From d54385ce68cd18ab002b46f61246ad197cec92de Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 30 Apr 2015 14:53:54 -0700 Subject: etherdev: Process is_multicast_ether_addr at same size as other operations This change makes it so that we process the address in is_multicast_ether_addr at the same size as the other calls. This allows us to avoid duplicate reads when used with other calls such as is_zero_ether_addr or eth_addr_copy. In addition I have added a 64 bit version of the function so in eth_type_trans we can process the destination address as a 64 bit value throughout. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 606563ef8a72..c4a10f991fe0 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -110,7 +110,29 @@ static inline bool is_zero_ether_addr(const u8 *addr) */ static inline bool is_multicast_ether_addr(const u8 *addr) { - return 0x01 & addr[0]; +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + u32 a = *(const u32 *)addr; +#else + u16 a = *(const u16 *)addr; +#endif +#ifdef __BIG_ENDIAN + return 0x01 & (a >> ((sizeof(a) * 8) - 8)); +#else + return 0x01 & a; +#endif +} + +static inline bool is_multicast_ether_addr_64bits(const u8 addr[6+2]) +{ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 +#ifdef __BIG_ENDIAN + return 0x01 & ((*(const u64 *)addr) >> 56); +#else + return 0x01 & (*(const u64 *)addr); +#endif +#else + return is_multicast_ether_addr(addr); +#endif } /** -- cgit v1.2.3 From 50fb799289501c2eab9f43fc9af513027e1e994f Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 1 May 2015 11:30:12 -0700 Subject: net: Add skb_get_hash_perturb This calls flow_disect and __skb_get_hash to procure a hash for a packet. Input includes a key to initialize jhash. This function does not set skb->hash. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ net/core/flow_dissector.c | 38 ++++++++++++++++++++++++++++++-------- 2 files changed, 32 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 66e374d62f64..acb83e249e3f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -927,6 +927,8 @@ static inline __u32 skb_get_hash(struct sk_buff *skb) return skb->hash; } +__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb); + static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) { return skb->hash; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2c35c02a931e..b1df995ef06c 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -267,13 +267,12 @@ static __always_inline void __flow_hash_secret_init(void) net_get_random_once(&hashrnd, sizeof(hashrnd)); } -static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c) +static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c, u32 keyval) { - __flow_hash_secret_init(); - return jhash_3words(a, b, c, hashrnd); + return jhash_3words(a, b, c, keyval); } -static inline u32 __flow_hash_from_keys(struct flow_keys *keys) +static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) { u32 hash; @@ -287,7 +286,8 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys) hash = __flow_hash_3words((__force u32)keys->dst, (__force u32)keys->src, - (__force u32)keys->ports); + (__force u32)keys->ports, + keyval); if (!hash) hash = 1; @@ -296,10 +296,20 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys) u32 flow_hash_from_keys(struct flow_keys *keys) { - return __flow_hash_from_keys(keys); + __flow_hash_secret_init(); + return __flow_hash_from_keys(keys, hashrnd); } EXPORT_SYMBOL(flow_hash_from_keys); +static inline u32 ___skb_get_hash(const struct sk_buff *skb, + struct flow_keys *keys, u32 keyval) +{ + if (!skb_flow_dissect(skb, keys)) + return 0; + + return __flow_hash_from_keys(keys, keyval); +} + /* * __skb_get_hash: calculate a flow hash based on src/dst addresses * and src/dst port numbers. Sets hash in skb to non-zero hash value @@ -309,8 +319,12 @@ EXPORT_SYMBOL(flow_hash_from_keys); void __skb_get_hash(struct sk_buff *skb) { struct flow_keys keys; + u32 hash; - if (!skb_flow_dissect(skb, &keys)) + __flow_hash_secret_init(); + + hash = ___skb_get_hash(skb, &keys, hashrnd); + if (!hash) return; if (keys.ports) @@ -318,10 +332,18 @@ void __skb_get_hash(struct sk_buff *skb) skb->sw_hash = 1; - skb->hash = __flow_hash_from_keys(&keys); + skb->hash = hash; } EXPORT_SYMBOL(__skb_get_hash); +__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) +{ + struct flow_keys keys; + + return ___skb_get_hash(skb, &keys, perturb); +} +EXPORT_SYMBOL(skb_get_hash_perturb); + /* * Returns a Tx hash based on the given packet descriptor a Tx queues' number * to be used as a distribution range. -- cgit v1.2.3 From 9afd85c9e4552b276e2f4cfefd622bdeeffbbf26 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Sat, 2 May 2015 14:01:07 +0200 Subject: net: Export IGMP/MLD message validation code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With this patch, the IGMP and MLD message validation functions are moved from the bridge code to IPv4/IPv6 multicast files. Some small refactoring was done to enhance readibility and to iron out some differences in behaviour between the IGMP and MLD parsing code (e.g. the skb-cloning of MLD messages is now only done if necessary, just like the IGMP part always did). Finally, these IGMP and MLD message validation functions are exported so that not only the bridge can use it but batman-adv later, too. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- include/linux/igmp.h | 1 + include/linux/skbuff.h | 3 + include/net/addrconf.h | 1 + net/bridge/br_multicast.c | 218 +++++++--------------------------------------- net/core/skbuff.c | 87 ++++++++++++++++++ net/ipv4/igmp.c | 162 ++++++++++++++++++++++++++++++++++ net/ipv6/Makefile | 1 + net/ipv6/mcast_snoop.c | 213 ++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 498 insertions(+), 188 deletions(-) create mode 100644 net/ipv6/mcast_snoop.c (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 2c677afeea47..193ad488d3e2 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -130,5 +130,6 @@ extern void ip_mc_unmap(struct in_device *); extern void ip_mc_remap(struct in_device *); extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr); extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr); +int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed); #endif diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index acb83e249e3f..9c2f793573fa 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3419,6 +3419,9 @@ static inline void skb_checksum_none_assert(const struct sk_buff *skb) bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); int skb_checksum_setup(struct sk_buff *skb, bool recalculate); +struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, + unsigned int transport_len, + __sum16(*skb_chkf)(struct sk_buff *skb)); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, void *data, diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 80456f72d70a..def59d3a34d5 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -142,6 +142,7 @@ void ipv6_mc_unmap(struct inet6_dev *idev); void ipv6_mc_remap(struct inet6_dev *idev); void ipv6_mc_init_dev(struct inet6_dev *idev); void ipv6_mc_destroy_dev(struct inet6_dev *idev); +int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed); void addrconf_dad_failure(struct inet6_ifaddr *ifp); bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index b52f4cb8aee9..2d69d5cab52f 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -975,9 +975,6 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, int err = 0; __be32 group; - if (!pskb_may_pull(skb, sizeof(*ih))) - return -EINVAL; - ih = igmpv3_report_hdr(skb); num = ntohs(ih->ngrec); len = sizeof(*ih); @@ -1248,25 +1245,14 @@ static int br_ip4_multicast_query(struct net_bridge *br, max_delay = 10 * HZ; group = 0; } - } else { - if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) { - err = -EINVAL; - goto out; - } - + } else if (skb->len >= sizeof(*ih3)) { ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs) goto out; max_delay = ih3->code ? IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; - } - - /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer - * all-systems destination addresses (224.0.0.1) for general queries - */ - if (!group && iph->daddr != htonl(INADDR_ALLHOSTS_GROUP)) { - err = -EINVAL; + } else { goto out; } @@ -1329,12 +1315,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */ - if (!(ipv6_addr_type(&ip6h->saddr) & IPV6_ADDR_LINKLOCAL)) { - err = -EINVAL; - goto out; - } - if (skb->len == sizeof(*mld)) { if (!pskb_may_pull(skb, sizeof(*mld))) { err = -EINVAL; @@ -1358,14 +1338,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, is_general_query = group && ipv6_addr_any(group); - /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer - * all-nodes destination address (ff02::1) for general queries - */ - if (is_general_query && !ipv6_addr_is_ll_all_nodes(&ip6h->daddr)) { - err = -EINVAL; - goto out; - } - if (is_general_query) { saddr.proto = htons(ETH_P_IPV6); saddr.u.ip6 = ip6h->saddr; @@ -1557,66 +1529,22 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, struct sk_buff *skb, u16 vid) { - struct sk_buff *skb2 = skb; - const struct iphdr *iph; + struct sk_buff *skb_trimmed = NULL; struct igmphdr *ih; - unsigned int len; - unsigned int offset; int err; - /* We treat OOM as packet loss for now. */ - if (!pskb_may_pull(skb, sizeof(*iph))) - return -EINVAL; - - iph = ip_hdr(skb); - - if (iph->ihl < 5 || iph->version != 4) - return -EINVAL; - - if (!pskb_may_pull(skb, ip_hdrlen(skb))) - return -EINVAL; - - iph = ip_hdr(skb); + err = ip_mc_check_igmp(skb, &skb_trimmed); - if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) - return -EINVAL; - - if (iph->protocol != IPPROTO_IGMP) { - if (!ipv4_is_local_multicast(iph->daddr)) + if (err == -ENOMSG) { + if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) BR_INPUT_SKB_CB(skb)->mrouters_only = 1; return 0; + } else if (err < 0) { + return err; } - len = ntohs(iph->tot_len); - if (skb->len < len || len < ip_hdrlen(skb)) - return -EINVAL; - - if (skb->len > len) { - skb2 = skb_clone(skb, GFP_ATOMIC); - if (!skb2) - return -ENOMEM; - - err = pskb_trim_rcsum(skb2, len); - if (err) - goto err_out; - } - - len -= ip_hdrlen(skb2); - offset = skb_network_offset(skb2) + ip_hdrlen(skb2); - __skb_pull(skb2, offset); - skb_reset_transport_header(skb2); - - err = -EINVAL; - if (!pskb_may_pull(skb2, sizeof(*ih))) - goto out; - - if (skb_checksum_simple_validate(skb2)) - goto out; - - err = 0; - BR_INPUT_SKB_CB(skb)->igmp = 1; - ih = igmp_hdr(skb2); + ih = igmp_hdr(skb); switch (ih->type) { case IGMP_HOST_MEMBERSHIP_REPORT: @@ -1625,21 +1553,19 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = br_ip4_multicast_add_group(br, port, ih->group, vid); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: - err = br_ip4_multicast_igmp3_report(br, port, skb2, vid); + err = br_ip4_multicast_igmp3_report(br, port, skb_trimmed, vid); break; case IGMP_HOST_MEMBERSHIP_QUERY: - err = br_ip4_multicast_query(br, port, skb2, vid); + err = br_ip4_multicast_query(br, port, skb_trimmed, vid); break; case IGMP_HOST_LEAVE_MESSAGE: br_ip4_multicast_leave_group(br, port, ih->group, vid); break; } -out: - __skb_push(skb2, offset); -err_out: - if (skb2 != skb) - kfree_skb(skb2); + if (skb_trimmed) + kfree_skb(skb_trimmed); + return err; } @@ -1649,126 +1575,42 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, struct sk_buff *skb, u16 vid) { - struct sk_buff *skb2; - const struct ipv6hdr *ip6h; - u8 icmp6_type; - u8 nexthdr; - __be16 frag_off; - unsigned int len; - int offset; + struct sk_buff *skb_trimmed = NULL; + struct mld_msg *mld; int err; - if (!pskb_may_pull(skb, sizeof(*ip6h))) - return -EINVAL; + err = ipv6_mc_check_mld(skb, &skb_trimmed); - ip6h = ipv6_hdr(skb); - - /* - * We're interested in MLD messages only. - * - Version is 6 - * - MLD has always Router Alert hop-by-hop option - * - But we do not support jumbrograms. - */ - if (ip6h->version != 6) - return 0; - - /* Prevent flooding this packet if there is no listener present */ - if (!ipv6_addr_is_ll_all_nodes(&ip6h->daddr)) - BR_INPUT_SKB_CB(skb)->mrouters_only = 1; - - if (ip6h->nexthdr != IPPROTO_HOPOPTS || - ip6h->payload_len == 0) - return 0; - - len = ntohs(ip6h->payload_len) + sizeof(*ip6h); - if (skb->len < len) - return -EINVAL; - - nexthdr = ip6h->nexthdr; - offset = ipv6_skip_exthdr(skb, sizeof(*ip6h), &nexthdr, &frag_off); - - if (offset < 0 || nexthdr != IPPROTO_ICMPV6) + if (err == -ENOMSG) { + if (!ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr)) + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; return 0; - - /* Okay, we found ICMPv6 header */ - skb2 = skb_clone(skb, GFP_ATOMIC); - if (!skb2) - return -ENOMEM; - - err = -EINVAL; - if (!pskb_may_pull(skb2, offset + sizeof(struct icmp6hdr))) - goto out; - - len -= offset - skb_network_offset(skb2); - - __skb_pull(skb2, offset); - skb_reset_transport_header(skb2); - skb_postpull_rcsum(skb2, skb_network_header(skb2), - skb_network_header_len(skb2)); - - icmp6_type = icmp6_hdr(skb2)->icmp6_type; - - switch (icmp6_type) { - case ICMPV6_MGM_QUERY: - case ICMPV6_MGM_REPORT: - case ICMPV6_MGM_REDUCTION: - case ICMPV6_MLD2_REPORT: - break; - default: - err = 0; - goto out; - } - - /* Okay, we found MLD message. Check further. */ - if (skb2->len > len) { - err = pskb_trim_rcsum(skb2, len); - if (err) - goto out; - err = -EINVAL; + } else if (err < 0) { + return err; } - ip6h = ipv6_hdr(skb2); - - if (skb_checksum_validate(skb2, IPPROTO_ICMPV6, ip6_compute_pseudo)) - goto out; - - err = 0; - BR_INPUT_SKB_CB(skb)->igmp = 1; + mld = (struct mld_msg *)skb_transport_header(skb); - switch (icmp6_type) { + switch (mld->mld_type) { case ICMPV6_MGM_REPORT: - { - struct mld_msg *mld; - if (!pskb_may_pull(skb2, sizeof(*mld))) { - err = -EINVAL; - goto out; - } - mld = (struct mld_msg *)skb_transport_header(skb2); BR_INPUT_SKB_CB(skb)->mrouters_only = 1; err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid); break; - } case ICMPV6_MLD2_REPORT: - err = br_ip6_multicast_mld2_report(br, port, skb2, vid); + err = br_ip6_multicast_mld2_report(br, port, skb_trimmed, vid); break; case ICMPV6_MGM_QUERY: - err = br_ip6_multicast_query(br, port, skb2, vid); + err = br_ip6_multicast_query(br, port, skb_trimmed, vid); break; case ICMPV6_MGM_REDUCTION: - { - struct mld_msg *mld; - if (!pskb_may_pull(skb2, sizeof(*mld))) { - err = -EINVAL; - goto out; - } - mld = (struct mld_msg *)skb_transport_header(skb2); br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid); - } + break; } -out: - kfree_skb(skb2); + if (skb_trimmed) + kfree_skb(skb_trimmed); + return err; } #endif diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3cfff2a3d651..1e4278a4dd7e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4030,6 +4030,93 @@ int skb_checksum_setup(struct sk_buff *skb, bool recalculate) } EXPORT_SYMBOL(skb_checksum_setup); +/** + * skb_checksum_maybe_trim - maybe trims the given skb + * @skb: the skb to check + * @transport_len: the data length beyond the network header + * + * Checks whether the given skb has data beyond the given transport length. + * If so, returns a cloned skb trimmed to this transport length. + * Otherwise returns the provided skb. Returns NULL in error cases + * (e.g. transport_len exceeds skb length or out-of-memory). + * + * Caller needs to set the skb transport header and release the returned skb. + * Provided skb is consumed. + */ +static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb, + unsigned int transport_len) +{ + struct sk_buff *skb_chk; + unsigned int len = skb_transport_offset(skb) + transport_len; + int ret; + + if (skb->len < len) { + kfree_skb(skb); + return NULL; + } else if (skb->len == len) { + return skb; + } + + skb_chk = skb_clone(skb, GFP_ATOMIC); + kfree_skb(skb); + + if (!skb_chk) + return NULL; + + ret = pskb_trim_rcsum(skb_chk, len); + if (ret) { + kfree_skb(skb_chk); + return NULL; + } + + return skb_chk; +} + +/** + * skb_checksum_trimmed - validate checksum of an skb + * @skb: the skb to check + * @transport_len: the data length beyond the network header + * @skb_chkf: checksum function to use + * + * Applies the given checksum function skb_chkf to the provided skb. + * Returns a checked and maybe trimmed skb. Returns NULL on error. + * + * If the skb has data beyond the given transport length, then a + * trimmed & cloned skb is checked and returned. + * + * Caller needs to set the skb transport header and release the returned skb. + * Provided skb is consumed. + */ +struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, + unsigned int transport_len, + __sum16(*skb_chkf)(struct sk_buff *skb)) +{ + struct sk_buff *skb_chk; + unsigned int offset = skb_transport_offset(skb); + int ret; + + skb_chk = skb_checksum_maybe_trim(skb, transport_len); + if (!skb_chk) + return NULL; + + if (!pskb_may_pull(skb_chk, offset)) { + kfree_skb(skb_chk); + return NULL; + } + + __skb_pull(skb_chk, offset); + ret = skb_chkf(skb_chk); + __skb_push(skb_chk, offset); + + if (ret) { + kfree_skb(skb_chk); + return NULL; + } + + return skb_chk; +} +EXPORT_SYMBOL(skb_checksum_trimmed); + void __skb_warn_lro_forwarding(const struct sk_buff *skb) { net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n", diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a3a697f5ffba..651cdf648ec4 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1339,6 +1339,168 @@ out: } EXPORT_SYMBOL(ip_mc_inc_group); +static int ip_mc_check_iphdr(struct sk_buff *skb) +{ + const struct iphdr *iph; + unsigned int len; + unsigned int offset = skb_network_offset(skb) + sizeof(*iph); + + if (!pskb_may_pull(skb, offset)) + return -EINVAL; + + iph = ip_hdr(skb); + + if (iph->version != 4 || ip_hdrlen(skb) < sizeof(*iph)) + return -EINVAL; + + offset += ip_hdrlen(skb) - sizeof(*iph); + + if (!pskb_may_pull(skb, offset)) + return -EINVAL; + + iph = ip_hdr(skb); + + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + return -EINVAL; + + len = skb_network_offset(skb) + ntohs(iph->tot_len); + if (skb->len < len || len < offset) + return -EINVAL; + + skb_set_transport_header(skb, offset); + + return 0; +} + +static int ip_mc_check_igmp_reportv3(struct sk_buff *skb) +{ + unsigned int len = skb_transport_offset(skb); + + len += sizeof(struct igmpv3_report); + + return pskb_may_pull(skb, len) ? 0 : -EINVAL; +} + +static int ip_mc_check_igmp_query(struct sk_buff *skb) +{ + unsigned int len = skb_transport_offset(skb); + + len += sizeof(struct igmphdr); + if (skb->len < len) + return -EINVAL; + + /* IGMPv{1,2}? */ + if (skb->len != len) { + /* or IGMPv3? */ + len += sizeof(struct igmpv3_query) - sizeof(struct igmphdr); + if (skb->len < len || !pskb_may_pull(skb, len)) + return -EINVAL; + } + + /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer + * all-systems destination addresses (224.0.0.1) for general queries + */ + if (!igmp_hdr(skb)->group && + ip_hdr(skb)->daddr != htonl(INADDR_ALLHOSTS_GROUP)) + return -EINVAL; + + return 0; +} + +static int ip_mc_check_igmp_msg(struct sk_buff *skb) +{ + switch (igmp_hdr(skb)->type) { + case IGMP_HOST_LEAVE_MESSAGE: + case IGMP_HOST_MEMBERSHIP_REPORT: + case IGMPV2_HOST_MEMBERSHIP_REPORT: + /* fall through */ + return 0; + case IGMPV3_HOST_MEMBERSHIP_REPORT: + return ip_mc_check_igmp_reportv3(skb); + case IGMP_HOST_MEMBERSHIP_QUERY: + return ip_mc_check_igmp_query(skb); + default: + return -ENOMSG; + } +} + +static inline __sum16 ip_mc_validate_checksum(struct sk_buff *skb) +{ + return skb_checksum_simple_validate(skb); +} + +static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) + +{ + struct sk_buff *skb_chk; + unsigned int transport_len; + unsigned int len = skb_transport_offset(skb) + sizeof(struct igmphdr); + int ret; + + transport_len = ntohs(ip_hdr(skb)->tot_len) - ip_hdrlen(skb); + + skb_get(skb); + skb_chk = skb_checksum_trimmed(skb, transport_len, + ip_mc_validate_checksum); + if (!skb_chk) + return -EINVAL; + + if (!pskb_may_pull(skb_chk, len)) { + kfree_skb(skb_chk); + return -EINVAL; + } + + ret = ip_mc_check_igmp_msg(skb_chk); + if (ret) { + kfree_skb(skb_chk); + return ret; + } + + if (skb_trimmed) + *skb_trimmed = skb_chk; + else + kfree_skb(skb_chk); + + return 0; +} + +/** + * ip_mc_check_igmp - checks whether this is a sane IGMP packet + * @skb: the skb to validate + * @skb_trimmed: to store an skb pointer trimmed to IPv4 packet tail (optional) + * + * Checks whether an IPv4 packet is a valid IGMP packet. If so sets + * skb network and transport headers accordingly and returns zero. + * + * -EINVAL: A broken packet was detected, i.e. it violates some internet + * standard + * -ENOMSG: IP header validation succeeded but it is not an IGMP packet. + * -ENOMEM: A memory allocation failure happened. + * + * Optionally, an skb pointer might be provided via skb_trimmed (or set it + * to NULL): After parsing an IGMP packet successfully it will point to + * an skb which has its tail aligned to the IP packet end. This might + * either be the originally provided skb or a trimmed, cloned version if + * the skb frame had data beyond the IP packet. A cloned skb allows us + * to leave the original skb and its full frame unchanged (which might be + * desirable for layer 2 frame jugglers). + * + * The caller needs to release a reference count from any returned skb_trimmed. + */ +int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) +{ + int ret = ip_mc_check_iphdr(skb); + + if (ret < 0) + return ret; + + if (ip_hdr(skb)->protocol != IPPROTO_IGMP) + return -ENOMSG; + + return __ip_mc_check_igmp(skb, skb_trimmed); +} +EXPORT_SYMBOL(ip_mc_check_igmp); + /* * Resend IGMP JOIN report; used by netdev notifier. */ diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 2e8c06108ab9..0f3f1999719a 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -48,4 +48,5 @@ obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o ifneq ($(CONFIG_IPV6),) obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o +obj-y += mcast_snoop.o endif diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c new file mode 100644 index 000000000000..1a2cbc13a7d3 --- /dev/null +++ b/net/ipv6/mcast_snoop.c @@ -0,0 +1,213 @@ +/* Copyright (C) 2010: YOSHIFUJI Hideaki + * Copyright (C) 2015: Linus Lüssing + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + * + * + * Based on the MLD support added to br_multicast.c by YOSHIFUJI Hideaki. + */ + +#include +#include +#include +#include +#include + +static int ipv6_mc_check_ip6hdr(struct sk_buff *skb) +{ + const struct ipv6hdr *ip6h; + unsigned int len; + unsigned int offset = skb_network_offset(skb) + sizeof(*ip6h); + + if (!pskb_may_pull(skb, offset)) + return -EINVAL; + + ip6h = ipv6_hdr(skb); + + if (ip6h->version != 6) + return -EINVAL; + + len = offset + ntohs(ip6h->payload_len); + if (skb->len < len || len <= offset) + return -EINVAL; + + return 0; +} + +static int ipv6_mc_check_exthdrs(struct sk_buff *skb) +{ + const struct ipv6hdr *ip6h; + unsigned int offset; + u8 nexthdr; + __be16 frag_off; + + ip6h = ipv6_hdr(skb); + + if (ip6h->nexthdr != IPPROTO_HOPOPTS) + return -ENOMSG; + + nexthdr = ip6h->nexthdr; + offset = skb_network_offset(skb) + sizeof(*ip6h); + offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off); + + if (offset < 0) + return -EINVAL; + + if (nexthdr != IPPROTO_ICMPV6) + return -ENOMSG; + + skb_set_transport_header(skb, offset); + + return 0; +} + +static int ipv6_mc_check_mld_reportv2(struct sk_buff *skb) +{ + unsigned int len = skb_transport_offset(skb); + + len += sizeof(struct mld2_report); + + return pskb_may_pull(skb, len) ? 0 : -EINVAL; +} + +static int ipv6_mc_check_mld_query(struct sk_buff *skb) +{ + struct mld_msg *mld; + unsigned int len = skb_transport_offset(skb); + + /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */ + if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) + return -EINVAL; + + len += sizeof(struct mld_msg); + if (skb->len < len) + return -EINVAL; + + /* MLDv1? */ + if (skb->len != len) { + /* or MLDv2? */ + len += sizeof(struct mld2_query) - sizeof(struct mld_msg); + if (skb->len < len || !pskb_may_pull(skb, len)) + return -EINVAL; + } + + mld = (struct mld_msg *)skb_transport_header(skb); + + /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer + * all-nodes destination address (ff02::1) for general queries + */ + if (ipv6_addr_any(&mld->mld_mca) && + !ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr)) + return -EINVAL; + + return 0; +} + +static int ipv6_mc_check_mld_msg(struct sk_buff *skb) +{ + struct mld_msg *mld = (struct mld_msg *)skb_transport_header(skb); + + switch (mld->mld_type) { + case ICMPV6_MGM_REDUCTION: + case ICMPV6_MGM_REPORT: + /* fall through */ + return 0; + case ICMPV6_MLD2_REPORT: + return ipv6_mc_check_mld_reportv2(skb); + case ICMPV6_MGM_QUERY: + return ipv6_mc_check_mld_query(skb); + default: + return -ENOMSG; + } +} + +static inline __sum16 ipv6_mc_validate_checksum(struct sk_buff *skb) +{ + return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo); +} + +static int __ipv6_mc_check_mld(struct sk_buff *skb, + struct sk_buff **skb_trimmed) + +{ + struct sk_buff *skb_chk = NULL; + unsigned int transport_len; + unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg); + int ret; + + transport_len = ntohs(ipv6_hdr(skb)->payload_len); + transport_len -= skb_transport_offset(skb) - sizeof(struct ipv6hdr); + + skb_get(skb); + skb_chk = skb_checksum_trimmed(skb, transport_len, + ipv6_mc_validate_checksum); + if (!skb_chk) + return -EINVAL; + + if (!pskb_may_pull(skb_chk, len)) { + kfree_skb(skb_chk); + return -EINVAL; + } + + ret = ipv6_mc_check_mld_msg(skb_chk); + if (ret) { + kfree_skb(skb_chk); + return ret; + } + + if (skb_trimmed) + *skb_trimmed = skb_chk; + else + kfree_skb(skb_chk); + + return 0; +} + +/** + * ipv6_mc_check_mld - checks whether this is a sane MLD packet + * @skb: the skb to validate + * @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional) + * + * Checks whether an IPv6 packet is a valid MLD packet. If so sets + * skb network and transport headers accordingly and returns zero. + * + * -EINVAL: A broken packet was detected, i.e. it violates some internet + * standard + * -ENOMSG: IP header validation succeeded but it is not an MLD packet. + * -ENOMEM: A memory allocation failure happened. + * + * Optionally, an skb pointer might be provided via skb_trimmed (or set it + * to NULL): After parsing an MLD packet successfully it will point to + * an skb which has its tail aligned to the IP packet end. This might + * either be the originally provided skb or a trimmed, cloned version if + * the skb frame had data beyond the IP packet. A cloned skb allows us + * to leave the original skb and its full frame unchanged (which might be + * desirable for layer 2 frame jugglers). + * + * The caller needs to release a reference count from any returned skb_trimmed. + */ +int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed) +{ + int ret; + + ret = ipv6_mc_check_ip6hdr(skb); + if (ret < 0) + return ret; + + ret = ipv6_mc_check_exthdrs(skb); + if (ret < 0) + return ret; + + return __ipv6_mc_check_mld(skb, skb_trimmed); +} +EXPORT_SYMBOL(ipv6_mc_check_mld); -- cgit v1.2.3 From cd8ae85299d54155702a56811b2e035e63064d3d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 3 May 2015 21:34:46 -0700 Subject: tcp: provide SYN headers for passive connections This patch allows a server application to get the TCP SYN headers for its passive connections. This is useful if the server is doing fingerprinting of clients based on SYN packet contents. Two socket options are added: TCP_SAVE_SYN and TCP_SAVED_SYN. The first is used on a socket to enable saving the SYN headers for child connections. This can be set before or after the listen() call. The latter is used to retrieve the SYN headers for passive connections, if the parent listener has enabled TCP_SAVE_SYN. TCP_SAVED_SYN is read once, it frees the saved SYN headers. The data returned in TCP_SAVED_SYN are network (IPv4/IPv6) and TCP headers. Original patch was written by Tom Herbert, I changed it to not hold a full skb (and associated dst and conntracking reference). We have used such patch for about 3 years at Google. Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Tested-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 8 ++++++++ include/net/request_sock.h | 4 +++- include/uapi/linux/tcp.h | 2 ++ net/ipv4/tcp.c | 35 +++++++++++++++++++++++++++++++++++ net/ipv4/tcp_input.c | 18 ++++++++++++++++++ net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_minisocks.c | 3 +++ 7 files changed, 70 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3b2911502a8c..e6fb5df22db1 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -199,6 +199,7 @@ struct tcp_sock { syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ + save_syn:1, /* Save headers of SYN packet */ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ @@ -326,6 +327,7 @@ struct tcp_sock { * socket. Used to retransmit SYNACKs etc. */ struct request_sock *fastopen_rsk; + u32 *saved_syn; }; enum tsq_flags { @@ -393,4 +395,10 @@ static inline int fastopen_init_queue(struct sock *sk, int backlog) return 0; } +static inline void tcp_saved_syn_free(struct tcp_sock *tp) +{ + kfree(tp->saved_syn); + tp->saved_syn = NULL; +} + #endif /* _LINUX_TCP_H */ diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 9f4265ce8892..87935cad2f7b 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -64,6 +64,7 @@ struct request_sock { struct timer_list rsk_timer; const struct request_sock_ops *rsk_ops; struct sock *sk; + u32 *saved_syn; u32 secid; u32 peer_secid; }; @@ -77,7 +78,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener) req->rsk_ops = ops; sock_hold(sk_listener); req->rsk_listener = sk_listener; - + req->saved_syn = NULL; /* Following is temporary. It is coupled with debugging * helpers in reqsk_put() & reqsk_free() */ @@ -104,6 +105,7 @@ static inline void reqsk_free(struct request_sock *req) req->rsk_ops->destructor(req); if (req->rsk_listener) sock_put(req->rsk_listener); + kfree(req->saved_syn); kmem_cache_free(req->rsk_ops->slab, req); } diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index faa72f4fa547..51ebedba577f 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -113,6 +113,8 @@ enum { #define TCP_TIMESTAMP 24 #define TCP_NOTSENT_LOWAT 25 /* limit number of unsent bytes in write queue */ #define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */ +#define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ +#define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ struct tcp_repair_opt { __u32 opt_code; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 46efa03d2b11..ecccfdc50d76 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2482,6 +2482,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level, icsk->icsk_syn_retries = val; break; + case TCP_SAVE_SYN: + if (val < 0 || val > 1) + err = -EINVAL; + else + tp->save_syn = val; + break; + case TCP_LINGER2: if (val < 0) tp->linger2 = -1; @@ -2818,6 +2825,34 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_NOTSENT_LOWAT: val = tp->notsent_lowat; break; + case TCP_SAVE_SYN: + val = tp->save_syn; + break; + case TCP_SAVED_SYN: { + if (get_user(len, optlen)) + return -EFAULT; + + lock_sock(sk); + if (tp->saved_syn) { + len = min_t(unsigned int, tp->saved_syn[0], len); + if (put_user(len, optlen)) { + release_sock(sk); + return -EFAULT; + } + if (copy_to_user(optval, tp->saved_syn + 1, len)) { + release_sock(sk); + return -EFAULT; + } + tcp_saved_syn_free(tp); + release_sock(sk); + } else { + release_sock(sk); + len = 0; + if (put_user(len, optlen)) + return -EFAULT; + } + return 0; + } default: return -ENOPROTOOPT; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 09bdc4abfcbb..df2ca615cd0c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6060,6 +6060,23 @@ static bool tcp_syn_flood_action(struct sock *sk, return want_cookie; } +static void tcp_reqsk_record_syn(const struct sock *sk, + struct request_sock *req, + const struct sk_buff *skb) +{ + if (tcp_sk(sk)->save_syn) { + u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb); + u32 *copy; + + copy = kmalloc(len + sizeof(u32), GFP_ATOMIC); + if (copy) { + copy[0] = len; + memcpy(©[1], skb_network_header(skb), len); + req->saved_syn = copy; + } + } +} + int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb) @@ -6192,6 +6209,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_rsk(req)->tfo_listener = false; af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); } + tcp_reqsk_record_syn(sk, req, skb); return 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fc1c658ec6c1..91cb4768a860 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1802,6 +1802,7 @@ void tcp_v4_destroy_sock(struct sock *sk) /* If socket is aborted during connect operation */ tcp_free_fastopen_req(tp); + tcp_saved_syn_free(tp); sk_sockets_allocated_dec(sk); sock_release_memcg(sk); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index e5d7649136fc..ebe2ab2596ed 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -536,6 +536,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->fastopen_rsk = NULL; newtp->syn_data_acked = 0; + newtp->saved_syn = req->saved_syn; + req->saved_syn = NULL; + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); } return newsk; -- cgit v1.2.3 From 2c7a88c252bf3381958cf716f31b6b2e0f2f3fa7 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 4 May 2015 14:33:48 -0700 Subject: etherdev: Fix sparse error, make test usable by other functions This change does two things. First it fixes a sparse error for the fact that the __be16 degrades to an integer. Since that is actually what I am kind of doing I am simply working around that by forcing both sides of the comparison to u16. Also I realized on some compilers I was generating another instruction for big endian systems such as PowerPC since it was masking the value before doing the comparison. So to resolve that I have simply pulled the mask out and wrapped it in an #ifndef __BIG_ENDIAN. Lastly I pulled this all out into its own function. I notices there are similar checks in a number of other places so this function can be reused there to help reduce overhead in these paths as well. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 18 ++++++++++++++++++ net/ethernet/eth.c | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index c4a10f991fe0..9012f8775208 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -190,6 +190,24 @@ static inline bool is_valid_ether_addr(const u8 *addr) return !is_multicast_ether_addr(addr) && !is_zero_ether_addr(addr); } +/** + * eth_proto_is_802_3 - Determine if a given Ethertype/length is a protocol + * @proto: Ethertype/length value to be tested + * + * Check that the value from the Ethertype/length field is a valid Ethertype. + * + * Return true if the valid is an 802.3 supported Ethertype. + */ +static inline bool eth_proto_is_802_3(__be16 proto) +{ +#ifndef __BIG_ENDIAN + /* if CPU is little endian mask off bits representing LSB */ + proto &= htons(0xFF00); +#endif + /* cast both to u16 and compare since LSB can be ignored */ + return (__force u16)proto >= (__force u16)htons(ETH_P_802_3_MIN); +} + /** * eth_random_addr - Generate software assigned random Ethernet address * @addr: Pointer to a six-byte array containing the Ethernet address diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 314e4c5a5a5e..9045e2a1108f 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -179,7 +179,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) if (unlikely(netdev_uses_dsa(dev))) return htons(ETH_P_XDSA); - if (likely((eth->h_proto & htons(0xFF00)) >= htons(ETH_P_802_3_MIN))) + if (likely(eth_proto_is_802_3(eth->h_proto))) return eth->h_proto; /* -- cgit v1.2.3 From 9545b22da647cf6fbbac9c5a48c50fd72d892b11 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 4 May 2015 14:34:10 -0700 Subject: vlan: Use eth_proto_is_802_3 Replace "ntohs(proto) >= ETH_P_802_3_MIN" w/ eth_proto_is_802_3(proto). Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 920e4457ce6e..b9ab677c0c0a 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -539,7 +539,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, */ proto = vhdr->h_vlan_encapsulated_proto; - if (ntohs(proto) >= ETH_P_802_3_MIN) { + if (eth_proto_is_802_3(proto)) { skb->protocol = proto; return; } -- cgit v1.2.3 From 4ae92bc77ac8e620f7c8d59b5882a4cb0d1c4ef1 Mon Sep 17 00:00:00 2001 From: Nicolas Schichan Date: Wed, 6 May 2015 16:12:27 +0200 Subject: net: filter: add a callback to allow classic post-verifier transformations This is in preparation for use by the seccomp code, the rationale is not to duplicate additional code within the seccomp layer, but instead, have it abstracted and hidden within the classic BPF API. As an interim step, this now also makes bpf_prepare_filter() visible (not as exported symbol though), so that seccomp can reuse that code path instead of reimplementing it. Joint work with Daniel Borkmann. Signed-off-by: Nicolas Schichan Signed-off-by: Daniel Borkmann Cc: Alexei Starovoitov Cc: Kees Cook Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 6 ++++++ net/core/filter.c | 18 +++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index fa11b3a367be..91996247cb55 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -384,7 +384,13 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); +typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter, + unsigned int flen); + int bpf_check_classic(const struct sock_filter *filter, unsigned int flen); +struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, + bpf_aux_classic_check_t trans); + int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); diff --git a/net/core/filter.c b/net/core/filter.c index bf831a85c315..e670494f1d83 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -988,7 +988,8 @@ out_err: return ERR_PTR(err); } -static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp) +struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, + bpf_aux_classic_check_t trans) { int err; @@ -1001,6 +1002,17 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp) return ERR_PTR(err); } + /* There might be additional checks and transformations + * needed on classic filters, f.e. in case of seccomp. + */ + if (trans) { + err = trans(fp->insns, fp->len); + if (err) { + __bpf_prog_release(fp); + return ERR_PTR(err); + } + } + /* Probe if we can JIT compile the filter and if so, do * the compilation of the filter. */ @@ -1050,7 +1062,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog) /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ - fp = bpf_prepare_filter(fp); + fp = bpf_prepare_filter(fp, NULL); if (IS_ERR(fp)) return PTR_ERR(fp); @@ -1135,7 +1147,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ - prog = bpf_prepare_filter(prog); + prog = bpf_prepare_filter(prog, NULL); if (IS_ERR(prog)) return PTR_ERR(prog); -- cgit v1.2.3 From d9e12f42e58da475379b9080708b94f2095904af Mon Sep 17 00:00:00 2001 From: Nicolas Schichan Date: Wed, 6 May 2015 16:12:28 +0200 Subject: seccomp: simplify seccomp_prepare_filter and reuse bpf_prepare_filter Remove the calls to bpf_check_classic(), bpf_convert_filter() and bpf_migrate_runtime() and let bpf_prepare_filter() take care of that instead. seccomp_check_filter() is passed to bpf_prepare_filter() so that it gets called from there, after bpf_check_classic(). We can now remove exposure of two internal classic BPF functions previously used by seccomp. The export of bpf_check_classic() symbol, previously known as sk_chk_filter(), was there since pre git times, and no in-tree module was using it, therefore remove it. Joint work with Daniel Borkmann. Signed-off-by: Nicolas Schichan Signed-off-by: Daniel Borkmann Cc: Alexei Starovoitov Cc: Kees Cook Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 4 --- kernel/seccomp.c | 68 ++++++++++++++++---------------------------------- net/core/filter.c | 8 +++--- 3 files changed, 26 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 91996247cb55..0dcb44bcfc5f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -363,9 +363,6 @@ int sk_filter(struct sock *sk, struct sk_buff *skb); void bpf_prog_select_runtime(struct bpf_prog *fp); void bpf_prog_free(struct bpf_prog *fp); -int bpf_convert_filter(struct sock_filter *prog, int len, - struct bpf_insn *new_prog, int *new_len); - struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags); @@ -387,7 +384,6 @@ int sk_detach_filter(struct sock *sk); typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter, unsigned int flen); -int bpf_check_classic(const struct sock_filter *filter, unsigned int flen); struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, bpf_aux_classic_check_t trans); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 4f44028943e6..93d40f7f3683 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -347,15 +347,14 @@ static inline void seccomp_sync_threads(void) static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) { struct seccomp_filter *filter; - unsigned long fp_size; - struct sock_filter *fp; - int new_len; - long ret; + struct bpf_prog *prog; + unsigned long fsize; if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) return ERR_PTR(-EINVAL); + BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter)); - fp_size = fprog->len * sizeof(struct sock_filter); + fsize = bpf_classic_proglen(fprog); /* * Installing a seccomp filter requires that the task has @@ -368,60 +367,37 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) CAP_SYS_ADMIN) != 0) return ERR_PTR(-EACCES); - fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN); - if (!fp) + prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); + if (!prog) return ERR_PTR(-ENOMEM); /* Copy the instructions from fprog. */ - ret = -EFAULT; - if (copy_from_user(fp, fprog->filter, fp_size)) - goto free_prog; - - /* Check and rewrite the fprog via the skb checker */ - ret = bpf_check_classic(fp, fprog->len); - if (ret) - goto free_prog; + if (copy_from_user(prog->insns, fprog->filter, fsize)) { + __bpf_prog_free(prog); + return ERR_PTR(-EFAULT); + } - /* Check and rewrite the fprog for seccomp use */ - ret = seccomp_check_filter(fp, fprog->len); - if (ret) - goto free_prog; + prog->len = fprog->len; - /* Convert 'sock_filter' insns to 'bpf_insn' insns */ - ret = bpf_convert_filter(fp, fprog->len, NULL, &new_len); - if (ret) - goto free_prog; + /* bpf_prepare_filter() already takes care of freeing + * memory in case something goes wrong. + */ + prog = bpf_prepare_filter(prog, seccomp_check_filter); + if (IS_ERR(prog)) + return ERR_CAST(prog); /* Allocate a new seccomp_filter */ - ret = -ENOMEM; filter = kzalloc(sizeof(struct seccomp_filter), GFP_KERNEL|__GFP_NOWARN); - if (!filter) - goto free_prog; - - filter->prog = bpf_prog_alloc(bpf_prog_size(new_len), __GFP_NOWARN); - if (!filter->prog) - goto free_filter; - - ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len); - if (ret) - goto free_filter_prog; + if (!filter) { + bpf_prog_destroy(prog); + return ERR_PTR(-ENOMEM); + } - kfree(fp); + filter->prog = prog; atomic_set(&filter->usage, 1); - filter->prog->len = new_len; - - bpf_prog_select_runtime(filter->prog); return filter; - -free_filter_prog: - __bpf_prog_free(filter->prog); -free_filter: - kfree(filter); -free_prog: - kfree(fp); - return ERR_PTR(ret); } /** diff --git a/net/core/filter.c b/net/core/filter.c index e670494f1d83..f887084740cd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -355,8 +355,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, * for socket filters: ctx == 'struct sk_buff *', for seccomp: * ctx == 'struct seccomp_data *'. */ -int bpf_convert_filter(struct sock_filter *prog, int len, - struct bpf_insn *new_prog, int *new_len) +static int bpf_convert_filter(struct sock_filter *prog, int len, + struct bpf_insn *new_prog, int *new_len) { int new_flen = 0, pass = 0, target, i; struct bpf_insn *new_insn; @@ -751,7 +751,8 @@ static bool chk_code_allowed(u16 code_to_probe) * * Returns 0 if the rule set is legal or -EINVAL if not. */ -int bpf_check_classic(const struct sock_filter *filter, unsigned int flen) +static int bpf_check_classic(const struct sock_filter *filter, + unsigned int flen) { bool anc_found; int pc; @@ -825,7 +826,6 @@ int bpf_check_classic(const struct sock_filter *filter, unsigned int flen) return -EINVAL; } -EXPORT_SYMBOL(bpf_check_classic); static int bpf_prog_store_orig_filter(struct bpf_prog *fp, const struct sock_fprog *fprog) -- cgit v1.2.3 From ac67eb2c5347bd9976308c0e0cf1d9e7ca690342 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 6 May 2015 16:12:30 +0200 Subject: seccomp, filter: add and use bpf_prog_create_from_user from seccomp Seccomp has always been a special candidate when it comes to preparation of its filters in seccomp_prepare_filter(). Due to the extra checks and filter rewrite it partially duplicates code and has BPF internals exposed. This patch adds a generic API inside the BPF code code that seccomp can use and thus keep it's filter preparation code minimal and better maintainable. The other side-effect is that now classic JITs can add seccomp support as well by only providing a BPF_LDX | BPF_W | BPF_ABS translation. Tested with seccomp and BPF test suites. Signed-off-by: Daniel Borkmann Cc: Nicolas Schichan Cc: Alexei Starovoitov Cc: Kees Cook Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 12 +++++------- kernel/seccomp.c | 42 ++++++++++++----------------------------- net/core/filter.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 66 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 0dcb44bcfc5f..3c03a6085b82 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -374,19 +374,17 @@ static inline void bpf_prog_unlock_free(struct bpf_prog *fp) __bpf_prog_free(fp); } +typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter, + unsigned int flen); + int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); +int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, + bpf_aux_classic_check_t trans); void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); - -typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter, - unsigned int flen); - -struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, - bpf_aux_classic_check_t trans); - int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 93d40f7f3683..245df6b32b81 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -346,15 +346,13 @@ static inline void seccomp_sync_threads(void) */ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) { - struct seccomp_filter *filter; - struct bpf_prog *prog; - unsigned long fsize; + struct seccomp_filter *sfilter; + int ret; if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) return ERR_PTR(-EINVAL); BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter)); - fsize = bpf_classic_proglen(fprog); /* * Installing a seccomp filter requires that the task has @@ -367,37 +365,21 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) CAP_SYS_ADMIN) != 0) return ERR_PTR(-EACCES); - prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); - if (!prog) - return ERR_PTR(-ENOMEM); - - /* Copy the instructions from fprog. */ - if (copy_from_user(prog->insns, fprog->filter, fsize)) { - __bpf_prog_free(prog); - return ERR_PTR(-EFAULT); - } - - prog->len = fprog->len; - - /* bpf_prepare_filter() already takes care of freeing - * memory in case something goes wrong. - */ - prog = bpf_prepare_filter(prog, seccomp_check_filter); - if (IS_ERR(prog)) - return ERR_CAST(prog); - /* Allocate a new seccomp_filter */ - filter = kzalloc(sizeof(struct seccomp_filter), - GFP_KERNEL|__GFP_NOWARN); - if (!filter) { - bpf_prog_destroy(prog); + sfilter = kzalloc(sizeof(*sfilter), GFP_KERNEL | __GFP_NOWARN); + if (!sfilter) return ERR_PTR(-ENOMEM); + + ret = bpf_prog_create_from_user(&sfilter->prog, fprog, + seccomp_check_filter); + if (ret < 0) { + kfree(sfilter); + return ERR_PTR(ret); } - filter->prog = prog; - atomic_set(&filter->usage, 1); + atomic_set(&sfilter->usage, 1); - return filter; + return sfilter; } /** diff --git a/net/core/filter.c b/net/core/filter.c index 45c015d6b974..a831f193e2c7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -991,8 +991,8 @@ out_err: return ERR_PTR(err); } -struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, - bpf_aux_classic_check_t trans) +static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, + bpf_aux_classic_check_t trans) { int err; @@ -1074,6 +1074,53 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog) } EXPORT_SYMBOL_GPL(bpf_prog_create); +/** + * bpf_prog_create_from_user - create an unattached filter from user buffer + * @pfp: the unattached filter that is created + * @fprog: the filter program + * @trans: post-classic verifier transformation handler + * + * This function effectively does the same as bpf_prog_create(), only + * that it builds up its insns buffer from user space provided buffer. + * It also allows for passing a bpf_aux_classic_check_t handler. + */ +int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, + bpf_aux_classic_check_t trans) +{ + unsigned int fsize = bpf_classic_proglen(fprog); + struct bpf_prog *fp; + + /* Make sure new filter is there and in the right amounts. */ + if (fprog->filter == NULL) + return -EINVAL; + + fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); + if (!fp) + return -ENOMEM; + + if (copy_from_user(fp->insns, fprog->filter, fsize)) { + __bpf_prog_free(fp); + return -EFAULT; + } + + fp->len = fprog->len; + /* Since unattached filters are not copied back to user + * space through sk_get_filter(), we do not need to hold + * a copy here, and can spare us the work. + */ + fp->orig_prog = NULL; + + /* bpf_prepare_filter() already takes care of freeing + * memory in case something goes wrong. + */ + fp = bpf_prepare_filter(fp, trans); + if (IS_ERR(fp)) + return PTR_ERR(fp); + + *pfp = fp; + return 0; +} + void bpf_prog_destroy(struct bpf_prog *fp) { __bpf_prog_release(fp); -- cgit v1.2.3 From 59324cf35aba5336b611074028777838a963d03b Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 7 May 2015 11:02:53 +0200 Subject: netlink: allow to listen "all" netns More accurately, listen all netns that have a nsid assigned into the netns where the netlink socket is opened. For this purpose, a netlink socket option is added: NETLINK_LISTEN_ALL_NSID. When this option is set on a netlink socket, this socket will receive netlink notifications from all netns that have a nsid assigned into the netns where the socket has been opened. The nsid is sent to userland via an anscillary data. With this patch, a daemon needs only one socket to listen many netns. This is useful when the number of netns is high. Because 0 is a valid value for a nsid, the field nsid_is_set indicates if the field nsid is valid or not. skb->cb is initialized to 0 on skb allocation, thus we are sure that we will never send a nsid 0 by error to the userland. Signed-off-by: Nicolas Dichtel Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 ++ include/net/net_namespace.h | 2 ++ include/uapi/linux/netlink.h | 1 + net/core/net_namespace.c | 10 ++++++++- net/netlink/af_netlink.c | 52 +++++++++++++++++++++++++++++++++++++++----- 5 files changed, 61 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 6835c1279df7..9120edb650a0 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -28,6 +28,8 @@ struct netlink_skb_parms { __u32 dst_group; __u32 flags; struct sock *sk; + bool nsid_is_set; + int nsid; }; #define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb)) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 6d1e2eae32fb..3f850acc844e 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -272,6 +272,8 @@ static inline struct net *read_pnet(const possible_net_t *pnet) #endif int peernet2id_alloc(struct net *net, struct net *peer); +int peernet2id(struct net *net, struct net *peer); +bool peernet_has_id(struct net *net, struct net *peer); struct net *get_net_ns_by_id(struct net *net, int id); struct pernet_operations { diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 1a85940f8ab7..3e34b7d702f8 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -108,6 +108,7 @@ struct nlmsgerr { #define NETLINK_NO_ENOBUFS 5 #define NETLINK_RX_RING 6 #define NETLINK_TX_RING 7 +#define NETLINK_LISTEN_ALL_NSID 8 struct nl_pktinfo { __u32 group; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index ae5008b097de..a665bf490c88 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -229,7 +229,7 @@ int peernet2id_alloc(struct net *net, struct net *peer) EXPORT_SYMBOL(peernet2id_alloc); /* This function returns, if assigned, the id of a peer netns. */ -static int peernet2id(struct net *net, struct net *peer) +int peernet2id(struct net *net, struct net *peer) { unsigned long flags; int id; @@ -240,6 +240,14 @@ static int peernet2id(struct net *net, struct net *peer) return id; } +/* This function returns true is the peer netns has an id assigned into the + * current netns. + */ +bool peernet_has_id(struct net *net, struct net *peer) +{ + return peernet2id(net, peer) >= 0; +} + struct net *get_net_ns_by_id(struct net *net, int id) { unsigned long flags; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index bf7f56d7a9aa..a5fff75accf8 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -83,6 +83,7 @@ struct listeners { #define NETLINK_F_RECV_PKTINFO 0x2 #define NETLINK_F_BROADCAST_SEND_ERROR 0x4 #define NETLINK_F_RECV_NO_ENOBUFS 0x8 +#define NETLINK_F_LISTEN_ALL_NSID 0x10 static inline int netlink_is_kernel(struct sock *sk) { @@ -1932,8 +1933,17 @@ static void do_one_broadcast(struct sock *sk, !test_bit(p->group - 1, nlk->groups)) return; - if (!net_eq(sock_net(sk), p->net)) - return; + if (!net_eq(sock_net(sk), p->net)) { + if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID)) + return; + + if (!peernet_has_id(sock_net(sk), p->net)) + return; + + if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns, + CAP_NET_BROADCAST)) + return; + } if (p->failure) { netlink_overrun(sk); @@ -1959,13 +1969,22 @@ static void do_one_broadcast(struct sock *sk, p->failure = 1; if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) p->delivery_failure = 1; - } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) { + goto out; + } + if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) { kfree_skb(p->skb2); p->skb2 = NULL; - } else if (sk_filter(sk, p->skb2)) { + goto out; + } + if (sk_filter(sk, p->skb2)) { kfree_skb(p->skb2); p->skb2 = NULL; - } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { + goto out; + } + NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net); + NETLINK_CB(p->skb2).nsid_is_set = true; + val = netlink_broadcast_deliver(sk, p->skb2); + if (val < 0) { netlink_overrun(sk); if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) p->delivery_failure = 1; @@ -1974,6 +1993,7 @@ static void do_one_broadcast(struct sock *sk, p->delivered = 1; p->skb2 = NULL; } +out: sock_put(sk); } @@ -2202,6 +2222,16 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, break; } #endif /* CONFIG_NETLINK_MMAP */ + case NETLINK_LISTEN_ALL_NSID: + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST)) + return -EPERM; + + if (val) + nlk->flags |= NETLINK_F_LISTEN_ALL_NSID; + else + nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID; + err = 0; + break; default: err = -ENOPROTOOPT; } @@ -2268,6 +2298,16 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); } +static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) +{ + if (!NETLINK_CB(skb).nsid_is_set) + return; + + put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int), + &NETLINK_CB(skb).nsid); +} + static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; @@ -2421,6 +2461,8 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (nlk->flags & NETLINK_F_RECV_PKTINFO) netlink_cmsg_recv_pktinfo(msg, skb); + if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID) + netlink_cmsg_listen_all_nsid(sk, msg, skb); memset(&scm, 0, sizeof(scm)); scm.creds = *NETLINK_CREDS(skb); -- cgit v1.2.3 From 140e807da12988e2a925fe029336e7bb67a8d4de Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 21:07:08 -0500 Subject: tun: Utilize the normal socket network namespace refcounting. There is no need for tun to do the weird network namespace refcounting. The existing network namespace refcounting in tfile has almost exactly the same lifetime. So rewrite the code to use the struct sock network namespace refcounting and remove the unnecessary hand rolled network namespace refcounting and the unncesary tfile->net. This change allows the tun code to directly call sock_put bypassing sock_release and making SOCK_EXTERNALLY_ALLOCATED unnecessary. Remove the now unncessary tun_release so that if anything tries to use the sock_release code path the kernel will oops, and let us know about the bug. The macvtap code already uses it's internal socket this way. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- drivers/net/tun.c | 24 ++++-------------------- include/linux/net.h | 1 - net/socket.c | 3 --- 3 files changed, 4 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e470ae59d405..3262f3e2b8b2 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -146,7 +146,6 @@ struct tun_file { struct socket socket; struct socket_wq wq; struct tun_struct __rcu *tun; - struct net *net; struct fasync_struct *fasync; /* only used for fasnyc */ unsigned int flags; @@ -493,10 +492,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); } - - BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED, - &tfile->socket.flags)); - sk_release_kernel(&tfile->sk); + sock_put(&tfile->sk); } } @@ -1492,18 +1488,10 @@ out: return ret; } -static int tun_release(struct socket *sock) -{ - if (sock->sk) - sock_put(sock->sk); - return 0; -} - /* Ops structure to mimic raw sockets with tun */ static const struct proto_ops tun_socket_ops = { .sendmsg = tun_sendmsg, .recvmsg = tun_recvmsg, - .release = tun_release, }; static struct proto tun_proto = { @@ -1865,7 +1853,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, if (cmd == TUNSETIFF && !tun) { ifr.ifr_name[IFNAMSIZ-1] = '\0'; - ret = tun_set_iff(tfile->net, file, &ifr); + ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr); if (ret) goto unlock; @@ -2154,16 +2142,16 @@ out: static int tun_chr_open(struct inode *inode, struct file * file) { + struct net *net = current->nsproxy->net_ns; struct tun_file *tfile; DBG1(KERN_INFO, "tunX: tun_chr_open\n"); - tfile = (struct tun_file *)sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL, + tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, &tun_proto); if (!tfile) return -ENOMEM; RCU_INIT_POINTER(tfile->tun, NULL); - tfile->net = get_net(current->nsproxy->net_ns); tfile->flags = 0; tfile->ifindex = 0; @@ -2174,13 +2162,11 @@ static int tun_chr_open(struct inode *inode, struct file * file) tfile->socket.ops = &tun_socket_ops; sock_init_data(&tfile->socket, &tfile->sk); - sk_change_net(&tfile->sk, tfile->net); tfile->sk.sk_write_space = tun_sock_write_space; tfile->sk.sk_sndbuf = INT_MAX; file->private_data = tfile; - set_bit(SOCK_EXTERNALLY_ALLOCATED, &tfile->socket.flags); INIT_LIST_HEAD(&tfile->next); sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); @@ -2191,10 +2177,8 @@ static int tun_chr_open(struct inode *inode, struct file * file) static int tun_chr_close(struct inode *inode, struct file *file) { struct tun_file *tfile = file->private_data; - struct net *net = tfile->net; tun_detach(tfile, true); - put_net(net); return 0; } diff --git a/include/linux/net.h b/include/linux/net.h index 738ea48be889..8a5e81d2bdf7 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -38,7 +38,6 @@ struct net; #define SOCK_NOSPACE 2 #define SOCK_PASSCRED 3 #define SOCK_PASSSEC 4 -#define SOCK_EXTERNALLY_ALLOCATED 5 #ifndef ARCH_HAS_SOCKET_TYPES /** diff --git a/net/socket.c b/net/socket.c index 884e32997698..b5f1f43ed8f4 100644 --- a/net/socket.c +++ b/net/socket.c @@ -576,9 +576,6 @@ void sock_release(struct socket *sock) if (rcu_dereference_protected(sock->wq, 1)->fasync_list) pr_err("%s: fasync list not empty!\n", __func__); - if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags)) - return; - this_cpu_sub(sockets_in_use, 1); if (!sock->file) { iput(SOCK_INODE(sock)); -- cgit v1.2.3 From eeb1bd5c40edb0e2fd925c8535e2fdebdbc5cef2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 21:08:05 -0500 Subject: net: Add a struct net parameter to sock_create_kern This is long overdue, and is part of cleaning up how we allocate kernel sockets that don't reference count struct net. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- drivers/block/drbd/drbd_receiver.c | 4 ++-- fs/afs/rxrpc.c | 2 +- fs/dlm/lowcomms.c | 16 ++++++++-------- include/linux/net.h | 2 +- net/bluetooth/rfcomm/core.c | 2 +- net/ceph/messenger.c | 4 ++-- net/ipv4/af_inet.c | 2 +- net/ipv4/udp_tunnel.c | 2 +- net/ipv6/ip6_udp_tunnel.c | 2 +- net/l2tp/l2tp_core.c | 4 ++-- net/netfilter/ipvs/ip_vs_sync.c | 4 ++-- net/rxrpc/ar-local.c | 4 ++-- net/socket.c | 4 ++-- 13 files changed, 26 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index cee20354ac37..c097909c589c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -598,7 +598,7 @@ static struct socket *drbd_try_connect(struct drbd_connection *connection) memcpy(&peer_in6, &connection->peer_addr, peer_addr_len); what = "sock_create_kern"; - err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family, + err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family, SOCK_STREAM, IPPROTO_TCP, &sock); if (err < 0) { sock = NULL; @@ -693,7 +693,7 @@ static int prepare_listen_socket(struct drbd_connection *connection, struct acce memcpy(&my_addr, &connection->my_addr, my_addr_len); what = "sock_create_kern"; - err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family, + err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family, SOCK_STREAM, IPPROTO_TCP, &s_listen); if (err) { s_listen = NULL; diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 3a57a1b0fb51..b50642870a43 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -85,7 +85,7 @@ int afs_open_socket(void) return -ENOMEM; } - ret = sock_create_kern(AF_RXRPC, SOCK_DGRAM, PF_INET, &socket); + ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET, &socket); if (ret < 0) { destroy_workqueue(afs_async_calls); _leave(" = %d [socket]", ret); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index d08e079ea5d3..754fd6c0b747 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -921,8 +921,8 @@ static int tcp_accept_from_sock(struct connection *con) mutex_unlock(&connections_lock); memset(&peeraddr, 0, sizeof(peeraddr)); - result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, - IPPROTO_TCP, &newsock); + result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + SOCK_STREAM, IPPROTO_TCP, &newsock); if (result < 0) return -ENOMEM; @@ -1173,8 +1173,8 @@ static void tcp_connect_to_sock(struct connection *con) goto out; /* Create a socket to communicate with */ - result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, - IPPROTO_TCP, &sock); + result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + SOCK_STREAM, IPPROTO_TCP, &sock); if (result < 0) goto out_err; @@ -1258,8 +1258,8 @@ static struct socket *tcp_create_listen_sock(struct connection *con, addr_len = sizeof(struct sockaddr_in6); /* Create a socket to communicate with */ - result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, - IPPROTO_TCP, &sock); + result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + SOCK_STREAM, IPPROTO_TCP, &sock); if (result < 0) { log_print("Can't create listening comms socket"); goto create_out; @@ -1365,8 +1365,8 @@ static int sctp_listen_for_all(void) log_print("Using SCTP for communications"); - result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_SEQPACKET, - IPPROTO_SCTP, &sock); + result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + SOCK_SEQPACKET, IPPROTO_SCTP, &sock); if (result < 0) { log_print("Can't create comms socket, check SCTP is loaded"); goto out; diff --git a/include/linux/net.h b/include/linux/net.h index 8a5e81d2bdf7..04aa06852771 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -207,7 +207,7 @@ void sock_unregister(int family); int __sock_create(struct net *net, int family, int type, int proto, struct socket **res, int kern); int sock_create(int family, int type, int proto, struct socket **res); -int sock_create_kern(int family, int type, int proto, struct socket **res); +int sock_create_kern(struct net *net, int family, int type, int proto, struct socket **res); int sock_create_lite(int family, int type, int proto, struct socket **res); void sock_release(struct socket *sock); int sock_sendmsg(struct socket *sock, struct msghdr *msg); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 4fea24275b17..29709fbfd1f5 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -200,7 +200,7 @@ static int rfcomm_l2sock_create(struct socket **sock) BT_DBG(""); - err = sock_create_kern(PF_BLUETOOTH, SOCK_SEQPACKET, BTPROTO_L2CAP, sock); + err = sock_create_kern(&init_net, PF_BLUETOOTH, SOCK_SEQPACKET, BTPROTO_L2CAP, sock); if (!err) { struct sock *sk = (*sock)->sk; sk->sk_data_ready = rfcomm_l2data_ready; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 967080a9f043..073262fea6dd 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -480,8 +480,8 @@ static int ceph_tcp_connect(struct ceph_connection *con) int ret; BUG_ON(con->sock); - ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM, - IPPROTO_TCP, &sock); + ret = sock_create_kern(&init_net, con->peer_addr.in_addr.ss_family, + SOCK_STREAM, IPPROTO_TCP, &sock); if (ret) return ret; sock->sk->sk_allocation = GFP_NOFS; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8b47a4d79d04..09f4d024dfe5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1430,7 +1430,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, struct net *net) { struct socket *sock; - int rc = sock_create_kern(family, type, protocol, &sock); + int rc = sock_create_kern(&init_net, family, type, protocol, &sock); if (rc == 0) { *sk = sock->sk; diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 6bb98cc193c9..4e2837476967 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -15,7 +15,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, struct socket *sock = NULL; struct sockaddr_in udp_addr; - err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock); + err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &sock); if (err < 0) goto error; diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index bba8903e871f..478576b61214 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -19,7 +19,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, int err; struct socket *sock = NULL; - err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock); + err = sock_create_kern(&init_net, AF_INET6, SOCK_DGRAM, 0, &sock); if (err < 0) goto error; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index a29a504492af..ae513a2fe7f3 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1399,7 +1399,7 @@ static int l2tp_tunnel_sock_create(struct net *net, if (cfg->local_ip6 && cfg->peer_ip6) { struct sockaddr_l2tpip6 ip6_addr = {0}; - err = sock_create_kern(AF_INET6, SOCK_DGRAM, + err = sock_create_kern(&init_net, AF_INET6, SOCK_DGRAM, IPPROTO_L2TP, &sock); if (err < 0) goto out; @@ -1429,7 +1429,7 @@ static int l2tp_tunnel_sock_create(struct net *net, { struct sockaddr_l2tpip ip_addr = {0}; - err = sock_create_kern(AF_INET, SOCK_DGRAM, + err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, IPPROTO_L2TP, &sock); if (err < 0) goto out; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 19b9cce6c210..2e9a5b5d1239 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1458,7 +1458,7 @@ static struct socket *make_send_sock(struct net *net, int id) int result; /* First create a socket move it to right name space later */ - result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + result = sock_create_kern(&init_net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); @@ -1518,7 +1518,7 @@ static struct socket *make_receive_sock(struct net *net, int id) int result; /* First create a socket */ - result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + result = sock_create_kern(&init_net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c index ca904ed5400a..78483b4602bf 100644 --- a/net/rxrpc/ar-local.c +++ b/net/rxrpc/ar-local.c @@ -73,8 +73,8 @@ static int rxrpc_create_local(struct rxrpc_local *local) _enter("%p{%d}", local, local->srx.transport_type); /* create a socket to represent the local endpoint */ - ret = sock_create_kern(PF_INET, local->srx.transport_type, IPPROTO_UDP, - &local->socket); + ret = sock_create_kern(&init_net, PF_INET, local->srx.transport_type, + IPPROTO_UDP, &local->socket); if (ret < 0) { _leave(" = %d [socket]", ret); return ret; diff --git a/net/socket.c b/net/socket.c index b5f1f43ed8f4..9963a0b53a64 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1210,9 +1210,9 @@ int sock_create(int family, int type, int protocol, struct socket **res) } EXPORT_SYMBOL(sock_create); -int sock_create_kern(int family, int type, int protocol, struct socket **res) +int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res) { - return __sock_create(&init_net, family, type, protocol, res, 1); + return __sock_create(net, family, type, protocol, res, 1); } EXPORT_SYMBOL(sock_create_kern); -- cgit v1.2.3 From 11aa9c28b4209242a9de0a661a7b3405adb568a0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 21:09:13 -0500 Subject: net: Pass kern from net_proto_family.create to sk_alloc In preparation for changing how struct net is refcounted on kernel sockets pass the knowledge that we are creating a kernel socket from sock_create_kern through to sk_alloc. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- crypto/af_alg.c | 4 ++-- drivers/isdn/mISDN/socket.c | 12 ++++++------ drivers/net/macvtap.c | 2 +- drivers/net/ppp/pppoe.c | 4 ++-- drivers/net/ppp/pppox.c | 2 +- drivers/net/ppp/pptp.c | 4 ++-- drivers/net/tun.c | 2 +- include/linux/if_pppox.h | 2 +- include/net/af_vsock.h | 2 +- include/net/llc_conn.h | 2 +- include/net/sock.h | 2 +- net/appletalk/ddp.c | 2 +- net/atm/common.c | 4 ++-- net/atm/common.h | 2 +- net/atm/pvc.c | 2 +- net/atm/svc.c | 2 +- net/ax25/af_ax25.c | 4 ++-- net/bluetooth/bnep/sock.c | 2 +- net/bluetooth/cmtp/sock.c | 2 +- net/bluetooth/hci_sock.c | 2 +- net/bluetooth/hidp/sock.c | 2 +- net/bluetooth/l2cap_sock.c | 10 +++++----- net/bluetooth/rfcomm/sock.c | 8 ++++---- net/bluetooth/sco.c | 8 ++++---- net/caif/caif_socket.c | 2 +- net/can/af_can.c | 2 +- net/core/sock.c | 3 ++- net/decnet/af_decnet.c | 8 ++++---- net/ieee802154/socket.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv6/af_inet6.c | 2 +- net/ipx/af_ipx.c | 2 +- net/irda/af_irda.c | 2 +- net/iucv/af_iucv.c | 10 +++++----- net/key/af_key.c | 2 +- net/l2tp/l2tp_ppp.c | 4 ++-- net/llc/af_llc.c | 2 +- net/llc/llc_conn.c | 6 +++--- net/netlink/af_netlink.c | 11 +++++------ net/netrom/af_netrom.c | 4 ++-- net/nfc/af_nfc.c | 2 +- net/nfc/llcp.h | 2 +- net/nfc/llcp_core.c | 2 +- net/nfc/llcp_sock.c | 8 ++++---- net/nfc/nfc.h | 2 +- net/nfc/rawsock.c | 4 ++-- net/packet/af_packet.c | 2 +- net/phonet/af_phonet.c | 2 +- net/phonet/pep.c | 2 +- net/rds/af_rds.c | 2 +- net/rose/af_rose.c | 4 ++-- net/rxrpc/af_rxrpc.c | 2 +- net/sctp/ipv6.c | 2 +- net/sctp/protocol.c | 2 +- net/tipc/socket.c | 2 +- net/unix/af_unix.c | 8 ++++---- net/vmw_vsock/af_vsock.c | 7 ++++--- net/vmw_vsock/vmci_transport.c | 2 +- net/x25/af_x25.c | 8 ++++---- 59 files changed, 109 insertions(+), 108 deletions(-) (limited to 'include/linux') diff --git a/crypto/af_alg.c b/crypto/af_alg.c index f22cc56fd1b3..5ad0d5354535 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -244,7 +244,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock) if (!type) goto unlock; - sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto); + sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, 0); err = -ENOMEM; if (!sk2) goto unlock; @@ -324,7 +324,7 @@ static int alg_create(struct net *net, struct socket *sock, int protocol, return -EPROTONOSUPPORT; err = -ENOMEM; - sk = sk_alloc(net, PF_ALG, GFP_KERNEL, &alg_proto); + sk = sk_alloc(net, PF_ALG, GFP_KERNEL, &alg_proto, kern); if (!sk) goto out; diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 8dc7290089bb..0d29b5a6356d 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -601,14 +601,14 @@ static const struct proto_ops data_sock_ops = { }; static int -data_sock_create(struct net *net, struct socket *sock, int protocol) +data_sock_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; if (sock->type != SOCK_DGRAM) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto); + sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto, kern); if (!sk) return -ENOMEM; @@ -756,14 +756,14 @@ static const struct proto_ops base_sock_ops = { static int -base_sock_create(struct net *net, struct socket *sock, int protocol) +base_sock_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto); + sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto, kern); if (!sk) return -ENOMEM; @@ -785,7 +785,7 @@ mISDN_sock_create(struct net *net, struct socket *sock, int proto, int kern) switch (proto) { case ISDN_P_BASE: - err = base_sock_create(net, sock, proto); + err = base_sock_create(net, sock, proto, kern); break; case ISDN_P_TE_S0: case ISDN_P_NT_S0: @@ -799,7 +799,7 @@ mISDN_sock_create(struct net *net, struct socket *sock, int proto, int kern) case ISDN_P_B_L2DTMF: case ISDN_P_B_L2DSP: case ISDN_P_B_L2DSPHDLC: - err = data_sock_create(net, sock, proto); + err = data_sock_create(net, sock, proto, kern); break; default: return err; diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 8c350c5d54ad..0398631a3c24 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -476,7 +476,7 @@ static int macvtap_open(struct inode *inode, struct file *file) err = -ENOMEM; q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, - &macvtap_proto); + &macvtap_proto, 0); if (!q) goto out; diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index aa1dd926623a..f86c5ab334aa 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -546,11 +546,11 @@ static struct proto pppoe_sk_proto __read_mostly = { * Initialize a new struct sock. * **********************************************************************/ -static int pppoe_create(struct net *net, struct socket *sock) +static int pppoe_create(struct net *net, struct socket *sock, int kern) { struct sock *sk; - sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto); + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, kern); if (!sk) return -ENOMEM; diff --git a/drivers/net/ppp/pppox.c b/drivers/net/ppp/pppox.c index 2940e9fe351b..0e1b30622477 100644 --- a/drivers/net/ppp/pppox.c +++ b/drivers/net/ppp/pppox.c @@ -118,7 +118,7 @@ static int pppox_create(struct net *net, struct socket *sock, int protocol, !try_module_get(pppox_protos[protocol]->owner)) goto out; - rc = pppox_protos[protocol]->create(net, sock); + rc = pppox_protos[protocol]->create(net, sock, kern); module_put(pppox_protos[protocol]->owner); out: diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index e3bfbd4d0136..14839bc0aaf5 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -561,14 +561,14 @@ static void pptp_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); } -static int pptp_create(struct net *net, struct socket *sock) +static int pptp_create(struct net *net, struct socket *sock, int kern) { int error = -ENOMEM; struct sock *sk; struct pppox_sock *po; struct pptp_opt *opt; - sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pptp_sk_proto); + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pptp_sk_proto, kern); if (!sk) goto out; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 3262f3e2b8b2..1a1c4f7b3ec5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2148,7 +2148,7 @@ static int tun_chr_open(struct inode *inode, struct file * file) DBG1(KERN_INFO, "tunX: tun_chr_open\n"); tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, - &tun_proto); + &tun_proto, 0); if (!tfile) return -ENOMEM; RCU_INIT_POINTER(tfile->tun, NULL); diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 66a7d7600f43..b49cf923becc 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -74,7 +74,7 @@ static inline struct sock *sk_pppox(struct pppox_sock *po) struct module; struct pppox_proto { - int (*create)(struct net *net, struct socket *sock); + int (*create)(struct net *net, struct socket *sock, int kern); int (*ioctl)(struct socket *sock, unsigned int cmd, unsigned long arg); struct module *owner; diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 172632dd9930..db639a4c5ab8 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -74,7 +74,7 @@ void vsock_pending_work(struct work_struct *work); struct sock *__vsock_create(struct net *net, struct socket *sock, struct sock *parent, - gfp_t priority, unsigned short type); + gfp_t priority, unsigned short type, int kern); /**** TRANSPORT ****/ diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index 0134681acc4c..fe994d2e5286 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -96,7 +96,7 @@ static __inline__ char llc_backlog_type(struct sk_buff *skb) } struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, - struct proto *prot); + struct proto *prot, int kern); void llc_sk_free(struct sock *sk); void llc_sk_reset(struct sock *sk); diff --git a/include/net/sock.h b/include/net/sock.h index 3a4898ec8c67..d8dcf91732b0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1514,7 +1514,7 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) struct sock *sk_alloc(struct net *net, int family, gfp_t priority, - struct proto *prot); + struct proto *prot, int kern); void sk_free(struct sock *sk); void sk_release_kernel(struct sock *sk); struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 3b7ad43c7dad..d5871ac493eb 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1030,7 +1030,7 @@ static int atalk_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) goto out; rc = -ENOMEM; - sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto); + sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, kern); if (!sk) goto out; rc = 0; diff --git a/net/atm/common.c b/net/atm/common.c index ed0466637e13..49a872db7e42 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -141,7 +141,7 @@ static struct proto vcc_proto = { .release_cb = vcc_release_cb, }; -int vcc_create(struct net *net, struct socket *sock, int protocol, int family) +int vcc_create(struct net *net, struct socket *sock, int protocol, int family, int kern) { struct sock *sk; struct atm_vcc *vcc; @@ -149,7 +149,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family) sock->sk = NULL; if (sock->type == SOCK_STREAM) return -EINVAL; - sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto); + sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto, kern); if (!sk) return -ENOMEM; sock_init_data(sock, sk); diff --git a/net/atm/common.h b/net/atm/common.h index 4d6f5b2068ac..959436b87182 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -10,7 +10,7 @@ #include /* for poll_table */ -int vcc_create(struct net *net, struct socket *sock, int protocol, int family); +int vcc_create(struct net *net, struct socket *sock, int protocol, int family, int kern); int vcc_release(struct socket *sock); int vcc_connect(struct socket *sock, int itf, short vpi, int vci); int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, diff --git a/net/atm/pvc.c b/net/atm/pvc.c index ae0324021407..040207ec399f 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -136,7 +136,7 @@ static int pvc_create(struct net *net, struct socket *sock, int protocol, return -EAFNOSUPPORT; sock->ops = &pvc_proto_ops; - return vcc_create(net, sock, protocol, PF_ATMPVC); + return vcc_create(net, sock, protocol, PF_ATMPVC, kern); } static const struct net_proto_family pvc_family_ops = { diff --git a/net/atm/svc.c b/net/atm/svc.c index 1ba23f5018e7..3fa0a9ee98d1 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -660,7 +660,7 @@ static int svc_create(struct net *net, struct socket *sock, int protocol, return -EAFNOSUPPORT; sock->ops = &svc_proto_ops; - error = vcc_create(net, sock, protocol, AF_ATMSVC); + error = vcc_create(net, sock, protocol, AF_ATMSVC, kern); if (error) return error; ATM_SD(sock)->local.sas_family = AF_ATMSVC; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 330c1f4a5a0b..4273533d22b1 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -855,7 +855,7 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol, return -ESOCKTNOSUPPORT; } - sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto); + sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto, kern); if (sk == NULL) return -ENOMEM; @@ -881,7 +881,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) struct sock *sk; ax25_cb *ax25, *oax25; - sk = sk_alloc(sock_net(osk), PF_AX25, GFP_ATOMIC, osk->sk_prot); + sk = sk_alloc(sock_net(osk), PF_AX25, GFP_ATOMIC, osk->sk_prot, 0); if (sk == NULL) return NULL; diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index bde2bdd9e929..b5116fa9835e 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -202,7 +202,7 @@ static int bnep_sock_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index d82787d417bd..ce86a7bae844 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -205,7 +205,7 @@ static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 56f9edbf3d05..5b14dcafcd08 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1377,7 +1377,7 @@ static int hci_sock_create(struct net *net, struct socket *sock, int protocol, sock->ops = &hci_sock_ops; - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index cb3fdde1968a..008ba439bd62 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -235,7 +235,7 @@ static int hidp_sock_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a7278f05eafb..244287706f91 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -43,7 +43,7 @@ static struct bt_sock_list l2cap_sk_list = { static const struct proto_ops l2cap_sock_ops; static void l2cap_sock_init(struct sock *sk, struct sock *parent); static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, - int proto, gfp_t prio); + int proto, gfp_t prio, int kern); bool l2cap_is_socket(struct socket *sock) { @@ -1193,7 +1193,7 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) } sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP, - GFP_ATOMIC); + GFP_ATOMIC, 0); if (!sk) { release_sock(parent); return NULL; @@ -1523,12 +1523,12 @@ static struct proto l2cap_proto = { }; static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, - int proto, gfp_t prio) + int proto, gfp_t prio, int kern) { struct sock *sk; struct l2cap_chan *chan; - sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto, kern); if (!sk) return NULL; @@ -1574,7 +1574,7 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol, sock->ops = &l2cap_sock_ops; - sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC); + sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 825e8fb5114b..b2338e971b33 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -269,12 +269,12 @@ static struct proto rfcomm_proto = { .obj_size = sizeof(struct rfcomm_pinfo) }; -static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) +static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern) { struct rfcomm_dlc *d; struct sock *sk; - sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto, kern); if (!sk) return NULL; @@ -324,7 +324,7 @@ static int rfcomm_sock_create(struct net *net, struct socket *sock, sock->ops = &rfcomm_sock_ops; - sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC); + sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern); if (!sk) return -ENOMEM; @@ -969,7 +969,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * goto done; } - sk = rfcomm_sock_alloc(sock_net(parent), NULL, BTPROTO_RFCOMM, GFP_ATOMIC); + sk = rfcomm_sock_alloc(sock_net(parent), NULL, BTPROTO_RFCOMM, GFP_ATOMIC, 0); if (!sk) goto done; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 4322c833e748..6b6e59dc54cf 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -460,11 +460,11 @@ static struct proto sco_proto = { .obj_size = sizeof(struct sco_pinfo) }; -static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) +static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern) { struct sock *sk; - sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto, kern); if (!sk) return NULL; @@ -501,7 +501,7 @@ static int sco_sock_create(struct net *net, struct socket *sock, int protocol, sock->ops = &sco_sock_ops; - sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC); + sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern); if (!sk) return -ENOMEM; @@ -1026,7 +1026,7 @@ static void sco_conn_ready(struct sco_conn *conn) bh_lock_sock(parent); sk = sco_sock_alloc(sock_net(parent), NULL, - BTPROTO_SCO, GFP_ATOMIC); + BTPROTO_SCO, GFP_ATOMIC, 0); if (!sk) { bh_unlock_sock(parent); sco_conn_unlock(conn); diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 4ec0c803aef1..78a04ebb113c 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1047,7 +1047,7 @@ static int caif_create(struct net *net, struct socket *sock, int protocol, * is really not used at all in the net/core or socket.c but the * initialization makes sure that sock->state is not uninitialized. */ - sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot); + sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot, kern); if (!sk) return -ENOMEM; diff --git a/net/can/af_can.c b/net/can/af_can.c index 32d710eaf1fc..d4d404bdfc9a 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -179,7 +179,7 @@ static int can_create(struct net *net, struct socket *sock, int protocol, sock->ops = cp->ops; - sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot); + sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot, kern); if (!sk) { err = -ENOMEM; goto errout; diff --git a/net/core/sock.c b/net/core/sock.c index e891bcf325ca..cbc3789b830c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1396,9 +1396,10 @@ EXPORT_SYMBOL_GPL(sock_update_netprioidx); * @family: protocol family * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) * @prot: struct proto associated with this new sock instance + * @kern: is this to be a kernel socket? */ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, - struct proto *prot) + struct proto *prot, int kern) { struct sock *sk; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 754484b3cd0e..675cf94e04f8 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -468,10 +468,10 @@ static struct proto dn_proto = { .obj_size = sizeof(struct dn_sock), }; -static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp) +static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp, int kern) { struct dn_scp *scp; - struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto); + struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, kern); if (!sk) goto out; @@ -693,7 +693,7 @@ static int dn_create(struct net *net, struct socket *sock, int protocol, } - if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL)) == NULL) + if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL, kern)) == NULL) return -ENOBUFS; sk->sk_protocol = protocol; @@ -1096,7 +1096,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags) cb = DN_SKB_CB(skb); sk->sk_ack_backlog--; - newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation); + newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, 0); if (newsk == NULL) { release_sock(sk); kfree_skb(skb); diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index b60c65f70346..7aaaf967df58 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -1014,7 +1014,7 @@ static int ieee802154_create(struct net *net, struct socket *sock, } rc = -ENOMEM; - sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto); + sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto, kern); if (!sk) goto out; rc = 0; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 09f4d024dfe5..e2dd9cb99d61 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -317,7 +317,7 @@ lookup_protocol: WARN_ON(!answer_prot->slab); err = -ENOBUFS; - sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot); + sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern); if (!sk) goto out; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4632afa57e05..f3866c0b6cfe 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -167,7 +167,7 @@ lookup_protocol: WARN_ON(!answer_prot->slab); err = -ENOBUFS; - sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot); + sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, kern); if (!sk) goto out; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 4ea5d7497b5f..48d0dc89b58d 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1347,7 +1347,7 @@ static int ipx_create(struct net *net, struct socket *sock, int protocol, goto out; rc = -ENOMEM; - sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto); + sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto, kern); if (!sk) goto out; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index ee0ea25c8e7a..fae6822cc367 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1100,7 +1100,7 @@ static int irda_create(struct net *net, struct socket *sock, int protocol, } /* Allocate networking socket */ - sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto); + sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto, kern); if (sk == NULL) return -ENOMEM; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 6daa52a18d40..918151c11348 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -535,12 +535,12 @@ static void iucv_sock_init(struct sock *sk, struct sock *parent) sk->sk_type = parent->sk_type; } -static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) +static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio, int kern) { struct sock *sk; struct iucv_sock *iucv; - sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto); + sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto, kern); if (!sk) return NULL; iucv = iucv_sk(sk); @@ -602,7 +602,7 @@ static int iucv_sock_create(struct net *net, struct socket *sock, int protocol, return -ESOCKTNOSUPPORT; } - sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL); + sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL, kern); if (!sk) return -ENOMEM; @@ -1723,7 +1723,7 @@ static int iucv_callback_connreq(struct iucv_path *path, } /* Create the new socket */ - nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC); + nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0); if (!nsk) { err = pr_iucv->path_sever(path, user_data); iucv_path_free(path); @@ -1933,7 +1933,7 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb) goto out; } - nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC); + nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0); bh_lock_sock(sk); if ((sk->sk_state != IUCV_LISTEN) || sk_acceptq_is_full(sk) || diff --git a/net/key/af_key.c b/net/key/af_key.c index f0d52d721b3a..9e834ec475a9 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -149,7 +149,7 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol, return -EPROTONOSUPPORT; err = -ENOMEM; - sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto); + sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, kern); if (sk == NULL) goto out; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index e9b0dec56b8e..f56c9f69e9f2 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -542,12 +542,12 @@ static int pppol2tp_backlog_recv(struct sock *sk, struct sk_buff *skb) /* socket() handler. Initialize a new struct sock. */ -static int pppol2tp_create(struct net *net, struct socket *sock) +static int pppol2tp_create(struct net *net, struct socket *sock, int kern) { int error = -ENOMEM; struct sock *sk; - sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto); + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto, kern); if (!sk) goto out; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 17a8dff06090..8fd9febaa5ba 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -168,7 +168,7 @@ static int llc_ui_create(struct net *net, struct socket *sock, int protocol, if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) { rc = -ENOMEM; - sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto); + sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto, kern); if (sk) { rc = 0; llc_ui_sk_init(sock, sk); diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 81a61fce3afb..3e821daf9dd4 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -768,7 +768,7 @@ static struct sock *llc_create_incoming_sock(struct sock *sk, struct llc_addr *daddr) { struct sock *newsk = llc_sk_alloc(sock_net(sk), sk->sk_family, GFP_ATOMIC, - sk->sk_prot); + sk->sk_prot, 0); struct llc_sock *newllc, *llc = llc_sk(sk); if (!newsk) @@ -931,9 +931,9 @@ static void llc_sk_init(struct sock *sk) * Allocates a LLC sock and initializes it. Returns the new LLC sock * or %NULL if there's no memory available for one */ -struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot) +struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern) { - struct sock *sk = sk_alloc(net, family, priority, prot); + struct sock *sk = sk_alloc(net, family, priority, prot, kern); if (!sk) goto out; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a5fff75accf8..be6665ab7f40 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1119,14 +1119,15 @@ static struct proto netlink_proto = { }; static int __netlink_create(struct net *net, struct socket *sock, - struct mutex *cb_mutex, int protocol) + struct mutex *cb_mutex, int protocol, + int kern) { struct sock *sk; struct netlink_sock *nlk; sock->ops = &netlink_ops; - sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto); + sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern); if (!sk) return -ENOMEM; @@ -1188,7 +1189,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, if (err < 0) goto out; - err = __netlink_create(net, sock, cb_mutex, protocol); + err = __netlink_create(net, sock, cb_mutex, protocol, kern); if (err < 0) goto out_module; @@ -2515,14 +2516,12 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; - /* * We have to just have a reference on the net from sk, but don't * get_net it. Besides, we cannot get and then put the net here. * So we create one inside init_net and the move it to net. */ - - if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0) + if (__netlink_create(&init_net, sock, cb_mutex, unit, 0) < 0) goto out_sock_release_nosk; sk = sock->sk; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index b987fd56c3c5..ed212ffc1d9d 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -433,7 +433,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto); + sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto, kern); if (sk == NULL) return -ENOMEM; @@ -476,7 +476,7 @@ static struct sock *nr_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) return NULL; - sk = sk_alloc(sock_net(osk), PF_NETROM, GFP_ATOMIC, osk->sk_prot); + sk = sk_alloc(sock_net(osk), PF_NETROM, GFP_ATOMIC, osk->sk_prot, 0); if (sk == NULL) return NULL; diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c index 2277276f52bc..54e40fa47822 100644 --- a/net/nfc/af_nfc.c +++ b/net/nfc/af_nfc.c @@ -40,7 +40,7 @@ static int nfc_sock_create(struct net *net, struct socket *sock, int proto, read_lock(&proto_tab_lock); if (proto_tab[proto] && try_module_get(proto_tab[proto]->owner)) { - rc = proto_tab[proto]->create(net, sock, proto_tab[proto]); + rc = proto_tab[proto]->create(net, sock, proto_tab[proto], kern); module_put(proto_tab[proto]->owner); } read_unlock(&proto_tab_lock); diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index de1789e3cc82..1f68724d44d3 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -225,7 +225,7 @@ void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local, struct sk_buff *skb, u8 direction); /* Sock API */ -struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp); +struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp, int kern); void nfc_llcp_sock_free(struct nfc_llcp_sock *sock); void nfc_llcp_accept_unlink(struct sock *sk); void nfc_llcp_accept_enqueue(struct sock *parent, struct sock *sk); diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index b18f07ccb504..98876274a1ee 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -934,7 +934,7 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local, sock->ssap = ssap; } - new_sk = nfc_llcp_sock_alloc(NULL, parent->sk_type, GFP_ATOMIC); + new_sk = nfc_llcp_sock_alloc(NULL, parent->sk_type, GFP_ATOMIC, 0); if (new_sk == NULL) { reason = LLCP_DM_REJ; release_sock(&sock->sk); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 9578bd6a4f3e..b7de0da46acd 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -942,12 +942,12 @@ static void llcp_sock_destruct(struct sock *sk) } } -struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp) +struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp, int kern) { struct sock *sk; struct nfc_llcp_sock *llcp_sock; - sk = sk_alloc(&init_net, PF_NFC, gfp, &llcp_sock_proto); + sk = sk_alloc(&init_net, PF_NFC, gfp, &llcp_sock_proto, kern); if (!sk) return NULL; @@ -993,7 +993,7 @@ void nfc_llcp_sock_free(struct nfc_llcp_sock *sock) } static int llcp_sock_create(struct net *net, struct socket *sock, - const struct nfc_protocol *nfc_proto) + const struct nfc_protocol *nfc_proto, int kern) { struct sock *sk; @@ -1009,7 +1009,7 @@ static int llcp_sock_create(struct net *net, struct socket *sock, else sock->ops = &llcp_sock_ops; - sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC); + sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC, kern); if (sk == NULL) return -ENOMEM; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index a8ce80b47720..5c93e8412a26 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -30,7 +30,7 @@ struct nfc_protocol { struct proto *proto; struct module *owner; int (*create)(struct net *net, struct socket *sock, - const struct nfc_protocol *nfc_proto); + const struct nfc_protocol *nfc_proto, int kern); }; struct nfc_rawsock { diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 82b4e8024778..e9a91488fe3d 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -334,7 +334,7 @@ static void rawsock_destruct(struct sock *sk) } static int rawsock_create(struct net *net, struct socket *sock, - const struct nfc_protocol *nfc_proto) + const struct nfc_protocol *nfc_proto, int kern) { struct sock *sk; @@ -348,7 +348,7 @@ static int rawsock_create(struct net *net, struct socket *sock, else sock->ops = &rawsock_ops; - sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto); + sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto, kern); if (!sk) return -ENOMEM; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 5102c3cc4eec..94713276a1d9 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2832,7 +2832,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, sock->state = SS_UNCONNECTED; err = -ENOBUFS; - sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto); + sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern); if (sk == NULL) goto out; diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 32ab87d34828..10d42f3220ab 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -97,7 +97,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol, goto out; } - sk = sk_alloc(net, PF_PHONET, GFP_KERNEL, pnp->prot); + sk = sk_alloc(net, PF_PHONET, GFP_KERNEL, pnp->prot, kern); if (sk == NULL) { err = -ENOMEM; goto out; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 6de2aeb98a1f..850a86cde0b3 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -845,7 +845,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) } /* Create a new to-be-accepted sock */ - newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot); + newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, 0); if (!newsk) { pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL); err = -ENOBUFS; diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 10443377fb9d..3d83641f2861 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -440,7 +440,7 @@ static int rds_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_SEQPACKET || protocol) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto); + sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 8ae603069a1a..36dbc2da3661 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -520,7 +520,7 @@ static int rose_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto); + sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto, kern); if (sk == NULL) return -ENOMEM; @@ -559,7 +559,7 @@ static struct sock *rose_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) return NULL; - sk = sk_alloc(sock_net(osk), PF_ROSE, GFP_ATOMIC, &rose_proto); + sk = sk_alloc(sock_net(osk), PF_ROSE, GFP_ATOMIC, &rose_proto, 0); if (sk == NULL) return NULL; diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 0095b9a0b779..25d60ed15284 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -632,7 +632,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, sock->ops = &rxrpc_rpc_ops; sock->state = SS_UNCONNECTED; - sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto); + sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 0e4198ee2370..e703ff7fed40 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -635,7 +635,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; - newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot); + newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, 0); if (!newsk) goto out; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 53b7acde9aa3..59e80356672b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -550,7 +550,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, struct sctp_association *asoc) { struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL, - sk->sk_prot); + sk->sk_prot, 0); struct inet_sock *newinet; if (!newsk) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9074b5cede38..8f3c8e2cef8e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -342,7 +342,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, } /* Allocate socket's protocol area */ - sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern); if (sk == NULL) return -ENOMEM; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5266ea7b922b..941b3d26e3bf 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -620,7 +620,7 @@ static struct proto unix_proto = { */ static struct lock_class_key af_unix_sk_receive_queue_lock_key; -static struct sock *unix_create1(struct net *net, struct socket *sock) +static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) { struct sock *sk = NULL; struct unix_sock *u; @@ -629,7 +629,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) goto out; - sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto); + sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern); if (!sk) goto out; @@ -688,7 +688,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol, return -ESOCKTNOSUPPORT; } - return unix_create1(net, sock) ? 0 : -ENOMEM; + return unix_create1(net, sock, kern) ? 0 : -ENOMEM; } static int unix_release(struct socket *sock) @@ -1088,7 +1088,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, err = -ENOMEM; /* create new sock for complete connection */ - newsk = unix_create1(sock_net(sk), NULL); + newsk = unix_create1(sock_net(sk), NULL, 0); if (newsk == NULL) goto out; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 2ec86e652a19..df5fc6b340f1 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -581,13 +581,14 @@ struct sock *__vsock_create(struct net *net, struct socket *sock, struct sock *parent, gfp_t priority, - unsigned short type) + unsigned short type, + int kern) { struct sock *sk; struct vsock_sock *psk; struct vsock_sock *vsk; - sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto); + sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto, kern); if (!sk) return NULL; @@ -1866,7 +1867,7 @@ static int vsock_create(struct net *net, struct socket *sock, sock->state = SS_UNCONNECTED; - return __vsock_create(net, sock, NULL, GFP_KERNEL, 0) ? 0 : -ENOMEM; + return __vsock_create(net, sock, NULL, GFP_KERNEL, 0, kern) ? 0 : -ENOMEM; } static const struct net_proto_family vsock_family_ops = { diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index c294da095461..1f63daff3965 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1022,7 +1022,7 @@ static int vmci_transport_recv_listen(struct sock *sk, } pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, - sk->sk_type); + sk->sk_type, 0); if (!pending) { vmci_transport_send_reset(sk, pkt); return -ENOMEM; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index c3ab230e4493..a750f330b8dd 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -515,10 +515,10 @@ static struct proto x25_proto = { .obj_size = sizeof(struct x25_sock), }; -static struct sock *x25_alloc_socket(struct net *net) +static struct sock *x25_alloc_socket(struct net *net, int kern) { struct x25_sock *x25; - struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto); + struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto, kern); if (!sk) goto out; @@ -553,7 +553,7 @@ static int x25_create(struct net *net, struct socket *sock, int protocol, goto out; rc = -ENOBUFS; - if ((sk = x25_alloc_socket(net)) == NULL) + if ((sk = x25_alloc_socket(net, kern)) == NULL) goto out; x25 = x25_sk(sk); @@ -602,7 +602,7 @@ static struct sock *x25_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) goto out; - if ((sk = x25_alloc_socket(sock_net(osk))) == NULL) + if ((sk = x25_alloc_socket(sock_net(osk), 0)) == NULL) goto out; x25 = x25_sk(sk); -- cgit v1.2.3 From d2788d34885d4ce5ba17a8996fd95d28942e574e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 9 May 2015 22:51:32 +0200 Subject: net: sched: further simplify handle_ing Ingress qdisc has no other purpose than calling into tc_classify() that executes attached classifier(s) and action(s). It has a 1:1 relationship to dev->ingress_queue. After having commit 087c1a601ad7 ("net: sched: run ingress qdisc without locks") removed the central ingress lock, one major contention point is gone. The extra indirection layers however, are not necessary for calling into ingress qdisc. pktgen calling locally into netif_receive_skb() with a dummy u32, single CPU result on a Supermicro X10SLM-F, Xeon E3-1240: before ~21,1 Mpps, after patch ~22,9 Mpps. We can redirect the private classifier list to the netdev directly, without changing any classifier API bits (!) and execute on that from handle_ing() side. The __QDISC_STATE_DEACTIVATE test can be removed, ingress qdisc doesn't have a queue and thus dev_deactivate_queue() is also not applicable, ingress_cl_list provides similar behaviour. In other words, ingress qdisc acts like TCQ_F_BUILTIN qdisc. One next possible step is the removal of the dev's ingress (dummy) netdev_queue, and to only have the list member in the netdevice itself. Note, the filter chain is RCU protected and individual filter elements are being kfree'd by sched subsystem after RCU grace period. RCU read lock is being held by __netif_receive_skb_core(). Joint work with Alexei Starovoitov. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ net/core/dev.c | 30 ++++++++++++++---------- net/sched/sch_ingress.c | 58 ++++++++--------------------------------------- 3 files changed, 31 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1899c74a7127..c4e1caf6056f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1655,7 +1655,11 @@ struct net_device { rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; +#if CONFIG_NET_CLS_ACT + struct tcf_proto __rcu *ingress_cl_list; +#endif struct netdev_queue __rcu *ingress_queue; + unsigned char broadcast[MAX_ADDR_LEN]; #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rx_cpu_rmap; diff --git a/net/core/dev.c b/net/core/dev.c index 8a757464bfa2..e5f77c40bbd1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3525,31 +3525,37 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { - struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue); - struct Qdisc *q; + struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list); + struct tcf_result cl_res; /* If there's at least one ingress present somewhere (so * we get here via enabled static key), remaining devices * that are not configured with an ingress qdisc will bail - * out w/o the rcu_dereference(). + * out here. */ - if (!rxq || (q = rcu_dereference(rxq->qdisc)) == &noop_qdisc) + if (!cl) return skb; - if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; } + qdisc_bstats_update_cpu(cl->q, skb); skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { - switch (qdisc_enqueue_root(skb, q)) { - case TC_ACT_SHOT: - case TC_ACT_STOLEN: - kfree_skb(skb); - return NULL; - } + switch (tc_classify(skb, cl, &cl_res)) { + case TC_ACT_OK: + case TC_ACT_RECLASSIFY: + skb->tc_index = TC_H_MIN(cl_res.classid); + break; + case TC_ACT_SHOT: + qdisc_qstats_drop_cpu(cl->q); + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + kfree_skb(skb); + return NULL; + default: + break; } return skb; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index a89cc3278bfb..e7c648fa9dc3 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -12,16 +12,10 @@ #include #include #include + #include #include - -struct ingress_qdisc_data { - struct tcf_proto __rcu *filter_list; -}; - -/* ------------------------- Class/flow operations ------------------------- */ - static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) { return NULL; @@ -49,45 +43,11 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker) static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch, unsigned long cl) { - struct ingress_qdisc_data *p = qdisc_priv(sch); - - return &p->filter_list; -} - -/* --------------------------- Qdisc operations ---------------------------- */ + struct net_device *dev = qdisc_dev(sch); -static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct ingress_qdisc_data *p = qdisc_priv(sch); - struct tcf_result res; - struct tcf_proto *fl = rcu_dereference_bh(p->filter_list); - int result; - - result = tc_classify(skb, fl, &res); - - qdisc_bstats_update_cpu(sch, skb); - switch (result) { - case TC_ACT_SHOT: - result = TC_ACT_SHOT; - qdisc_qstats_drop_cpu(sch); - break; - case TC_ACT_STOLEN: - case TC_ACT_QUEUED: - result = TC_ACT_STOLEN; - break; - case TC_ACT_RECLASSIFY: - case TC_ACT_OK: - skb->tc_index = TC_H_MIN(res.classid); - default: - result = TC_ACT_OK; - break; - } - - return result; + return &dev->ingress_cl_list; } -/* ------------------------------------------------------------- */ - static int ingress_init(struct Qdisc *sch, struct nlattr *opt) { net_inc_ingress_queue(); @@ -98,9 +58,9 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt) static void ingress_destroy(struct Qdisc *sch) { - struct ingress_qdisc_data *p = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); - tcf_destroy_chain(&p->filter_list); + tcf_destroy_chain(&dev->ingress_cl_list); net_dec_ingress_queue(); } @@ -111,6 +71,7 @@ static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) nest = nla_nest_start(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; + return nla_nest_end(skb, nest); nla_put_failure: @@ -131,8 +92,6 @@ static const struct Qdisc_class_ops ingress_class_ops = { static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { .cl_ops = &ingress_class_ops, .id = "ingress", - .priv_size = sizeof(struct ingress_qdisc_data), - .enqueue = ingress_enqueue, .init = ingress_init, .destroy = ingress_destroy, .dump = ingress_dump, @@ -149,6 +108,7 @@ static void __exit ingress_module_exit(void) unregister_qdisc(&ingress_qdisc_ops); } -module_init(ingress_module_init) -module_exit(ingress_module_exit) +module_init(ingress_module_init); +module_exit(ingress_module_exit); + MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 4cda01e86f68dacc758b2daf6e8809f2ce915b85 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 11 May 2015 19:28:49 +0200 Subject: net: sched: fix typo in net_device ifdef This should have been #ifdef not #if. Reported-by: Fengguang Wu Fixes: d2788d34885d ("net: sched: further simplify handle_ing") Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c4e1caf6056f..a6d706b2a947 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1655,7 +1655,7 @@ struct net_device { rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; -#if CONFIG_NET_CLS_ACT +#ifdef CONFIG_NET_CLS_ACT struct tcf_proto __rcu *ingress_cl_list; #endif struct netdev_queue __rcu *ingress_queue; -- cgit v1.2.3 From 0e39250845c0f91acc64264709b25f7f9b85c2c3 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 6 May 2015 21:11:51 -0700 Subject: net: Store virtual address instead of page in netdev_alloc_cache This change makes it so that we store the virtual address of the page in the netdev_alloc_cache instead of the page pointer. The idea behind this is to avoid multiple calls to page_address since the virtual address is required for every access, but the page pointer is only needed at allocation or reset of the page. While I was at it I also reordered the netdev_alloc_cache structure a bit so that the size is always 16 bytes by dropping size in the case where PAGE_SIZE is greater than or equal to 32KB. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 ++--- net/core/skbuff.c | 55 +++++++++++++++++++++++++++++--------------------- 2 files changed, 34 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9c2f793573fa..8b9a2c35a9d7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2128,9 +2128,8 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } -#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) -#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) -#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE +#define NETDEV_FRAG_PAGE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) +#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(NETDEV_FRAG_PAGE_MAX_SIZE) void *netdev_alloc_frag(unsigned int fragsz); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d6851ca32598..a3062ec341c3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -348,7 +348,13 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) EXPORT_SYMBOL(build_skb); struct netdev_alloc_cache { - struct page_frag frag; + void * va; +#if (PAGE_SIZE < NETDEV_FRAG_PAGE_MAX_SIZE) + __u16 offset; + __u16 size; +#else + __u32 offset; +#endif /* we maintain a pagecount bias, so that we dont dirty cache line * containing page->_count every time we allocate a fragment. */ @@ -361,21 +367,20 @@ static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache); static struct page *__page_frag_refill(struct netdev_alloc_cache *nc, gfp_t gfp_mask) { - const unsigned int order = NETDEV_FRAG_PAGE_MAX_ORDER; struct page *page = NULL; gfp_t gfp = gfp_mask; - if (order) { - gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | - __GFP_NOMEMALLOC; - page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); - nc->frag.size = PAGE_SIZE << (page ? order : 0); - } - +#if (PAGE_SIZE < NETDEV_FRAG_PAGE_MAX_SIZE) + gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | + __GFP_NOMEMALLOC; + page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, + NETDEV_FRAG_PAGE_MAX_ORDER); + nc->size = page ? NETDEV_FRAG_PAGE_MAX_SIZE : PAGE_SIZE; +#endif if (unlikely(!page)) page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); - nc->frag.page = page; + nc->va = page ? page_address(page) : NULL; return page; } @@ -383,19 +388,20 @@ static struct page *__page_frag_refill(struct netdev_alloc_cache *nc, static void *__alloc_page_frag(struct netdev_alloc_cache *nc, unsigned int fragsz, gfp_t gfp_mask) { - struct page *page = nc->frag.page; - unsigned int size; + unsigned int size = PAGE_SIZE; + struct page *page; int offset; - if (unlikely(!page)) { + if (unlikely(!nc->va)) { refill: page = __page_frag_refill(nc, gfp_mask); if (!page) return NULL; - /* if size can vary use frag.size else just use PAGE_SIZE */ - size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE; - +#if (PAGE_SIZE < NETDEV_FRAG_PAGE_MAX_SIZE) + /* if size can vary use size else just use PAGE_SIZE */ + size = nc->size; +#endif /* Even if we own the page, we do not use atomic_set(). * This would break get_page_unless_zero() users. */ @@ -404,17 +410,20 @@ refill: /* reset page count bias and offset to start of new frag */ nc->pfmemalloc = page->pfmemalloc; nc->pagecnt_bias = size; - nc->frag.offset = size; + nc->offset = size; } - offset = nc->frag.offset - fragsz; + offset = nc->offset - fragsz; if (unlikely(offset < 0)) { + page = virt_to_page(nc->va); + if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count)) goto refill; - /* if size can vary use frag.size else just use PAGE_SIZE */ - size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE; - +#if (PAGE_SIZE < NETDEV_FRAG_PAGE_MAX_SIZE) + /* if size can vary use size else just use PAGE_SIZE */ + size = nc->size; +#endif /* OK, page count is 0, we can safely set it */ atomic_set(&page->_count, size); @@ -424,9 +433,9 @@ refill: } nc->pagecnt_bias--; - nc->frag.offset = offset; + nc->offset = offset; - return page_address(page) + offset; + return nc->va + offset; } static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) -- cgit v1.2.3 From b63ae8ca096dfdbfeef6a209c30a93a966518853 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 6 May 2015 21:11:57 -0700 Subject: mm/net: Rename and move page fragment handling from net/ to mm/ This change moves the __alloc_page_frag functionality out of the networking stack and into the page allocation portion of mm. The idea it so help make this maintainable by placing it with other page allocation functions. Since we are moving it from skbuff.c to page_alloc.c I have also renamed the basic defines and structure from netdev_alloc_cache to page_frag_cache to reflect that this is now part of a different kernel subsystem. I have also added a simple __free_page_frag function which can handle freeing the frags based on the skb->head pointer. The model for this is based off of __free_pages since we don't actually need to deal with all of the cases that put_page handles. I incorporated the virt_to_head_page call and compound_order into the function as it actually allows for a signficant size reduction by reducing code duplication. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/gfp.h | 5 +++ include/linux/mm_types.h | 18 +++++++++ include/linux/skbuff.h | 3 -- mm/page_alloc.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++ net/core/skbuff.c | 100 +++-------------------------------------------- 5 files changed, 127 insertions(+), 97 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 97a9373e61e8..70a7fee1efb3 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -366,6 +366,11 @@ extern void free_pages(unsigned long addr, unsigned int order); extern void free_hot_cold_page(struct page *page, bool cold); extern void free_hot_cold_page_list(struct list_head *list, bool cold); +struct page_frag_cache; +extern void *__alloc_page_frag(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask); +extern void __free_page_frag(void *addr); + extern void __free_kmem_pages(struct page *page, unsigned int order); extern void free_kmem_pages(unsigned long addr, unsigned int order); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 8d37e26a1007..0038ac7466fd 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -226,6 +226,24 @@ struct page_frag { #endif }; +#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) +#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) + +struct page_frag_cache { + void * va; +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) + __u16 offset; + __u16 size; +#else + __u32 offset; +#endif + /* we maintain a pagecount bias, so that we dont dirty cache line + * containing page->_count every time we allocate a fragment. + */ + unsigned int pagecnt_bias; + bool pfmemalloc; +}; + typedef unsigned long __nocast vm_flags_t; /* diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8b9a2c35a9d7..0039fcc45b3b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2128,9 +2128,6 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } -#define NETDEV_FRAG_PAGE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) -#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(NETDEV_FRAG_PAGE_MAX_SIZE) - void *netdev_alloc_frag(unsigned int fragsz); struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ebffa0e4a9c0..2fd31aebef30 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2966,6 +2966,104 @@ void free_pages(unsigned long addr, unsigned int order) EXPORT_SYMBOL(free_pages); +/* + * Page Fragment: + * An arbitrary-length arbitrary-offset area of memory which resides + * within a 0 or higher order page. Multiple fragments within that page + * are individually refcounted, in the page's reference counter. + * + * The page_frag functions below provide a simple allocation framework for + * page fragments. This is used by the network stack and network device + * drivers to provide a backing region of memory for use as either an + * sk_buff->head, or to be used in the "frags" portion of skb_shared_info. + */ +static struct page *__page_frag_refill(struct page_frag_cache *nc, + gfp_t gfp_mask) +{ + struct page *page = NULL; + gfp_t gfp = gfp_mask; + +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) + gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | + __GFP_NOMEMALLOC; + page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, + PAGE_FRAG_CACHE_MAX_ORDER); + nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE; +#endif + if (unlikely(!page)) + page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); + + nc->va = page ? page_address(page) : NULL; + + return page; +} + +void *__alloc_page_frag(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask) +{ + unsigned int size = PAGE_SIZE; + struct page *page; + int offset; + + if (unlikely(!nc->va)) { +refill: + page = __page_frag_refill(nc, gfp_mask); + if (!page) + return NULL; + +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) + /* if size can vary use size else just use PAGE_SIZE */ + size = nc->size; +#endif + /* Even if we own the page, we do not use atomic_set(). + * This would break get_page_unless_zero() users. + */ + atomic_add(size - 1, &page->_count); + + /* reset page count bias and offset to start of new frag */ + nc->pfmemalloc = page->pfmemalloc; + nc->pagecnt_bias = size; + nc->offset = size; + } + + offset = nc->offset - fragsz; + if (unlikely(offset < 0)) { + page = virt_to_page(nc->va); + + if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count)) + goto refill; + +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) + /* if size can vary use size else just use PAGE_SIZE */ + size = nc->size; +#endif + /* OK, page count is 0, we can safely set it */ + atomic_set(&page->_count, size); + + /* reset page count bias and offset to start of new frag */ + nc->pagecnt_bias = size; + offset = size - fragsz; + } + + nc->pagecnt_bias--; + nc->offset = offset; + + return nc->va + offset; +} +EXPORT_SYMBOL(__alloc_page_frag); + +/* + * Frees a page fragment allocated out of either a compound or order 0 page. + */ +void __free_page_frag(void *addr) +{ + struct page *page = virt_to_head_page(addr); + + if (unlikely(put_page_testzero(page))) + __free_pages_ok(page, compound_order(page)); +} +EXPORT_SYMBOL(__free_page_frag); + /* * alloc_kmem_pages charges newly allocated pages to the kmem resource counter * of the current memory cgroup. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a3062ec341c3..dcc0e07abf47 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -347,100 +347,12 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) } EXPORT_SYMBOL(build_skb); -struct netdev_alloc_cache { - void * va; -#if (PAGE_SIZE < NETDEV_FRAG_PAGE_MAX_SIZE) - __u16 offset; - __u16 size; -#else - __u32 offset; -#endif - /* we maintain a pagecount bias, so that we dont dirty cache line - * containing page->_count every time we allocate a fragment. - */ - unsigned int pagecnt_bias; - bool pfmemalloc; -}; -static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); -static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache); - -static struct page *__page_frag_refill(struct netdev_alloc_cache *nc, - gfp_t gfp_mask) -{ - struct page *page = NULL; - gfp_t gfp = gfp_mask; - -#if (PAGE_SIZE < NETDEV_FRAG_PAGE_MAX_SIZE) - gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | - __GFP_NOMEMALLOC; - page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, - NETDEV_FRAG_PAGE_MAX_ORDER); - nc->size = page ? NETDEV_FRAG_PAGE_MAX_SIZE : PAGE_SIZE; -#endif - if (unlikely(!page)) - page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); - - nc->va = page ? page_address(page) : NULL; - - return page; -} - -static void *__alloc_page_frag(struct netdev_alloc_cache *nc, - unsigned int fragsz, gfp_t gfp_mask) -{ - unsigned int size = PAGE_SIZE; - struct page *page; - int offset; - - if (unlikely(!nc->va)) { -refill: - page = __page_frag_refill(nc, gfp_mask); - if (!page) - return NULL; - -#if (PAGE_SIZE < NETDEV_FRAG_PAGE_MAX_SIZE) - /* if size can vary use size else just use PAGE_SIZE */ - size = nc->size; -#endif - /* Even if we own the page, we do not use atomic_set(). - * This would break get_page_unless_zero() users. - */ - atomic_add(size - 1, &page->_count); - - /* reset page count bias and offset to start of new frag */ - nc->pfmemalloc = page->pfmemalloc; - nc->pagecnt_bias = size; - nc->offset = size; - } - - offset = nc->offset - fragsz; - if (unlikely(offset < 0)) { - page = virt_to_page(nc->va); - - if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count)) - goto refill; - -#if (PAGE_SIZE < NETDEV_FRAG_PAGE_MAX_SIZE) - /* if size can vary use size else just use PAGE_SIZE */ - size = nc->size; -#endif - /* OK, page count is 0, we can safely set it */ - atomic_set(&page->_count, size); - - /* reset page count bias and offset to start of new frag */ - nc->pagecnt_bias = size; - offset = size - fragsz; - } - - nc->pagecnt_bias--; - nc->offset = offset; - - return nc->va + offset; -} +static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); +static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache); static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { - struct netdev_alloc_cache *nc; + struct page_frag_cache *nc; unsigned long flags; void *data; @@ -466,7 +378,7 @@ EXPORT_SYMBOL(netdev_alloc_frag); static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { - struct netdev_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); + struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); return __alloc_page_frag(nc, fragsz, gfp_mask); } @@ -493,7 +405,7 @@ EXPORT_SYMBOL(napi_alloc_frag); struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, gfp_t gfp_mask) { - struct netdev_alloc_cache *nc; + struct page_frag_cache *nc; unsigned long flags; struct sk_buff *skb; bool pfmemalloc; @@ -556,7 +468,7 @@ EXPORT_SYMBOL(__netdev_alloc_skb); struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, gfp_t gfp_mask) { - struct netdev_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); + struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); struct sk_buff *skb; void *data; -- cgit v1.2.3 From 181edb2bfa22b50817684135ab6430ed2808abf0 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 6 May 2015 21:12:03 -0700 Subject: net: Add skb_free_frag to replace use of put_page in freeing skb->head This change adds a function called skb_free_frag which is meant to compliment the function netdev_alloc_frag. The general idea is to enable a more lightweight version of page freeing since we don't actually need all the overhead of a put_page, and we don't quite fit the model of __free_pages. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ net/core/skbuff.c | 10 ++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0039fcc45b3b..c0b574a414e7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2182,6 +2182,11 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); } +static inline void skb_free_frag(void *addr) +{ + __free_page_frag(addr); +} + void *napi_alloc_frag(unsigned int fragsz); struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int length, gfp_t gfp_mask); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index dcc0e07abf47..d67e612bf0ef 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -436,7 +436,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, skb = __build_skb(data, len); if (unlikely(!skb)) { - put_page(virt_to_head_page(data)); + skb_free_frag(data); return NULL; } @@ -490,7 +490,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, skb = __build_skb(data, len); if (unlikely(!skb)) { - put_page(virt_to_head_page(data)); + skb_free_frag(data); return NULL; } @@ -549,10 +549,12 @@ static void skb_clone_fraglist(struct sk_buff *skb) static void skb_free_head(struct sk_buff *skb) { + unsigned char *head = skb->head; + if (skb->head_frag) - put_page(virt_to_head_page(skb->head)); + skb_free_frag(head); else - kfree(skb->head); + kfree(head); } static void skb_release_data(struct sk_buff *skb) -- cgit v1.2.3 From 9d47c0a2d958e06322c88245749278633d333cca Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sun, 10 May 2015 09:47:47 -0700 Subject: switchdev: s/swdev_/switchdev_/ Turned out that "switchdev" sticks. So just unify all related terms to use this prefix. Signed-off-by: Jiri Pirko Signed-off-by: Scott Feldman Acked-by: Roopa Prabhu Acked-by: Andy Gospodarek Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.c | 37 +++++++++++++++++---------------- include/linux/netdevice.h | 2 +- include/net/switchdev.h | 30 +++++++++++++-------------- net/dsa/slave.c | 8 ++++---- net/switchdev/switchdev.c | 40 ++++++++++++++++++------------------ 5 files changed, 59 insertions(+), 58 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 418f62b19545..7473874daf1f 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4221,8 +4221,8 @@ static const struct net_device_ops rocker_port_netdev_ops = { * swdev interface ********************/ -static int rocker_port_swdev_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid) +static int rocker_port_switchdev_parent_id_get(struct net_device *dev, + struct netdev_phys_item_id *psid) { struct rocker_port *rocker_port = netdev_priv(dev); struct rocker *rocker = rocker_port->rocker; @@ -4232,18 +4232,19 @@ static int rocker_port_swdev_parent_id_get(struct net_device *dev, return 0; } -static int rocker_port_swdev_port_stp_update(struct net_device *dev, u8 state) +static int rocker_port_switchdev_port_stp_update(struct net_device *dev, + u8 state) { struct rocker_port *rocker_port = netdev_priv(dev); return rocker_port_stp_update(rocker_port, state); } -static int rocker_port_swdev_fib_ipv4_add(struct net_device *dev, - __be32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, - u32 nlflags, u32 tb_id) +static int rocker_port_switchdev_fib_ipv4_add(struct net_device *dev, + __be32 dst, int dst_len, + struct fib_info *fi, + u8 tos, u8 type, + u32 nlflags, u32 tb_id) { struct rocker_port *rocker_port = netdev_priv(dev); int flags = 0; @@ -4252,10 +4253,10 @@ static int rocker_port_swdev_fib_ipv4_add(struct net_device *dev, fi, tb_id, flags); } -static int rocker_port_swdev_fib_ipv4_del(struct net_device *dev, - __be32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) +static int rocker_port_switchdev_fib_ipv4_del(struct net_device *dev, + __be32 dst, int dst_len, + struct fib_info *fi, + u8 tos, u8 type, u32 tb_id) { struct rocker_port *rocker_port = netdev_priv(dev); int flags = ROCKER_OP_FLAG_REMOVE; @@ -4264,11 +4265,11 @@ static int rocker_port_swdev_fib_ipv4_del(struct net_device *dev, fi, tb_id, flags); } -static const struct swdev_ops rocker_port_swdev_ops = { - .swdev_parent_id_get = rocker_port_swdev_parent_id_get, - .swdev_port_stp_update = rocker_port_swdev_port_stp_update, - .swdev_fib_ipv4_add = rocker_port_swdev_fib_ipv4_add, - .swdev_fib_ipv4_del = rocker_port_swdev_fib_ipv4_del, +static const struct switchdev_ops rocker_port_switchdev_ops = { + .switchdev_parent_id_get = rocker_port_switchdev_parent_id_get, + .switchdev_port_stp_update = rocker_port_switchdev_port_stp_update, + .switchdev_fib_ipv4_add = rocker_port_switchdev_fib_ipv4_add, + .switchdev_fib_ipv4_del = rocker_port_switchdev_fib_ipv4_del, }; /******************** @@ -4623,7 +4624,7 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) rocker_port_dev_addr_init(rocker, rocker_port); dev->netdev_ops = &rocker_port_netdev_ops; dev->ethtool_ops = &rocker_port_ethtool_ops; - dev->swdev_ops = &rocker_port_swdev_ops; + dev->switchdev_ops = &rocker_port_switchdev_ops; netif_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx, NAPI_POLL_WEIGHT); netif_napi_add(dev, &rocker_port->napi_rx, rocker_port_poll_rx, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a6d706b2a947..2b39235b9f13 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1567,7 +1567,7 @@ struct net_device { const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; #ifdef CONFIG_NET_SWITCHDEV - const struct swdev_ops *swdev_ops; + const struct switchdev_ops *switchdev_ops; #endif const struct header_ops *header_ops; diff --git a/include/net/switchdev.h b/include/net/switchdev.h index cd921fa0d63a..97b556daeef7 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -19,28 +19,28 @@ struct fib_info; /** * struct switchdev_ops - switchdev operations * - * @swdev_parent_id_get: Called to get an ID of the switch chip this port + * @switchdev_parent_id_get: Called to get an ID of the switch chip this port * is part of. If driver implements this, it indicates that it * represents a port of a switch chip. * - * @swdev_port_stp_update: Called to notify switch device port of bridge + * @switchdev_port_stp_update: Called to notify switch device port of bridge * port STP state change. * - * @swdev_fib_ipv4_add: Called to add/modify IPv4 route to switch device. + * @switchdev_fib_ipv4_add: Called to add/modify IPv4 route to switch device. * - * @swdev_fib_ipv4_del: Called to delete IPv4 route from switch device. + * @switchdev_fib_ipv4_del: Called to delete IPv4 route from switch device. */ -struct swdev_ops { - int (*swdev_parent_id_get)(struct net_device *dev, - struct netdev_phys_item_id *psid); - int (*swdev_port_stp_update)(struct net_device *dev, u8 state); - int (*swdev_fib_ipv4_add)(struct net_device *dev, __be32 dst, - int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, - u32 tb_id); - int (*swdev_fib_ipv4_del)(struct net_device *dev, __be32 dst, - int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id); +struct switchdev_ops { + int (*switchdev_parent_id_get)(struct net_device *dev, + struct netdev_phys_item_id *psid); + int (*switchdev_port_stp_update)(struct net_device *dev, u8 state); + int (*switchdev_fib_ipv4_add)(struct net_device *dev, __be32 dst, + int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 nlflags, + u32 tb_id); + int (*switchdev_fib_ipv4_del)(struct net_device *dev, __be32 dst, + int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id); }; enum switchdev_notifier_type { diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 03e041addea3..1546acf6ebd3 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -675,9 +675,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_get_iflink = dsa_slave_get_iflink, }; -static const struct swdev_ops dsa_slave_swdev_ops = { - .swdev_parent_id_get = dsa_slave_parent_id_get, - .swdev_port_stp_update = dsa_slave_stp_update, +static const struct switchdev_ops dsa_slave_switchdev_ops = { + .switchdev_parent_id_get = dsa_slave_parent_id_get, + .switchdev_port_stp_update = dsa_slave_stp_update, }; static void dsa_slave_adjust_link(struct net_device *dev) @@ -866,7 +866,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, eth_hw_addr_inherit(slave_dev, master); slave_dev->tx_queue_len = 0; slave_dev->netdev_ops = &dsa_slave_netdev_ops; - slave_dev->swdev_ops = &dsa_slave_swdev_ops; + slave_dev->switchdev_ops = &dsa_slave_switchdev_ops; netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one, NULL); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 52613ed49a8c..b7f44a23def5 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -28,11 +28,11 @@ int switchdev_parent_id_get(struct net_device *dev, struct netdev_phys_item_id *psid) { - const struct swdev_ops *ops = dev->swdev_ops; + const struct switchdev_ops *ops = dev->switchdev_ops; - if (!ops || !ops->swdev_parent_id_get) + if (!ops || !ops->switchdev_parent_id_get) return -EOPNOTSUPP; - return ops->swdev_parent_id_get(dev, psid); + return ops->switchdev_parent_id_get(dev, psid); } EXPORT_SYMBOL_GPL(switchdev_parent_id_get); @@ -46,13 +46,13 @@ EXPORT_SYMBOL_GPL(switchdev_parent_id_get); */ int switchdev_port_stp_update(struct net_device *dev, u8 state) { - const struct swdev_ops *ops = dev->swdev_ops; + const struct switchdev_ops *ops = dev->switchdev_ops; struct net_device *lower_dev; struct list_head *iter; int err = -EOPNOTSUPP; - if (ops && ops->swdev_port_stp_update) - return ops->swdev_port_stp_update(dev, state); + if (ops && ops->switchdev_port_stp_update) + return ops->switchdev_port_stp_update(dev, state); netdev_for_each_lower_dev(dev, lower_dev, iter) { err = switchdev_port_stp_update(lower_dev, state); @@ -239,17 +239,17 @@ EXPORT_SYMBOL_GPL(ndo_dflt_switchdev_port_bridge_dellink); static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) { - const struct swdev_ops *ops = dev->swdev_ops; + const struct switchdev_ops *ops = dev->switchdev_ops; struct net_device *lower_dev; struct net_device *port_dev; struct list_head *iter; /* Recusively search down until we find a sw port dev. - * (A sw port dev supports swdev_parent_id_get). + * (A sw port dev supports switchdev_parent_id_get). */ if (dev->features & NETIF_F_HW_SWITCH_OFFLOAD && - ops && ops->swdev_parent_id_get) + ops && ops->switchdev_parent_id_get) return dev; netdev_for_each_lower_dev(dev, lower_dev, iter) { @@ -313,7 +313,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 nlflags, u32 tb_id) { struct net_device *dev; - const struct swdev_ops *ops; + const struct switchdev_ops *ops; int err = 0; /* Don't offload route if using custom ip rules or if @@ -331,12 +331,12 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, dev = switchdev_get_dev_by_nhs(fi); if (!dev) return 0; - ops = dev->swdev_ops; + ops = dev->switchdev_ops; - if (ops->swdev_fib_ipv4_add) { - err = ops->swdev_fib_ipv4_add(dev, htonl(dst), dst_len, - fi, tos, type, nlflags, - tb_id); + if (ops->switchdev_fib_ipv4_add) { + err = ops->switchdev_fib_ipv4_add(dev, htonl(dst), dst_len, + fi, tos, type, nlflags, + tb_id); if (!err) fi->fib_flags |= RTNH_F_EXTERNAL; } @@ -361,7 +361,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 tb_id) { struct net_device *dev; - const struct swdev_ops *ops; + const struct switchdev_ops *ops; int err = 0; if (!(fi->fib_flags & RTNH_F_EXTERNAL)) @@ -370,11 +370,11 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, dev = switchdev_get_dev_by_nhs(fi); if (!dev) return 0; - ops = dev->swdev_ops; + ops = dev->switchdev_ops; - if (ops->swdev_fib_ipv4_del) { - err = ops->swdev_fib_ipv4_del(dev, htonl(dst), dst_len, - fi, tos, type, tb_id); + if (ops->switchdev_fib_ipv4_del) { + err = ops->switchdev_fib_ipv4_del(dev, htonl(dst), dst_len, + fi, tos, type, tb_id); if (!err) fi->fib_flags &= ~RTNH_F_EXTERNAL; } -- cgit v1.2.3 From 7889cbee8357aaed85898d028829dfb4f75bae2c Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 10 May 2015 09:48:07 -0700 Subject: switchdev: remove NETIF_F_HW_SWITCH_OFFLOAD feature flag Roopa said remove the feature flag for this series and she'll work on bringing it back if needed at a later date. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 5 +---- drivers/net/ethernet/rocker/rocker.c | 3 +-- drivers/net/team/team.c | 2 +- include/linux/netdev_features.h | 5 +---- net/core/ethtool.c | 1 - 5 files changed, 4 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 147642dae3d0..a2e25de98bde 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1015,10 +1015,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev, netdev_features_t mask; struct slave *slave; - /* If any slave has the offload feature flag set, - * set the offload flag on the bond. - */ - mask = features | NETIF_F_HW_SWITCH_OFFLOAD; + mask = features; features &= ~NETIF_F_ONE_FOR_ALL; features |= NETIF_F_ALL_FOR_ALL; diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 1c906504cfe2..9715e3322706 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4906,8 +4906,7 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) rocker_carrier_init(rocker_port); dev->features |= NETIF_F_NETNS_LOCAL | - NETIF_F_HW_VLAN_CTAG_FILTER | - NETIF_F_HW_SWITCH_OFFLOAD; + NETIF_F_HW_VLAN_CTAG_FILTER; err = register_netdev(dev); if (err) { diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index bda32be596ef..1ec035a53c3d 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1924,7 +1924,7 @@ static netdev_features_t team_fix_features(struct net_device *dev, struct team *team = netdev_priv(dev); netdev_features_t mask; - mask = features | NETIF_F_HW_SWITCH_OFFLOAD; + mask = features; features &= ~NETIF_F_ONE_FOR_ALL; features |= NETIF_F_ALL_FOR_ALL; diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 7d59dc6ab789..9672781c593d 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -66,7 +66,6 @@ enum { NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */ NETIF_F_HW_L2FW_DOFFLOAD_BIT, /* Allow L2 Forwarding in Hardware */ NETIF_F_BUSY_POLL_BIT, /* Busy poll */ - NETIF_F_HW_SWITCH_OFFLOAD_BIT, /* HW switch offload */ /* * Add your fresh new feature above and remember to update @@ -125,7 +124,6 @@ enum { #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) #define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD) #define NETIF_F_BUSY_POLL __NETIF_F(BUSY_POLL) -#define NETIF_F_HW_SWITCH_OFFLOAD __NETIF_F(HW_SWITCH_OFFLOAD) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ @@ -161,8 +159,7 @@ enum { */ #define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \ NETIF_F_SG | NETIF_F_HIGHDMA | \ - NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED | \ - NETIF_F_HW_SWITCH_OFFLOAD) + NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED) /* * If one device doesn't support one of these features, then disable it diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 1d00b8922902..eb0c3ace7458 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -98,7 +98,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_RXALL_BIT] = "rx-all", [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", [NETIF_F_BUSY_POLL_BIT] = "busy-poll", - [NETIF_F_HW_SWITCH_OFFLOAD_BIT] = "hw-switch-offload", }; static const char -- cgit v1.2.3 From a2029240e5836e73ebcc1a8ddb8c22d636f89c9a Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 11 May 2015 21:17:53 +0200 Subject: net: deinline netif_tx_stop_all_queues(), remove WARN_ON in netif_tx_stop_queue() These functions compile to 60 bytes of machine code each. With this .config: http://busybox.net/~vda/kernel_config there are 617 calls of netif_tx_stop_queue() and 49 calls of netif_tx_stop_all_queues() in vmlinux. To fix this, remove WARN_ON in netif_tx_stop_queue() as suggested by davem, and deinline netif_tx_stop_all_queues(). Change in code size is about 20k: text data bss dec hex filename 82426986 22255416 20627456 125309858 77813a2 vmlinux.before 82406248 22255416 20627456 125289120 777c2a0 vmlinux gcc-4.7.2 still creates deinlined version of netif_tx_stop_queue sometimes: $ nm --size-sort vmlinux | grep netif_tx_stop_queue | wc -l 190 ffffffff81b558a8 : ffffffff81b558a8: 55 push %rbp ffffffff81b558a9: 48 89 e5 mov %rsp,%rbp ffffffff81b558ac: f0 80 8f e0 01 00 00 lock orb $0x1,0x1e0(%rdi) ffffffff81b558b3: 01 ffffffff81b558b4: 5d pop %rbp ffffffff81b558b5: c3 retq This needs additional fixing. Signed-off-by: Denys Vlasenko CC: Alexei Starovoitov CC: Alexander Duyck CC: Joe Perches CC: David S. Miller CC: Jiri Pirko CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org CC: netfilter-devel@vger.kernel.org Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 +------------- net/core/dev.c | 11 +++++++++++ 2 files changed, 12 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2b39235b9f13..fa57915f440c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2559,10 +2559,6 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev) static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) { - if (WARN_ON(!dev_queue)) { - pr_info("netif_stop_queue() cannot be called before register_netdev()\n"); - return; - } set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } @@ -2578,15 +2574,7 @@ static inline void netif_stop_queue(struct net_device *dev) netif_tx_stop_queue(netdev_get_tx_queue(dev, 0)); } -static inline void netif_tx_stop_all_queues(struct net_device *dev) -{ - unsigned int i; - - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - netif_tx_stop_queue(txq); - } -} +void netif_tx_stop_all_queues(struct net_device *dev); static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue) { diff --git a/net/core/dev.c b/net/core/dev.c index e5f77c40bbd1..fd012bbe0486 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6301,6 +6301,17 @@ static int netif_alloc_netdev_queues(struct net_device *dev) return 0; } +void netif_tx_stop_all_queues(struct net_device *dev) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + netif_tx_stop_queue(txq); + } +} +EXPORT_SYMBOL(netif_tx_stop_all_queues); + /** * register_netdevice - register a network device * @dev: device to register -- cgit v1.2.3 From cffc642d93f9324a06dfbd7da9af29652952a248 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 11 May 2015 22:22:44 -0700 Subject: test_bpf: add 173 new testcases for eBPF add an exhaustive set of eBPF tests bringing total to: test_bpf: Summary: 233 PASSED, 0 FAILED, [0/226 JIT'ed] Signed-off-by: Michael Holzheu Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 10 + lib/test_bpf.c | 2192 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 2202 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 3c03a6085b82..ce1d72d34382 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -207,6 +207,16 @@ struct bpf_prog_aux; .off = OFF, \ .imm = 0 }) +/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ + +#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ #define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 3c41049d72d8..8bca780e3613 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1806,6 +1806,2198 @@ static struct bpf_test tests[] = { 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6}, { { 38, 256 } } }, + /* BPF_ALU | BPF_MOV | BPF_X */ + { + "ALU_MOV_X: dst = 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_MOV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_MOV_X: dst = 4294967295", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 4294967295), + BPF_ALU32_REG(BPF_MOV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295 } }, + }, + { + "ALU64_MOV_X: dst = 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_MOV_X: dst = 4294967295", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 4294967295), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295 } }, + }, + /* BPF_ALU | BPF_MOV | BPF_K */ + { + "ALU_MOV_K: dst = 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_MOV_K: dst = 4294967295", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 4294967295), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295 } }, + }, + { + "ALU_MOV_K: 0x0000ffffffff0000 = 0x00000000ffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0x00000000ffffffff), + BPF_ALU32_IMM(BPF_MOV, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_MOV_K: dst = 2", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_MOV_K: dst = 2147483647", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, 2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2147483647 } }, + }, + { + "ALU64_OR_K: dst = 0x0", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0x0), + BPF_ALU64_IMM(BPF_MOV, R2, 0x0), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_MOV_K: dst = -1", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_MOV, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_ADD | BPF_X */ + { + "ALU_ADD_X: 1 + 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_ADD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_ADD_X: 1 + 4294967294 = 4294967295", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967294), + BPF_ALU32_REG(BPF_ADD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295 } }, + }, + { + "ALU64_ADD_X: 1 + 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_ADD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_ADD_X: 1 + 4294967294 = 4294967295", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967294), + BPF_ALU64_REG(BPF_ADD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295 } }, + }, + /* BPF_ALU | BPF_ADD | BPF_K */ + { + "ALU_ADD_K: 1 + 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_ADD, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_ADD_K: 3 + 0 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_ADD, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_ADD_K: 1 + 4294967294 = 4294967295", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_ADD, R0, 4294967294), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295 } }, + }, + { + "ALU_ADD_K: 0 + (-1) = 0x00000000ffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0), + BPF_LD_IMM64(R3, 0x00000000ffffffff), + BPF_ALU32_IMM(BPF_ADD, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_ADD_K: 1 + 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_ADD, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_ADD_K: 3 + 0 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_ADD, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_ADD_K: 1 + 2147483646 = 2147483647", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_ADD, R0, 2147483646), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2147483647 } }, + }, + { + "ALU64_ADD_K: 2147483646 + -2147483647 = -1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483646), + BPF_ALU64_IMM(BPF_ADD, R0, -2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } }, + }, + { + "ALU64_ADD_K: 1 + 0 = 1", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x1), + BPF_LD_IMM64(R3, 0x1), + BPF_ALU64_IMM(BPF_ADD, R2, 0x0), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_ADD_K: 0 + (-1) = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_ADD, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_SUB | BPF_X */ + { + "ALU_SUB_X: 3 - 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU32_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_SUB_X: 4294967295 - 4294967294 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967294), + BPF_ALU32_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_SUB_X: 3 - 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_SUB_X: 4294967295 - 4294967294 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967294), + BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_ALU | BPF_SUB | BPF_K */ + { + "ALU_SUB_K: 3 - 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_SUB, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_SUB_K: 3 - 0 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_SUB, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_SUB_K: 4294967295 - 4294967294 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_SUB, R0, 4294967294), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_SUB_K: 3 - 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_SUB, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_SUB_K: 3 - 0 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_SUB, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_SUB_K: 4294967294 - 4294967295 = -1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967294), + BPF_ALU64_IMM(BPF_SUB, R0, 4294967295), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } }, + }, + { + "ALU64_ADD_K: 2147483646 - 2147483647 = -1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483646), + BPF_ALU64_IMM(BPF_SUB, R0, 2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } }, + }, + /* BPF_ALU | BPF_MUL | BPF_X */ + { + "ALU_MUL_X: 2 * 3 = 6", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 3), + BPF_ALU32_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 6 } }, + }, + { + "ALU_MUL_X: 2 * 0x7FFFFFF8 = 0xFFFFFFF0", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 0x7FFFFFF8), + BPF_ALU32_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xFFFFFFF0 } }, + }, + { + "ALU_MUL_X: -1 * -1 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, -1), + BPF_ALU32_IMM(BPF_MOV, R1, -1), + BPF_ALU32_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_MUL_X: 2 * 3 = 6", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 3), + BPF_ALU64_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 6 } }, + }, + { + "ALU64_MUL_X: 1 * 2147483647 = 2147483647", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 2147483647), + BPF_ALU64_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2147483647 } }, + }, + /* BPF_ALU | BPF_MUL | BPF_K */ + { + "ALU_MUL_K: 2 * 3 = 6", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MUL, R0, 3), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 6 } }, + }, + { + "ALU_MUL_K: 3 * 1 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MUL, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_MUL_K: 2 * 0x7FFFFFF8 = 0xFFFFFFF0", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MUL, R0, 0x7FFFFFF8), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xFFFFFFF0 } }, + }, + { + "ALU_MUL_K: 1 * (-1) = 0x00000000ffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x1), + BPF_LD_IMM64(R3, 0x00000000ffffffff), + BPF_ALU32_IMM(BPF_MUL, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_MUL_K: 2 * 3 = 6", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU64_IMM(BPF_MUL, R0, 3), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 6 } }, + }, + { + "ALU64_MUL_K: 3 * 1 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_MUL, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_MUL_K: 1 * 2147483647 = 2147483647", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_MUL, R0, 2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2147483647 } }, + }, + { + "ALU64_MUL_K: 1 * -2147483647 = -2147483647", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_MUL, R0, -2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -2147483647 } }, + }, + { + "ALU64_MUL_K: 1 * (-1) = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x1), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_MUL, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_DIV | BPF_X */ + { + "ALU_DIV_X: 6 / 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 6), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_DIV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_DIV_X: 4294967295 / 4294967295 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967295), + BPF_ALU32_REG(BPF_DIV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_DIV_X: 6 / 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 6), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_DIV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_DIV_X: 2147483647 / 2147483647 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483647), + BPF_ALU32_IMM(BPF_MOV, R1, 2147483647), + BPF_ALU64_REG(BPF_DIV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_DIV_X: 0xffffffffffffffff / (-1) = 0x0000000000000001", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffffffffffUL), + BPF_LD_IMM64(R4, 0xffffffffffffffffUL), + BPF_LD_IMM64(R3, 0x0000000000000001UL), + BPF_ALU64_REG(BPF_DIV, R2, R4), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_DIV | BPF_K */ + { + "ALU_DIV_K: 6 / 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 6), + BPF_ALU32_IMM(BPF_DIV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_DIV_K: 3 / 1 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_DIV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_DIV_K: 4294967295 / 4294967295 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_DIV, R0, 4294967295), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU_DIV_K: 0xffffffffffffffff / (-1) = 0x1", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffffffffffUL), + BPF_LD_IMM64(R3, 0x1UL), + BPF_ALU32_IMM(BPF_DIV, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_DIV_K: 6 / 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 6), + BPF_ALU64_IMM(BPF_DIV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_DIV_K: 3 / 1 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_DIV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_DIV_K: 2147483647 / 2147483647 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483647), + BPF_ALU64_IMM(BPF_DIV, R0, 2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_DIV_K: 0xffffffffffffffff / (-1) = 0x0000000000000001", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffffffffffUL), + BPF_LD_IMM64(R3, 0x0000000000000001UL), + BPF_ALU64_IMM(BPF_DIV, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_MOD | BPF_X */ + { + "ALU_MOD_X: 3 % 2 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_MOD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU_MOD_X: 4294967295 % 4294967293 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967293), + BPF_ALU32_REG(BPF_MOD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_MOD_X: 3 % 2 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_MOD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_MOD_X: 2147483647 % 2147483645 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483647), + BPF_ALU32_IMM(BPF_MOV, R1, 2147483645), + BPF_ALU64_REG(BPF_MOD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + /* BPF_ALU | BPF_MOD | BPF_K */ + { + "ALU_MOD_K: 3 % 2 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOD, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU_MOD_K: 3 % 1 = 0", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOD, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + }, + { + "ALU_MOD_K: 4294967295 % 4294967293 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_MOD, R0, 4294967293), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_MOD_K: 3 % 2 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_MOD, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_MOD_K: 3 % 1 = 0", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_MOD, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + }, + { + "ALU64_MOD_K: 2147483647 % 2147483645 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483647), + BPF_ALU64_IMM(BPF_MOD, R0, 2147483645), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + /* BPF_ALU | BPF_AND | BPF_X */ + { + "ALU_AND_X: 3 & 2 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_AND, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_AND_X: 0xffffffff & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xffffffff), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU32_REG(BPF_AND, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_AND_X: 3 & 2 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_AND, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_AND_X: 0xffffffff & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xffffffff), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU64_REG(BPF_AND, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + /* BPF_ALU | BPF_AND | BPF_K */ + { + "ALU_AND_K: 3 & 2 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_AND, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_AND_K: 0xffffffff & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xffffffff), + BPF_ALU32_IMM(BPF_AND, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_AND_K: 3 & 2 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_AND, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_AND_K: 0xffffffff & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xffffffff), + BPF_ALU64_IMM(BPF_AND, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_AND_K: 0x0000ffffffff0000 & 0x0 = 0x0000ffff00000000", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0x0000000000000000), + BPF_ALU64_IMM(BPF_AND, R2, 0x0), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_AND_K: 0x0000ffffffff0000 & -1 = 0x0000ffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0x0000ffffffff0000), + BPF_ALU64_IMM(BPF_AND, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_AND_K: 0xffffffffffffffff & -1 = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffffffffff), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_AND, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_OR | BPF_X */ + { + "ALU_OR_X: 1 | 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_OR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_OR_X: 0x0 | 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU32_REG(BPF_OR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_OR_X: 1 | 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_OR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_OR_X: 0 | 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU64_REG(BPF_OR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + /* BPF_ALU | BPF_OR | BPF_K */ + { + "ALU_OR_K: 1 | 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_OR, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_OR_K: 0 & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_ALU32_IMM(BPF_OR, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_OR_K: 1 | 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_OR, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_OR_K: 0 & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_ALU64_IMM(BPF_OR, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_OR_K: 0x0000ffffffff0000 | 0x0 = 0x0000ffff00000000", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0x0000ffffffff0000), + BPF_ALU64_IMM(BPF_OR, R2, 0x0), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_OR_K: 0x0000ffffffff0000 | -1 = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_OR, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_OR_K: 0x000000000000000 | -1 = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000000000000000), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_OR, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_XOR | BPF_X */ + { + "ALU_XOR_X: 5 ^ 6 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 5), + BPF_ALU32_IMM(BPF_MOV, R1, 6), + BPF_ALU32_REG(BPF_XOR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_XOR_X: 0x1 ^ 0xffffffff = 0xfffffffe", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU32_REG(BPF_XOR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } }, + }, + { + "ALU64_XOR_X: 5 ^ 6 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 5), + BPF_ALU32_IMM(BPF_MOV, R1, 6), + BPF_ALU64_REG(BPF_XOR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_XOR_X: 1 ^ 0xffffffff = 0xfffffffe", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU64_REG(BPF_XOR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } }, + }, + /* BPF_ALU | BPF_XOR | BPF_K */ + { + "ALU_XOR_K: 5 ^ 6 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 5), + BPF_ALU32_IMM(BPF_XOR, R0, 6), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_XOR_K: 1 ^ 0xffffffff = 0xfffffffe", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_XOR, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } }, + }, + { + "ALU64_XOR_K: 5 ^ 6 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 5), + BPF_ALU64_IMM(BPF_XOR, R0, 6), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_XOR_K: 1 & 0xffffffff = 0xfffffffe", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_XOR, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } }, + }, + { + "ALU64_XOR_K: 0x0000ffffffff0000 ^ 0x0 = 0x0000ffffffff0000", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0x0000ffffffff0000), + BPF_ALU64_IMM(BPF_XOR, R2, 0x0), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_XOR_K: 0x0000ffffffff0000 ^ -1 = 0xffff00000000ffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0xffff00000000ffff), + BPF_ALU64_IMM(BPF_XOR, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_XOR_K: 0x000000000000000 ^ -1 = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000000000000000), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_XOR, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_LSH | BPF_X */ + { + "ALU_LSH_X: 1 << 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU32_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_LSH_X: 1 << 31 = 0x80000000", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 31), + BPF_ALU32_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x80000000 } }, + }, + { + "ALU64_LSH_X: 1 << 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_LSH_X: 1 << 31 = 0x80000000", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 31), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x80000000 } }, + }, + /* BPF_ALU | BPF_LSH | BPF_K */ + { + "ALU_LSH_K: 1 << 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_LSH, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_LSH_K: 1 << 31 = 0x80000000", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_LSH, R0, 31), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x80000000 } }, + }, + { + "ALU64_LSH_K: 1 << 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_LSH, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_LSH_K: 1 << 31 = 0x80000000", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_LSH, R0, 31), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x80000000 } }, + }, + /* BPF_ALU | BPF_RSH | BPF_X */ + { + "ALU_RSH_X: 2 >> 1 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU32_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU_RSH_X: 0x80000000 >> 31 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x80000000), + BPF_ALU32_IMM(BPF_MOV, R1, 31), + BPF_ALU32_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_RSH_X: 2 >> 1 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_RSH_X: 0x80000000 >> 31 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x80000000), + BPF_ALU32_IMM(BPF_MOV, R1, 31), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_ALU | BPF_RSH | BPF_K */ + { + "ALU_RSH_K: 2 >> 1 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_RSH, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU_RSH_K: 0x80000000 >> 31 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x80000000), + BPF_ALU32_IMM(BPF_RSH, R0, 31), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_RSH_K: 2 >> 1 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU64_IMM(BPF_RSH, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_RSH_K: 0x80000000 >> 31 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x80000000), + BPF_ALU64_IMM(BPF_RSH, R0, 31), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_ALU | BPF_ARSH | BPF_X */ + { + "ALU_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xff00ff0000000000LL), + BPF_ALU32_IMM(BPF_MOV, R1, 40), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffff00ff } }, + }, + /* BPF_ALU | BPF_ARSH | BPF_K */ + { + "ALU_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xff00ff0000000000LL), + BPF_ALU64_IMM(BPF_ARSH, R0, 40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffff00ff } }, + }, + /* BPF_ALU | BPF_NEG */ + { + "ALU_NEG: -(3) = -3", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 3), + BPF_ALU32_IMM(BPF_NEG, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -3 } }, + }, + { + "ALU_NEG: -(-3) = 3", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -3), + BPF_ALU32_IMM(BPF_NEG, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_NEG: -(3) = -3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_NEG, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -3 } }, + }, + { + "ALU64_NEG: -(-3) = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, -3), + BPF_ALU64_IMM(BPF_NEG, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + /* BPF_ALU | BPF_END | BPF_FROM_BE */ + { + "ALU_END_FROM_BE 16: 0x0123456789abcdef -> 0xcdef", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_BE, R0, 16), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, cpu_to_be16(0xcdef) } }, + }, + { + "ALU_END_FROM_BE 32: 0x0123456789abcdef -> 0x89abcdef", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_BE, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, cpu_to_be32(0x89abcdef) } }, + }, + { + "ALU_END_FROM_BE 64: 0x0123456789abcdef -> 0x89abcdef", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_BE, R0, 64), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, (u32) cpu_to_be64(0x0123456789abcdefLL) } }, + }, + /* BPF_ALU | BPF_END | BPF_FROM_LE */ + { + "ALU_END_FROM_LE 16: 0x0123456789abcdef -> 0xefcd", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_LE, R0, 16), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, cpu_to_le16(0xcdef) } }, + }, + { + "ALU_END_FROM_LE 32: 0x0123456789abcdef -> 0xefcdab89", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_LE, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, cpu_to_le32(0x89abcdef) } }, + }, + { + "ALU_END_FROM_LE 64: 0x0123456789abcdef -> 0x67452301", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_LE, R0, 64), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, (u32) cpu_to_le64(0x0123456789abcdefLL) } }, + }, + /* BPF_ST(X) | BPF_MEM | BPF_B/H/W/DW */ + { + "ST_MEM_B: Store/Load byte: max negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_B, R10, -40, 0xff), + BPF_LDX_MEM(BPF_B, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xff } }, + }, + { + "ST_MEM_B: Store/Load byte: max positive", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_H, R10, -40, 0x7f), + BPF_LDX_MEM(BPF_H, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x7f } }, + }, + { + "STX_MEM_B: Store/Load byte: max negative", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0xffLL), + BPF_STX_MEM(BPF_B, R10, R1, -40), + BPF_LDX_MEM(BPF_B, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xff } }, + }, + { + "ST_MEM_H: Store/Load half word: max negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_H, R10, -40, 0xffff), + BPF_LDX_MEM(BPF_H, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffff } }, + }, + { + "ST_MEM_H: Store/Load half word: max positive", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_H, R10, -40, 0x7fff), + BPF_LDX_MEM(BPF_H, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x7fff } }, + }, + { + "STX_MEM_H: Store/Load half word: max negative", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0xffffLL), + BPF_STX_MEM(BPF_H, R10, R1, -40), + BPF_LDX_MEM(BPF_H, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffff } }, + }, + { + "ST_MEM_W: Store/Load word: max negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_W, R10, -40, 0xffffffff), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ST_MEM_W: Store/Load word: max positive", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_W, R10, -40, 0x7fffffff), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x7fffffff } }, + }, + { + "STX_MEM_W: Store/Load word: max negative", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0xffffffffLL), + BPF_STX_MEM(BPF_W, R10, R1, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ST_MEM_DW: Store/Load double word: max negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff), + BPF_LDX_MEM(BPF_DW, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ST_MEM_DW: Store/Load double word: max negative 2", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffff00000000ffff), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff), + BPF_LDX_MEM(BPF_DW, R2, R10, -40), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ST_MEM_DW: Store/Load double word: max positive", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_DW, R10, -40, 0x7fffffff), + BPF_LDX_MEM(BPF_DW, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x7fffffff } }, + }, + { + "STX_MEM_DW: Store/Load double word: max negative", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_STX_MEM(BPF_W, R10, R1, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + /* BPF_STX | BPF_XADD | BPF_W/DW */ + { + "STX_XADD_W: Test: 0x12 + 0x10 = 0x22", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12), + BPF_ST_MEM(BPF_W, R10, -40, 0x10), + BPF_STX_XADD(BPF_W, R10, R0, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x22 } }, + }, + { + "STX_XADD_DW: Test: 0x12 + 0x10 = 0x22", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12), + BPF_ST_MEM(BPF_DW, R10, -40, 0x10), + BPF_STX_XADD(BPF_DW, R10, R0, -40), + BPF_LDX_MEM(BPF_DW, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x22 } }, + }, + /* BPF_JMP | BPF_EXIT */ + { + "JMP_EXIT", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x4711), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 0x4712), + }, + INTERNAL, + { }, + { { 0, 0x4711 } }, + }, + /* BPF_JMP | BPF_JA */ + { + "JMP_JA: Unconditional jump: if (true) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSGT | BPF_K */ + { + "JMP_JSGT_K: Signed jump: if (-1 > -2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_JMP_IMM(BPF_JSGT, R1, -2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSGT_K: Signed jump: if (-1 > -1) return 0", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_JMP_IMM(BPF_JSGT, R1, -1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSGE | BPF_K */ + { + "JMP_JSGE_K: Signed jump: if (-1 >= -2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_JMP_IMM(BPF_JSGE, R1, -2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSGE_K: Signed jump: if (-1 >= -1) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_JMP_IMM(BPF_JSGE, R1, -1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JGT | BPF_K */ + { + "JMP_JGT_K: if (3 > 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JGT, R1, 2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JGE | BPF_K */ + { + "JMP_JGE_K: if (3 >= 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JGE, R1, 2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JGE_K: if (3 >= 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JGE, R1, 3, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JNE | BPF_K */ + { + "JMP_JNE_K: if (3 != 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JNE, R1, 2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JEQ | BPF_K */ + { + "JMP_JEQ_K: if (3 == 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JEQ, R1, 3, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSET | BPF_K */ + { + "JMP_JSET_K: if (0x3 & 0x2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JNE, R1, 2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSET_K: if (0x3 & 0xffffffff) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JNE, R1, 0xffffffff, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSGT | BPF_X */ + { + "JMP_JSGT_X: Signed jump: if (-1 > -2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, -2), + BPF_JMP_REG(BPF_JSGT, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSGT_X: Signed jump: if (-1 > -1) return 0", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, -1), + BPF_JMP_REG(BPF_JSGT, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSGE | BPF_X */ + { + "JMP_JSGE_X: Signed jump: if (-1 >= -2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, -2), + BPF_JMP_REG(BPF_JSGE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSGE_X: Signed jump: if (-1 >= -1) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, -1), + BPF_JMP_REG(BPF_JSGE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JGT | BPF_X */ + { + "JMP_JGT_X: if (3 > 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JGT, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JGE | BPF_X */ + { + "JMP_JGE_X: if (3 >= 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JGE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JGE_X: if (3 >= 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 3), + BPF_JMP_REG(BPF_JGE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JNE | BPF_X */ + { + "JMP_JNE_X: if (3 != 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JNE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JEQ | BPF_X */ + { + "JMP_JEQ_X: if (3 == 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 3), + BPF_JMP_REG(BPF_JEQ, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSET | BPF_X */ + { + "JMP_JSET_X: if (0x3 & 0x2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JNE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSET_X: if (0x3 & 0xffffffff) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 0xffffffff), + BPF_JMP_REG(BPF_JNE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, }; static struct net_device dev; -- cgit v1.2.3 From 275e2bc0f25d5eb99c99ebb7293fc3722533124b Mon Sep 17 00:00:00 2001 From: Sergey Popovich Date: Sat, 2 May 2015 19:28:17 +0200 Subject: netfilter: ipset: Fix ext_*() macros So pointers returned by these macros could be referenced with -> directly. Signed-off-by: Sergey Popovich Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 34b172301558..f88be7258e5f 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -122,13 +122,13 @@ struct ip_set_skbinfo { struct ip_set; #define ext_timeout(e, s) \ -(unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT]) +((unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT])) #define ext_counter(e, s) \ -(struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER]) +((struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER])) #define ext_comment(e, s) \ -(struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT]) +((struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT])) #define ext_skbinfo(e, s) \ -(struct ip_set_skbinfo *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_SKBINFO]) +((struct ip_set_skbinfo *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_SKBINFO])) typedef int (*ipset_adtfn)(struct ip_set *set, void *value, const struct ip_set_ext *ext, -- cgit v1.2.3 From 1bd758eb1cab2fa5b71a23f9e5d3c8076f4ed650 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:07 +0200 Subject: net: change name of flow_dissector header to match the .c file name add couple of empty lines on the way. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- drivers/net/ethernet/cisco/enic/enic_clsf.c | 2 +- include/linux/skbuff.h | 2 +- include/net/flow_dissector.h | 68 +++++++++++++++++++++++++++++ include/net/flow_keys.h | 61 -------------------------- include/net/ip.h | 2 +- include/net/ipv6.h | 2 +- net/core/flow_dissector.c | 2 +- net/sched/cls_flow.c | 2 +- net/sched/sch_choke.c | 2 +- 10 files changed, 76 insertions(+), 69 deletions(-) create mode 100644 include/net/flow_dissector.h delete mode 100644 include/net/flow_keys.h (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a2e25de98bde..fe7c38721775 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -76,7 +76,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.c b/drivers/net/ethernet/cisco/enic/enic_clsf.c index 0be6850be8a2..380f04903a36 100644 --- a/drivers/net/ethernet/cisco/enic/enic_clsf.c +++ b/drivers/net/ethernet/cisco/enic/enic_clsf.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include "enic_res.h" #include "enic_clsf.h" diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c0b574a414e7..e35c2b13d434 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -34,7 +34,7 @@ #include #include #include -#include +#include /* A. Checksumming of received packets by device. * diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h new file mode 100644 index 000000000000..5e99a7b3963c --- /dev/null +++ b/include/net/flow_dissector.h @@ -0,0 +1,68 @@ +#ifndef _NET_FLOW_DISSECTOR_H +#define _NET_FLOW_DISSECTOR_H + +/* struct flow_keys: + * @src: source ip address in case of IPv4 + * For IPv6 it contains 32bit hash of src address + * @dst: destination ip address in case of IPv4 + * For IPv6 it contains 32bit hash of dst address + * @ports: port numbers of Transport header + * port16[0]: src port number + * port16[1]: dst port number + * @thoff: Transport header offset + * @n_proto: Network header protocol (eg. IPv4/IPv6) + * @ip_proto: Transport header protocol (eg. TCP/UDP) + * All the members, except thoff, are in network byte order. + */ +struct flow_keys { + /* (src,dst) must be grouped, in the same way than in IP header */ + __be32 src; + __be32 dst; + union { + __be32 ports; + __be16 port16[2]; + }; + u16 thoff; + __be16 n_proto; + u8 ip_proto; +}; + +bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, + void *data, __be16 proto, int nhoff, int hlen); + +static inline bool skb_flow_dissect(const struct sk_buff *skb, + struct flow_keys *flow) +{ + return __skb_flow_dissect(skb, flow, NULL, 0, 0, 0); +} + +__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, + void *data, int hlen_proto); + +static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, + int thoff, u8 ip_proto) +{ + return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0); +} + +u32 flow_hash_from_keys(struct flow_keys *keys); + +unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len, + __be16 protocol); + +/* struct flow_keys_digest: + * + * This structure is used to hold a digest of the full flow keys. This is a + * larger "hash" of a flow to allow definitively matching specific flows where + * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so + * that it can by used in CB of skb (see sch_choke for an example). + */ +#define FLOW_KEYS_DIGEST_LEN 16 +struct flow_keys_digest { + u8 data[FLOW_KEYS_DIGEST_LEN]; +}; + +void make_flow_keys_digest(struct flow_keys_digest *digest, + const struct flow_keys *flow); + +#endif diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h deleted file mode 100644 index 6d6ef626811a..000000000000 --- a/include/net/flow_keys.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef _NET_FLOW_KEYS_H -#define _NET_FLOW_KEYS_H - -/* struct flow_keys: - * @src: source ip address in case of IPv4 - * For IPv6 it contains 32bit hash of src address - * @dst: destination ip address in case of IPv4 - * For IPv6 it contains 32bit hash of dst address - * @ports: port numbers of Transport header - * port16[0]: src port number - * port16[1]: dst port number - * @thoff: Transport header offset - * @n_proto: Network header protocol (eg. IPv4/IPv6) - * @ip_proto: Transport header protocol (eg. TCP/UDP) - * All the members, except thoff, are in network byte order. - */ -struct flow_keys { - /* (src,dst) must be grouped, in the same way than in IP header */ - __be32 src; - __be32 dst; - union { - __be32 ports; - __be16 port16[2]; - }; - u16 thoff; - __be16 n_proto; - u8 ip_proto; -}; - -bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, - void *data, __be16 proto, int nhoff, int hlen); -static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) -{ - return __skb_flow_dissect(skb, flow, NULL, 0, 0, 0); -} -__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, - void *data, int hlen_proto); -static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto) -{ - return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0); -} -u32 flow_hash_from_keys(struct flow_keys *keys); -unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len, - __be16 protocol); - -/* struct flow_keys_digest: - * - * This structure is used to hold a digest of the full flow keys. This is a - * larger "hash" of a flow to allow definitively matching specific flows where - * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so - * that it can by used in CB of skb (see sch_choke for an example). - */ -#define FLOW_KEYS_DIGEST_LEN 16 -struct flow_keys_digest { - u8 data[FLOW_KEYS_DIGEST_LEN]; -}; - -void make_flow_keys_digest(struct flow_keys_digest *digest, - const struct flow_keys *flow); - -#endif diff --git a/include/net/ip.h b/include/net/ip.h index d14af7edd197..562eb653aebc 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -31,7 +31,7 @@ #include #include #include -#include +#include struct sock; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 53d25ef1699a..9932b86394a9 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #define SIN6_LEN_RFC2133 24 diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d3acc4dff4ae..eaa86b539221 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include /* copy saddr & daddr, possibly using 64bit load/store diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index a620c4e288a5..4c5a92298906 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index dfe3da75594c..2624e991a2d1 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include /* CHOKe stateless AQM for fair bandwidth allocation -- cgit v1.2.3 From 10b89ee43e849544eddfe34e535341fc077464ec Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:09 +0200 Subject: net: move *skb_get_poff declarations into correct header Since these functions are defined in flow_dissector.c, move header declarations from skbuff.h into flow_dissector.h Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ---- include/net/flow_dissector.h | 3 +++ net/core/filter.c | 1 + net/ethernet/eth.c | 1 + 4 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e35c2b13d434..17607ab9e7a2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3424,10 +3424,6 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, unsigned int transport_len, __sum16(*skb_chkf)(struct sk_buff *skb)); -u32 skb_get_poff(const struct sk_buff *skb); -u32 __skb_get_poff(const struct sk_buff *skb, void *data, - const struct flow_keys *keys, int hlen); - /** * skb_head_is_locked - Determine if the skb->head is locked down * @skb: skb to check diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 118ae6926a72..4570ccafe59d 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -46,6 +46,9 @@ static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, } u32 flow_hash_from_keys(struct flow_keys *keys); +u32 skb_get_poff(const struct sk_buff *skb); +u32 __skb_get_poff(const struct sk_buff *skb, void *data, + const struct flow_keys *keys, int hlen); /* struct flow_keys_digest: * diff --git a/net/core/filter.c b/net/core/filter.c index a831f193e2c7..6805717be614 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 9045e2a1108f..9332a0ab0698 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -58,6 +58,7 @@ #include #include #include +#include #include __setup("ether=", netdev_boot_setup); -- cgit v1.2.3 From 9c684b5083bc191c4b7b189c73d75587e167a474 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:11 +0200 Subject: net: move __skb_get_hash function declaration to flow_dissector.h Since the definition of the function is in flow_dissector.c, it makes sense to have the declaration in flow_dissector.h Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 - include/net/flow_dissector.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 17607ab9e7a2..ae2d1b7769d8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -918,7 +918,6 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type) skb->hash = hash; } -void __skb_get_hash(struct sk_buff *skb); static inline __u32 skb_get_hash(struct sk_buff *skb) { if (!skb->l4_hash && !skb->sw_hash) diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 4570ccafe59d..e4ee761ca8c0 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -46,6 +46,7 @@ static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, } u32 flow_hash_from_keys(struct flow_keys *keys); +void __skb_get_hash(struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen); -- cgit v1.2.3 From 5605c76240aadc823e3d46ac9afde2f26fbcf019 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:12 +0200 Subject: net: move __skb_tx_hash to dev.c __skb_tx_hash function has no relation to flow_dissect so just move it to dev.c Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ include/linux/skbuff.h | 3 --- net/core/dev.c | 28 ++++++++++++++++++++++++++++ net/core/flow_dissector.c | 28 ---------------------------- 4 files changed, 31 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cd0951c1893d..d3ed01c18247 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2832,6 +2832,9 @@ static inline int netif_set_xps_queue(struct net_device *dev, } #endif +u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, + unsigned int num_tx_queues); + /* * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used * as a distribution range limit for the returned value. diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ae2d1b7769d8..b01c7fba7c17 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3299,9 +3299,6 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb) return skb->queue_mapping != 0; } -u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, - unsigned int num_tx_queues); - static inline struct sec_path *skb_sec_path(struct sk_buff *skb) { #ifdef CONFIG_XFRM diff --git a/net/core/dev.c b/net/core/dev.c index 90a568a150b4..d044d2f8532b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2350,6 +2350,34 @@ void netif_device_attach(struct net_device *dev) } EXPORT_SYMBOL(netif_device_attach); +/* + * Returns a Tx hash based on the given packet descriptor a Tx queues' number + * to be used as a distribution range. + */ +u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, + unsigned int num_tx_queues) +{ + u32 hash; + u16 qoffset = 0; + u16 qcount = num_tx_queues; + + if (skb_rx_queue_recorded(skb)) { + hash = skb_get_rx_queue(skb); + while (unlikely(hash >= num_tx_queues)) + hash -= num_tx_queues; + return hash; + } + + if (dev->num_tc) { + u8 tc = netdev_get_prio_tc_map(dev, skb->priority); + qoffset = dev->tc_to_txq[tc].offset; + qcount = dev->tc_to_txq[tc].count; + } + + return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset; +} +EXPORT_SYMBOL(__skb_tx_hash); + static void skb_warn_bad_offload(const struct sk_buff *skb) { static const netdev_features_t null_features = 0; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 0d9bc3a586ba..07ca11d06339 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -371,34 +371,6 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) } EXPORT_SYMBOL(skb_get_hash_perturb); -/* - * Returns a Tx hash based on the given packet descriptor a Tx queues' number - * to be used as a distribution range. - */ -u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, - unsigned int num_tx_queues) -{ - u32 hash; - u16 qoffset = 0; - u16 qcount = num_tx_queues; - - if (skb_rx_queue_recorded(skb)) { - hash = skb_get_rx_queue(skb); - while (unlikely(hash >= num_tx_queues)) - hash -= num_tx_queues; - return hash; - } - - if (dev->num_tc) { - u8 tc = netdev_get_prio_tc_map(dev, skb->priority); - qoffset = dev->tc_to_txq[tc].offset; - qcount = dev->tc_to_txq[tc].count; - } - - return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset; -} -EXPORT_SYMBOL(__skb_tx_hash); - u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen) { -- cgit v1.2.3 From 06635a35d13d42b95422bba6633f175245cc644e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 May 2015 14:56:16 +0200 Subject: flow_dissect: use programable dissector in skb_flow_dissect and friends Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 18 +-- drivers/net/ethernet/cisco/enic/enic_clsf.c | 27 ++-- drivers/net/ethernet/cisco/enic/enic_ethtool.c | 10 +- drivers/net/hyperv/netvsc_drv.c | 8 +- include/linux/skbuff.h | 4 +- include/net/flow_dissector.h | 63 ++++---- include/net/ip.h | 8 +- include/net/ipv6.h | 8 +- net/core/flow_dissector.c | 199 +++++++++++++++++-------- net/ethernet/eth.c | 6 +- net/sched/cls_flow.c | 20 +-- net/sched/sch_choke.c | 4 +- 12 files changed, 229 insertions(+), 146 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index fe7c38721775..ef26e0147050 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3051,16 +3051,16 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, int noff, proto = -1; if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) - return skb_flow_dissect(skb, fk); + return skb_flow_dissect_flow_keys(skb, fk); - fk->ports = 0; + fk->ports.ports = 0; noff = skb_network_offset(skb); if (skb->protocol == htons(ETH_P_IP)) { if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph)))) return false; iph = ip_hdr(skb); - fk->src = iph->saddr; - fk->dst = iph->daddr; + fk->addrs.src = iph->saddr; + fk->addrs.dst = iph->daddr; noff += iph->ihl << 2; if (!ip_is_fragment(iph)) proto = iph->protocol; @@ -3068,15 +3068,15 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6)))) return false; iph6 = ipv6_hdr(skb); - fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr); - fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr); + fk->addrs.src = (__force __be32)ipv6_addr_hash(&iph6->saddr); + fk->addrs.dst = (__force __be32)ipv6_addr_hash(&iph6->daddr); noff += sizeof(*iph6); proto = iph6->nexthdr; } else { return false; } if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0) - fk->ports = skb_flow_get_ports(skb, noff, proto); + fk->ports.ports = skb_flow_get_ports(skb, noff, proto); return true; } @@ -3102,8 +3102,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) hash = bond_eth_hash(skb); else - hash = (__force u32)flow.ports; - hash ^= (__force u32)flow.dst ^ (__force u32)flow.src; + hash = (__force u32)flow.ports.ports; + hash ^= (__force u32)flow.addrs.dst ^ (__force u32)flow.addrs.src; hash ^= (hash >> 16); hash ^= (hash >> 8); diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.c b/drivers/net/ethernet/cisco/enic/enic_clsf.c index 380f04903a36..d3d25c7e5b96 100644 --- a/drivers/net/ethernet/cisco/enic/enic_clsf.c +++ b/drivers/net/ethernet/cisco/enic/enic_clsf.c @@ -22,7 +22,7 @@ int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq) int res; struct filter data; - switch (keys->ip_proto) { + switch (keys->basic.ip_proto) { case IPPROTO_TCP: data.u.ipv4.protocol = PROTO_TCP; break; @@ -33,10 +33,10 @@ int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq) return -EPROTONOSUPPORT; }; data.type = FILTER_IPV4_5TUPLE; - data.u.ipv4.src_addr = ntohl(keys->src); - data.u.ipv4.dst_addr = ntohl(keys->dst); - data.u.ipv4.src_port = ntohs(keys->port16[0]); - data.u.ipv4.dst_port = ntohs(keys->port16[1]); + data.u.ipv4.src_addr = ntohl(keys->addrs.src); + data.u.ipv4.dst_addr = ntohl(keys->addrs.dst); + data.u.ipv4.src_port = ntohs(keys->ports.port16[0]); + data.u.ipv4.dst_port = ntohs(keys->ports.port16[1]); data.u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE; spin_lock_bh(&enic->devcmd_lock); @@ -158,11 +158,11 @@ static struct enic_rfs_fltr_node *htbl_key_search(struct hlist_head *h, struct enic_rfs_fltr_node *tpos; hlist_for_each_entry(tpos, h, node) - if (tpos->keys.src == k->src && - tpos->keys.dst == k->dst && - tpos->keys.ports == k->ports && - tpos->keys.ip_proto == k->ip_proto && - tpos->keys.n_proto == k->n_proto) + if (tpos->keys.addrs.src == k->addrs.src && + tpos->keys.addrs.dst == k->addrs.dst && + tpos->keys.ports.ports == k->ports.ports && + tpos->keys.basic.ip_proto == k->basic.ip_proto && + tpos->keys.basic.n_proto == k->basic.n_proto) return tpos; return NULL; } @@ -177,9 +177,10 @@ int enic_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, int res, i; enic = netdev_priv(dev); - res = skb_flow_dissect(skb, &keys); - if (!res || keys.n_proto != htons(ETH_P_IP) || - (keys.ip_proto != IPPROTO_TCP && keys.ip_proto != IPPROTO_UDP)) + res = skb_flow_dissect_flow_keys(skb, &keys); + if (!res || keys.basic.n_proto != htons(ETH_P_IP) || + (keys.basic.ip_proto != IPPROTO_TCP && + keys.basic.ip_proto != IPPROTO_UDP)) return -EPROTONOSUPPORT; tbl_idx = skb_get_hash_raw(skb) & ENIC_RFS_FLW_MASK; diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index 28d9ca675a27..7588f8dcc890 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -334,7 +334,7 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd) n = htbl_fltr_search(enic, (u16)fsp->location); if (!n) return -EINVAL; - switch (n->keys.ip_proto) { + switch (n->keys.basic.ip_proto) { case IPPROTO_TCP: fsp->flow_type = TCP_V4_FLOW; break; @@ -346,16 +346,16 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd) break; } - fsp->h_u.tcp_ip4_spec.ip4src = n->keys.src; + fsp->h_u.tcp_ip4_spec.ip4src = n->keys.addrs.src; fsp->m_u.tcp_ip4_spec.ip4src = (__u32)~0; - fsp->h_u.tcp_ip4_spec.ip4dst = n->keys.dst; + fsp->h_u.tcp_ip4_spec.ip4dst = n->keys.addrs.dst; fsp->m_u.tcp_ip4_spec.ip4dst = (__u32)~0; - fsp->h_u.tcp_ip4_spec.psrc = n->keys.port16[0]; + fsp->h_u.tcp_ip4_spec.psrc = n->keys.ports.port16[0]; fsp->m_u.tcp_ip4_spec.psrc = (__u16)~0; - fsp->h_u.tcp_ip4_spec.pdst = n->keys.port16[1]; + fsp->h_u.tcp_ip4_spec.pdst = n->keys.ports.port16[1]; fsp->m_u.tcp_ip4_spec.pdst = (__u16)~0; fsp->ring_cookie = n->rq_id; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 5993c7e2d723..8e5fe888a0ec 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -196,12 +196,12 @@ static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb) struct flow_keys flow; int data_len; - if (!skb_flow_dissect(skb, &flow) || - !(flow.n_proto == htons(ETH_P_IP) || - flow.n_proto == htons(ETH_P_IPV6))) + if (!skb_flow_dissect_flow_keys(skb, &flow) || + !(flow.basic.n_proto == htons(ETH_P_IP) || + flow.basic.n_proto == htons(ETH_P_IPV6))) return false; - if (flow.ip_proto == IPPROTO_TCP) + if (flow.basic.ip_proto == IPPROTO_TCP) data_len = 12; else data_len = 8; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b01c7fba7c17..f83aa6568cbf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1935,8 +1935,8 @@ static inline void skb_probe_transport_header(struct sk_buff *skb, if (skb_transport_header_was_set(skb)) return; - else if (skb_flow_dissect(skb, &keys)) - skb_set_transport_header(skb, keys.thoff); + else if (skb_flow_dissect_flow_keys(skb, &keys)) + skb_set_transport_header(skb, keys.basic.thoff); else skb_set_transport_header(skb, offset_hint); } diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 20239e807f77..0c8d406fb730 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -59,42 +59,47 @@ struct flow_dissector { unsigned short int offset[FLOW_DISSECTOR_KEY_MAX]; }; -/* struct flow_keys: - * @src: source ip address in case of IPv4 - * For IPv6 it contains 32bit hash of src address - * @dst: destination ip address in case of IPv4 - * For IPv6 it contains 32bit hash of dst address - * @ports: port numbers of Transport header - * port16[0]: src port number - * port16[1]: dst port number - * @thoff: Transport header offset - * @n_proto: Network header protocol (eg. IPv4/IPv6) - * @ip_proto: Transport header protocol (eg. TCP/UDP) - * All the members, except thoff, are in network byte order. - */ -struct flow_keys { - /* (src,dst) must be grouped, in the same way than in IP header */ - __be32 src; - __be32 dst; - union { - __be32 ports; - __be16 port16[2]; - }; - u16 thoff; - __be16 n_proto; - u8 ip_proto; -}; - void skb_flow_dissector_init(struct flow_dissector *flow_dissector, const struct flow_dissector_key *key, unsigned int key_count); -bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, + +bool __skb_flow_dissect(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, void *data, __be16 proto, int nhoff, int hlen); static inline bool skb_flow_dissect(const struct sk_buff *skb, - struct flow_keys *flow) + struct flow_dissector *flow_dissector, + void *target_container) +{ + return __skb_flow_dissect(skb, flow_dissector, target_container, + NULL, 0, 0, 0); +} + +struct flow_keys { + struct flow_dissector_key_addrs addrs; + struct flow_dissector_key_ports ports; + struct flow_dissector_key_basic basic; +}; + +extern struct flow_dissector flow_keys_dissector; +extern struct flow_dissector flow_keys_buf_dissector; + +static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, + struct flow_keys *flow) +{ + memset(flow, 0, sizeof(*flow)); + return __skb_flow_dissect(skb, &flow_keys_dissector, flow, + NULL, 0, 0, 0); +} + +static inline bool skb_flow_dissect_flow_keys_buf(struct flow_keys *flow, + void *data, __be16 proto, + int nhoff, int hlen) { - return __skb_flow_dissect(skb, flow, NULL, 0, 0, 0); + memset(flow, 0, sizeof(*flow)); + return __skb_flow_dissect(NULL, &flow_keys_buf_dissector, flow, + data, proto, nhoff, hlen); } __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, diff --git a/include/net/ip.h b/include/net/ip.h index 562eb653aebc..b0443d4fe13f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -360,10 +360,10 @@ static inline void inet_set_txhash(struct sock *sk) struct inet_sock *inet = inet_sk(sk); struct flow_keys keys; - keys.src = inet->inet_saddr; - keys.dst = inet->inet_daddr; - keys.port16[0] = inet->inet_sport; - keys.port16[1] = inet->inet_dport; + keys.addrs.src = inet->inet_saddr; + keys.addrs.dst = inet->inet_daddr; + keys.ports.port16[0] = inet->inet_sport; + keys.ports.port16[1] = inet->inet_dport; sk->sk_txhash = flow_hash_from_keys(&keys); } diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 9932b86394a9..9eed9761dfce 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -698,10 +698,10 @@ static inline void ip6_set_txhash(struct sock *sk) struct ipv6_pinfo *np = inet6_sk(sk); struct flow_keys keys; - keys.src = (__force __be32)ipv6_addr_hash(&np->saddr); - keys.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr); - keys.port16[0] = inet->inet_sport; - keys.port16[1] = inet->inet_dport; + keys.addrs.src = (__force __be32)ipv6_addr_hash(&np->saddr); + keys.addrs.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr); + keys.ports.port16[0] = inet->inet_sport; + keys.ports.port16[1] = inet->inet_dport; sk->sk_txhash = flow_hash_from_keys(&keys); } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 6883e22b8fc4..6a49acaa6651 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -63,17 +64,6 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, } EXPORT_SYMBOL(skb_flow_dissector_init); -/* copy saddr & daddr, possibly using 64bit load/store - * Equivalent to : flow->src = iph->saddr; - * flow->dst = iph->daddr; - */ -static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph) -{ - BUILD_BUG_ON(offsetof(typeof(*flow), dst) != - offsetof(typeof(*flow), src) + sizeof(flow->src)); - memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst)); -} - /** * __skb_flow_get_ports - extract the upper layer ports and return them * @skb: sk_buff to extract the ports from @@ -111,17 +101,27 @@ EXPORT_SYMBOL(__skb_flow_get_ports); /** * __skb_flow_dissect - extract the flow_keys struct and return it * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified + * @flow_dissector: list of keys to dissect + * @target_container: target structure to put dissected values into * @data: raw buffer pointer to the packet, if NULL use skb->data * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb) * @hlen: packet header length, if @data is NULL use skb_headlen(skb) * - * The function will try to retrieve the struct flow_keys from either the skbuff - * or a raw buffer specified by the rest parameters + * The function will try to retrieve individual keys into target specified + * by flow_dissector from either the skbuff or a raw buffer specified by the + * rest parameters. + * + * Caller must take care of zeroing target container memory. */ -bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, +bool __skb_flow_dissect(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, void *data, __be16 proto, int nhoff, int hlen) { + struct flow_dissector_key_basic *key_basic; + struct flow_dissector_key_addrs *key_addrs; + struct flow_dissector_key_ports *key_ports; u8 ip_proto; if (!data) { @@ -131,7 +131,12 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, hlen = skb_headlen(skb); } - memset(flow, 0, sizeof(*flow)); + /* It is ensured by skb_flow_dissector_init() that basic key will + * be always present. + */ + key_basic = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_BASIC, + target_container); again: switch (proto) { @@ -148,14 +153,13 @@ ip: if (ip_is_fragment(iph)) ip_proto = 0; - /* skip the address processing if skb is NULL. The assumption - * here is that if there is no skb we are not looking for flow - * info but lengths and protocols. - */ - if (!skb) + if (!skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS)) break; - - iph_to_flow_copy_addrs(flow, iph); + key_addrs = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + target_container); + memcpy(key_addrs, &iph->saddr, sizeof(*key_addrs)); break; } case htons(ETH_P_IPV6): { @@ -171,12 +175,15 @@ ipv6: ip_proto = iph->nexthdr; nhoff += sizeof(struct ipv6hdr); - /* see comment above in IPv4 section */ - if (!skb) + if (!skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) break; + key_addrs = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, + target_container); - flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); - flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); + key_addrs->src = (__force __be32)ipv6_addr_hash(&iph->saddr); + key_addrs->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); flow_label = ip6_flowlabel(iph); if (flow_label) { @@ -184,10 +191,18 @@ ipv6: * use that to represent the ports without any * further dissection. */ - flow->n_proto = proto; - flow->ip_proto = ip_proto; - flow->ports = flow_label; - flow->thoff = (u16)nhoff; + + key_basic->n_proto = proto; + key_basic->ip_proto = ip_proto; + key_basic->thoff = (u16)nhoff; + + if (!skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS)) + break; + key_ports = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS, + target_container); + key_ports->ports = flow_label; return true; } @@ -234,14 +249,22 @@ ipv6: hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) return false; - flow->src = hdr->srcnode; - flow->dst = 0; - flow->n_proto = proto; - flow->thoff = (u16)nhoff; + key_basic->n_proto = proto; + key_basic->thoff = (u16)nhoff; + + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) { + return true; + key_addrs = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, + target_container); + key_addrs->src = hdr->srcnode; + key_addrs->dst = 0; + } return true; } case htons(ETH_P_FCOE): - flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN); + key_basic->thoff = (u16)(nhoff + FCOE_HEADER_LEN); /* fall through */ default: return false; @@ -296,14 +319,24 @@ ipv6: break; } - flow->n_proto = proto; - flow->ip_proto = ip_proto; - flow->thoff = (u16) nhoff; - - /* unless skb is set we don't need to record port info */ - if (skb) - flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, - data, hlen); + /* It is ensured by skb_flow_dissector_init() that basic key will + * be always present. + */ + key_basic = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_BASIC, + target_container); + key_basic->n_proto = proto; + key_basic->ip_proto = ip_proto; + key_basic->thoff = (u16) nhoff; + + if (skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS)) { + key_ports = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS, + target_container); + key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, + data, hlen); + } return true; } @@ -325,16 +358,16 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) u32 hash; /* get a consistent hash (same value on both flow directions) */ - if (((__force u32)keys->dst < (__force u32)keys->src) || - (((__force u32)keys->dst == (__force u32)keys->src) && - ((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) { - swap(keys->dst, keys->src); - swap(keys->port16[0], keys->port16[1]); + if (((__force u32)keys->addrs.dst < (__force u32)keys->addrs.src) || + (((__force u32)keys->addrs.dst == (__force u32)keys->addrs.src) && + ((__force u16)keys->ports.port16[1] < (__force u16)keys->ports.port16[0]))) { + swap(keys->addrs.dst, keys->addrs.src); + swap(keys->ports.port16[0], keys->ports.port16[1]); } - hash = __flow_hash_3words((__force u32)keys->dst, - (__force u32)keys->src, - (__force u32)keys->ports, + hash = __flow_hash_3words((__force u32)keys->addrs.dst, + (__force u32)keys->addrs.src, + (__force u32)keys->ports.ports, keyval); if (!hash) hash = 1; @@ -352,7 +385,7 @@ EXPORT_SYMBOL(flow_hash_from_keys); static inline u32 ___skb_get_hash(const struct sk_buff *skb, struct flow_keys *keys, u32 keyval) { - if (!skb_flow_dissect(skb, keys)) + if (!skb_flow_dissect_flow_keys(skb, keys)) return 0; return __flow_hash_from_keys(keys, keyval); @@ -377,11 +410,11 @@ void make_flow_keys_digest(struct flow_keys_digest *digest, memset(digest, 0, sizeof(*digest)); - data->n_proto = flow->n_proto; - data->ip_proto = flow->ip_proto; - data->ports = flow->ports; - data->src = flow->src; - data->dst = flow->dst; + data->n_proto = flow->basic.n_proto; + data->ip_proto = flow->basic.ip_proto; + data->ports = flow->ports.ports; + data->src = flow->addrs.src; + data->dst = flow->addrs.dst; } EXPORT_SYMBOL(make_flow_keys_digest); @@ -404,7 +437,7 @@ void __skb_get_hash(struct sk_buff *skb) hash = ___skb_get_hash(skb, &keys, hashrnd); if (!hash) return; - if (keys.ports) + if (keys.ports.ports) skb->l4_hash = 1; skb->sw_hash = 1; skb->hash = hash; @@ -422,9 +455,9 @@ EXPORT_SYMBOL(skb_get_hash_perturb); u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen) { - u32 poff = keys->thoff; + u32 poff = keys->basic.thoff; - switch (keys->ip_proto) { + switch (keys->basic.ip_proto) { case IPPROTO_TCP: { /* access doff as u8 to avoid unaligned access */ const u8 *doff; @@ -478,8 +511,52 @@ u32 skb_get_poff(const struct sk_buff *skb) { struct flow_keys keys; - if (!skb_flow_dissect(skb, &keys)) + if (!skb_flow_dissect_flow_keys(skb, &keys)) return 0; return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb)); } + +static const struct flow_dissector_key flow_keys_dissector_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_BASIC, + .offset = offsetof(struct flow_keys, basic), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, + .offset = offsetof(struct flow_keys, addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, + .offset = offsetof(struct flow_keys, addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_PORTS, + .offset = offsetof(struct flow_keys, ports), + }, +}; + +static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_BASIC, + .offset = offsetof(struct flow_keys, basic), + }, +}; + +struct flow_dissector flow_keys_dissector __read_mostly; +EXPORT_SYMBOL(flow_keys_dissector); + +struct flow_dissector flow_keys_buf_dissector __read_mostly; + +static int __init init_default_flow_dissectors(void) +{ + skb_flow_dissector_init(&flow_keys_dissector, + flow_keys_dissector_keys, + ARRAY_SIZE(flow_keys_dissector_keys)); + skb_flow_dissector_init(&flow_keys_buf_dissector, + flow_keys_buf_dissector_keys, + ARRAY_SIZE(flow_keys_buf_dissector_keys)); + return 0; +} + +late_initcall_sync(init_default_flow_dissectors); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 9332a0ab0698..c3325bd2f3fb 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -131,9 +131,9 @@ u32 eth_get_headlen(void *data, unsigned int len) return len; /* parse any remaining L2/L3 headers, check for L4 */ - if (!__skb_flow_dissect(NULL, &keys, data, - eth->h_proto, sizeof(*eth), len)) - return max_t(u32, keys.thoff, sizeof(*eth)); + if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto, + sizeof(*eth), len)) + return max_t(u32, keys.basic.thoff, sizeof(*eth)); /* parse for any L4 headers */ return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 4c5a92298906..4b3e3e30bf4d 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -68,35 +68,35 @@ static inline u32 addr_fold(void *addr) static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow) { - if (flow->src) - return ntohl(flow->src); + if (flow->addrs.src) + return ntohl(flow->addrs.src); return addr_fold(skb->sk); } static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) { - if (flow->dst) - return ntohl(flow->dst); + if (flow->addrs.dst) + return ntohl(flow->addrs.dst); return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) { - return flow->ip_proto; + return flow->basic.ip_proto; } static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) { - if (flow->ports) - return ntohs(flow->port16[0]); + if (flow->ports.ports) + return ntohs(flow->ports.port16[0]); return addr_fold(skb->sk); } static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) { - if (flow->ports) - return ntohs(flow->port16[1]); + if (flow->ports.ports) + return ntohs(flow->ports.port16[1]); return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } @@ -295,7 +295,7 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp, keymask = f->keymask; if (keymask & FLOW_KEYS_NEEDED) - skb_flow_dissect(skb, &flow_keys); + skb_flow_dissect_flow_keys(skb, &flow_keys); for (n = 0; n < f->nkeys; n++) { key = ffs(keymask) - 1; diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 2624e991a2d1..93d5742dc7e0 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -170,13 +170,13 @@ static bool choke_match_flow(struct sk_buff *skb1, if (!choke_skb_cb(skb1)->keys_valid) { choke_skb_cb(skb1)->keys_valid = 1; - skb_flow_dissect(skb1, &temp); + skb_flow_dissect_flow_keys(skb1, &temp); make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp); } if (!choke_skb_cb(skb2)->keys_valid) { choke_skb_cb(skb2)->keys_valid = 1; - skb_flow_dissect(skb2, &temp); + skb_flow_dissect_flow_keys(skb2, &temp); make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp); } -- cgit v1.2.3 From f0b5e8a42f37a880b8467e59dc814f4f21581d3d Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Tue, 12 May 2015 20:28:07 +0200 Subject: net: kill useless net_*_ingress_queue() definitions when NET_CLS_ACT is unset This fixes 4577139b2dabf589 ("net: use jump label patching for ingress qdisc in __netif_receive_skb_core"). The only client of this is sch_ingress and it depends on NET_CLS_ACT. So there is no way these definition can be of any help. Cc: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 7b8e260c4a27..bd29ab4b0941 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -82,14 +82,6 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); #ifdef CONFIG_NET_CLS_ACT void net_inc_ingress_queue(void); void net_dec_ingress_queue(void); -#else -static inline void net_inc_ingress_queue(void) -{ -} - -static inline void net_dec_ingress_queue(void) -{ -} #endif extern void rtnetlink_init(void); -- cgit v1.2.3 From 87d5c18ce1f41a2410d4fb26d5d68c55867450f5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 13 May 2015 18:19:34 +0200 Subject: netfilter: cleanup struct nf_hook_ops indentation Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netfilter.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 63560d0a8dfe..83be4a3cec98 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -79,16 +79,16 @@ typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, const struct nf_hook_state *state); struct nf_hook_ops { - struct list_head list; + struct list_head list; /* User fills in from here down. */ - nf_hookfn *hook; - struct module *owner; - void *priv; - u_int8_t pf; - unsigned int hooknum; + nf_hookfn *hook; + struct module *owner; + void *priv; + u_int8_t pf; + unsigned int hooknum; /* Hooks are ordered in ascending priority. */ - int priority; + int priority; }; struct nf_sockopt_ops { -- cgit v1.2.3 From f7191483461ce2ae579b6f7227fa7ce49e006656 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 13 May 2015 18:19:35 +0200 Subject: netfilter: add hook list to nf_hook_state Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netfilter.h | 7 +++++-- net/netfilter/core.c | 6 ++---- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 83be4a3cec98..388ed1952242 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -54,10 +54,12 @@ struct nf_hook_state { struct net_device *in; struct net_device *out; struct sock *sk; + struct list_head *hook_list; int (*okfn)(struct sock *, struct sk_buff *); }; static inline void nf_hook_state_init(struct nf_hook_state *p, + struct list_head *hook_list, unsigned int hook, int thresh, u_int8_t pf, struct net_device *indev, @@ -71,6 +73,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, p->in = indev; p->out = outdev; p->sk = sk; + p->hook_list = hook_list; p->okfn = okfn; } @@ -166,8 +169,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, if (nf_hooks_active(pf, hook)) { struct nf_hook_state state; - nf_hook_state_init(&state, hook, thresh, pf, - indev, outdev, sk, okfn); + nf_hook_state_init(&state, &nf_hooks[pf][hook], hook, thresh, + pf, indev, outdev, sk, okfn); return nf_hook_slow(skb, &state); } return 1; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index e6163017c42d..e418cfd603c0 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -166,11 +166,9 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state) /* We may already have this, but read-locks nest anyway */ rcu_read_lock(); - elem = list_entry_rcu(&nf_hooks[state->pf][state->hook], - struct nf_hook_ops, list); + elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list); next_hook: - verdict = nf_iterate(&nf_hooks[state->pf][state->hook], skb, state, - &elem); + verdict = nf_iterate(state->hook_list, skb, state, &elem); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { -- cgit v1.2.3 From b8d0aad0c77f488d1d51a02d871a5cbc2d8032b9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 13 May 2015 18:19:36 +0200 Subject: netfilter: add nf_hook_list_active() In preparation to have netfilter ingress per-device hook list. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netfilter.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 388ed1952242..49d00638d1fa 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -134,26 +134,33 @@ extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #ifdef HAVE_JUMP_LABEL extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; -static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) +static inline bool nf_hook_list_active(struct list_head *nf_hook_list, + u_int8_t pf, unsigned int hook) { if (__builtin_constant_p(pf) && __builtin_constant_p(hook)) return static_key_false(&nf_hooks_needed[pf][hook]); - return !list_empty(&nf_hooks[pf][hook]); + return !list_empty(nf_hook_list); } #else -static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) +static inline bool nf_hook_list_active(struct list_head *nf_hook_list, + u_int8_t pf, unsigned int hook) { - return !list_empty(&nf_hooks[pf][hook]); + return !list_empty(nf_hook_list); } #endif +static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) +{ + return nf_hook_list_active(&nf_hooks[pf][hook], pf, hook); +} + int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); /** * nf_hook_thresh - call a netfilter hook - * + * * Returns 1 if the hook has allowed the packet to pass. The function * okfn must be invoked by the caller in this case. Any other return * value indicates the packet has been consumed by the hook. -- cgit v1.2.3 From 1cf51900f8545b358b5deaacfda348d990f671db Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 13 May 2015 18:19:37 +0200 Subject: net: add CONFIG_NET_INGRESS to enable ingress filtering This new config switch enables the ingress filtering infrastructure that is controlled through the ingress_needed static key. This prepares the introduction of the Netfilter ingress hook that resides under this unique static key. Note that CONFIG_SCH_INGRESS automatically selects this, that should be no problem since this also depends on CONFIG_NET_CLS_ACT. Signed-off-by: Pablo Neira Ayuso Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 +- net/Kconfig | 3 +++ net/core/dev.c | 7 ++++--- net/sched/Kconfig | 1 + 4 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index bd29ab4b0941..a2324fb45cf4 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -79,7 +79,7 @@ static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); -#ifdef CONFIG_NET_CLS_ACT +#ifdef CONFIG_NET_INGRESS void net_inc_ingress_queue(void); void net_dec_ingress_queue(void); #endif diff --git a/net/Kconfig b/net/Kconfig index 44dd5786ee91..57a7c5af3175 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -45,6 +45,9 @@ config COMPAT_NETLINK_MESSAGES Newly written code should NEVER need this option but do compat-independent messages instead! +config NET_INGRESS + bool + menu "Networking options" source "net/packet/Kconfig" diff --git a/net/core/dev.c b/net/core/dev.c index af549062ae8e..a5ef90016ce7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1630,7 +1630,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) } EXPORT_SYMBOL(call_netdevice_notifiers); -#ifdef CONFIG_NET_CLS_ACT +#ifdef CONFIG_NET_INGRESS static struct static_key ingress_needed __read_mostly; void net_inc_ingress_queue(void) @@ -3798,13 +3798,14 @@ another_round: } skip_taps: -#ifdef CONFIG_NET_CLS_ACT +#ifdef CONFIG_NET_INGRESS if (static_key_false(&ingress_needed)) { skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) goto unlock; } - +#endif +#ifdef CONFIG_NET_CLS_ACT skb->tc_verd = 0; ncls: #endif diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 5fd1c2f487d2..daa33432b716 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -312,6 +312,7 @@ config NET_SCH_PIE config NET_SCH_INGRESS tristate "Ingress Qdisc" depends on NET_CLS_ACT + select NET_INGRESS ---help--- Say Y here if you want to use classifiers for incoming packets. If unsure, say Y. -- cgit v1.2.3 From e687ad60af09010936bbd0b2a3b5d90a8ee8353c Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 13 May 2015 18:19:38 +0200 Subject: netfilter: add netfilter ingress hook after handle_ing() under unique static key This patch adds the Netfilter ingress hook just after the existing tc ingress hook, that seems to be the consensus solution for this. Note that the Netfilter hook resides under the global static key that enables ingress filtering. Nonetheless, Netfilter still also has its own static key for minimal impact on the existing handle_ing(). * Without this patch: Result: OK: 6216490(c6216338+d152) usec, 100000000 (60byte,0frags) 16086246pps 7721Mb/sec (7721398080bps) errors: 100000000 42.46% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core 25.92% kpktgend_0 [kernel.kallsyms] [k] kfree_skb 7.81% kpktgend_0 [pktgen] [k] pktgen_thread_worker 5.62% kpktgend_0 [kernel.kallsyms] [k] ip_rcv 2.70% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal 2.34% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk 1.44% kpktgend_0 [kernel.kallsyms] [k] __build_skb * With this patch: Result: OK: 6214833(c6214731+d101) usec, 100000000 (60byte,0frags) 16090536pps 7723Mb/sec (7723457280bps) errors: 100000000 41.23% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core 26.57% kpktgend_0 [kernel.kallsyms] [k] kfree_skb 7.72% kpktgend_0 [pktgen] [k] pktgen_thread_worker 5.55% kpktgend_0 [kernel.kallsyms] [k] ip_rcv 2.78% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal 2.06% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk 1.43% kpktgend_0 [kernel.kallsyms] [k] __build_skb * Without this patch + tc ingress: tc filter add dev eth4 parent ffff: protocol ip prio 1 \ u32 match ip dst 4.3.2.1/32 Result: OK: 9269001(c9268821+d179) usec, 100000000 (60byte,0frags) 10788648pps 5178Mb/sec (5178551040bps) errors: 100000000 40.99% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core 17.50% kpktgend_0 [kernel.kallsyms] [k] kfree_skb 11.77% kpktgend_0 [cls_u32] [k] u32_classify 5.62% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat 5.18% kpktgend_0 [pktgen] [k] pktgen_thread_worker 3.23% kpktgend_0 [kernel.kallsyms] [k] tc_classify 2.97% kpktgend_0 [kernel.kallsyms] [k] ip_rcv 1.83% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal 1.50% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk 0.99% kpktgend_0 [kernel.kallsyms] [k] __build_skb * With this patch + tc ingress: tc filter add dev eth4 parent ffff: protocol ip prio 1 \ u32 match ip dst 4.3.2.1/32 Result: OK: 9308218(c9308091+d126) usec, 100000000 (60byte,0frags) 10743194pps 5156Mb/sec (5156733120bps) errors: 100000000 42.01% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core 17.78% kpktgend_0 [kernel.kallsyms] [k] kfree_skb 11.70% kpktgend_0 [cls_u32] [k] u32_classify 5.46% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat 5.16% kpktgend_0 [pktgen] [k] pktgen_thread_worker 2.98% kpktgend_0 [kernel.kallsyms] [k] ip_rcv 2.84% kpktgend_0 [kernel.kallsyms] [k] tc_classify 1.96% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal 1.57% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk Note that the results are very similar before and after. I can see gcc gets the code under the ingress static key out of the hot path. Then, on that cold branch, it generates the code to accomodate the netfilter ingress static key. My explanation for this is that this reduces the pressure on the instruction cache for non-users as the new code is out of the hot path, and it comes with minimal impact for tc ingress users. Using gcc version 4.8.4 on: Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 8 [...] L1d cache: 16K L1i cache: 64K L2 cache: 2048K L3 cache: 8192K Signed-off-by: Pablo Neira Ayuso Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ include/linux/netfilter.h | 1 + include/linux/netfilter_ingress.h | 41 +++++++++++++++++++++++++++++++++++++++ include/uapi/linux/netfilter.h | 6 ++++++ net/core/dev.c | 36 ++++++++++++++++++++++++++++++++++ net/netfilter/Kconfig | 7 +++++++ net/netfilter/core.c | 31 ++++++++++++++++++++++++++++- 7 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 include/linux/netfilter_ingress.h (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d3ed01c18247..51f8d2f5dc3f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1656,6 +1656,9 @@ struct net_device { struct tcf_proto __rcu *ingress_cl_list; #endif struct netdev_queue __rcu *ingress_queue; +#ifdef CONFIG_NETFILTER_INGRESS + struct list_head nf_hooks_ingress; +#endif unsigned char broadcast[MAX_ADDR_LEN]; #ifdef CONFIG_RFS_ACCEL diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 49d00638d1fa..f5ff5d156da8 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -86,6 +86,7 @@ struct nf_hook_ops { /* User fills in from here down. */ nf_hookfn *hook; + struct net_device *dev; struct module *owner; void *priv; u_int8_t pf; diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h new file mode 100644 index 000000000000..cb0727fe2b3d --- /dev/null +++ b/include/linux/netfilter_ingress.h @@ -0,0 +1,41 @@ +#ifndef _NETFILTER_INGRESS_H_ +#define _NETFILTER_INGRESS_H_ + +#include +#include + +#ifdef CONFIG_NETFILTER_INGRESS +static inline int nf_hook_ingress_active(struct sk_buff *skb) +{ + return nf_hook_list_active(&skb->dev->nf_hooks_ingress, + NFPROTO_NETDEV, NF_NETDEV_INGRESS); +} + +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + struct nf_hook_state state; + + nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress, + NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL, + skb->dev, NULL, NULL); + return nf_hook_slow(skb, &state); +} + +static inline void nf_hook_ingress_init(struct net_device *dev) +{ + INIT_LIST_HEAD(&dev->nf_hooks_ingress); +} +#else /* CONFIG_NETFILTER_INGRESS */ +static inline int nf_hook_ingress_active(struct sk_buff *skb) +{ + return 0; +} + +static inline int nf_hook_ingress(struct sk_buff *skb) +{ + return 0; +} + +static inline void nf_hook_ingress_init(struct net_device *dev) {} +#endif /* CONFIG_NETFILTER_INGRESS */ +#endif /* _NETFILTER_INGRESS_H_ */ diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index ef1b1f88ca18..177027cce6b3 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -51,11 +51,17 @@ enum nf_inet_hooks { NF_INET_NUMHOOKS }; +enum nf_dev_hooks { + NF_NETDEV_INGRESS, + NF_NETDEV_NUMHOOKS +}; + enum { NFPROTO_UNSPEC = 0, NFPROTO_INET = 1, NFPROTO_IPV4 = 2, NFPROTO_ARP = 3, + NFPROTO_NETDEV = 5, NFPROTO_BRIDGE = 7, NFPROTO_IPV6 = 10, NFPROTO_DECNET = 12, diff --git a/net/core/dev.c b/net/core/dev.c index a5ef90016ce7..29f0d6e6542c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -135,6 +135,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -3666,6 +3667,13 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, return skb; } +#else +static inline struct sk_buff *handle_ing(struct sk_buff *skb, + struct packet_type **pt_prev, + int *ret, struct net_device *orig_dev) +{ + return skb; +} #endif /** @@ -3739,6 +3747,28 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb) } } +#ifdef CONFIG_NETFILTER_INGRESS +static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, + int *ret, struct net_device *orig_dev) +{ + if (nf_hook_ingress_active(skb)) { + if (*pt_prev) { + *ret = deliver_skb(skb, *pt_prev, orig_dev); + *pt_prev = NULL; + } + + return nf_hook_ingress(skb); + } + return 0; +} +#else +static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, + int *ret, struct net_device *orig_dev) +{ + return 0; +} +#endif + static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) { struct packet_type *ptype, *pt_prev; @@ -3803,6 +3833,9 @@ skip_taps: skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) goto unlock; + + if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0) + goto unlock; } #endif #ifdef CONFIG_NET_CLS_ACT @@ -6968,6 +7001,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->group = INIT_NETDEV_GROUP; if (!dev->ethtool_ops) dev->ethtool_ops = &default_ethtool_ops; + + nf_hook_ingress_init(dev); + return dev; free_all: diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index f70e34a68f70..db1c674397ad 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1,6 +1,13 @@ menu "Core Netfilter Configuration" depends on NET && INET && NETFILTER +config NETFILTER_INGRESS + bool "Netfilter ingress support" + select NET_INGRESS + help + This allows you to classify packets from ingress using the Netfilter + infrastructure. + config NETFILTER_NETLINK tristate diff --git a/net/netfilter/core.c b/net/netfilter/core.c index e418cfd603c0..653e32eac08c 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -64,10 +64,27 @@ static DEFINE_MUTEX(nf_hook_mutex); int nf_register_hook(struct nf_hook_ops *reg) { + struct list_head *nf_hook_list; struct nf_hook_ops *elem; mutex_lock(&nf_hook_mutex); - list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) { + switch (reg->pf) { + case NFPROTO_NETDEV: +#ifdef CONFIG_NETFILTER_INGRESS + if (reg->hooknum == NF_NETDEV_INGRESS) { + BUG_ON(reg->dev == NULL); + nf_hook_list = ®->dev->nf_hooks_ingress; + net_inc_ingress_queue(); + break; + } +#endif + /* Fall through. */ + default: + nf_hook_list = &nf_hooks[reg->pf][reg->hooknum]; + break; + } + + list_for_each_entry(elem, nf_hook_list, list) { if (reg->priority < elem->priority) break; } @@ -85,6 +102,18 @@ void nf_unregister_hook(struct nf_hook_ops *reg) mutex_lock(&nf_hook_mutex); list_del_rcu(®->list); mutex_unlock(&nf_hook_mutex); + switch (reg->pf) { + case NFPROTO_NETDEV: +#ifdef CONFIG_NETFILTER_INGRESS + if (reg->hooknum == NF_NETDEV_INGRESS) { + net_dec_ingress_queue(); + break; + } + break; +#endif + default: + break; + } #ifdef HAVE_JUMP_LABEL static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif -- cgit v1.2.3 From 7fb48c5bc3100f7674a8e26f42c1518196500728 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 3 May 2015 22:05:28 +0200 Subject: netfilter: bridge: neigh_head and physoutdev can't be used at same time The neigh_header is only needed when we detect DNAT after prerouting and neigh cache didn't have a mac address for us. The output port has not been chosen yet so we can re-use the storage area, bringing struct size down to 32 bytes on x86_64. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 8 +++++--- net/bridge/br_netfilter.c | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c0b574a414e7..3d932e64125a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -170,12 +170,14 @@ struct nf_bridge_info { BRNF_PROTO_UNCHANGED, BRNF_PROTO_8021Q, BRNF_PROTO_PPPOE - } orig_proto; + } orig_proto:8; bool pkt_otherhost; unsigned int mask; struct net_device *physindev; - struct net_device *physoutdev; - char neigh_header[8]; + union { + struct net_device *physoutdev; + char neigh_header[8]; + }; }; #endif diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index ab55e2472beb..13973da29b2a 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -973,6 +973,8 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) nf_bridge->neigh_header, ETH_HLEN - ETH_ALEN); skb->dev = nf_bridge->physindev; + + nf_bridge->physoutdev = NULL; br_handle_frame_finish(NULL, skb); } -- cgit v1.2.3 From a3b1c1eb50f9b3e0c73c37157d0c61b2e90ae580 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 6 May 2015 16:28:57 +0200 Subject: netfilter: ipset: deinline ip_set_put_extensions() On x86 allyesconfig build: The function compiles to 489 bytes of machine code. It has 25 callsites. text data bss dec hex filename 82441375 22255384 20627456 125324215 7784bb7 vmlinux.before 82434909 22255384 20627456 125317749 7783275 vmlinux Signed-off-by: Denys Vlasenko CC: Jozsef Kadlecsik CC: Eric W. Biederman CC: David S. Miller CC: Jan Engelhardt CC: Jiri Pirko CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org CC: netfilter-devel@vger.kernel.org Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 24 ++---------------------- net/netfilter/ipset/ip_set_core.c | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index f88be7258e5f..ffdfdc24952a 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -533,29 +533,9 @@ bitmap_bytes(u32 a, u32 b) #include #include -static inline int +int ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, - const void *e, bool active) -{ - if (SET_WITH_TIMEOUT(set)) { - unsigned long *timeout = ext_timeout(e, set); - - if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(active ? ip_set_timeout_get(timeout) - : *timeout))) - return -EMSGSIZE; - } - if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(e, set))) - return -EMSGSIZE; - if (SET_WITH_COMMENT(set) && - ip_set_put_comment(skb, ext_comment(e, set))) - return -EMSGSIZE; - if (SET_WITH_SKBINFO(set) && - ip_set_put_skbinfo(skb, ext_skbinfo(e, set))) - return -EMSGSIZE; - return 0; -} + const void *e, bool active); #define IP_SET_INIT_KEXT(skb, opt, set) \ { .bytes = (skb)->len, .packets = 1, \ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 7f9c0565e715..475e4960a164 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -432,6 +432,31 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], } EXPORT_SYMBOL_GPL(ip_set_get_extensions); +int +ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, + const void *e, bool active) +{ + if (SET_WITH_TIMEOUT(set)) { + unsigned long *timeout = ext_timeout(e, set); + + if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, + htonl(active ? ip_set_timeout_get(timeout) + : *timeout))) + return -EMSGSIZE; + } + if (SET_WITH_COUNTER(set) && + ip_set_put_counter(skb, ext_counter(e, set))) + return -EMSGSIZE; + if (SET_WITH_COMMENT(set) && + ip_set_put_comment(skb, ext_comment(e, set))) + return -EMSGSIZE; + if (SET_WITH_SKBINFO(set) && + ip_set_put_skbinfo(skb, ext_skbinfo(e, set))) + return -EMSGSIZE; + return 0; +} +EXPORT_SYMBOL_GPL(ip_set_put_extensions); + /* * Creating/destroying/renaming/swapping affect the existence and * the properties of a set. All of these can be executed from userspace -- cgit v1.2.3 From 922f2dd1b65a888e34c472979460dc23211750a2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 12 May 2015 10:33:24 -0700 Subject: net: phy: Add phy_ignore_ta_mask to account for broken turn-around Some PHY devices/switches will not release the turn-around line as they should do at the end of a MDIO transaction. To help with such situations, allow MDIO bus drivers to be made aware of such restrictions. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 685809835b5c..701c7a3946e0 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -181,6 +181,9 @@ struct mii_bus { /* PHY addresses to be ignored when probing */ u32 phy_mask; + /* PHY addresses to ignore the TA/read failure */ + u32 phy_ignore_ta_mask; + /* * Pointer to an array of interrupts, each PHY's * interrupt at the index matching its address -- cgit v1.2.3 From a4afd37b26f4b9f640310a89b7f8d176ae3460b1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 13 May 2015 13:12:43 +0200 Subject: test_bpf: add tests related to BPF_MAXINSNS Couple of torture test cases related to the bug fixed in 0b59d8806a31 ("ARM: net: delegate filter to kernel interpreter when imm_offset() return value can't fit into 12bits."). I've added a helper to allocate and fill the insn space. Output on x86_64 from my laptop: test_bpf: #233 BPF_MAXINSNS: Maximum possible literals jited:0 7 PASS test_bpf: #234 BPF_MAXINSNS: Single literal jited:0 8 PASS test_bpf: #235 BPF_MAXINSNS: Run/add until end jited:0 11553 PASS test_bpf: #236 BPF_MAXINSNS: Too many instructions PASS test_bpf: #237 BPF_MAXINSNS: Very long jump jited:0 9 PASS test_bpf: #238 BPF_MAXINSNS: Ctx heavy transformations jited:0 20329 20398 PASS test_bpf: #239 BPF_MAXINSNS: Call heavy transformations jited:0 32178 32475 PASS test_bpf: #240 BPF_MAXINSNS: Jump heavy test jited:0 10518 PASS test_bpf: #233 BPF_MAXINSNS: Maximum possible literals jited:1 4 PASS test_bpf: #234 BPF_MAXINSNS: Single literal jited:1 4 PASS test_bpf: #235 BPF_MAXINSNS: Run/add until end jited:1 1625 PASS test_bpf: #236 BPF_MAXINSNS: Too many instructions PASS test_bpf: #237 BPF_MAXINSNS: Very long jump jited:1 8 PASS test_bpf: #238 BPF_MAXINSNS: Ctx heavy transformations jited:1 3301 3174 PASS test_bpf: #239 BPF_MAXINSNS: Call heavy transformations jited:1 24107 23491 PASS test_bpf: #240 BPF_MAXINSNS: Jump heavy test jited:1 8651 PASS Signed-off-by: Daniel Borkmann Cc: Alexei Starovoitov Cc: Nicolas Schichan Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 8 ++ lib/test_bpf.c | 317 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 316 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index ce1d72d34382..200be4a74a33 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -277,6 +277,14 @@ struct bpf_prog_aux; .off = 0, \ .imm = 0 }) +/* Internal classic blocks for direct assignment */ + +#define __BPF_STMT(CODE, K) \ + ((struct sock_filter) BPF_STMT(CODE, K)) + +#define __BPF_JUMP(CODE, K, JT, JF) \ + ((struct sock_filter) BPF_JUMP(CODE, K, JT, JF)) + #define bytes_to_bpf_size(bytes) \ ({ \ int bpf_size = -EINVAL; \ diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 8bca780e3613..66358b21acce 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -21,6 +21,7 @@ #include #include #include +#include /* General test specific settings */ #define MAX_SUBTESTS 3 @@ -67,6 +68,10 @@ struct bpf_test { union { struct sock_filter insns[MAX_INSNS]; struct bpf_insn insns_int[MAX_INSNS]; + struct { + void *insns; + unsigned int len; + } ptr; } u; __u8 aux; __u8 data[MAX_DATA]; @@ -74,8 +79,190 @@ struct bpf_test { int data_size; __u32 result; } test[MAX_SUBTESTS]; + int (*fill_helper)(struct bpf_test *self); }; +/* Large test cases need separate allocation and fill handler. */ + +static int bpf_fill_maxinsns1(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + __u32 k = ~0; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + for (i = 0; i < len; i++, k--) + insn[i] = __BPF_STMT(BPF_RET | BPF_K, k); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns2(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + for (i = 0; i < len; i++) + insn[i] = __BPF_STMT(BPF_RET | BPF_K, 0xfefefefe); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns3(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + struct rnd_state rnd; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + prandom_seed_state(&rnd, 3141592653589793238ULL); + + for (i = 0; i < len - 1; i++) { + __u32 k = prandom_u32_state(&rnd); + + insn[i] = __BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, k); + } + + insn[len - 1] = __BPF_STMT(BPF_RET | BPF_A, 0); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns4(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS + 1; + struct sock_filter *insn; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + for (i = 0; i < len; i++) + insn[i] = __BPF_STMT(BPF_RET | BPF_K, 0xfefefefe); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns5(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + insn[0] = __BPF_JUMP(BPF_JMP | BPF_JA, len - 2, 0, 0); + + for (i = 1; i < len - 1; i++) + insn[i] = __BPF_STMT(BPF_RET | BPF_K, 0xfefefefe); + + insn[len - 1] = __BPF_STMT(BPF_RET | BPF_K, 0xabababab); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns6(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + for (i = 0; i < len - 1; i++) + insn[i] = __BPF_STMT(BPF_LD | BPF_W | BPF_ABS, SKF_AD_OFF + + SKF_AD_VLAN_TAG_PRESENT); + + insn[len - 1] = __BPF_STMT(BPF_RET | BPF_A, 0); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns7(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + for (i = 0; i < len - 4; i++) + insn[i] = __BPF_STMT(BPF_LD | BPF_W | BPF_ABS, SKF_AD_OFF + + SKF_AD_CPU); + + insn[len - 4] = __BPF_STMT(BPF_MISC | BPF_TAX, 0); + insn[len - 3] = __BPF_STMT(BPF_LD | BPF_W | BPF_ABS, SKF_AD_OFF + + SKF_AD_CPU); + insn[len - 2] = __BPF_STMT(BPF_ALU | BPF_SUB | BPF_X, 0); + insn[len - 1] = __BPF_STMT(BPF_RET | BPF_A, 0); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_maxinsns8(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct sock_filter *insn; + int i, jmp_off = len - 3; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + insn[0] = __BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff); + + for (i = 1; i < len - 1; i++) + insn[i] = __BPF_JUMP(BPF_JMP | BPF_JGT, 0xffffffff, jmp_off--, 0); + + insn[len - 1] = __BPF_STMT(BPF_RET | BPF_A, 0); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + static struct bpf_test tests[] = { { "TAX", @@ -3998,6 +4185,73 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Maximum possible literals", + { }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0xffffffff } }, + .fill_helper = bpf_fill_maxinsns1, + }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Single literal", + { }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0xfefefefe } }, + .fill_helper = bpf_fill_maxinsns2, + }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Run/add until end", + { }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0x947bf368 } }, + .fill_helper = bpf_fill_maxinsns3, + }, + { + "BPF_MAXINSNS: Too many instructions", + { }, + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, + { }, + { }, + .fill_helper = bpf_fill_maxinsns4, + }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Very long jump", + { }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0xabababab } }, + .fill_helper = bpf_fill_maxinsns5, + }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Ctx heavy transformations", + { }, + CLASSIC, + { }, + { + { 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }, + { 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) } + }, + .fill_helper = bpf_fill_maxinsns6, + }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Call heavy transformations", + { }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 1, 0 }, { 10, 0 } }, + .fill_helper = bpf_fill_maxinsns7, + }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Jump heavy test", + { }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0xffffffff } }, + .fill_helper = bpf_fill_maxinsns8, + }, }; static struct net_device dev; @@ -4051,10 +4305,15 @@ static void release_test_data(const struct bpf_test *test, void *data) kfree_skb(data); } -static int probe_filter_length(struct sock_filter *fp) +static int filter_length(int which) { - int len = 0; + struct sock_filter *fp; + int len; + if (tests[which].fill_helper) + return tests[which].u.ptr.len; + + fp = tests[which].u.insns; for (len = MAX_INSNS - 1; len > 0; --len) if (fp[len].code != 0 || fp[len].k != 0) break; @@ -4062,16 +4321,25 @@ static int probe_filter_length(struct sock_filter *fp) return len + 1; } +static void *filter_pointer(int which) +{ + if (tests[which].fill_helper) + return tests[which].u.ptr.insns; + else + return tests[which].u.insns; +} + static struct bpf_prog *generate_filter(int which, int *err) { - struct bpf_prog *fp; - struct sock_fprog_kern fprog; - unsigned int flen = probe_filter_length(tests[which].u.insns); __u8 test_type = tests[which].aux & TEST_TYPE_MASK; + unsigned int flen = filter_length(which); + void *fptr = filter_pointer(which); + struct sock_fprog_kern fprog; + struct bpf_prog *fp; switch (test_type) { case CLASSIC: - fprog.filter = tests[which].u.insns; + fprog.filter = fptr; fprog.len = flen; *err = bpf_prog_create(&fp, &fprog); @@ -4107,8 +4375,7 @@ static struct bpf_prog *generate_filter(int which, int *err) } fp->len = flen; - memcpy(fp->insnsi, tests[which].u.insns_int, - fp->len * sizeof(struct bpf_insn)); + memcpy(fp->insnsi, fptr, fp->len * sizeof(struct bpf_insn)); bpf_prog_select_runtime(fp); break; @@ -4180,6 +4447,29 @@ static int run_one(const struct bpf_prog *fp, struct bpf_test *test) return err_cnt; } +static __init int prepare_bpf_tests(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (tests[i].fill_helper && + tests[i].fill_helper(&tests[i]) < 0) + return -ENOMEM; + } + + return 0; +} + +static __init void destroy_bpf_tests(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (tests[i].fill_helper) + kfree(tests[i].u.ptr.insns); + } +} + static __init int test_bpf(void) { int i, err_cnt = 0, pass_cnt = 0; @@ -4227,7 +4517,16 @@ static __init int test_bpf(void) static int __init test_bpf_init(void) { - return test_bpf(); + int ret; + + ret = prepare_bpf_tests(); + if (ret < 0) + return ret; + + ret = test_bpf(); + + destroy_bpf_tests(); + return ret; } static void __exit test_bpf_exit(void) -- cgit v1.2.3 From ef7f3a5c7149ad2dbd1d8a71d0aa88a02d1dbcb8 Mon Sep 17 00:00:00 2001 From: Bert Vermeulen Date: Wed, 13 May 2015 13:35:39 +0200 Subject: mdio-gpio: Propagate mii_bus.phy_ignore_ta_mask This also changes mii_bus.phy_mask to u32 for consistency. Signed-off-by: Bert Vermeulen Signed-off-by: David S. Miller --- drivers/net/phy/mdio-gpio.c | 1 + include/linux/mdio-gpio.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index 53d18150f4e2..7dc21e56a7aa 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -158,6 +158,7 @@ static struct mii_bus *mdio_gpio_bus_init(struct device *dev, new_bus->name = "GPIO Bitbanged MDIO", new_bus->phy_mask = pdata->phy_mask; + new_bus->phy_ignore_ta_mask = pdata->phy_ignore_ta_mask; new_bus->irq = pdata->irqs; new_bus->parent = dev; diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h index 66c30a763b10..11f00cdabe3d 100644 --- a/include/linux/mdio-gpio.h +++ b/include/linux/mdio-gpio.h @@ -23,7 +23,8 @@ struct mdio_gpio_platform_data { bool mdio_active_low; bool mdo_active_low; - unsigned int phy_mask; + u32 phy_mask; + u32 phy_ignore_ta_mask; int irqs[PHY_MAX_ADDR]; /* reset callback */ int (*reset)(struct mii_bus *bus); -- cgit v1.2.3 From 55917a21d0cc012bb6073bb05bb768fd51d8e237 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 14 May 2015 14:57:23 +0200 Subject: netfilter: x_tables: add context to know if extension runs from nft_compat Currently, we have four xtables extensions that cannot be used from the xt over nft compat layer. The problem is that they need real access to the full blown xt_entry to validate that the rule comes with the right dependencies. This check was introduced to overcome the lack of sufficient userspace dependency validation in iptables. To resolve this problem, this patch introduces a new field to the xt_tgchk_param structure that tell us if the extension is run from nft_compat context. The three affected extensions are: 1) CLUSTERIP, this target has been superseded by xt_cluster. So just bail out by returning -EINVAL. 2) TCPMSS. Relax the checking when used from nft_compat. If used with the wrong configuration, it will corrupt !syn packets by adding TCP MSS option. 3) ebt_stp. Relax the check to make sure it uses the reserved destination MAC address for STP. Signed-off-by: Pablo Neira Ayuso Tested-by: Arturo Borrero Gonzalez --- include/linux/netfilter/x_tables.h | 2 ++ net/bridge/netfilter/ebt_stp.c | 6 ++++-- net/ipv4/netfilter/ipt_CLUSTERIP.c | 5 +++++ net/netfilter/nft_compat.c | 2 ++ net/netfilter/xt_TCPMSS.c | 6 ++++++ 5 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index a3e215bb0241..09f38206c18f 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -62,6 +62,7 @@ struct xt_mtchk_param { void *matchinfo; unsigned int hook_mask; u_int8_t family; + bool nft_compat; }; /** @@ -92,6 +93,7 @@ struct xt_tgchk_param { void *targinfo; unsigned int hook_mask; u_int8_t family; + bool nft_compat; }; /* Target destructor parameters */ diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 071d87214dde..0c40570069ba 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -164,8 +164,10 @@ static int ebt_stp_mt_check(const struct xt_mtchk_param *par) !(info->bitmask & EBT_STP_MASK)) return -EINVAL; /* Make sure the match only receives stp frames */ - if (!ether_addr_equal(e->destmac, bridge_ula) || - !ether_addr_equal(e->destmsk, msk) || !(e->bitmask & EBT_DESTMAC)) + if (!par->nft_compat && + (!ether_addr_equal(e->destmac, bridge_ula) || + !ether_addr_equal(e->destmsk, msk) || + !(e->bitmask & EBT_DESTMAC))) return -EINVAL; return 0; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 771ab3d01ad3..45cb16a6a4a3 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -367,6 +367,11 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) struct clusterip_config *config; int ret; + if (par->nft_compat) { + pr_err("cannot use CLUSTERIP target from nftables compat\n"); + return -EOPNOTSUPP; + } + if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP && cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT && cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 7f29cfc76349..66def315eb56 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -161,6 +161,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, par->hook_mask = 0; } par->family = ctx->afi->family; + par->nft_compat = true; } static void target_compat_from_user(struct xt_target *t, void *in, void *out) @@ -377,6 +378,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, par->hook_mask = 0; } par->family = ctx->afi->family; + par->nft_compat = true; } static void match_compat_from_user(struct xt_match *m, void *in, void *out) diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index e762de5ee89b..8c3190e2fc6a 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -277,6 +277,9 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par) "FORWARD, OUTPUT and POSTROUTING hooks\n"); return -EINVAL; } + if (par->nft_compat) + return 0; + xt_ematch_foreach(ematch, e) if (find_syn_match(ematch)) return 0; @@ -299,6 +302,9 @@ static int tcpmss_tg6_check(const struct xt_tgchk_param *par) "FORWARD, OUTPUT and POSTROUTING hooks\n"); return -EINVAL; } + if (par->nft_compat) + return 0; + xt_ematch_foreach(ematch, e) if (find_syn_match(ematch)) return 0; -- cgit v1.2.3 From c91d46065209bfe6124396fc409765ced0601b59 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 May 2015 05:48:07 -0700 Subject: net: fix two sparse errors First one in __skb_checksum_validate_complete() fixes the following (and other callers) make C=2 CF=-D__CHECK_ENDIAN__ net/ipv4/tcp_ipv4.o CHECK net/ipv4/tcp_ipv4.c include/linux/skbuff.h:3052:24: warning: incorrect type in return expression (different base types) include/linux/skbuff.h:3052:24: expected restricted __sum16 include/linux/skbuff.h:3052:24: got int Second is fixing gso_make_checksum() : CHECK net/ipv4/gre_offload.c include/linux/skbuff.h:3360:14: warning: incorrect type in assignment (different base types) include/linux/skbuff.h:3360:14: expected unsigned short [unsigned] [usertype] csum include/linux/skbuff.h:3360:14: got restricted __sum16 include/linux/skbuff.h:3365:16: warning: incorrect type in return expression (different base types) include/linux/skbuff.h:3365:16: expected restricted __sum16 include/linux/skbuff.h:3365:16: got unsigned short [unsigned] [usertype] csum Fixes: 5a21232983aa7 ("net: Support for csum_bad in skbuff") Fixes: 7e2b10c1e52ca ("net: Support for multiple checksums with gso") Signed-off-by: Eric Dumazet CC: Tom Herbert Acked-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f83aa6568cbf..b57eebfb67e0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3051,7 +3051,7 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb, } } else if (skb->csum_bad) { /* ip_summed == CHECKSUM_NONE in this case */ - return 1; + return (__force __sum16)1; } skb->csum = psum; @@ -3353,15 +3353,14 @@ static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra) static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res) { int plen = SKB_GSO_CB(skb)->csum_start - skb_headroom(skb) - - skb_transport_offset(skb); - __u16 csum; + skb_transport_offset(skb); + __wsum partial; - csum = csum_fold(csum_partial(skb_transport_header(skb), - plen, skb->csum)); + partial = csum_partial(skb_transport_header(skb), plen, skb->csum); skb->csum = res; SKB_GSO_CB(skb)->csum_start -= plen; - return csum; + return csum_fold(partial); } static inline bool skb_is_gso(const struct sk_buff *skb) -- cgit v1.2.3 From 04fd61ab36ec065e194ab5e74ae34a5240d992bb Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 19 May 2015 16:59:03 -0700 Subject: bpf: allow bpf programs to tail-call other bpf programs introduce bpf_tail_call(ctx, &jmp_table, index) helper function which can be used from BPF programs like: int bpf_prog(struct pt_regs *ctx) { ... bpf_tail_call(ctx, &jmp_table, index); ... } that is roughly equivalent to: int bpf_prog(struct pt_regs *ctx) { ... if (jmp_table[index]) return (*jmp_table[index])(ctx); ... } The important detail that it's not a normal call, but a tail call. The kernel stack is precious, so this helper reuses the current stack frame and jumps into another BPF program without adding extra call frame. It's trivially done in interpreter and a bit trickier in JITs. In case of x64 JIT the bigger part of generated assembler prologue is common for all programs, so it is simply skipped while jumping. Other JITs can do similar prologue-skipping optimization or do stack unwind before jumping into the next program. bpf_tail_call() arguments: ctx - context pointer jmp_table - one of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table index - index in the jump table Since all BPF programs are idenitified by file descriptor, user space need to populate the jmp_table with FDs of other BPF programs. If jmp_table[index] is empty the bpf_tail_call() doesn't jump anywhere and program execution continues as normal. New BPF_MAP_TYPE_PROG_ARRAY map type is introduced so that user space can populate this jmp_table array with FDs of other bpf programs. Programs can share the same jmp_table array or use multiple jmp_tables. The chain of tail calls can form unpredictable dynamic loops therefore tail_call_cnt is used to limit the number of calls and currently is set to 32. Use cases: Acked-by: Daniel Borkmann ========== - simplify complex programs by splitting them into a sequence of small programs - dispatch routine For tracing and future seccomp the program may be triggered on all system calls, but processing of syscall arguments will be different. It's more efficient to implement them as: int syscall_entry(struct seccomp_data *ctx) { bpf_tail_call(ctx, &syscall_jmp_table, ctx->nr /* syscall number */); ... default: process unknown syscall ... } int sys_write_event(struct seccomp_data *ctx) {...} int sys_read_event(struct seccomp_data *ctx) {...} syscall_jmp_table[__NR_write] = sys_write_event; syscall_jmp_table[__NR_read] = sys_read_event; For networking the program may call into different parsers depending on packet format, like: int packet_parser(struct __sk_buff *skb) { ... parse L2, L3 here ... __u8 ipproto = load_byte(skb, ... offsetof(struct iphdr, protocol)); bpf_tail_call(skb, &ipproto_jmp_table, ipproto); ... default: process unknown protocol ... } int parse_tcp(struct __sk_buff *skb) {...} int parse_udp(struct __sk_buff *skb) {...} ipproto_jmp_table[IPPROTO_TCP] = parse_tcp; ipproto_jmp_table[IPPROTO_UDP] = parse_udp; - for TC use case, bpf_tail_call() allows to implement reclassify-like logic - bpf_map_update_elem/delete calls into BPF_MAP_TYPE_PROG_ARRAY jump table are atomic, so user space can build chains of BPF programs on the fly Implementation details: ======================= - high performance of bpf_tail_call() is the goal. It could have been implemented without JIT changes as a wrapper on top of BPF_PROG_RUN() macro, but with two downsides: . all programs would have to pay performance penalty for this feature and tail call itself would be slower, since mandatory stack unwind, return, stack allocate would be done for every tailcall. . tailcall would be limited to programs running preempt_disabled, since generic 'void *ctx' doesn't have room for 'tail_call_cnt' and it would need to be either global per_cpu variable accessed by helper and by wrapper or global variable protected by locks. In this implementation x64 JIT bypasses stack unwind and jumps into the callee program after prologue. - bpf_prog_array_compatible() ensures that prog_type of callee and caller are the same and JITed/non-JITed flag is the same, since calling JITed program from non-JITed is invalid, since stack frames are different. Similarly calling kprobe type program from socket type program is invalid. - jump table is implemented as BPF_MAP_TYPE_PROG_ARRAY to reuse 'map' abstraction, its user space API and all of verifier logic. It's in the existing arraymap.c file, since several functions are shared with regular array map. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 22 +++++++++ include/linux/filter.h | 2 +- include/uapi/linux/bpf.h | 10 +++++ kernel/bpf/arraymap.c | 113 ++++++++++++++++++++++++++++++++++++++++++++--- kernel/bpf/core.c | 73 +++++++++++++++++++++++++++++- kernel/bpf/syscall.c | 23 +++++++++- kernel/bpf/verifier.c | 17 +++++++ kernel/trace/bpf_trace.c | 2 + net/core/filter.c | 2 + 9 files changed, 255 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d5cda067115a..8821b9a8689e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -126,6 +126,27 @@ struct bpf_prog_aux { struct work_struct work; }; +struct bpf_array { + struct bpf_map map; + u32 elem_size; + /* 'ownership' of prog_array is claimed by the first program that + * is going to use this map or by the first program which FD is stored + * in the map to make sure that all callers and callees have the same + * prog_type and JITed flag + */ + enum bpf_prog_type owner_prog_type; + bool owner_jited; + union { + char value[0] __aligned(8); + struct bpf_prog *prog[0] __aligned(8); + }; +}; +#define MAX_TAIL_CALL_CNT 32 + +u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); +void bpf_prog_array_map_clear(struct bpf_map *map); +bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); + #ifdef CONFIG_BPF_SYSCALL void bpf_register_prog_type(struct bpf_prog_type_list *tl); void bpf_register_map_type(struct bpf_map_type_list *tl); @@ -160,5 +181,6 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; +extern const struct bpf_func_proto bpf_tail_call_proto; #endif /* _LINUX_BPF_H */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 200be4a74a33..17724f6ea983 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -378,7 +378,7 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) int sk_filter(struct sock *sk, struct sk_buff *skb); -void bpf_prog_select_runtime(struct bpf_prog *fp); +int bpf_prog_select_runtime(struct bpf_prog *fp); void bpf_prog_free(struct bpf_prog *fp); struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a9ebdf5701e8..f0a9af8b4dae 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -113,6 +113,7 @@ enum bpf_map_type { BPF_MAP_TYPE_UNSPEC, BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_ARRAY, + BPF_MAP_TYPE_PROG_ARRAY, }; enum bpf_prog_type { @@ -210,6 +211,15 @@ enum bpf_func_id { * Return: 0 on success */ BPF_FUNC_l4_csum_replace, + + /** + * bpf_tail_call(ctx, prog_array_map, index) - jump into another BPF program + * @ctx: context pointer passed to next program + * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY + * @index: index inside array that selects specific program to run + * Return: 0 on success + */ + BPF_FUNC_tail_call, __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 8a6616583f38..614bcd4c1d74 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -14,12 +14,7 @@ #include #include #include - -struct bpf_array { - struct bpf_map map; - u32 elem_size; - char value[0] __aligned(8); -}; +#include /* Called from syscall */ static struct bpf_map *array_map_alloc(union bpf_attr *attr) @@ -154,3 +149,109 @@ static int __init register_array_map(void) return 0; } late_initcall(register_array_map); + +static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr) +{ + /* only bpf_prog file descriptors can be stored in prog_array map */ + if (attr->value_size != sizeof(u32)) + return ERR_PTR(-EINVAL); + return array_map_alloc(attr); +} + +static void prog_array_map_free(struct bpf_map *map) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + int i; + + synchronize_rcu(); + + /* make sure it's empty */ + for (i = 0; i < array->map.max_entries; i++) + BUG_ON(array->prog[i] != NULL); + kvfree(array); +} + +static void *prog_array_map_lookup_elem(struct bpf_map *map, void *key) +{ + return NULL; +} + +/* only called from syscall */ +static int prog_array_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_prog *prog, *old_prog; + u32 index = *(u32 *)key, ufd; + + if (map_flags != BPF_ANY) + return -EINVAL; + + if (index >= array->map.max_entries) + return -E2BIG; + + ufd = *(u32 *)value; + prog = bpf_prog_get(ufd); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + if (!bpf_prog_array_compatible(array, prog)) { + bpf_prog_put(prog); + return -EINVAL; + } + + old_prog = xchg(array->prog + index, prog); + if (old_prog) + bpf_prog_put(old_prog); + + return 0; +} + +static int prog_array_map_delete_elem(struct bpf_map *map, void *key) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_prog *old_prog; + u32 index = *(u32 *)key; + + if (index >= array->map.max_entries) + return -E2BIG; + + old_prog = xchg(array->prog + index, NULL); + if (old_prog) { + bpf_prog_put(old_prog); + return 0; + } else { + return -ENOENT; + } +} + +/* decrement refcnt of all bpf_progs that are stored in this map */ +void bpf_prog_array_map_clear(struct bpf_map *map) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + int i; + + for (i = 0; i < array->map.max_entries; i++) + prog_array_map_delete_elem(map, &i); +} + +static const struct bpf_map_ops prog_array_ops = { + .map_alloc = prog_array_map_alloc, + .map_free = prog_array_map_free, + .map_get_next_key = array_map_get_next_key, + .map_lookup_elem = prog_array_map_lookup_elem, + .map_update_elem = prog_array_map_update_elem, + .map_delete_elem = prog_array_map_delete_elem, +}; + +static struct bpf_map_type_list prog_array_type __read_mostly = { + .ops = &prog_array_ops, + .type = BPF_MAP_TYPE_PROG_ARRAY, +}; + +static int __init register_prog_array_map(void) +{ + bpf_register_map_type(&prog_array_type); + return 0; +} +late_initcall(register_prog_array_map); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 54f0e7fcd0e2..d44b25cbe460 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -176,6 +176,15 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) return 0; } +const struct bpf_func_proto bpf_tail_call_proto = { + .func = NULL, + .gpl_only = false, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +}; + /** * __bpf_prog_run - run eBPF program on a given context * @ctx: is the data we are operating on @@ -244,6 +253,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG, /* Call instruction */ [BPF_JMP | BPF_CALL] = &&JMP_CALL, + [BPF_JMP | BPF_CALL | BPF_X] = &&JMP_TAIL_CALL, /* Jumps */ [BPF_JMP | BPF_JA] = &&JMP_JA, [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X, @@ -286,6 +296,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B, [BPF_LD | BPF_IMM | BPF_DW] = &&LD_IMM_DW, }; + u32 tail_call_cnt = 0; void *ptr; int off; @@ -431,6 +442,30 @@ select_insn: BPF_R4, BPF_R5); CONT; + JMP_TAIL_CALL: { + struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2; + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_prog *prog; + u64 index = BPF_R3; + + if (unlikely(index >= array->map.max_entries)) + goto out; + + if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT)) + goto out; + + tail_call_cnt++; + + prog = READ_ONCE(array->prog[index]); + if (unlikely(!prog)) + goto out; + + ARG1 = BPF_R1; + insn = prog->insnsi; + goto select_insn; +out: + CONT; + } /* JMP */ JMP_JA: insn += insn->off; @@ -619,6 +654,40 @@ void __weak bpf_int_jit_compile(struct bpf_prog *prog) { } +bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) +{ + if (array->owner_prog_type) { + if (array->owner_prog_type != fp->type) + return false; + if (array->owner_jited != fp->jited) + return false; + } else { + array->owner_prog_type = fp->type; + array->owner_jited = fp->jited; + } + return true; +} + +static int check_tail_call(const struct bpf_prog *fp) +{ + struct bpf_prog_aux *aux = fp->aux; + int i; + + for (i = 0; i < aux->used_map_cnt; i++) { + struct bpf_array *array; + struct bpf_map *map; + + map = aux->used_maps[i]; + if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) + continue; + array = container_of(map, struct bpf_array, map); + if (!bpf_prog_array_compatible(array, fp)) + return -EINVAL; + } + + return 0; +} + /** * bpf_prog_select_runtime - select execution runtime for BPF program * @fp: bpf_prog populated with internal BPF program @@ -626,7 +695,7 @@ void __weak bpf_int_jit_compile(struct bpf_prog *prog) * try to JIT internal BPF program, if JIT is not available select interpreter * BPF program will be executed via BPF_PROG_RUN() macro */ -void bpf_prog_select_runtime(struct bpf_prog *fp) +int bpf_prog_select_runtime(struct bpf_prog *fp) { fp->bpf_func = (void *) __bpf_prog_run; @@ -634,6 +703,8 @@ void bpf_prog_select_runtime(struct bpf_prog *fp) bpf_int_jit_compile(fp); /* Lock whole bpf_prog as read-only */ bpf_prog_lock_ro(fp); + + return check_tail_call(fp); } EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 3bae6c591914..98a69bd83069 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -68,6 +68,12 @@ static int bpf_map_release(struct inode *inode, struct file *filp) { struct bpf_map *map = filp->private_data; + if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) + /* prog_array stores refcnt-ed bpf_prog pointers + * release them all when user space closes prog_array_fd + */ + bpf_prog_array_map_clear(map); + bpf_map_put(map); return 0; } @@ -392,6 +398,19 @@ static void fixup_bpf_calls(struct bpf_prog *prog) */ BUG_ON(!prog->aux->ops->get_func_proto); + if (insn->imm == BPF_FUNC_tail_call) { + /* mark bpf_tail_call as different opcode + * to avoid conditional branch in + * interpeter for every normal call + * and to prevent accidental JITing by + * JIT compiler that doesn't support + * bpf_tail_call yet + */ + insn->imm = 0; + insn->code |= BPF_X; + continue; + } + fn = prog->aux->ops->get_func_proto(insn->imm); /* all functions that have prototype and verifier allowed * programs to call them, must be real in-kernel functions @@ -532,7 +551,9 @@ static int bpf_prog_load(union bpf_attr *attr) fixup_bpf_calls(prog); /* eBPF program is ready to be JITed */ - bpf_prog_select_runtime(prog); + err = bpf_prog_select_runtime(prog); + if (err < 0) + goto free_used_maps; err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC); if (err < 0) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 47dcd3aa6e23..cfd9a40b9a5a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -907,6 +907,23 @@ static int check_call(struct verifier_env *env, int func_id) fn->ret_type, func_id); return -EINVAL; } + + if (map && map->map_type == BPF_MAP_TYPE_PROG_ARRAY && + func_id != BPF_FUNC_tail_call) + /* prog_array map type needs extra care: + * only allow to pass it into bpf_tail_call() for now. + * bpf_map_delete_elem() can be allowed in the future, + * while bpf_map_update_elem() must only be done via syscall + */ + return -EINVAL; + + if (func_id == BPF_FUNC_tail_call && + map->map_type != BPF_MAP_TYPE_PROG_ARRAY) + /* don't allow any other map type to be passed into + * bpf_tail_call() + */ + return -EINVAL; + return 0; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 2d56ce501632..646445e41bd4 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -172,6 +172,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_probe_read_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; + case BPF_FUNC_tail_call: + return &bpf_tail_call_proto; case BPF_FUNC_trace_printk: /* diff --git a/net/core/filter.c b/net/core/filter.c index 6805717be614..3adcca6f17a4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1421,6 +1421,8 @@ sk_filter_func_proto(enum bpf_func_id func_id) return &bpf_get_prandom_u32_proto; case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; + case BPF_FUNC_tail_call: + return &bpf_tail_call_proto; default: return NULL; } -- cgit v1.2.3 From 2efd055c53c06b7e89c167c98069bab9afce7e59 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 20 May 2015 16:35:41 -0700 Subject: tcp: add tcpi_segs_in and tcpi_segs_out to tcp_info This patch tracks the total number of inbound and outbound segments on a TCP socket. One may use this number to have an idea on connection quality when compared against the retransmissions. RFC4898 named these : tcpEStatsPerfSegsIn and tcpEStatsPerfSegsOut These are a 32bit field each and can be fetched both from TCP_INFO getsockopt() if one has a handle on a TCP socket, or from inet_diag netlink facility (iproute2/ss patch will follow) Note that tp->segs_out was placed near tp->snd_nxt for good data locality and minimal performance impact, while tp->segs_in was placed near tp->bytes_received for the same reason. Join work with Eric Dumazet. Note that received SYN are accounted on the listener, but sent SYNACK are not accounted. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 7 ++++++- include/uapi/linux/tcp.h | 4 +++- net/ipv4/tcp.c | 2 ++ net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_minisocks.c | 1 + net/ipv4/tcp_output.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 7 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index e6fb5df22db1..f0212026c77f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -149,11 +149,16 @@ struct tcp_sock { * sum(delta(rcv_nxt)), or how many bytes * were acked. */ + u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn + * total number of segments in. + */ u32 rcv_nxt; /* What we want to receive next */ u32 copied_seq; /* Head of yet unread data */ u32 rcv_wup; /* rcv_nxt on last window update sent */ u32 snd_nxt; /* Next sequence we send */ - + u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut + * The total number of segments sent. + */ u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked * sum(delta(snd_una)), or how many bytes * were acked. diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 51ebedba577f..65a77b071e22 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -192,8 +192,10 @@ struct tcp_info { __u64 tcpi_pacing_rate; __u64 tcpi_max_pacing_rate; - __u64 tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */ + __u64 tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */ __u64 tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */ + __u32 tcpi_segs_out; /* RFC4898 tcpEStatsPerfSegsOut */ + __u32 tcpi_segs_in; /* RFC4898 tcpEStatsPerfSegsIn */ }; /* for TCP_MD5SIG socket option */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0a3f9a00565b..7f3e721b9e69 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2695,6 +2695,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) spin_lock_bh(&sk->sk_lock.slock); info->tcpi_bytes_acked = tp->bytes_acked; info->tcpi_bytes_received = tp->bytes_received; + info->tcpi_segs_out = tp->segs_out; + info->tcpi_segs_in = tp->segs_in; spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0cc4b5a630cd..feb875769b8d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1626,6 +1626,7 @@ process: skb->dev = NULL; bh_lock_sock_nested(sk); + tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); ret = 0; if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb)) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ebe2ab2596ed..b62d15c86946 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -448,6 +448,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1; + newtp->segs_in = 0; newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3aebe0157dfa..534e5fdb04c1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1027,6 +1027,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb)); + tp->segs_out += tcp_skb_pcount(skb); /* OK, its time to fill skb_shinfo(skb)->gso_segs */ skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b6575d665568..beac6bf840b9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1421,6 +1421,7 @@ process: skb->dev = NULL; bh_lock_sock_nested(sk); + tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); ret = 0; if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb)) -- cgit v1.2.3 From be12a1fe298e8be04d5215364f94654dff81b0bc Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 21 May 2015 16:59:58 +0200 Subject: net: skbuff: add skb_append_pagefrags and use it Signed-off-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +++ net/core/skbuff.c | 18 ++++++++++++++++++ net/ipv4/ip_output.c | 15 ++++----------- 3 files changed, 25 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b617095adb88..f708936cdd23 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -861,6 +861,9 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, int len, int odd, struct sk_buff *skb), void *from, int length); +int skb_append_pagefrags(struct sk_buff *skb, struct page *page, + int offset, size_t size); + struct skb_seq_state { __u32 lower_offset; __u32 upper_offset; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f3fe9bd9e672..4f2babeaf18d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2915,6 +2915,24 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, } EXPORT_SYMBOL(skb_append_datato_frags); +int skb_append_pagefrags(struct sk_buff *skb, struct page *page, + int offset, size_t size) +{ + int i = skb_shinfo(skb)->nr_frags; + + if (skb_can_coalesce(skb, i, page, offset)) { + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size); + } else if (i < MAX_SKB_FRAGS) { + get_page(page); + skb_fill_page_desc(skb, i, page, offset, size); + } else { + return -EMSGSIZE; + } + + return 0; +} +EXPORT_SYMBOL_GPL(skb_append_pagefrags); + /** * skb_pull_rcsum - pull skb and update receive checksum * @skb: buffer to update diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8d91b922fcfe..451b009dae75 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1233,11 +1233,9 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, } while (size > 0) { - int i; - - if (skb_is_gso(skb)) + if (skb_is_gso(skb)) { len = size; - else { + } else { /* Check if the remaining data fits into current packet. */ len = mtu - skb->len; @@ -1289,15 +1287,10 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, continue; } - i = skb_shinfo(skb)->nr_frags; if (len > size) len = size; - if (skb_can_coalesce(skb, i, page, offset)) { - skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len); - } else if (i < MAX_SKB_FRAGS) { - get_page(page); - skb_fill_page_desc(skb, i, page, offset, len); - } else { + + if (skb_append_pagefrags(skb, page, offset, len)) { err = -EMSGSIZE; goto error; } -- cgit v1.2.3 From a60e3cc7c92973a31fad0fd04dc5cf4355d3d1ef Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 21 May 2015 17:00:00 +0200 Subject: net: make skb_splice_bits more configureable Prepare skb_splice_bits to be able to deal with AF_UNIX sockets. AF_UNIX sockets don't use lock_sock/release_sock and thus we have to use a callback to make the locking and unlocking configureable. Signed-off-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 11 +++++++++-- net/core/skbuff.c | 45 ++++++++++++++++++++++++++++----------------- net/ipv4/tcp.c | 5 +++-- 3 files changed, 40 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f708936cdd23..6b41c15efa27 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -35,6 +35,7 @@ #include #include #include +#include /* A. Checksumming of received packets by device. * @@ -2699,9 +2700,15 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, __wsum csum); -int skb_splice_bits(struct sk_buff *skb, unsigned int offset, +ssize_t skb_socket_splice(struct sock *sk, + struct pipe_inode_info *pipe, + struct splice_pipe_desc *spd); +int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, struct pipe_inode_info *pipe, unsigned int len, - unsigned int flags); + unsigned int flags, + ssize_t (*splice_cb)(struct sock *, + struct pipe_inode_info *, + struct splice_pipe_desc *)); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); unsigned int skb_zerocopy_headlen(const struct sk_buff *from); int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4f2babeaf18d..02769fa4f5c8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1870,15 +1870,39 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, return false; } +ssize_t skb_socket_splice(struct sock *sk, + struct pipe_inode_info *pipe, + struct splice_pipe_desc *spd) +{ + int ret; + + /* Drop the socket lock, otherwise we have reverse + * locking dependencies between sk_lock and i_mutex + * here as compared to sendfile(). We enter here + * with the socket lock held, and splice_to_pipe() will + * grab the pipe inode lock. For sendfile() emulation, + * we call into ->sendpage() with the i_mutex lock held + * and networking will grab the socket lock. + */ + release_sock(sk); + ret = splice_to_pipe(pipe, spd); + lock_sock(sk); + + return ret; +} + /* * Map data from the skb to a pipe. Should handle both the linear part, * the fragments, and the frag list. It does NOT handle frag lists within * the frag list, if such a thing exists. We'd probably need to recurse to * handle that cleanly. */ -int skb_splice_bits(struct sk_buff *skb, unsigned int offset, +int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, struct pipe_inode_info *pipe, unsigned int tlen, - unsigned int flags) + unsigned int flags, + ssize_t (*splice_cb)(struct sock *, + struct pipe_inode_info *, + struct splice_pipe_desc *)) { struct partial_page partial[MAX_SKB_FRAGS]; struct page *pages[MAX_SKB_FRAGS]; @@ -1891,7 +1915,6 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, .spd_release = sock_spd_release, }; struct sk_buff *frag_iter; - struct sock *sk = skb->sk; int ret = 0; /* @@ -1914,20 +1937,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, } done: - if (spd.nr_pages) { - /* - * Drop the socket lock, otherwise we have reverse - * locking dependencies between sk_lock and i_mutex - * here as compared to sendfile(). We enter here - * with the socket lock held, and splice_to_pipe() will - * grab the pipe inode lock. For sendfile() emulation, - * we call into ->sendpage() with the i_mutex lock held - * and networking will grab the socket lock. - */ - release_sock(sk); - ret = splice_to_pipe(pipe, &spd); - lock_sock(sk); - } + if (spd.nr_pages) + ret = splice_cb(sk, pipe, &spd); return ret; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 90afcec3f427..65f791f74845 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -695,8 +695,9 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, struct tcp_splice_state *tss = rd_desc->arg.data; int ret; - ret = skb_splice_bits(skb, offset, tss->pipe, min(rd_desc->count, len), - tss->flags); + ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe, + min(rd_desc->count, len), tss->flags, + skb_socket_splice); if (ret > 0) rd_desc->count -= ret; return ret; -- cgit v1.2.3 From cacce073bfd2b4091fbc58e906e7a9a21f538ff6 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Thu, 14 May 2015 23:05:49 +0200 Subject: bcma: add module_bcma_driver() This makes it possible to save some lines of code in drivers with an simple bcma driver registration. Signed-off-by: Hauke Mehrtens Signed-off-by: Kalle Valo --- include/linux/bcma/bcma.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index e34f906647d3..2ff4a9961e1d 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -305,6 +305,15 @@ int __bcma_driver_register(struct bcma_driver *drv, struct module *owner); extern void bcma_driver_unregister(struct bcma_driver *drv); +/* module_bcma_driver() - Helper macro for drivers that don't do + * anything special in module init/exit. This eliminates a lot of + * boilerplate. Each module may only use this macro once, and + * calling it replaces module_init() and module_exit() + */ +#define module_bcma_driver(__bcma_driver) \ + module_driver(__bcma_driver, bcma_driver_register, \ + bcma_driver_unregister) + /* Set a fallback SPROM. * See kdoc at the function definition for complete documentation. */ extern int bcma_arch_register_fallback_sprom( -- cgit v1.2.3 From e463d88c36d42211aa72ed76d32fb8bf37820ef1 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 26 May 2015 12:19:58 -0700 Subject: net: phy: Add phy_interface_is_rgmii helper RGMII interfaces come in 4 different flavors that the PHY library needs to care about: regular RGMII (no delays), RGMII with either RX or TX delay, and both. In order to avoid errors of checking only for one type of RGMII interface and miss the 3 others, introduce a convenience function which tests for all values. Suggested-by: David S. Miller Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 701c7a3946e0..a26c3f84b8dd 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -677,6 +677,17 @@ static inline bool phy_is_internal(struct phy_device *phydev) return phydev->is_internal; } +/** + * phy_interface_is_rgmii - Convenience function for testing if a PHY interface + * is RGMII (all variants) + * @phydev: the phy_device struct + */ +static inline bool phy_interface_is_rgmii(struct phy_device *phydev) +{ + return phydev->interface >= PHY_INTERFACE_MODE_RGMII && + phydev->interface <= PHY_INTERFACE_MODE_RGMII_TXID; +} + /** * phy_write_mmd - Convenience function for writing a register * on an MMD on a given PHY. -- cgit v1.2.3 From e5c4708b2b6fec0300db60ee3cf4b4ae96430a12 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 26 May 2015 19:20:14 -0700 Subject: pci: Add Cavium PCI vendor id This vendor id will be used by network (vNIC), USB (xHCI), SATA (AHCI), GPIO, I2C, MMC and maybe other drivers for ThunderX SoC. Acked-by: Bjorn Helgaas Signed-off-by: Sunil Goutham Signed-off-by: Aleksey Makarov Signed-off-by: David S. Miller --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 2f7b9a40f627..2972c7f3aa1d 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2329,6 +2329,8 @@ #define PCI_DEVICE_ID_ALTIMA_AC9100 0x03ea #define PCI_DEVICE_ID_ALTIMA_AC1003 0x03eb +#define PCI_VENDOR_ID_CAVIUM 0x177d + #define PCI_VENDOR_ID_BELKIN 0x1799 #define PCI_DEVICE_ID_BELKIN_F5D7010V7 0x701f -- cgit v1.2.3 From 5790cf3c00c2f92aacba348e13f8a9a8f5dd96bd Mon Sep 17 00:00:00 2001 From: Mathieu Olivari Date: Wed, 27 May 2015 11:02:47 -0700 Subject: stmmac: add phy-handle support to the platform layer On stmmac driver, PHY specification in device-tree was done using the non-standard property "snps,phy-addr". Specifying a PHY on a different MDIO bus that the one within the stmmac controller doesn't seem to be possible when device-tree is used. This change adds support for the phy-handle property, as specified in Documentation/devicetree/bindings/net/ethernet.txt. Signed-off-by: Mathieu Olivari Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 28 ++++++++++++++-------- .../net/ethernet/stmicro/stmmac/stmmac_platform.c | 6 ++++- include/linux/stmmac.h | 1 + 3 files changed, 24 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e4f273976071..31c64169f2ec 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -52,6 +52,7 @@ #include "stmmac_ptp.h" #include "stmmac.h" #include +#include #define STMMAC_ALIGN(x) L1_CACHE_ALIGN(x) @@ -816,18 +817,25 @@ static int stmmac_init_phy(struct net_device *dev) priv->speed = 0; priv->oldduplex = -1; - if (priv->plat->phy_bus_name) - snprintf(bus_id, MII_BUS_ID_SIZE, "%s-%x", - priv->plat->phy_bus_name, priv->plat->bus_id); - else - snprintf(bus_id, MII_BUS_ID_SIZE, "stmmac-%x", - priv->plat->bus_id); + if (priv->plat->phy_node) { + phydev = of_phy_connect(dev, priv->plat->phy_node, + &stmmac_adjust_link, 0, interface); + } else { + if (priv->plat->phy_bus_name) + snprintf(bus_id, MII_BUS_ID_SIZE, "%s-%x", + priv->plat->phy_bus_name, priv->plat->bus_id); + else + snprintf(bus_id, MII_BUS_ID_SIZE, "stmmac-%x", + priv->plat->bus_id); - snprintf(phy_id_fmt, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, bus_id, - priv->plat->phy_addr); - pr_debug("stmmac_init_phy: trying to attach to %s\n", phy_id_fmt); + snprintf(phy_id_fmt, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, bus_id, + priv->plat->phy_addr); + pr_debug("stmmac_init_phy: trying to attach to %s\n", + phy_id_fmt); - phydev = phy_connect(dev, phy_id_fmt, &stmmac_adjust_link, interface); + phydev = phy_connect(dev, phy_id_fmt, &stmmac_adjust_link, + interface); + } if (IS_ERR(phydev)) { pr_err("%s: Could not attach to PHY\n", dev->name); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 1664c0186f5b..8d23155a1a7e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "stmmac.h" #include "stmmac_platform.h" @@ -144,13 +145,16 @@ static int stmmac_probe_config_dt(struct platform_device *pdev, /* Default to phy auto-detection */ plat->phy_addr = -1; + /* If we find a phy-handle property, use it as the PHY */ + plat->phy_node = of_parse_phandle(np, "phy-handle", 0); + /* "snps,phy-addr" is not a standard property. Mark it as deprecated * and warn of its use. Remove this when phy node support is added. */ if (of_property_read_u32(np, "snps,phy-addr", &plat->phy_addr) == 0) dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n"); - if (plat->phy_bus_name) + if (plat->phy_node || plat->phy_bus_name) plat->mdio_bus_data = NULL; else plat->mdio_bus_data = diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 7f484a239f53..c735f5c91eea 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -99,6 +99,7 @@ struct plat_stmmacenet_data { int phy_addr; int interface; struct stmmac_mdio_bus_data *mdio_bus_data; + struct device_node *phy_node; struct stmmac_dma_cfg *dma_cfg; int clk_csr; int has_gmac; -- cgit v1.2.3 From f4fb874cf076f9eafdd15c0a88cd0f0397b95e43 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 27 May 2015 21:07:26 -0400 Subject: if_vlan: fix vlaue -> value typo Fixes "vlaue" for "value" in include/linux/if_vlan.h. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index b9ab677c0c0a..a40d29846ac2 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -416,7 +416,7 @@ static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb, /** * __vlan_get_tag - get the VLAN ID that is part of the payload * @skb: skbuff to query - * @vlan_tci: buffer to store vlaue + * @vlan_tci: buffer to store value * * Returns error if the skb is not of VLAN type */ @@ -435,7 +435,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) /** * __vlan_hwaccel_get_tag - get the VLAN ID that is in @skb->cb[] * @skb: skbuff to query - * @vlan_tci: buffer to store vlaue + * @vlan_tci: buffer to store value * * Returns error if @skb->vlan_tci is not set correctly */ @@ -456,7 +456,7 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, /** * vlan_get_tag - get the VLAN ID from the skb * @skb: skbuff to query - * @vlan_tci: buffer to store vlaue + * @vlan_tci: buffer to store value * * Returns error if the skb is not VLAN tagged */ -- cgit v1.2.3 From 64ffaa2159b752e6c263dc57eaaaed7367d37493 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 28 May 2015 22:28:38 +0300 Subject: net/mlx5_core,mlx5_ib: Do not use vmap() on coherent memory As David Daney pointed in mlx4_core driver [1], mlx5_core is also misusing the DMA-API. This patch is removing the code that vmap() memory allocated by dma_alloc_coherent(). After this patch, users of this drivers might fail allocating resources on memory fragmeneted systems. This will be fixed later on. [1] - https://patchwork.ozlabs.org/patch/458531/ CC: David Daney Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx5/cq.c | 3 +- drivers/infiniband/hw/mlx5/qp.c | 2 +- drivers/infiniband/hw/mlx5/srq.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/alloc.c | 96 +++++-------------------- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 3 +- include/linux/mlx5/driver.h | 9 +-- 6 files changed, 22 insertions(+), 93 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 2ee6b1051975..4e88b18cf62e 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -590,8 +590,7 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf, { int err; - err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, - PAGE_SIZE * 2, &buf->buf); + err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, &buf->buf); if (err) return err; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d35f62d4f4c5..426eb88dfa49 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -768,7 +768,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift); - err = mlx5_buf_alloc(dev->mdev, qp->buf_size, PAGE_SIZE * 2, &qp->buf); + err = mlx5_buf_alloc(dev->mdev, qp->buf_size, &qp->buf); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); goto err_uuar; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 02d77a29764d..4242e1ded868 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -165,7 +165,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, return err; } - if (mlx5_buf_alloc(dev->mdev, buf_size, PAGE_SIZE * 2, &srq->buf)) { + if (mlx5_buf_alloc(dev->mdev, buf_size, &srq->buf)) { mlx5_ib_dbg(dev, "buf alloc failed\n"); err = -ENOMEM; goto err_db; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index ac0f7bf4be95..0715b497511f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -42,95 +42,36 @@ #include "mlx5_core.h" /* Handling for queue buffers -- we allocate a bunch of memory and - * register it in a memory region at HCA virtual address 0. If the - * requested size is > max_direct, we split the allocation into - * multiple pages, so we don't require too much contiguous memory. + * register it in a memory region at HCA virtual address 0. */ -int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct, - struct mlx5_buf *buf) +int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf) { dma_addr_t t; buf->size = size; - if (size <= max_direct) { - buf->nbufs = 1; - buf->npages = 1; - buf->page_shift = (u8)get_order(size) + PAGE_SHIFT; - buf->direct.buf = dma_zalloc_coherent(&dev->pdev->dev, - size, &t, GFP_KERNEL); - if (!buf->direct.buf) - return -ENOMEM; - - buf->direct.map = t; - - while (t & ((1 << buf->page_shift) - 1)) { - --buf->page_shift; - buf->npages *= 2; - } - } else { - int i; - - buf->direct.buf = NULL; - buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE; - buf->npages = buf->nbufs; - buf->page_shift = PAGE_SHIFT; - buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), - GFP_KERNEL); - if (!buf->page_list) - return -ENOMEM; - - for (i = 0; i < buf->nbufs; i++) { - buf->page_list[i].buf = - dma_zalloc_coherent(&dev->pdev->dev, PAGE_SIZE, - &t, GFP_KERNEL); - if (!buf->page_list[i].buf) - goto err_free; - - buf->page_list[i].map = t; - } - - if (BITS_PER_LONG == 64) { - struct page **pages; - pages = kmalloc(sizeof(*pages) * buf->nbufs, GFP_KERNEL); - if (!pages) - goto err_free; - for (i = 0; i < buf->nbufs; i++) - pages[i] = virt_to_page(buf->page_list[i].buf); - buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL); - kfree(pages); - if (!buf->direct.buf) - goto err_free; - } - } + buf->npages = 1; + buf->page_shift = (u8)get_order(size) + PAGE_SHIFT; + buf->direct.buf = dma_zalloc_coherent(&dev->pdev->dev, + size, &t, GFP_KERNEL); + if (!buf->direct.buf) + return -ENOMEM; - return 0; + buf->direct.map = t; -err_free: - mlx5_buf_free(dev, buf); + while (t & ((1 << buf->page_shift) - 1)) { + --buf->page_shift; + buf->npages *= 2; + } - return -ENOMEM; + return 0; } EXPORT_SYMBOL_GPL(mlx5_buf_alloc); void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf) { - int i; - - if (buf->nbufs == 1) - dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf, - buf->direct.map); - else { - if (BITS_PER_LONG == 64) - vunmap(buf->direct.buf); - - for (i = 0; i < buf->nbufs; i++) - if (buf->page_list[i].buf) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, - buf->page_list[i].buf, - buf->page_list[i].map); - kfree(buf->page_list); - } + dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf, + buf->direct.map); } EXPORT_SYMBOL_GPL(mlx5_buf_free); @@ -230,10 +171,7 @@ void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas) int i; for (i = 0; i < buf->npages; i++) { - if (buf->nbufs == 1) - addr = buf->direct.map + (i << buf->page_shift); - else - addr = buf->page_list[i].map; + addr = buf->direct.map + (i << buf->page_shift); pas[i] = cpu_to_be64(addr); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 58800e4f3958..3f511bd84489 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -346,8 +346,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int inlen; eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE); - err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, 2 * PAGE_SIZE, - &eq->buf); + err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf); if (err) return err; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9a90e7523dc2..c4cf25ffcc16 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -334,8 +334,6 @@ struct mlx5_buf_list { struct mlx5_buf { struct mlx5_buf_list direct; - struct mlx5_buf_list *page_list; - int nbufs; int npages; int size; u8 page_shift; @@ -586,11 +584,7 @@ struct mlx5_pas { static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset) { - if (likely(BITS_PER_LONG == 64 || buf->nbufs == 1)) return buf->direct.buf + offset; - else - return buf->page_list[offset >> PAGE_SHIFT].buf + - (offset & (PAGE_SIZE - 1)); } extern struct workqueue_struct *mlx5_core_wq; @@ -669,8 +663,7 @@ void mlx5_health_cleanup(void); void __init mlx5_health_init(void); void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev); -int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct, - struct mlx5_buf *buf); +int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf); void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf); struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, gfp_t flags, int npages); -- cgit v1.2.3 From db058a186f98b057c19c42f7b10d9a96fd3b5d59 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 28 May 2015 22:28:39 +0300 Subject: net/mlx5_core: Set irq affinity hints Preparation for upcoming ethernet driver. - Move msix array from eq_table struct to priv since its not related to eq_table - Intorduce irq_info struct to hold all irq information - Move name from mlx5_eq to irq_info struct since it is irq property. - Set IRQ affinity hints Signed-off-by: Achiad Shochat Signed-off-by: Rana Shahout Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 16 ++-- drivers/net/ethernet/mellanox/mlx5/core/main.c | 111 ++++++++++++++++++++++--- include/linux/mlx5/driver.h | 11 ++- 3 files changed, 117 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 3f511bd84489..516efc25fc4f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -339,7 +339,7 @@ static void init_eq_buf(struct mlx5_eq *eq) int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int nent, u64 mask, const char *name, struct mlx5_uar *uar) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_priv *priv = &dev->priv; struct mlx5_create_eq_mbox_in *in; struct mlx5_create_eq_mbox_out out; int err; @@ -377,14 +377,15 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, goto err_in; } - snprintf(eq->name, MLX5_MAX_EQ_NAME, "%s@pci:%s", + snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", name, pci_name(dev->pdev)); + eq->eqn = out.eq_number; eq->irqn = vecidx; eq->dev = dev; eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET; - err = request_irq(table->msix_arr[vecidx].vector, mlx5_msix_handler, 0, - eq->name, eq); + err = request_irq(priv->msix_arr[vecidx].vector, mlx5_msix_handler, 0, + priv->irq_info[vecidx].name, eq); if (err) goto err_eq; @@ -400,7 +401,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, return 0; err_irq: - free_irq(table->msix_arr[vecidx].vector, eq); + free_irq(priv->msix_arr[vecidx].vector, eq); err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -416,16 +417,15 @@ EXPORT_SYMBOL_GPL(mlx5_create_map_eq); int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { - struct mlx5_eq_table *table = &dev->priv.eq_table; int err; mlx5_debug_eq_remove(dev, eq); - free_irq(table->msix_arr[eq->irqn].vector, eq); + free_irq(dev->priv.msix_arr[eq->irqn].vector, eq); err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", eq->eqn); - synchronize_irq(table->msix_arr[eq->irqn].vector); + synchronize_irq(dev->priv.msix_arr[eq->irqn].vector); mlx5_buf_free(dev, &eq->buf); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 28425e5ea91f..55085b01b4ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -208,7 +209,8 @@ static void release_bar(struct pci_dev *pdev) static int mlx5_enable_msix(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_priv *priv = &dev->priv; + struct mlx5_eq_table *table = &priv->eq_table; int num_eqs = 1 << dev->caps.gen.log_max_eq; int nvec; int i; @@ -218,14 +220,16 @@ static int mlx5_enable_msix(struct mlx5_core_dev *dev) if (nvec <= MLX5_EQ_VEC_COMP_BASE) return -ENOMEM; - table->msix_arr = kzalloc(nvec * sizeof(*table->msix_arr), GFP_KERNEL); - if (!table->msix_arr) - return -ENOMEM; + priv->msix_arr = kcalloc(nvec, sizeof(*priv->msix_arr), GFP_KERNEL); + + priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL); + if (!priv->msix_arr || !priv->irq_info) + goto err_free_msix; for (i = 0; i < nvec; i++) - table->msix_arr[i].entry = i; + priv->msix_arr[i].entry = i; - nvec = pci_enable_msix_range(dev->pdev, table->msix_arr, + nvec = pci_enable_msix_range(dev->pdev, priv->msix_arr, MLX5_EQ_VEC_COMP_BASE + 1, nvec); if (nvec < 0) return nvec; @@ -233,14 +237,20 @@ static int mlx5_enable_msix(struct mlx5_core_dev *dev) table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; return 0; + +err_free_msix: + kfree(priv->irq_info); + kfree(priv->msix_arr); + return -ENOMEM; } static void mlx5_disable_msix(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_priv *priv = &dev->priv; pci_disable_msix(dev->pdev); - kfree(table->msix_arr); + kfree(priv->irq_info); + kfree(priv->msix_arr); } struct mlx5_reg_host_endianess { @@ -507,6 +517,77 @@ static int mlx5_core_disable_hca(struct mlx5_core_dev *dev) return 0; } +static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + struct mlx5_priv *priv = &mdev->priv; + struct msix_entry *msix = priv->msix_arr; + int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; + int numa_node = dev_to_node(&mdev->pdev->dev); + int err; + + if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { + mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); + return -ENOMEM; + } + + err = cpumask_set_cpu_local_first(i, numa_node, priv->irq_info[i].mask); + if (err) { + mlx5_core_warn(mdev, "cpumask_set_cpu_local_first failed"); + goto err_clear_mask; + } + + err = irq_set_affinity_hint(irq, priv->irq_info[i].mask); + if (err) { + mlx5_core_warn(mdev, "irq_set_affinity_hint failed,irq 0x%.4x", + irq); + goto err_clear_mask; + } + + return 0; + +err_clear_mask: + free_cpumask_var(priv->irq_info[i].mask); + return err; +} + +static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + struct mlx5_priv *priv = &mdev->priv; + struct msix_entry *msix = priv->msix_arr; + int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; + + irq_set_affinity_hint(irq, NULL); + free_cpumask_var(priv->irq_info[i].mask); +} + +static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev) +{ + int err; + int i; + + for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) { + err = mlx5_irq_set_affinity_hint(mdev, i); + if (err) + goto err_out; + } + + return 0; + +err_out: + for (i--; i >= 0; i--) + mlx5_irq_clear_affinity_hint(mdev, i); + + return err; +} + +static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev) +{ + int i; + + for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) + mlx5_irq_clear_affinity_hint(mdev, i); +} + int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int *irqn) { struct mlx5_eq_table *table = &dev->priv.eq_table; @@ -549,7 +630,7 @@ static void free_comp_eqs(struct mlx5_core_dev *dev) static int alloc_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = &dev->priv.eq_table; - char name[MLX5_MAX_EQ_NAME]; + char name[MLX5_MAX_IRQ_NAME]; struct mlx5_eq *eq; int ncomp_vec; int nent; @@ -566,7 +647,7 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) goto clean; } - snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i); + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); err = mlx5_create_map_eq(dev, eq, i + MLX5_EQ_VEC_COMP_BASE, nent, 0, name, &dev->priv.uuari.uars[0]); @@ -730,6 +811,12 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) goto err_stop_eqs; } + err = mlx5_irq_set_affinity_hints(dev); + if (err) { + dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); + goto err_free_comp_eqs; + } + MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); mlx5_init_cq_table(dev); @@ -739,6 +826,9 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) return 0; +err_free_comp_eqs: + free_comp_eqs(dev); + err_stop_eqs: mlx5_stop_eqs(dev); @@ -793,6 +883,7 @@ static void mlx5_dev_cleanup(struct mlx5_core_dev *dev) mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cleanup_cq_table(dev); + mlx5_irq_clear_affinity_hints(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); mlx5_free_uuars(dev, &priv->uuari); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c4cf25ffcc16..9e8979502826 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -85,7 +85,7 @@ enum { }; enum { - MLX5_MAX_EQ_NAME = 32 + MLX5_MAX_IRQ_NAME = 32 }; enum { @@ -349,7 +349,6 @@ struct mlx5_eq { u8 eqn; int nent; u64 mask; - char name[MLX5_MAX_EQ_NAME]; struct list_head list; int index; struct mlx5_rsc_debug *dbg; @@ -412,7 +411,6 @@ struct mlx5_eq_table { struct mlx5_eq pages_eq; struct mlx5_eq async_eq; struct mlx5_eq cmd_eq; - struct msix_entry *msix_arr; int num_comp_vectors; /* protect EQs list */ @@ -465,9 +463,16 @@ struct mlx5_mr_table { struct radix_tree_root tree; }; +struct mlx5_irq_info { + cpumask_var_t mask; + char name[MLX5_MAX_IRQ_NAME]; +}; + struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; + struct msix_entry *msix_arr; + struct mlx5_irq_info *irq_info; struct mlx5_uuar_info uuari; MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock); -- cgit v1.2.3 From e281682bf29438848daac11627216bceb1507b71 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 28 May 2015 22:28:40 +0300 Subject: net/mlx5_core: HW data structs/types definitions cleanup mlx5_ifc.h was heavily modified here since it is now generated by a script from the device specification (PRM rev 0.25). This specification is backward compatible to existing hardware. Some structures/fields were added here in order to enable the Ethernet functionality of the driver. Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 17 +- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +- drivers/net/ethernet/mellanox/mlx5/core/mcg.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/uar.c | 37 + include/linux/mlx5/device.h | 113 +- include/linux/mlx5/driver.h | 4 +- include/linux/mlx5/mlx5_ifc.h | 6608 +++++++++++++++++++++++- include/linux/mlx5/qp.h | 25 + 9 files changed, 6705 insertions(+), 110 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index e3273faf4568..2f22cd2de2b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -390,8 +390,17 @@ const char *mlx5_command_str(int command) case MLX5_CMD_OP_ARM_RQ: return "ARM_RQ"; - case MLX5_CMD_OP_RESIZE_SRQ: - return "RESIZE_SRQ"; + case MLX5_CMD_OP_CREATE_XRC_SRQ: + return "CREATE_XRC_SRQ"; + + case MLX5_CMD_OP_DESTROY_XRC_SRQ: + return "DESTROY_XRC_SRQ"; + + case MLX5_CMD_OP_QUERY_XRC_SRQ: + return "QUERY_XRC_SRQ"; + + case MLX5_CMD_OP_ARM_XRC_SRQ: + return "ARM_XRC_SRQ"; case MLX5_CMD_OP_ALLOC_PD: return "ALLOC_PD"; @@ -408,8 +417,8 @@ const char *mlx5_command_str(int command) case MLX5_CMD_OP_ATTACH_TO_MCG: return "ATTACH_TO_MCG"; - case MLX5_CMD_OP_DETACH_FROM_MCG: - return "DETACH_FROM_MCG"; + case MLX5_CMD_OP_DETTACH_FROM_MCG: + return "DETTACH_FROM_MCG"; case MLX5_CMD_OP_ALLOC_XRCD: return "ALLOC_XRCD"; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 4b4cda3bcc5f..ef9b7695decd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -95,7 +95,7 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *caps) goto out; } - memcpy(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability_struct), + memcpy(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability), sizeof(*caps)); mlx5_core_dbg(dev, "on-demand paging capabilities:\nrc: %08x\nuc: %08x\nud: %08x\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 55085b01b4ce..a652cb93ceaa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -319,8 +319,7 @@ static void fw2drv_caps(struct mlx5_caps *caps, void *out) gen->max_srq_wqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_srq_sz); gen->max_wqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_qp_sz); gen->log_max_qp = MLX5_GET_PR(cmd_hca_cap, out, log_max_qp); - gen->log_max_strq = MLX5_GET_PR(cmd_hca_cap, out, log_max_strq_sz); - gen->log_max_srq = MLX5_GET_PR(cmd_hca_cap, out, log_max_srqs); + gen->log_max_srq = MLX5_GET_PR(cmd_hca_cap, out, log_max_srq); gen->max_cqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_cq_sz); gen->log_max_cq = MLX5_GET_PR(cmd_hca_cap, out, log_max_cq); gen->max_eqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_eq_sz); @@ -391,7 +390,7 @@ int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps, goto query_ex; } mlx5_core_dbg(dev, "%s\n", caps_opmod_str(opmod)); - fw2drv_caps(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability_struct)); + fw2drv_caps(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability)); query_ex: kfree(out); @@ -453,7 +452,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) /* disable checksum */ cur_caps->gen.flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM; - copy_rw_fields(MLX5_ADDR_OF(set_hca_cap_in, set_ctx, hca_capability_struct), + copy_rw_fields(MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability), cur_caps); err = set_caps(dev, set_ctx, set_sz); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c index d79fd85d1dd5..d5a0c2d61a18 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c @@ -91,7 +91,7 @@ int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) memset(&in, 0, sizeof(in)); memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DETACH_FROM_MCG); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DETTACH_FROM_MCG); memcpy(in.gid, mgid, sizeof(*mgid)); in.qpn = cpu_to_be32(qpn); err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index 5a89bb1d678a..ba58f6d7c761 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -223,3 +223,40 @@ int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) return 0; } + +int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar) +{ + phys_addr_t pfn; + phys_addr_t uar_bar_start; + int err; + + err = mlx5_cmd_alloc_uar(mdev, &uar->index); + if (err) { + mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err); + return err; + } + + uar_bar_start = pci_resource_start(mdev->pdev, 0); + pfn = (uar_bar_start >> PAGE_SHIFT) + uar->index; + uar->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!uar->map) { + mlx5_core_warn(mdev, "ioremap() failed, %d\n", err); + err = -ENOMEM; + goto err_free_uar; + } + + return 0; + +err_free_uar: + mlx5_cmd_free_uar(mdev, uar->index); + + return err; +} +EXPORT_SYMBOL(mlx5_alloc_map_uar); + +void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar) +{ + iounmap(uar->map); + mlx5_cmd_free_uar(mdev, uar->index); +} +EXPORT_SYMBOL(mlx5_unmap_free_uar); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index abf65c790421..feebed7b392b 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -35,6 +35,7 @@ #include #include +#include #if defined(__LITTLE_ENDIAN) #define MLX5_SET_HOST_ENDIANNESS 0 @@ -70,6 +71,14 @@ << __mlx5_dw_bit_off(typ, fld))); \ } while (0) +#define MLX5_SET_TO_ONES(typ, p, fld) do { \ + BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32); \ + *((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \ + cpu_to_be32((be32_to_cpu(*((__be32 *)(p) + __mlx5_dw_off(typ, fld))) & \ + (~__mlx5_dw_mask(typ, fld))) | ((__mlx5_mask(typ, fld)) \ + << __mlx5_dw_bit_off(typ, fld))); \ +} while (0) + #define MLX5_GET(typ, p, fld) ((be32_to_cpu(*((__be32 *)(p) +\ __mlx5_dw_off(typ, fld))) >> __mlx5_dw_bit_off(typ, fld)) & \ __mlx5_mask(typ, fld)) @@ -264,6 +273,7 @@ enum { MLX5_OPCODE_RDMA_WRITE_IMM = 0x09, MLX5_OPCODE_SEND = 0x0a, MLX5_OPCODE_SEND_IMM = 0x0b, + MLX5_OPCODE_LSO = 0x0e, MLX5_OPCODE_RDMA_READ = 0x10, MLX5_OPCODE_ATOMIC_CS = 0x11, MLX5_OPCODE_ATOMIC_FA = 0x12, @@ -541,6 +551,10 @@ struct mlx5_cmd_prot_block { u8 sig; }; +enum { + MLX5_CQE_SYND_FLUSHED_IN_ERROR = 5, +}; + struct mlx5_err_cqe { u8 rsvd0[32]; __be32 srqn; @@ -554,13 +568,22 @@ struct mlx5_err_cqe { }; struct mlx5_cqe64 { - u8 rsvd0[17]; + u8 rsvd0[4]; + u8 lro_tcppsh_abort_dupack; + u8 lro_min_ttl; + __be16 lro_tcp_win; + __be32 lro_ack_seq_num; + __be32 rss_hash_result; + u8 rss_hash_type; u8 ml_path; - u8 rsvd20[4]; + u8 rsvd20[2]; + __be16 check_sum; __be16 slid; __be32 flags_rqpn; - u8 rsvd28[4]; - __be32 srqn; + u8 hds_ip_ext; + u8 l4_hdr_type_etc; + __be16 vlan_info; + __be32 srqn; /* [31:24]: lro_num_seg, [23:0]: srqn */ __be32 imm_inval_pkey; u8 rsvd40[4]; __be32 byte_cnt; @@ -571,6 +594,40 @@ struct mlx5_cqe64 { u8 op_own; }; +static inline int get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) +{ + return (cqe->lro_tcppsh_abort_dupack >> 6) & 1; +} + +static inline u8 get_cqe_l4_hdr_type(struct mlx5_cqe64 *cqe) +{ + return (cqe->l4_hdr_type_etc >> 4) & 0x7; +} + +static inline int cqe_has_vlan(struct mlx5_cqe64 *cqe) +{ + return !!(cqe->l4_hdr_type_etc & 0x1); +} + +enum { + CQE_L4_HDR_TYPE_NONE = 0x0, + CQE_L4_HDR_TYPE_TCP_NO_ACK = 0x1, + CQE_L4_HDR_TYPE_UDP = 0x2, + CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA = 0x3, + CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA = 0x4, +}; + +enum { + CQE_RSS_HTYPE_IP = 0x3 << 6, + CQE_RSS_HTYPE_L4 = 0x3 << 2, +}; + +enum { + CQE_L2_OK = 1 << 0, + CQE_L3_OK = 1 << 1, + CQE_L4_OK = 1 << 2, +}; + struct mlx5_sig_err_cqe { u8 rsvd0[16]; __be32 expected_trans_sig; @@ -996,4 +1053,52 @@ struct mlx5_destroy_psv_out { u8 rsvd[8]; }; +#define MLX5_CMD_OP_MAX 0x920 + +enum { + VPORT_STATE_DOWN = 0x0, + VPORT_STATE_UP = 0x1, +}; + +enum { + MLX5_L3_PROT_TYPE_IPV4 = 0, + MLX5_L3_PROT_TYPE_IPV6 = 1, +}; + +enum { + MLX5_L4_PROT_TYPE_TCP = 0, + MLX5_L4_PROT_TYPE_UDP = 1, +}; + +enum { + MLX5_HASH_FIELD_SEL_SRC_IP = 1 << 0, + MLX5_HASH_FIELD_SEL_DST_IP = 1 << 1, + MLX5_HASH_FIELD_SEL_L4_SPORT = 1 << 2, + MLX5_HASH_FIELD_SEL_L4_DPORT = 1 << 3, + MLX5_HASH_FIELD_SEL_IPSEC_SPI = 1 << 4, +}; + +enum { + MLX5_MATCH_OUTER_HEADERS = 1 << 0, + MLX5_MATCH_MISC_PARAMETERS = 1 << 1, + MLX5_MATCH_INNER_HEADERS = 1 << 2, + +}; + +enum { + MLX5_FLOW_TABLE_TYPE_NIC_RCV = 0, + MLX5_FLOW_TABLE_TYPE_ESWITCH = 4, +}; + +enum { + MLX5_FLOW_CONTEXT_DEST_TYPE_VPORT = 0, + MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE = 1, + MLX5_FLOW_CONTEXT_DEST_TYPE_TIR = 2, +}; + +enum { + MLX5_RQC_RQ_TYPE_MEMORY_RQ_INLINE = 0x0, + MLX5_RQC_RQ_TYPE_MEMORY_RQ_RPM = 0x1, +}; + #endif /* MLX5_DEVICE_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9e8979502826..3fd4fdc1ba16 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -44,7 +44,6 @@ #include #include -#include enum { MLX5_BOARD_ID_LEN = 64, @@ -278,7 +277,6 @@ struct mlx5_general_caps { u8 log_max_mkey; u8 log_max_pd; u8 log_max_srq; - u8 log_max_strq; u8 log_max_mrw_sz; u8 log_max_bsf_list_size; u8 log_max_klm_list_size; @@ -664,6 +662,8 @@ int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn); int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); +int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar); +void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar); void mlx5_health_cleanup(void); void __init mlx5_health_init(void); void mlx5_start_health_poll(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index cb3ad17edd1f..b27e9f6e090a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -28,11 +28,44 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - */ - +*/ #ifndef MLX5_IFC_H #define MLX5_IFC_H +enum { + MLX5_EVENT_TYPE_CODING_COMPLETION_EVENTS = 0x0, + MLX5_EVENT_TYPE_CODING_PATH_MIGRATED_SUCCEEDED = 0x1, + MLX5_EVENT_TYPE_CODING_COMMUNICATION_ESTABLISHED = 0x2, + MLX5_EVENT_TYPE_CODING_SEND_QUEUE_DRAINED = 0x3, + MLX5_EVENT_TYPE_CODING_LAST_WQE_REACHED = 0x13, + MLX5_EVENT_TYPE_CODING_SRQ_LIMIT = 0x14, + MLX5_EVENT_TYPE_CODING_DCT_ALL_CONNECTIONS_CLOSED = 0x1c, + MLX5_EVENT_TYPE_CODING_DCT_ACCESS_KEY_VIOLATION = 0x1d, + MLX5_EVENT_TYPE_CODING_CQ_ERROR = 0x4, + MLX5_EVENT_TYPE_CODING_LOCAL_WQ_CATASTROPHIC_ERROR = 0x5, + MLX5_EVENT_TYPE_CODING_PATH_MIGRATION_FAILED = 0x7, + MLX5_EVENT_TYPE_CODING_PAGE_FAULT_EVENT = 0xc, + MLX5_EVENT_TYPE_CODING_INVALID_REQUEST_LOCAL_WQ_ERROR = 0x10, + MLX5_EVENT_TYPE_CODING_LOCAL_ACCESS_VIOLATION_WQ_ERROR = 0x11, + MLX5_EVENT_TYPE_CODING_LOCAL_SRQ_CATASTROPHIC_ERROR = 0x12, + MLX5_EVENT_TYPE_CODING_INTERNAL_ERROR = 0x8, + MLX5_EVENT_TYPE_CODING_PORT_STATE_CHANGE = 0x9, + MLX5_EVENT_TYPE_CODING_GPIO_EVENT = 0x15, + MLX5_EVENT_TYPE_CODING_REMOTE_CONFIGURATION_PROTOCOL_EVENT = 0x19, + MLX5_EVENT_TYPE_CODING_DOORBELL_BLUEFLAME_CONGESTION_EVENT = 0x1a, + MLX5_EVENT_TYPE_CODING_STALL_VL_EVENT = 0x1b, + MLX5_EVENT_TYPE_CODING_DROPPED_PACKET_LOGGED_EVENT = 0x1f, + MLX5_EVENT_TYPE_CODING_COMMAND_INTERFACE_COMPLETION = 0xa, + MLX5_EVENT_TYPE_CODING_PAGE_REQUEST = 0xb +}; + +enum { + MLX5_MODIFY_TIR_BITMASK_LRO = 0x0, + MLX5_MODIFY_TIR_BITMASK_INDIRECT_TABLE = 0x1, + MLX5_MODIFY_TIR_BITMASK_HASH = 0x2, + MLX5_MODIFY_TIR_BITMASK_TUNNELED_OFFLOAD_EN = 0x3 +}; + enum { MLX5_CMD_OP_QUERY_HCA_CAP = 0x100, MLX5_CMD_OP_QUERY_ADAPTER = 0x101, @@ -43,6 +76,8 @@ enum { MLX5_CMD_OP_QUERY_PAGES = 0x107, MLX5_CMD_OP_MANAGE_PAGES = 0x108, MLX5_CMD_OP_SET_HCA_CAP = 0x109, + MLX5_CMD_OP_QUERY_ISSI = 0x10a, + MLX5_CMD_OP_SET_ISSI = 0x10b, MLX5_CMD_OP_CREATE_MKEY = 0x200, MLX5_CMD_OP_QUERY_MKEY = 0x201, MLX5_CMD_OP_DESTROY_MKEY = 0x202, @@ -66,6 +101,7 @@ enum { MLX5_CMD_OP_2ERR_QP = 0x507, MLX5_CMD_OP_2RST_QP = 0x50a, MLX5_CMD_OP_QUERY_QP = 0x50b, + MLX5_CMD_OP_SQD_RTS_QP = 0x50c, MLX5_CMD_OP_INIT2INIT_QP = 0x50e, MLX5_CMD_OP_CREATE_PSV = 0x600, MLX5_CMD_OP_DESTROY_PSV = 0x601, @@ -73,7 +109,10 @@ enum { MLX5_CMD_OP_DESTROY_SRQ = 0x701, MLX5_CMD_OP_QUERY_SRQ = 0x702, MLX5_CMD_OP_ARM_RQ = 0x703, - MLX5_CMD_OP_RESIZE_SRQ = 0x704, + MLX5_CMD_OP_CREATE_XRC_SRQ = 0x705, + MLX5_CMD_OP_DESTROY_XRC_SRQ = 0x706, + MLX5_CMD_OP_QUERY_XRC_SRQ = 0x707, + MLX5_CMD_OP_ARM_XRC_SRQ = 0x708, MLX5_CMD_OP_CREATE_DCT = 0x710, MLX5_CMD_OP_DESTROY_DCT = 0x711, MLX5_CMD_OP_DRAIN_DCT = 0x712, @@ -85,8 +124,12 @@ enum { MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT = 0x753, MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT = 0x754, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT = 0x755, - MLX5_CMD_OP_QUERY_RCOE_ADDRESS = 0x760, + MLX5_CMD_OP_QUERY_ROCE_ADDRESS = 0x760, MLX5_CMD_OP_SET_ROCE_ADDRESS = 0x761, + MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT = 0x762, + MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT = 0x763, + MLX5_CMD_OP_QUERY_HCA_VPORT_GID = 0x764, + MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY = 0x765, MLX5_CMD_OP_QUERY_VPORT_COUNTER = 0x770, MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771, MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772, @@ -98,7 +141,7 @@ enum { MLX5_CMD_OP_CONFIG_INT_MODERATION = 0x804, MLX5_CMD_OP_ACCESS_REG = 0x805, MLX5_CMD_OP_ATTACH_TO_MCG = 0x806, - MLX5_CMD_OP_DETACH_FROM_MCG = 0x807, + MLX5_CMD_OP_DETTACH_FROM_MCG = 0x807, MLX5_CMD_OP_GET_DROPPED_PACKET_LOG = 0x80a, MLX5_CMD_OP_MAD_IFC = 0x50d, MLX5_CMD_OP_QUERY_MAD_DEMUX = 0x80b, @@ -106,23 +149,22 @@ enum { MLX5_CMD_OP_NOP = 0x80d, MLX5_CMD_OP_ALLOC_XRCD = 0x80e, MLX5_CMD_OP_DEALLOC_XRCD = 0x80f, - MLX5_CMD_OP_SET_BURST_SIZE = 0x812, - MLX5_CMD_OP_QUERY_BURST_SZIE = 0x813, - MLX5_CMD_OP_ACTIVATE_TRACER = 0x814, - MLX5_CMD_OP_DEACTIVATE_TRACER = 0x815, - MLX5_CMD_OP_CREATE_SNIFFER_RULE = 0x820, - MLX5_CMD_OP_DESTROY_SNIFFER_RULE = 0x821, - MLX5_CMD_OP_QUERY_CONG_PARAMS = 0x822, - MLX5_CMD_OP_MODIFY_CONG_PARAMS = 0x823, - MLX5_CMD_OP_QUERY_CONG_STATISTICS = 0x824, + MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN = 0x816, + MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN = 0x817, + MLX5_CMD_OP_QUERY_CONG_STATUS = 0x822, + MLX5_CMD_OP_MODIFY_CONG_STATUS = 0x823, + MLX5_CMD_OP_QUERY_CONG_PARAMS = 0x824, + MLX5_CMD_OP_MODIFY_CONG_PARAMS = 0x825, + MLX5_CMD_OP_QUERY_CONG_STATISTICS = 0x826, + MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT = 0x827, + MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT = 0x828, + MLX5_CMD_OP_SET_L2_TABLE_ENTRY = 0x829, + MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY = 0x82a, + MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY = 0x82b, MLX5_CMD_OP_CREATE_TIR = 0x900, MLX5_CMD_OP_MODIFY_TIR = 0x901, MLX5_CMD_OP_DESTROY_TIR = 0x902, MLX5_CMD_OP_QUERY_TIR = 0x903, - MLX5_CMD_OP_CREATE_TIS = 0x912, - MLX5_CMD_OP_MODIFY_TIS = 0x913, - MLX5_CMD_OP_DESTROY_TIS = 0x914, - MLX5_CMD_OP_QUERY_TIS = 0x915, MLX5_CMD_OP_CREATE_SQ = 0x904, MLX5_CMD_OP_MODIFY_SQ = 0x905, MLX5_CMD_OP_DESTROY_SQ = 0x906, @@ -135,9 +177,430 @@ enum { MLX5_CMD_OP_MODIFY_RMP = 0x90d, MLX5_CMD_OP_DESTROY_RMP = 0x90e, MLX5_CMD_OP_QUERY_RMP = 0x90f, - MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY = 0x910, - MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY = 0x911, - MLX5_CMD_OP_MAX = 0x911 + MLX5_CMD_OP_CREATE_TIS = 0x912, + MLX5_CMD_OP_MODIFY_TIS = 0x913, + MLX5_CMD_OP_DESTROY_TIS = 0x914, + MLX5_CMD_OP_QUERY_TIS = 0x915, + MLX5_CMD_OP_CREATE_RQT = 0x916, + MLX5_CMD_OP_MODIFY_RQT = 0x917, + MLX5_CMD_OP_DESTROY_RQT = 0x918, + MLX5_CMD_OP_QUERY_RQT = 0x919, + MLX5_CMD_OP_CREATE_FLOW_TABLE = 0x930, + MLX5_CMD_OP_DESTROY_FLOW_TABLE = 0x931, + MLX5_CMD_OP_QUERY_FLOW_TABLE = 0x932, + MLX5_CMD_OP_CREATE_FLOW_GROUP = 0x933, + MLX5_CMD_OP_DESTROY_FLOW_GROUP = 0x934, + MLX5_CMD_OP_QUERY_FLOW_GROUP = 0x935, + MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY = 0x936, + MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY = 0x937, + MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY = 0x938 +}; + +struct mlx5_ifc_flow_table_fields_supported_bits { + u8 outer_dmac[0x1]; + u8 outer_smac[0x1]; + u8 outer_ether_type[0x1]; + u8 reserved_0[0x1]; + u8 outer_first_prio[0x1]; + u8 outer_first_cfi[0x1]; + u8 outer_first_vid[0x1]; + u8 reserved_1[0x1]; + u8 outer_second_prio[0x1]; + u8 outer_second_cfi[0x1]; + u8 outer_second_vid[0x1]; + u8 reserved_2[0x1]; + u8 outer_sip[0x1]; + u8 outer_dip[0x1]; + u8 outer_frag[0x1]; + u8 outer_ip_protocol[0x1]; + u8 outer_ip_ecn[0x1]; + u8 outer_ip_dscp[0x1]; + u8 outer_udp_sport[0x1]; + u8 outer_udp_dport[0x1]; + u8 outer_tcp_sport[0x1]; + u8 outer_tcp_dport[0x1]; + u8 outer_tcp_flags[0x1]; + u8 outer_gre_protocol[0x1]; + u8 outer_gre_key[0x1]; + u8 outer_vxlan_vni[0x1]; + u8 reserved_3[0x5]; + u8 source_eswitch_port[0x1]; + + u8 inner_dmac[0x1]; + u8 inner_smac[0x1]; + u8 inner_ether_type[0x1]; + u8 reserved_4[0x1]; + u8 inner_first_prio[0x1]; + u8 inner_first_cfi[0x1]; + u8 inner_first_vid[0x1]; + u8 reserved_5[0x1]; + u8 inner_second_prio[0x1]; + u8 inner_second_cfi[0x1]; + u8 inner_second_vid[0x1]; + u8 reserved_6[0x1]; + u8 inner_sip[0x1]; + u8 inner_dip[0x1]; + u8 inner_frag[0x1]; + u8 inner_ip_protocol[0x1]; + u8 inner_ip_ecn[0x1]; + u8 inner_ip_dscp[0x1]; + u8 inner_udp_sport[0x1]; + u8 inner_udp_dport[0x1]; + u8 inner_tcp_sport[0x1]; + u8 inner_tcp_dport[0x1]; + u8 inner_tcp_flags[0x1]; + u8 reserved_7[0x9]; + + u8 reserved_8[0x40]; +}; + +struct mlx5_ifc_flow_table_prop_layout_bits { + u8 ft_support[0x1]; + u8 reserved_0[0x1f]; + + u8 reserved_1[0x2]; + u8 log_max_ft_size[0x6]; + u8 reserved_2[0x10]; + u8 max_ft_level[0x8]; + + u8 reserved_3[0x20]; + + u8 reserved_4[0x18]; + u8 log_max_ft_num[0x8]; + + u8 reserved_5[0x18]; + u8 log_max_destination[0x8]; + + u8 reserved_6[0x18]; + u8 log_max_flow[0x8]; + + u8 reserved_7[0x40]; + + struct mlx5_ifc_flow_table_fields_supported_bits ft_field_support; + + struct mlx5_ifc_flow_table_fields_supported_bits ft_field_bitmask_support; +}; + +struct mlx5_ifc_odp_per_transport_service_cap_bits { + u8 send[0x1]; + u8 receive[0x1]; + u8 write[0x1]; + u8 read[0x1]; + u8 reserved_0[0x1]; + u8 srq_receive[0x1]; + u8 reserved_1[0x1a]; +}; + +struct mlx5_ifc_fte_match_set_lyr_2_4_bits { + u8 smac_47_16[0x20]; + + u8 smac_15_0[0x10]; + u8 ethertype[0x10]; + + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 first_prio[0x3]; + u8 first_cfi[0x1]; + u8 first_vid[0xc]; + + u8 ip_protocol[0x8]; + u8 ip_dscp[0x6]; + u8 ip_ecn[0x2]; + u8 vlan_tag[0x1]; + u8 reserved_0[0x1]; + u8 frag[0x1]; + u8 reserved_1[0x4]; + u8 tcp_flags[0x9]; + + u8 tcp_sport[0x10]; + u8 tcp_dport[0x10]; + + u8 reserved_2[0x20]; + + u8 udp_sport[0x10]; + u8 udp_dport[0x10]; + + u8 src_ip[4][0x20]; + + u8 dst_ip[4][0x20]; +}; + +struct mlx5_ifc_fte_match_set_misc_bits { + u8 reserved_0[0x20]; + + u8 reserved_1[0x10]; + u8 source_port[0x10]; + + u8 outer_second_prio[0x3]; + u8 outer_second_cfi[0x1]; + u8 outer_second_vid[0xc]; + u8 inner_second_prio[0x3]; + u8 inner_second_cfi[0x1]; + u8 inner_second_vid[0xc]; + + u8 outer_second_vlan_tag[0x1]; + u8 inner_second_vlan_tag[0x1]; + u8 reserved_2[0xe]; + u8 gre_protocol[0x10]; + + u8 gre_key_h[0x18]; + u8 gre_key_l[0x8]; + + u8 vxlan_vni[0x18]; + u8 reserved_3[0x8]; + + u8 reserved_4[0x20]; + + u8 reserved_5[0xc]; + u8 outer_ipv6_flow_label[0x14]; + + u8 reserved_6[0xc]; + u8 inner_ipv6_flow_label[0x14]; + + u8 reserved_7[0xe0]; +}; + +struct mlx5_ifc_cmd_pas_bits { + u8 pa_h[0x20]; + + u8 pa_l[0x14]; + u8 reserved_0[0xc]; +}; + +struct mlx5_ifc_uint64_bits { + u8 hi[0x20]; + + u8 lo[0x20]; +}; + +enum { + MLX5_ADS_STAT_RATE_NO_LIMIT = 0x0, + MLX5_ADS_STAT_RATE_2_5GBPS = 0x7, + MLX5_ADS_STAT_RATE_10GBPS = 0x8, + MLX5_ADS_STAT_RATE_30GBPS = 0x9, + MLX5_ADS_STAT_RATE_5GBPS = 0xa, + MLX5_ADS_STAT_RATE_20GBPS = 0xb, + MLX5_ADS_STAT_RATE_40GBPS = 0xc, + MLX5_ADS_STAT_RATE_60GBPS = 0xd, + MLX5_ADS_STAT_RATE_80GBPS = 0xe, + MLX5_ADS_STAT_RATE_120GBPS = 0xf, +}; + +struct mlx5_ifc_ads_bits { + u8 fl[0x1]; + u8 free_ar[0x1]; + u8 reserved_0[0xe]; + u8 pkey_index[0x10]; + + u8 reserved_1[0x8]; + u8 grh[0x1]; + u8 mlid[0x7]; + u8 rlid[0x10]; + + u8 ack_timeout[0x5]; + u8 reserved_2[0x3]; + u8 src_addr_index[0x8]; + u8 reserved_3[0x4]; + u8 stat_rate[0x4]; + u8 hop_limit[0x8]; + + u8 reserved_4[0x4]; + u8 tclass[0x8]; + u8 flow_label[0x14]; + + u8 rgid_rip[16][0x8]; + + u8 reserved_5[0x4]; + u8 f_dscp[0x1]; + u8 f_ecn[0x1]; + u8 reserved_6[0x1]; + u8 f_eth_prio[0x1]; + u8 ecn[0x2]; + u8 dscp[0x6]; + u8 udp_sport[0x10]; + + u8 dei_cfi[0x1]; + u8 eth_prio[0x3]; + u8 sl[0x4]; + u8 port[0x8]; + u8 rmac_47_32[0x10]; + + u8 rmac_31_0[0x20]; +}; + +struct mlx5_ifc_flow_table_nic_cap_bits { + u8 reserved_0[0x200]; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive; + + u8 reserved_1[0x200]; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive_sniffer; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit; + + u8 reserved_2[0x200]; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_sniffer; + + u8 reserved_3[0x7200]; +}; + +struct mlx5_ifc_per_protocol_networking_offload_caps_bits { + u8 csum_cap[0x1]; + u8 vlan_cap[0x1]; + u8 lro_cap[0x1]; + u8 lro_psh_flag[0x1]; + u8 lro_time_stamp[0x1]; + u8 reserved_0[0x6]; + u8 max_lso_cap[0x5]; + u8 reserved_1[0x4]; + u8 rss_ind_tbl_cap[0x4]; + u8 reserved_2[0x3]; + u8 tunnel_lso_const_out_ip_id[0x1]; + u8 reserved_3[0x2]; + u8 tunnel_statless_gre[0x1]; + u8 tunnel_stateless_vxlan[0x1]; + + u8 reserved_4[0x20]; + + u8 reserved_5[0x10]; + u8 lro_min_mss_size[0x10]; + + u8 reserved_6[0x120]; + + u8 lro_timer_supported_periods[4][0x20]; + + u8 reserved_7[0x600]; +}; + +struct mlx5_ifc_roce_cap_bits { + u8 roce_apm[0x1]; + u8 reserved_0[0x1f]; + + u8 reserved_1[0x60]; + + u8 reserved_2[0xc]; + u8 l3_type[0x4]; + u8 reserved_3[0x8]; + u8 roce_version[0x8]; + + u8 reserved_4[0x10]; + u8 r_roce_dest_udp_port[0x10]; + + u8 r_roce_max_src_udp_port[0x10]; + u8 r_roce_min_src_udp_port[0x10]; + + u8 reserved_5[0x10]; + u8 roce_address_table_size[0x10]; + + u8 reserved_6[0x700]; +}; + +enum { + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_1_BYTE = 0x0, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_2_BYTES = 0x2, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_4_BYTES = 0x4, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_8_BYTES = 0x8, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_16_BYTES = 0x10, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_32_BYTES = 0x20, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_64_BYTES = 0x40, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_128_BYTES = 0x80, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_256_BYTES = 0x100, +}; + +enum { + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_1_BYTE = 0x1, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_2_BYTES = 0x2, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_4_BYTES = 0x4, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_8_BYTES = 0x8, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_16_BYTES = 0x10, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_32_BYTES = 0x20, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_64_BYTES = 0x40, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_128_BYTES = 0x80, + MLX5_ATOMIC_CAPS_ATOMIC_SIZE_DC_256_BYTES = 0x100, +}; + +struct mlx5_ifc_atomic_caps_bits { + u8 reserved_0[0x40]; + + u8 atomic_req_endianness[0x1]; + u8 reserved_1[0x1f]; + + u8 reserved_2[0x20]; + + u8 reserved_3[0x10]; + u8 atomic_operations[0x10]; + + u8 reserved_4[0x10]; + u8 atomic_size_qp[0x10]; + + u8 reserved_5[0x10]; + u8 atomic_size_dc[0x10]; + + u8 reserved_6[0x720]; +}; + +struct mlx5_ifc_odp_cap_bits { + u8 reserved_0[0x40]; + + u8 sig[0x1]; + u8 reserved_1[0x1f]; + + u8 reserved_2[0x20]; + + struct mlx5_ifc_odp_per_transport_service_cap_bits rc_odp_caps; + + struct mlx5_ifc_odp_per_transport_service_cap_bits uc_odp_caps; + + struct mlx5_ifc_odp_per_transport_service_cap_bits ud_odp_caps; + + u8 reserved_3[0x720]; +}; + +enum { + MLX5_WQ_TYPE_LINKED_LIST = 0x0, + MLX5_WQ_TYPE_CYCLIC = 0x1, + MLX5_WQ_TYPE_STRQ = 0x2, +}; + +enum { + MLX5_WQ_END_PAD_MODE_NONE = 0x0, + MLX5_WQ_END_PAD_MODE_ALIGN = 0x1, +}; + +enum { + MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_8_GID_ENTRIES = 0x0, + MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_16_GID_ENTRIES = 0x1, + MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_32_GID_ENTRIES = 0x2, + MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_64_GID_ENTRIES = 0x3, + MLX5_CMD_HCA_CAP_GID_TABLE_SIZE_128_GID_ENTRIES = 0x4, +}; + +enum { + MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_128_ENTRIES = 0x0, + MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_256_ENTRIES = 0x1, + MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_512_ENTRIES = 0x2, + MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_1K_ENTRIES = 0x3, + MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_2K_ENTRIES = 0x4, + MLX5_CMD_HCA_CAP_PKEY_TABLE_SIZE_4K_ENTRIES = 0x5, +}; + +enum { + MLX5_CMD_HCA_CAP_PORT_TYPE_IB = 0x0, + MLX5_CMD_HCA_CAP_PORT_TYPE_ETHERNET = 0x1, +}; + +enum { + MLX5_CMD_HCA_CAP_CMDIF_CHECKSUM_DISABLED = 0x0, + MLX5_CMD_HCA_CAP_CMDIF_CHECKSUM_INITIAL_STATE = 0x1, + MLX5_CMD_HCA_CAP_CMDIF_CHECKSUM_ENABLED = 0x3, +}; + +enum { + MLX5_CAP_PORT_TYPE_IB = 0x0, + MLX5_CAP_PORT_TYPE_ETH = 0x1, }; struct mlx5_ifc_cmd_hca_cap_bits { @@ -148,9 +611,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_1[0xb]; u8 log_max_qp[0x5]; - u8 log_max_strq_sz[0x8]; - u8 reserved_2[0x3]; - u8 log_max_srqs[0x5]; + u8 reserved_2[0xb]; + u8 log_max_srq[0x5]; u8 reserved_3[0x10]; u8 reserved_4[0x8]; @@ -185,165 +647,6123 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 pad_cap[0x1]; u8 cc_query_allowed[0x1]; u8 cc_modify_allowed[0x1]; - u8 reserved_15[0x1d]; + u8 reserved_15[0xd]; + u8 gid_table_size[0x10]; - u8 reserved_16[0x6]; + u8 out_of_seq_cnt[0x1]; + u8 vport_counters[0x1]; + u8 reserved_16[0x4]; u8 max_qp_cnt[0xa]; u8 pkey_table_size[0x10]; - u8 eswitch_owner[0x1]; - u8 reserved_17[0xa]; + u8 vport_group_manager[0x1]; + u8 vhca_group_manager[0x1]; + u8 ib_virt[0x1]; + u8 eth_virt[0x1]; + u8 reserved_17[0x1]; + u8 ets[0x1]; + u8 nic_flow_table[0x1]; + u8 reserved_18[0x4]; u8 local_ca_ack_delay[0x5]; - u8 reserved_18[0x8]; + u8 reserved_19[0x6]; + u8 port_type[0x2]; u8 num_ports[0x8]; - u8 reserved_19[0x3]; + u8 reserved_20[0x3]; u8 log_max_msg[0x5]; - u8 reserved_20[0x18]; + u8 reserved_21[0x18]; u8 stat_rate_support[0x10]; - u8 reserved_21[0x10]; + u8 reserved_22[0xc]; + u8 cqe_version[0x4]; - u8 reserved_22[0x10]; + u8 compact_address_vector[0x1]; + u8 reserved_23[0xe]; + u8 drain_sigerr[0x1]; u8 cmdif_checksum[0x2]; u8 sigerr_cqe[0x1]; - u8 reserved_23[0x1]; + u8 reserved_24[0x1]; u8 wq_signature[0x1]; u8 sctr_data_cqe[0x1]; - u8 reserved_24[0x1]; + u8 reserved_25[0x1]; u8 sho[0x1]; u8 tph[0x1]; u8 rf[0x1]; - u8 dc[0x1]; - u8 reserved_25[0x2]; + u8 dct[0x1]; + u8 reserved_26[0x1]; + u8 eth_net_offloads[0x1]; u8 roce[0x1]; u8 atomic[0x1]; - u8 rsz_srq[0x1]; + u8 reserved_27[0x1]; u8 cq_oi[0x1]; u8 cq_resize[0x1]; u8 cq_moderation[0x1]; - u8 sniffer_rule_flow[0x1]; - u8 sniffer_rule_vport[0x1]; - u8 sniffer_rule_phy[0x1]; - u8 reserved_26[0x1]; + u8 reserved_28[0x3]; + u8 cq_eq_remap[0x1]; u8 pg[0x1]; u8 block_lb_mc[0x1]; - u8 reserved_27[0x3]; + u8 reserved_29[0x1]; + u8 scqe_break_moderation[0x1]; + u8 reserved_30[0x1]; u8 cd[0x1]; - u8 reserved_28[0x1]; + u8 reserved_31[0x1]; u8 apm[0x1]; - u8 reserved_29[0x7]; + u8 reserved_32[0x7]; u8 qkv[0x1]; u8 pkv[0x1]; - u8 reserved_30[0x4]; + u8 reserved_33[0x4]; u8 xrc[0x1]; u8 ud[0x1]; u8 uc[0x1]; u8 rc[0x1]; - u8 reserved_31[0xa]; + u8 reserved_34[0xa]; u8 uar_sz[0x6]; - u8 reserved_32[0x8]; + u8 reserved_35[0x8]; u8 log_pg_sz[0x8]; u8 bf[0x1]; - u8 reserved_33[0xa]; + u8 reserved_36[0x1]; + u8 pad_tx_eth_packet[0x1]; + u8 reserved_37[0x8]; u8 log_bf_reg_size[0x5]; - u8 reserved_34[0x10]; + u8 reserved_38[0x10]; - u8 reserved_35[0x10]; + u8 reserved_39[0x10]; u8 max_wqe_sz_sq[0x10]; - u8 reserved_36[0x10]; + u8 reserved_40[0x10]; u8 max_wqe_sz_rq[0x10]; - u8 reserved_37[0x10]; + u8 reserved_41[0x10]; u8 max_wqe_sz_sq_dc[0x10]; - u8 reserved_38[0x7]; + u8 reserved_42[0x7]; u8 max_qp_mcg[0x19]; - u8 reserved_39[0x18]; + u8 reserved_43[0x18]; u8 log_max_mcg[0x8]; - u8 reserved_40[0xb]; + u8 reserved_44[0x3]; + u8 log_max_transport_domain[0x5]; + u8 reserved_45[0x3]; u8 log_max_pd[0x5]; - u8 reserved_41[0xb]; + u8 reserved_46[0xb]; u8 log_max_xrcd[0x5]; - u8 reserved_42[0x20]; + u8 reserved_47[0x20]; - u8 reserved_43[0x3]; + u8 reserved_48[0x3]; u8 log_max_rq[0x5]; - u8 reserved_44[0x3]; + u8 reserved_49[0x3]; u8 log_max_sq[0x5]; - u8 reserved_45[0x3]; + u8 reserved_50[0x3]; u8 log_max_tir[0x5]; - u8 reserved_46[0x3]; + u8 reserved_51[0x3]; u8 log_max_tis[0x5]; - u8 reserved_47[0x13]; - u8 log_max_rq_per_tir[0x5]; - u8 reserved_48[0x3]; + u8 basic_cyclic_rcv_wqe[0x1]; + u8 reserved_52[0x2]; + u8 log_max_rmp[0x5]; + u8 reserved_53[0x3]; + u8 log_max_rqt[0x5]; + u8 reserved_54[0x3]; + u8 log_max_rqt_size[0x5]; + u8 reserved_55[0x3]; u8 log_max_tis_per_sq[0x5]; - u8 reserved_49[0xe0]; + u8 reserved_56[0x3]; + u8 log_max_stride_sz_rq[0x5]; + u8 reserved_57[0x3]; + u8 log_min_stride_sz_rq[0x5]; + u8 reserved_58[0x3]; + u8 log_max_stride_sz_sq[0x5]; + u8 reserved_59[0x3]; + u8 log_min_stride_sz_sq[0x5]; + + u8 reserved_60[0x1b]; + u8 log_max_wq_sz[0x5]; + + u8 reserved_61[0xa0]; - u8 reserved_50[0x10]; + u8 reserved_62[0x3]; + u8 log_max_l2_table[0x5]; + u8 reserved_63[0x8]; u8 log_uar_page_sz[0x10]; - u8 reserved_51[0x100]; + u8 reserved_64[0x100]; - u8 reserved_52[0x1f]; + u8 reserved_65[0x1f]; u8 cqe_zip[0x1]; u8 cqe_zip_timeout[0x10]; u8 cqe_zip_max_num[0x10]; - u8 reserved_53[0x220]; + u8 reserved_66[0x220]; }; -struct mlx5_ifc_set_hca_cap_in_bits { - u8 opcode[0x10]; - u8 reserved_0[0x10]; +enum { + MLX5_DEST_FORMAT_STRUCT_DESTINATION_TYPE_FLOW_TABLE_ = 0x1, + MLX5_DEST_FORMAT_STRUCT_DESTINATION_TYPE_TIR = 0x2, +}; - u8 reserved_1[0x10]; - u8 op_mod[0x10]; +struct mlx5_ifc_dest_format_struct_bits { + u8 destination_type[0x8]; + u8 destination_id[0x18]; - u8 reserved_2[0x40]; + u8 reserved_0[0x20]; +}; + +struct mlx5_ifc_fte_match_param_bits { + struct mlx5_ifc_fte_match_set_lyr_2_4_bits outer_headers; + + struct mlx5_ifc_fte_match_set_misc_bits misc_parameters; + + struct mlx5_ifc_fte_match_set_lyr_2_4_bits inner_headers; - struct mlx5_ifc_cmd_hca_cap_bits hca_capability_struct; + u8 reserved_0[0xa00]; }; -struct mlx5_ifc_query_hca_cap_in_bits { - u8 opcode[0x10]; - u8 reserved_0[0x10]; +enum { + MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_SRC_IP = 0x0, + MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_DST_IP = 0x1, + MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_L4_SPORT = 0x2, + MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_L4_DPORT = 0x3, + MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_IPSEC_SPI = 0x4, +}; - u8 reserved_1[0x10]; - u8 op_mod[0x10]; +struct mlx5_ifc_rx_hash_field_select_bits { + u8 l3_prot_type[0x1]; + u8 l4_prot_type[0x1]; + u8 selected_fields[0x1e]; +}; - u8 reserved_2[0x40]; +enum { + MLX5_WQ_WQ_TYPE_WQ_LINKED_LIST = 0x0, + MLX5_WQ_WQ_TYPE_WQ_CYCLIC = 0x1, }; -struct mlx5_ifc_query_hca_cap_out_bits { - u8 status[0x8]; +enum { + MLX5_WQ_END_PADDING_MODE_END_PAD_NONE = 0x0, + MLX5_WQ_END_PADDING_MODE_END_PAD_ALIGN = 0x1, +}; + +struct mlx5_ifc_wq_bits { + u8 wq_type[0x4]; + u8 wq_signature[0x1]; + u8 end_padding_mode[0x2]; + u8 cd_slave[0x1]; u8 reserved_0[0x18]; - u8 syndrome[0x20]; + u8 hds_skip_first_sge[0x1]; + u8 log2_hds_buf_size[0x3]; + u8 reserved_1[0x7]; + u8 page_offset[0x5]; + u8 lwm[0x10]; - u8 reserved_1[0x40]; + u8 reserved_2[0x8]; + u8 pd[0x18]; + + u8 reserved_3[0x8]; + u8 uar_page[0x18]; + + u8 dbr_addr[0x40]; + + u8 hw_counter[0x20]; + + u8 sw_counter[0x20]; + + u8 reserved_4[0xc]; + u8 log_wq_stride[0x4]; + u8 reserved_5[0x3]; + u8 log_wq_pg_sz[0x5]; + u8 reserved_6[0x3]; + u8 log_wq_sz[0x5]; + + u8 reserved_7[0x4e0]; - u8 capability_struct[256][0x8]; + struct mlx5_ifc_cmd_pas_bits pas[0]; }; -struct mlx5_ifc_set_hca_cap_out_bits { - u8 status[0x8]; - u8 reserved_0[0x18]; +struct mlx5_ifc_rq_num_bits { + u8 reserved_0[0x8]; + u8 rq_num[0x18]; +}; - u8 syndrome[0x20]; +struct mlx5_ifc_mac_address_layout_bits { + u8 reserved_0[0x10]; + u8 mac_addr_47_32[0x10]; - u8 reserved_1[0x40]; + u8 mac_addr_31_0[0x20]; +}; + +struct mlx5_ifc_cong_control_r_roce_ecn_np_bits { + u8 reserved_0[0xa0]; + + u8 min_time_between_cnps[0x20]; + + u8 reserved_1[0x12]; + u8 cnp_dscp[0x6]; + u8 reserved_2[0x5]; + u8 cnp_802p_prio[0x3]; + + u8 reserved_3[0x720]; +}; + +struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits { + u8 reserved_0[0x60]; + + u8 reserved_1[0x4]; + u8 clamp_tgt_rate[0x1]; + u8 reserved_2[0x3]; + u8 clamp_tgt_rate_after_time_inc[0x1]; + u8 reserved_3[0x17]; + + u8 reserved_4[0x20]; + + u8 rpg_time_reset[0x20]; + + u8 rpg_byte_reset[0x20]; + + u8 rpg_threshold[0x20]; + + u8 rpg_max_rate[0x20]; + + u8 rpg_ai_rate[0x20]; + + u8 rpg_hai_rate[0x20]; + + u8 rpg_gd[0x20]; + + u8 rpg_min_dec_fac[0x20]; + + u8 rpg_min_rate[0x20]; + + u8 reserved_5[0xe0]; + + u8 rate_to_set_on_first_cnp[0x20]; + + u8 dce_tcp_g[0x20]; + + u8 dce_tcp_rtt[0x20]; + + u8 rate_reduce_monitor_period[0x20]; + + u8 reserved_6[0x20]; + + u8 initial_alpha_value[0x20]; + + u8 reserved_7[0x4a0]; +}; + +struct mlx5_ifc_cong_control_802_1qau_rp_bits { + u8 reserved_0[0x80]; + + u8 rppp_max_rps[0x20]; + + u8 rpg_time_reset[0x20]; + + u8 rpg_byte_reset[0x20]; + + u8 rpg_threshold[0x20]; + + u8 rpg_max_rate[0x20]; + + u8 rpg_ai_rate[0x20]; + + u8 rpg_hai_rate[0x20]; + + u8 rpg_gd[0x20]; + + u8 rpg_min_dec_fac[0x20]; + + u8 rpg_min_rate[0x20]; + + u8 reserved_1[0x640]; +}; + +enum { + MLX5_RESIZE_FIELD_SELECT_RESIZE_FIELD_SELECT_LOG_CQ_SIZE = 0x1, + MLX5_RESIZE_FIELD_SELECT_RESIZE_FIELD_SELECT_PAGE_OFFSET = 0x2, + MLX5_RESIZE_FIELD_SELECT_RESIZE_FIELD_SELECT_LOG_PAGE_SIZE = 0x4, +}; + +struct mlx5_ifc_resize_field_select_bits { + u8 resize_field_select[0x20]; +}; + +enum { + MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_CQ_PERIOD = 0x1, + MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_CQ_MAX_COUNT = 0x2, + MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_OI = 0x4, + MLX5_MODIFY_FIELD_SELECT_MODIFY_FIELD_SELECT_C_EQN = 0x8, +}; + +struct mlx5_ifc_modify_field_select_bits { + u8 modify_field_select[0x20]; +}; + +struct mlx5_ifc_field_select_r_roce_np_bits { + u8 field_select_r_roce_np[0x20]; +}; + +struct mlx5_ifc_field_select_r_roce_rp_bits { + u8 field_select_r_roce_rp[0x20]; +}; + +enum { + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPPP_MAX_RPS = 0x4, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_TIME_RESET = 0x8, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_BYTE_RESET = 0x10, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_THRESHOLD = 0x20, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_MAX_RATE = 0x40, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_AI_RATE = 0x80, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_HAI_RATE = 0x100, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_GD = 0x200, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_MIN_DEC_FAC = 0x400, + MLX5_FIELD_SELECT_802_1QAU_RP_FIELD_SELECT_8021QAURP_RPG_MIN_RATE = 0x800, +}; + +struct mlx5_ifc_field_select_802_1qau_rp_bits { + u8 field_select_8021qaurp[0x20]; +}; + +struct mlx5_ifc_phys_layer_cntrs_bits { + u8 time_since_last_clear_high[0x20]; + + u8 time_since_last_clear_low[0x20]; + + u8 symbol_errors_high[0x20]; + + u8 symbol_errors_low[0x20]; + + u8 sync_headers_errors_high[0x20]; + + u8 sync_headers_errors_low[0x20]; + + u8 edpl_bip_errors_lane0_high[0x20]; + + u8 edpl_bip_errors_lane0_low[0x20]; + + u8 edpl_bip_errors_lane1_high[0x20]; + + u8 edpl_bip_errors_lane1_low[0x20]; + + u8 edpl_bip_errors_lane2_high[0x20]; + + u8 edpl_bip_errors_lane2_low[0x20]; + + u8 edpl_bip_errors_lane3_high[0x20]; + + u8 edpl_bip_errors_lane3_low[0x20]; + + u8 fc_fec_corrected_blocks_lane0_high[0x20]; + + u8 fc_fec_corrected_blocks_lane0_low[0x20]; + + u8 fc_fec_corrected_blocks_lane1_high[0x20]; + + u8 fc_fec_corrected_blocks_lane1_low[0x20]; + + u8 fc_fec_corrected_blocks_lane2_high[0x20]; + + u8 fc_fec_corrected_blocks_lane2_low[0x20]; + + u8 fc_fec_corrected_blocks_lane3_high[0x20]; + + u8 fc_fec_corrected_blocks_lane3_low[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane0_high[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane0_low[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane1_high[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane1_low[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane2_high[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane2_low[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane3_high[0x20]; + + u8 fc_fec_uncorrectable_blocks_lane3_low[0x20]; + + u8 rs_fec_corrected_blocks_high[0x20]; + + u8 rs_fec_corrected_blocks_low[0x20]; + + u8 rs_fec_uncorrectable_blocks_high[0x20]; + + u8 rs_fec_uncorrectable_blocks_low[0x20]; + + u8 rs_fec_no_errors_blocks_high[0x20]; + + u8 rs_fec_no_errors_blocks_low[0x20]; + + u8 rs_fec_single_error_blocks_high[0x20]; + + u8 rs_fec_single_error_blocks_low[0x20]; + + u8 rs_fec_corrected_symbols_total_high[0x20]; + + u8 rs_fec_corrected_symbols_total_low[0x20]; + + u8 rs_fec_corrected_symbols_lane0_high[0x20]; + + u8 rs_fec_corrected_symbols_lane0_low[0x20]; + + u8 rs_fec_corrected_symbols_lane1_high[0x20]; + + u8 rs_fec_corrected_symbols_lane1_low[0x20]; + + u8 rs_fec_corrected_symbols_lane2_high[0x20]; + + u8 rs_fec_corrected_symbols_lane2_low[0x20]; + + u8 rs_fec_corrected_symbols_lane3_high[0x20]; + + u8 rs_fec_corrected_symbols_lane3_low[0x20]; + + u8 link_down_events[0x20]; + + u8 successful_recovery_events[0x20]; + + u8 reserved_0[0x180]; +}; + +struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits { + u8 transmit_queue_high[0x20]; + + u8 transmit_queue_low[0x20]; + + u8 reserved_0[0x780]; +}; + +struct mlx5_ifc_eth_per_prio_grp_data_layout_bits { + u8 rx_octets_high[0x20]; + + u8 rx_octets_low[0x20]; + + u8 reserved_0[0xc0]; + + u8 rx_frames_high[0x20]; + + u8 rx_frames_low[0x20]; + + u8 tx_octets_high[0x20]; + + u8 tx_octets_low[0x20]; + + u8 reserved_1[0xc0]; + + u8 tx_frames_high[0x20]; + + u8 tx_frames_low[0x20]; + + u8 rx_pause_high[0x20]; + + u8 rx_pause_low[0x20]; + + u8 rx_pause_duration_high[0x20]; + + u8 rx_pause_duration_low[0x20]; + + u8 tx_pause_high[0x20]; + + u8 tx_pause_low[0x20]; + + u8 tx_pause_duration_high[0x20]; + + u8 tx_pause_duration_low[0x20]; + + u8 rx_pause_transition_high[0x20]; + + u8 rx_pause_transition_low[0x20]; + + u8 reserved_2[0x400]; +}; + +struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits { + u8 port_transmit_wait_high[0x20]; + + u8 port_transmit_wait_low[0x20]; + + u8 reserved_0[0x780]; +}; + +struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits { + u8 dot3stats_alignment_errors_high[0x20]; + + u8 dot3stats_alignment_errors_low[0x20]; + + u8 dot3stats_fcs_errors_high[0x20]; + + u8 dot3stats_fcs_errors_low[0x20]; + + u8 dot3stats_single_collision_frames_high[0x20]; + + u8 dot3stats_single_collision_frames_low[0x20]; + + u8 dot3stats_multiple_collision_frames_high[0x20]; + + u8 dot3stats_multiple_collision_frames_low[0x20]; + + u8 dot3stats_sqe_test_errors_high[0x20]; + + u8 dot3stats_sqe_test_errors_low[0x20]; + + u8 dot3stats_deferred_transmissions_high[0x20]; + + u8 dot3stats_deferred_transmissions_low[0x20]; + + u8 dot3stats_late_collisions_high[0x20]; + + u8 dot3stats_late_collisions_low[0x20]; + + u8 dot3stats_excessive_collisions_high[0x20]; + + u8 dot3stats_excessive_collisions_low[0x20]; + + u8 dot3stats_internal_mac_transmit_errors_high[0x20]; + + u8 dot3stats_internal_mac_transmit_errors_low[0x20]; + + u8 dot3stats_carrier_sense_errors_high[0x20]; + + u8 dot3stats_carrier_sense_errors_low[0x20]; + + u8 dot3stats_frame_too_longs_high[0x20]; + + u8 dot3stats_frame_too_longs_low[0x20]; + + u8 dot3stats_internal_mac_receive_errors_high[0x20]; + + u8 dot3stats_internal_mac_receive_errors_low[0x20]; + + u8 dot3stats_symbol_errors_high[0x20]; + + u8 dot3stats_symbol_errors_low[0x20]; + + u8 dot3control_in_unknown_opcodes_high[0x20]; + + u8 dot3control_in_unknown_opcodes_low[0x20]; + + u8 dot3in_pause_frames_high[0x20]; + + u8 dot3in_pause_frames_low[0x20]; + + u8 dot3out_pause_frames_high[0x20]; + + u8 dot3out_pause_frames_low[0x20]; + + u8 reserved_0[0x3c0]; +}; + +struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits { + u8 ether_stats_drop_events_high[0x20]; + + u8 ether_stats_drop_events_low[0x20]; + + u8 ether_stats_octets_high[0x20]; + + u8 ether_stats_octets_low[0x20]; + + u8 ether_stats_pkts_high[0x20]; + + u8 ether_stats_pkts_low[0x20]; + + u8 ether_stats_broadcast_pkts_high[0x20]; + + u8 ether_stats_broadcast_pkts_low[0x20]; + + u8 ether_stats_multicast_pkts_high[0x20]; + + u8 ether_stats_multicast_pkts_low[0x20]; + + u8 ether_stats_crc_align_errors_high[0x20]; + + u8 ether_stats_crc_align_errors_low[0x20]; + + u8 ether_stats_undersize_pkts_high[0x20]; + + u8 ether_stats_undersize_pkts_low[0x20]; + + u8 ether_stats_oversize_pkts_high[0x20]; + + u8 ether_stats_oversize_pkts_low[0x20]; + + u8 ether_stats_fragments_high[0x20]; + + u8 ether_stats_fragments_low[0x20]; + + u8 ether_stats_jabbers_high[0x20]; + + u8 ether_stats_jabbers_low[0x20]; + + u8 ether_stats_collisions_high[0x20]; + + u8 ether_stats_collisions_low[0x20]; + + u8 ether_stats_pkts64octets_high[0x20]; + + u8 ether_stats_pkts64octets_low[0x20]; + + u8 ether_stats_pkts65to127octets_high[0x20]; + + u8 ether_stats_pkts65to127octets_low[0x20]; + + u8 ether_stats_pkts128to255octets_high[0x20]; + + u8 ether_stats_pkts128to255octets_low[0x20]; + + u8 ether_stats_pkts256to511octets_high[0x20]; + + u8 ether_stats_pkts256to511octets_low[0x20]; + + u8 ether_stats_pkts512to1023octets_high[0x20]; + + u8 ether_stats_pkts512to1023octets_low[0x20]; + + u8 ether_stats_pkts1024to1518octets_high[0x20]; + + u8 ether_stats_pkts1024to1518octets_low[0x20]; + + u8 ether_stats_pkts1519to2047octets_high[0x20]; + + u8 ether_stats_pkts1519to2047octets_low[0x20]; + + u8 ether_stats_pkts2048to4095octets_high[0x20]; + + u8 ether_stats_pkts2048to4095octets_low[0x20]; + + u8 ether_stats_pkts4096to8191octets_high[0x20]; + + u8 ether_stats_pkts4096to8191octets_low[0x20]; + + u8 ether_stats_pkts8192to10239octets_high[0x20]; + + u8 ether_stats_pkts8192to10239octets_low[0x20]; + + u8 reserved_0[0x280]; +}; + +struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits { + u8 if_in_octets_high[0x20]; + + u8 if_in_octets_low[0x20]; + + u8 if_in_ucast_pkts_high[0x20]; + + u8 if_in_ucast_pkts_low[0x20]; + + u8 if_in_discards_high[0x20]; + + u8 if_in_discards_low[0x20]; + + u8 if_in_errors_high[0x20]; + + u8 if_in_errors_low[0x20]; + + u8 if_in_unknown_protos_high[0x20]; + + u8 if_in_unknown_protos_low[0x20]; + + u8 if_out_octets_high[0x20]; + + u8 if_out_octets_low[0x20]; + + u8 if_out_ucast_pkts_high[0x20]; + + u8 if_out_ucast_pkts_low[0x20]; + + u8 if_out_discards_high[0x20]; + + u8 if_out_discards_low[0x20]; + + u8 if_out_errors_high[0x20]; + + u8 if_out_errors_low[0x20]; + + u8 if_in_multicast_pkts_high[0x20]; + + u8 if_in_multicast_pkts_low[0x20]; + + u8 if_in_broadcast_pkts_high[0x20]; + + u8 if_in_broadcast_pkts_low[0x20]; + + u8 if_out_multicast_pkts_high[0x20]; + + u8 if_out_multicast_pkts_low[0x20]; + + u8 if_out_broadcast_pkts_high[0x20]; + + u8 if_out_broadcast_pkts_low[0x20]; + + u8 reserved_0[0x480]; +}; + +struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits { + u8 a_frames_transmitted_ok_high[0x20]; + + u8 a_frames_transmitted_ok_low[0x20]; + + u8 a_frames_received_ok_high[0x20]; + + u8 a_frames_received_ok_low[0x20]; + + u8 a_frame_check_sequence_errors_high[0x20]; + + u8 a_frame_check_sequence_errors_low[0x20]; + + u8 a_alignment_errors_high[0x20]; + + u8 a_alignment_errors_low[0x20]; + + u8 a_octets_transmitted_ok_high[0x20]; + + u8 a_octets_transmitted_ok_low[0x20]; + + u8 a_octets_received_ok_high[0x20]; + + u8 a_octets_received_ok_low[0x20]; + + u8 a_multicast_frames_xmitted_ok_high[0x20]; + + u8 a_multicast_frames_xmitted_ok_low[0x20]; + + u8 a_broadcast_frames_xmitted_ok_high[0x20]; + + u8 a_broadcast_frames_xmitted_ok_low[0x20]; + + u8 a_multicast_frames_received_ok_high[0x20]; + + u8 a_multicast_frames_received_ok_low[0x20]; + + u8 a_broadcast_frames_received_ok_high[0x20]; + + u8 a_broadcast_frames_received_ok_low[0x20]; + + u8 a_in_range_length_errors_high[0x20]; + + u8 a_in_range_length_errors_low[0x20]; + + u8 a_out_of_range_length_field_high[0x20]; + + u8 a_out_of_range_length_field_low[0x20]; + + u8 a_frame_too_long_errors_high[0x20]; + + u8 a_frame_too_long_errors_low[0x20]; + + u8 a_symbol_error_during_carrier_high[0x20]; + + u8 a_symbol_error_during_carrier_low[0x20]; + + u8 a_mac_control_frames_transmitted_high[0x20]; + + u8 a_mac_control_frames_transmitted_low[0x20]; + + u8 a_mac_control_frames_received_high[0x20]; + + u8 a_mac_control_frames_received_low[0x20]; + + u8 a_unsupported_opcodes_received_high[0x20]; + + u8 a_unsupported_opcodes_received_low[0x20]; + + u8 a_pause_mac_ctrl_frames_received_high[0x20]; + + u8 a_pause_mac_ctrl_frames_received_low[0x20]; + + u8 a_pause_mac_ctrl_frames_transmitted_high[0x20]; + + u8 a_pause_mac_ctrl_frames_transmitted_low[0x20]; + + u8 reserved_0[0x300]; +}; + +struct mlx5_ifc_cmd_inter_comp_event_bits { + u8 command_completion_vector[0x20]; + + u8 reserved_0[0xc0]; +}; + +struct mlx5_ifc_stall_vl_event_bits { + u8 reserved_0[0x18]; + u8 port_num[0x1]; + u8 reserved_1[0x3]; + u8 vl[0x4]; + + u8 reserved_2[0xa0]; +}; + +struct mlx5_ifc_db_bf_congestion_event_bits { + u8 event_subtype[0x8]; + u8 reserved_0[0x8]; + u8 congestion_level[0x8]; + u8 reserved_1[0x8]; + + u8 reserved_2[0xa0]; +}; + +struct mlx5_ifc_gpio_event_bits { + u8 reserved_0[0x60]; + + u8 gpio_event_hi[0x20]; + + u8 gpio_event_lo[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_port_state_change_event_bits { + u8 reserved_0[0x40]; + + u8 port_num[0x4]; + u8 reserved_1[0x1c]; + + u8 reserved_2[0x80]; +}; + +struct mlx5_ifc_dropped_packet_logged_bits { + u8 reserved_0[0xe0]; +}; + +enum { + MLX5_CQ_ERROR_SYNDROME_CQ_OVERRUN = 0x1, + MLX5_CQ_ERROR_SYNDROME_CQ_ACCESS_VIOLATION_ERROR = 0x2, +}; + +struct mlx5_ifc_cq_error_bits { + u8 reserved_0[0x8]; + u8 cqn[0x18]; + + u8 reserved_1[0x20]; + + u8 reserved_2[0x18]; + u8 syndrome[0x8]; + + u8 reserved_3[0x80]; +}; + +struct mlx5_ifc_rdma_page_fault_event_bits { + u8 bytes_committed[0x20]; + + u8 r_key[0x20]; + + u8 reserved_0[0x10]; + u8 packet_len[0x10]; + + u8 rdma_op_len[0x20]; + + u8 rdma_va[0x40]; + + u8 reserved_1[0x5]; + u8 rdma[0x1]; + u8 write[0x1]; + u8 requestor[0x1]; + u8 qp_number[0x18]; +}; + +struct mlx5_ifc_wqe_associated_page_fault_event_bits { + u8 bytes_committed[0x20]; + + u8 reserved_0[0x10]; + u8 wqe_index[0x10]; + + u8 reserved_1[0x10]; + u8 len[0x10]; + + u8 reserved_2[0x60]; + + u8 reserved_3[0x5]; + u8 rdma[0x1]; + u8 write_read[0x1]; + u8 requestor[0x1]; + u8 qpn[0x18]; +}; + +struct mlx5_ifc_qp_events_bits { + u8 reserved_0[0xa0]; + + u8 type[0x8]; + u8 reserved_1[0x18]; + + u8 reserved_2[0x8]; + u8 qpn_rqn_sqn[0x18]; +}; + +struct mlx5_ifc_dct_events_bits { + u8 reserved_0[0xc0]; + + u8 reserved_1[0x8]; + u8 dct_number[0x18]; +}; + +struct mlx5_ifc_comp_event_bits { + u8 reserved_0[0xc0]; + + u8 reserved_1[0x8]; + u8 cq_number[0x18]; +}; + +enum { + MLX5_QPC_STATE_RST = 0x0, + MLX5_QPC_STATE_INIT = 0x1, + MLX5_QPC_STATE_RTR = 0x2, + MLX5_QPC_STATE_RTS = 0x3, + MLX5_QPC_STATE_SQER = 0x4, + MLX5_QPC_STATE_ERR = 0x6, + MLX5_QPC_STATE_SQD = 0x7, + MLX5_QPC_STATE_SUSPENDED = 0x9, +}; + +enum { + MLX5_QPC_ST_RC = 0x0, + MLX5_QPC_ST_UC = 0x1, + MLX5_QPC_ST_UD = 0x2, + MLX5_QPC_ST_XRC = 0x3, + MLX5_QPC_ST_DCI = 0x5, + MLX5_QPC_ST_QP0 = 0x7, + MLX5_QPC_ST_QP1 = 0x8, + MLX5_QPC_ST_RAW_DATAGRAM = 0x9, + MLX5_QPC_ST_REG_UMR = 0xc, +}; + +enum { + MLX5_QPC_PM_STATE_ARMED = 0x0, + MLX5_QPC_PM_STATE_REARM = 0x1, + MLX5_QPC_PM_STATE_RESERVED = 0x2, + MLX5_QPC_PM_STATE_MIGRATED = 0x3, +}; + +enum { + MLX5_QPC_END_PADDING_MODE_SCATTER_AS_IS = 0x0, + MLX5_QPC_END_PADDING_MODE_PAD_TO_CACHE_LINE_ALIGNMENT = 0x1, +}; + +enum { + MLX5_QPC_MTU_256_BYTES = 0x1, + MLX5_QPC_MTU_512_BYTES = 0x2, + MLX5_QPC_MTU_1K_BYTES = 0x3, + MLX5_QPC_MTU_2K_BYTES = 0x4, + MLX5_QPC_MTU_4K_BYTES = 0x5, + MLX5_QPC_MTU_RAW_ETHERNET_QP = 0x7, +}; + +enum { + MLX5_QPC_ATOMIC_MODE_IB_SPEC = 0x1, + MLX5_QPC_ATOMIC_MODE_ONLY_8B = 0x2, + MLX5_QPC_ATOMIC_MODE_UP_TO_8B = 0x3, + MLX5_QPC_ATOMIC_MODE_UP_TO_16B = 0x4, + MLX5_QPC_ATOMIC_MODE_UP_TO_32B = 0x5, + MLX5_QPC_ATOMIC_MODE_UP_TO_64B = 0x6, + MLX5_QPC_ATOMIC_MODE_UP_TO_128B = 0x7, + MLX5_QPC_ATOMIC_MODE_UP_TO_256B = 0x8, +}; + +enum { + MLX5_QPC_CS_REQ_DISABLE = 0x0, + MLX5_QPC_CS_REQ_UP_TO_32B = 0x11, + MLX5_QPC_CS_REQ_UP_TO_64B = 0x22, +}; + +enum { + MLX5_QPC_CS_RES_DISABLE = 0x0, + MLX5_QPC_CS_RES_UP_TO_32B = 0x1, + MLX5_QPC_CS_RES_UP_TO_64B = 0x2, +}; + +struct mlx5_ifc_qpc_bits { + u8 state[0x4]; + u8 reserved_0[0x4]; + u8 st[0x8]; + u8 reserved_1[0x3]; + u8 pm_state[0x2]; + u8 reserved_2[0x7]; + u8 end_padding_mode[0x2]; + u8 reserved_3[0x2]; + + u8 wq_signature[0x1]; + u8 block_lb_mc[0x1]; + u8 atomic_like_write_en[0x1]; + u8 latency_sensitive[0x1]; + u8 reserved_4[0x1]; + u8 drain_sigerr[0x1]; + u8 reserved_5[0x2]; + u8 pd[0x18]; + + u8 mtu[0x3]; + u8 log_msg_max[0x5]; + u8 reserved_6[0x1]; + u8 log_rq_size[0x4]; + u8 log_rq_stride[0x3]; + u8 no_sq[0x1]; + u8 log_sq_size[0x4]; + u8 reserved_7[0x6]; + u8 rlky[0x1]; + u8 reserved_8[0x4]; + + u8 counter_set_id[0x8]; + u8 uar_page[0x18]; + + u8 reserved_9[0x8]; + u8 user_index[0x18]; + + u8 reserved_10[0x3]; + u8 log_page_size[0x5]; + u8 remote_qpn[0x18]; + + struct mlx5_ifc_ads_bits primary_address_path; + + struct mlx5_ifc_ads_bits secondary_address_path; + + u8 log_ack_req_freq[0x4]; + u8 reserved_11[0x4]; + u8 log_sra_max[0x3]; + u8 reserved_12[0x2]; + u8 retry_count[0x3]; + u8 rnr_retry[0x3]; + u8 reserved_13[0x1]; + u8 fre[0x1]; + u8 cur_rnr_retry[0x3]; + u8 cur_retry_count[0x3]; + u8 reserved_14[0x5]; + + u8 reserved_15[0x20]; + + u8 reserved_16[0x8]; + u8 next_send_psn[0x18]; + + u8 reserved_17[0x8]; + u8 cqn_snd[0x18]; + + u8 reserved_18[0x40]; + + u8 reserved_19[0x8]; + u8 last_acked_psn[0x18]; + + u8 reserved_20[0x8]; + u8 ssn[0x18]; + + u8 reserved_21[0x8]; + u8 log_rra_max[0x3]; + u8 reserved_22[0x1]; + u8 atomic_mode[0x4]; + u8 rre[0x1]; + u8 rwe[0x1]; + u8 rae[0x1]; + u8 reserved_23[0x1]; + u8 page_offset[0x6]; + u8 reserved_24[0x3]; + u8 cd_slave_receive[0x1]; + u8 cd_slave_send[0x1]; + u8 cd_master[0x1]; + + u8 reserved_25[0x3]; + u8 min_rnr_nak[0x5]; + u8 next_rcv_psn[0x18]; + + u8 reserved_26[0x8]; + u8 xrcd[0x18]; + + u8 reserved_27[0x8]; + u8 cqn_rcv[0x18]; + + u8 dbr_addr[0x40]; + + u8 q_key[0x20]; + + u8 reserved_28[0x5]; + u8 rq_type[0x3]; + u8 srqn_rmpn[0x18]; + + u8 reserved_29[0x8]; + u8 rmsn[0x18]; + + u8 hw_sq_wqebb_counter[0x10]; + u8 sw_sq_wqebb_counter[0x10]; + + u8 hw_rq_counter[0x20]; + + u8 sw_rq_counter[0x20]; + + u8 reserved_30[0x20]; + + u8 reserved_31[0xf]; + u8 cgs[0x1]; + u8 cs_req[0x8]; + u8 cs_res[0x8]; + + u8 dc_access_key[0x40]; + + u8 reserved_32[0xc0]; +}; + +struct mlx5_ifc_roce_addr_layout_bits { + u8 source_l3_address[16][0x8]; + + u8 reserved_0[0x3]; + u8 vlan_valid[0x1]; + u8 vlan_id[0xc]; + u8 source_mac_47_32[0x10]; + + u8 source_mac_31_0[0x20]; + + u8 reserved_1[0x14]; + u8 roce_l3_type[0x4]; + u8 roce_version[0x8]; + + u8 reserved_2[0x20]; +}; + +union mlx5_ifc_hca_cap_union_bits { + struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap; + struct mlx5_ifc_odp_cap_bits odp_cap; + struct mlx5_ifc_atomic_caps_bits atomic_caps; + struct mlx5_ifc_roce_cap_bits roce_cap; + struct mlx5_ifc_per_protocol_networking_offload_caps_bits per_protocol_networking_offload_caps; + struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap; + u8 reserved_0[0x8000]; +}; + +enum { + MLX5_FLOW_CONTEXT_ACTION_ALLOW = 0x1, + MLX5_FLOW_CONTEXT_ACTION_DROP = 0x2, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST = 0x4, +}; + +struct mlx5_ifc_flow_context_bits { + u8 reserved_0[0x20]; + + u8 group_id[0x20]; + + u8 reserved_1[0x8]; + u8 flow_tag[0x18]; + + u8 reserved_2[0x10]; + u8 action[0x10]; + + u8 reserved_3[0x8]; + u8 destination_list_size[0x18]; + + u8 reserved_4[0x160]; + + struct mlx5_ifc_fte_match_param_bits match_value; + + u8 reserved_5[0x600]; + + struct mlx5_ifc_dest_format_struct_bits destination[0]; +}; + +enum { + MLX5_XRC_SRQC_STATE_GOOD = 0x0, + MLX5_XRC_SRQC_STATE_ERROR = 0x1, +}; + +struct mlx5_ifc_xrc_srqc_bits { + u8 state[0x4]; + u8 log_xrc_srq_size[0x4]; + u8 reserved_0[0x18]; + + u8 wq_signature[0x1]; + u8 cont_srq[0x1]; + u8 reserved_1[0x1]; + u8 rlky[0x1]; + u8 basic_cyclic_rcv_wqe[0x1]; + u8 log_rq_stride[0x3]; + u8 xrcd[0x18]; + + u8 page_offset[0x6]; + u8 reserved_2[0x2]; + u8 cqn[0x18]; + + u8 reserved_3[0x20]; + + u8 user_index_equal_xrc_srqn[0x1]; + u8 reserved_4[0x1]; + u8 log_page_size[0x6]; + u8 user_index[0x18]; + + u8 reserved_5[0x20]; + + u8 reserved_6[0x8]; + u8 pd[0x18]; + + u8 lwm[0x10]; + u8 wqe_cnt[0x10]; + + u8 reserved_7[0x40]; + + u8 db_record_addr_h[0x20]; + + u8 db_record_addr_l[0x1e]; + u8 reserved_8[0x2]; + + u8 reserved_9[0x80]; +}; + +struct mlx5_ifc_traffic_counter_bits { + u8 packets[0x40]; + + u8 octets[0x40]; +}; + +struct mlx5_ifc_tisc_bits { + u8 reserved_0[0xc]; + u8 prio[0x4]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x100]; + + u8 reserved_3[0x8]; + u8 transport_domain[0x18]; + + u8 reserved_4[0x3c0]; +}; + +enum { + MLX5_TIRC_DISP_TYPE_DIRECT = 0x0, + MLX5_TIRC_DISP_TYPE_INDIRECT = 0x1, +}; + +enum { + MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO = 0x1, + MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO = 0x2, +}; + +enum { + MLX5_TIRC_RX_HASH_FN_HASH_NONE = 0x0, + MLX5_TIRC_RX_HASH_FN_HASH_INVERTED_XOR8 = 0x1, + MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ = 0x2, +}; + +enum { + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_ = 0x1, + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST_ = 0x2, +}; + +struct mlx5_ifc_tirc_bits { + u8 reserved_0[0x20]; + + u8 disp_type[0x4]; + u8 reserved_1[0x1c]; + + u8 reserved_2[0x40]; + + u8 reserved_3[0x4]; + u8 lro_timeout_period_usecs[0x10]; + u8 lro_enable_mask[0x4]; + u8 lro_max_ip_payload_size[0x8]; + + u8 reserved_4[0x40]; + + u8 reserved_5[0x8]; + u8 inline_rqn[0x18]; + + u8 rx_hash_symmetric[0x1]; + u8 reserved_6[0x1]; + u8 tunneled_offload_en[0x1]; + u8 reserved_7[0x5]; + u8 indirect_table[0x18]; + + u8 rx_hash_fn[0x4]; + u8 reserved_8[0x2]; + u8 self_lb_block[0x2]; + u8 transport_domain[0x18]; + + u8 rx_hash_toeplitz_key[10][0x20]; + + struct mlx5_ifc_rx_hash_field_select_bits rx_hash_field_selector_outer; + + struct mlx5_ifc_rx_hash_field_select_bits rx_hash_field_selector_inner; + + u8 reserved_9[0x4c0]; +}; + +enum { + MLX5_SRQC_STATE_GOOD = 0x0, + MLX5_SRQC_STATE_ERROR = 0x1, +}; + +struct mlx5_ifc_srqc_bits { + u8 state[0x4]; + u8 log_srq_size[0x4]; + u8 reserved_0[0x18]; + + u8 wq_signature[0x1]; + u8 cont_srq[0x1]; + u8 reserved_1[0x1]; + u8 rlky[0x1]; + u8 reserved_2[0x1]; + u8 log_rq_stride[0x3]; + u8 xrcd[0x18]; + + u8 page_offset[0x6]; + u8 reserved_3[0x2]; + u8 cqn[0x18]; + + u8 reserved_4[0x20]; + + u8 reserved_5[0x2]; + u8 log_page_size[0x6]; + u8 reserved_6[0x18]; + + u8 reserved_7[0x20]; + + u8 reserved_8[0x8]; + u8 pd[0x18]; + + u8 lwm[0x10]; + u8 wqe_cnt[0x10]; + + u8 reserved_9[0x40]; + + u8 db_record_addr_h[0x20]; + + u8 db_record_addr_l[0x1e]; + u8 reserved_10[0x2]; + + u8 reserved_11[0x80]; +}; + +enum { + MLX5_SQC_STATE_RST = 0x0, + MLX5_SQC_STATE_RDY = 0x1, + MLX5_SQC_STATE_ERR = 0x3, +}; + +struct mlx5_ifc_sqc_bits { + u8 rlky[0x1]; + u8 cd_master[0x1]; + u8 fre[0x1]; + u8 flush_in_error_en[0x1]; + u8 reserved_0[0x4]; + u8 state[0x4]; + u8 reserved_1[0x14]; + + u8 reserved_2[0x8]; + u8 user_index[0x18]; + + u8 reserved_3[0x8]; + u8 cqn[0x18]; + + u8 reserved_4[0xa0]; + + u8 tis_lst_sz[0x10]; + u8 reserved_5[0x10]; + + u8 reserved_6[0x40]; + + u8 reserved_7[0x8]; + u8 tis_num_0[0x18]; + + struct mlx5_ifc_wq_bits wq; +}; + +struct mlx5_ifc_rqtc_bits { + u8 reserved_0[0xa0]; + + u8 reserved_1[0x10]; + u8 rqt_max_size[0x10]; + + u8 reserved_2[0x10]; + u8 rqt_actual_size[0x10]; + + u8 reserved_3[0x6a0]; + + struct mlx5_ifc_rq_num_bits rq_num[0]; +}; + +enum { + MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE = 0x0, + MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_RMP = 0x1, +}; + +enum { + MLX5_RQC_STATE_RST = 0x0, + MLX5_RQC_STATE_RDY = 0x1, + MLX5_RQC_STATE_ERR = 0x3, +}; + +struct mlx5_ifc_rqc_bits { + u8 rlky[0x1]; + u8 reserved_0[0x2]; + u8 vsd[0x1]; + u8 mem_rq_type[0x4]; + u8 state[0x4]; + u8 reserved_1[0x1]; + u8 flush_in_error_en[0x1]; + u8 reserved_2[0x12]; + + u8 reserved_3[0x8]; + u8 user_index[0x18]; + + u8 reserved_4[0x8]; + u8 cqn[0x18]; + + u8 counter_set_id[0x8]; + u8 reserved_5[0x18]; + + u8 reserved_6[0x8]; + u8 rmpn[0x18]; + + u8 reserved_7[0xe0]; + + struct mlx5_ifc_wq_bits wq; +}; + +enum { + MLX5_RMPC_STATE_RDY = 0x1, + MLX5_RMPC_STATE_ERR = 0x3, +}; + +struct mlx5_ifc_rmpc_bits { + u8 reserved_0[0x8]; + u8 state[0x4]; + u8 reserved_1[0x14]; + + u8 basic_cyclic_rcv_wqe[0x1]; + u8 reserved_2[0x1f]; + + u8 reserved_3[0x140]; + + struct mlx5_ifc_wq_bits wq; +}; + +enum { + MLX5_NIC_VPORT_CONTEXT_ALLOWED_LIST_TYPE_CURRENT_UC_MAC_ADDRESS = 0x0, +}; + +struct mlx5_ifc_nic_vport_context_bits { + u8 reserved_0[0x1f]; + u8 roce_en[0x1]; + + u8 reserved_1[0x760]; + + u8 reserved_2[0x5]; + u8 allowed_list_type[0x3]; + u8 reserved_3[0xc]; + u8 allowed_list_size[0xc]; + + struct mlx5_ifc_mac_address_layout_bits permanent_address; + + u8 reserved_4[0x20]; + + u8 current_uc_mac_address[0][0x40]; +}; + +enum { + MLX5_MKC_ACCESS_MODE_PA = 0x0, + MLX5_MKC_ACCESS_MODE_MTT = 0x1, + MLX5_MKC_ACCESS_MODE_KLMS = 0x2, +}; + +struct mlx5_ifc_mkc_bits { + u8 reserved_0[0x1]; + u8 free[0x1]; + u8 reserved_1[0xd]; + u8 small_fence_on_rdma_read_response[0x1]; + u8 umr_en[0x1]; + u8 a[0x1]; + u8 rw[0x1]; + u8 rr[0x1]; + u8 lw[0x1]; + u8 lr[0x1]; + u8 access_mode[0x2]; + u8 reserved_2[0x8]; + + u8 qpn[0x18]; + u8 mkey_7_0[0x8]; + + u8 reserved_3[0x20]; + + u8 length64[0x1]; + u8 bsf_en[0x1]; + u8 sync_umr[0x1]; + u8 reserved_4[0x2]; + u8 expected_sigerr_count[0x1]; + u8 reserved_5[0x1]; + u8 en_rinval[0x1]; + u8 pd[0x18]; + + u8 start_addr[0x40]; + + u8 len[0x40]; + + u8 bsf_octword_size[0x20]; + + u8 reserved_6[0x80]; + + u8 translations_octword_size[0x20]; + + u8 reserved_7[0x1b]; + u8 log_page_size[0x5]; + + u8 reserved_8[0x20]; +}; + +struct mlx5_ifc_pkey_bits { + u8 reserved_0[0x10]; + u8 pkey[0x10]; +}; + +struct mlx5_ifc_array128_auto_bits { + u8 array128_auto[16][0x8]; +}; + +struct mlx5_ifc_hca_vport_context_bits { + u8 field_select[0x20]; + + u8 reserved_0[0xe0]; + + u8 sm_virt_aware[0x1]; + u8 has_smi[0x1]; + u8 has_raw[0x1]; + u8 grh_required[0x1]; + u8 reserved_1[0x10]; + u8 port_state_policy[0x4]; + u8 phy_port_state[0x4]; + u8 vport_state[0x4]; + + u8 reserved_2[0x60]; + + u8 port_guid[0x40]; + + u8 node_guid[0x40]; + + u8 cap_mask1[0x20]; + + u8 cap_mask1_field_select[0x20]; + + u8 cap_mask2[0x20]; + + u8 cap_mask2_field_select[0x20]; + + u8 reserved_3[0x80]; + + u8 lid[0x10]; + u8 reserved_4[0x4]; + u8 init_type_reply[0x4]; + u8 lmc[0x3]; + u8 subnet_timeout[0x5]; + + u8 sm_lid[0x10]; + u8 sm_sl[0x4]; + u8 reserved_5[0xc]; + + u8 qkey_violation_counter[0x10]; + u8 pkey_violation_counter[0x10]; + + u8 reserved_6[0xca0]; +}; + +enum { + MLX5_EQC_STATUS_OK = 0x0, + MLX5_EQC_STATUS_EQ_WRITE_FAILURE = 0xa, +}; + +enum { + MLX5_EQC_ST_ARMED = 0x9, + MLX5_EQC_ST_FIRED = 0xa, +}; + +struct mlx5_ifc_eqc_bits { + u8 status[0x4]; + u8 reserved_0[0x9]; + u8 ec[0x1]; + u8 oi[0x1]; + u8 reserved_1[0x5]; + u8 st[0x4]; + u8 reserved_2[0x8]; + + u8 reserved_3[0x20]; + + u8 reserved_4[0x14]; + u8 page_offset[0x6]; + u8 reserved_5[0x6]; + + u8 reserved_6[0x3]; + u8 log_eq_size[0x5]; + u8 uar_page[0x18]; + + u8 reserved_7[0x20]; + + u8 reserved_8[0x18]; + u8 intr[0x8]; + + u8 reserved_9[0x3]; + u8 log_page_size[0x5]; + u8 reserved_10[0x18]; + + u8 reserved_11[0x60]; + + u8 reserved_12[0x8]; + u8 consumer_counter[0x18]; + + u8 reserved_13[0x8]; + u8 producer_counter[0x18]; + + u8 reserved_14[0x80]; +}; + +enum { + MLX5_DCTC_STATE_ACTIVE = 0x0, + MLX5_DCTC_STATE_DRAINING = 0x1, + MLX5_DCTC_STATE_DRAINED = 0x2, +}; + +enum { + MLX5_DCTC_CS_RES_DISABLE = 0x0, + MLX5_DCTC_CS_RES_NA = 0x1, + MLX5_DCTC_CS_RES_UP_TO_64B = 0x2, +}; + +enum { + MLX5_DCTC_MTU_256_BYTES = 0x1, + MLX5_DCTC_MTU_512_BYTES = 0x2, + MLX5_DCTC_MTU_1K_BYTES = 0x3, + MLX5_DCTC_MTU_2K_BYTES = 0x4, + MLX5_DCTC_MTU_4K_BYTES = 0x5, +}; + +struct mlx5_ifc_dctc_bits { + u8 reserved_0[0x4]; + u8 state[0x4]; + u8 reserved_1[0x18]; + + u8 reserved_2[0x8]; + u8 user_index[0x18]; + + u8 reserved_3[0x8]; + u8 cqn[0x18]; + + u8 counter_set_id[0x8]; + u8 atomic_mode[0x4]; + u8 rre[0x1]; + u8 rwe[0x1]; + u8 rae[0x1]; + u8 atomic_like_write_en[0x1]; + u8 latency_sensitive[0x1]; + u8 rlky[0x1]; + u8 free_ar[0x1]; + u8 reserved_4[0xd]; + + u8 reserved_5[0x8]; + u8 cs_res[0x8]; + u8 reserved_6[0x3]; + u8 min_rnr_nak[0x5]; + u8 reserved_7[0x8]; + + u8 reserved_8[0x8]; + u8 srqn[0x18]; + + u8 reserved_9[0x8]; + u8 pd[0x18]; + + u8 tclass[0x8]; + u8 reserved_10[0x4]; + u8 flow_label[0x14]; + + u8 dc_access_key[0x40]; + + u8 reserved_11[0x5]; + u8 mtu[0x3]; + u8 port[0x8]; + u8 pkey_index[0x10]; + + u8 reserved_12[0x8]; + u8 my_addr_index[0x8]; + u8 reserved_13[0x8]; + u8 hop_limit[0x8]; + + u8 dc_access_key_violation_count[0x20]; + + u8 reserved_14[0x14]; + u8 dei_cfi[0x1]; + u8 eth_prio[0x3]; + u8 ecn[0x2]; + u8 dscp[0x6]; + + u8 reserved_15[0x40]; +}; + +enum { + MLX5_CQC_STATUS_OK = 0x0, + MLX5_CQC_STATUS_CQ_OVERFLOW = 0x9, + MLX5_CQC_STATUS_CQ_WRITE_FAIL = 0xa, +}; + +enum { + MLX5_CQC_CQE_SZ_64_BYTES = 0x0, + MLX5_CQC_CQE_SZ_128_BYTES = 0x1, +}; + +enum { + MLX5_CQC_ST_SOLICITED_NOTIFICATION_REQUEST_ARMED = 0x6, + MLX5_CQC_ST_NOTIFICATION_REQUEST_ARMED = 0x9, + MLX5_CQC_ST_FIRED = 0xa, +}; + +struct mlx5_ifc_cqc_bits { + u8 status[0x4]; + u8 reserved_0[0x4]; + u8 cqe_sz[0x3]; + u8 cc[0x1]; + u8 reserved_1[0x1]; + u8 scqe_break_moderation_en[0x1]; + u8 oi[0x1]; + u8 reserved_2[0x2]; + u8 cqe_zip_en[0x1]; + u8 mini_cqe_res_format[0x2]; + u8 st[0x4]; + u8 reserved_3[0x8]; + + u8 reserved_4[0x20]; + + u8 reserved_5[0x14]; + u8 page_offset[0x6]; + u8 reserved_6[0x6]; + + u8 reserved_7[0x3]; + u8 log_cq_size[0x5]; + u8 uar_page[0x18]; + + u8 reserved_8[0x4]; + u8 cq_period[0xc]; + u8 cq_max_count[0x10]; + + u8 reserved_9[0x18]; + u8 c_eqn[0x8]; + + u8 reserved_10[0x3]; + u8 log_page_size[0x5]; + u8 reserved_11[0x18]; + + u8 reserved_12[0x20]; + + u8 reserved_13[0x8]; + u8 last_notified_index[0x18]; + + u8 reserved_14[0x8]; + u8 last_solicit_index[0x18]; + + u8 reserved_15[0x8]; + u8 consumer_counter[0x18]; + + u8 reserved_16[0x8]; + u8 producer_counter[0x18]; + + u8 reserved_17[0x40]; + + u8 dbr_addr[0x40]; +}; + +union mlx5_ifc_cong_control_roce_ecn_auto_bits { + struct mlx5_ifc_cong_control_802_1qau_rp_bits cong_control_802_1qau_rp; + struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits cong_control_r_roce_ecn_rp; + struct mlx5_ifc_cong_control_r_roce_ecn_np_bits cong_control_r_roce_ecn_np; + u8 reserved_0[0x800]; +}; + +struct mlx5_ifc_query_adapter_param_block_bits { + u8 reserved_0[0xe0]; + + u8 reserved_1[0x10]; + u8 vsd_vendor_id[0x10]; + + u8 vsd[208][0x8]; + + u8 vsd_contd_psid[16][0x8]; +}; + +union mlx5_ifc_modify_field_select_resize_field_select_auto_bits { + struct mlx5_ifc_modify_field_select_bits modify_field_select; + struct mlx5_ifc_resize_field_select_bits resize_field_select; + u8 reserved_0[0x20]; +}; + +union mlx5_ifc_field_select_802_1_r_roce_auto_bits { + struct mlx5_ifc_field_select_802_1qau_rp_bits field_select_802_1qau_rp; + struct mlx5_ifc_field_select_r_roce_rp_bits field_select_r_roce_rp; + struct mlx5_ifc_field_select_r_roce_np_bits field_select_r_roce_np; + u8 reserved_0[0x20]; +}; + +union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { + struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits eth_802_3_cntrs_grp_data_layout; + struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits eth_2863_cntrs_grp_data_layout; + struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout; + struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits eth_3635_cntrs_grp_data_layout; + struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits eth_extended_cntrs_grp_data_layout; + struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout; + struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout; + struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; + u8 reserved_0[0x7c0]; +}; + +union mlx5_ifc_event_auto_bits { + struct mlx5_ifc_comp_event_bits comp_event; + struct mlx5_ifc_dct_events_bits dct_events; + struct mlx5_ifc_qp_events_bits qp_events; + struct mlx5_ifc_wqe_associated_page_fault_event_bits wqe_associated_page_fault_event; + struct mlx5_ifc_rdma_page_fault_event_bits rdma_page_fault_event; + struct mlx5_ifc_cq_error_bits cq_error; + struct mlx5_ifc_dropped_packet_logged_bits dropped_packet_logged; + struct mlx5_ifc_port_state_change_event_bits port_state_change_event; + struct mlx5_ifc_gpio_event_bits gpio_event; + struct mlx5_ifc_db_bf_congestion_event_bits db_bf_congestion_event; + struct mlx5_ifc_stall_vl_event_bits stall_vl_event; + struct mlx5_ifc_cmd_inter_comp_event_bits cmd_inter_comp_event; + u8 reserved_0[0xe0]; +}; + +struct mlx5_ifc_health_buffer_bits { + u8 reserved_0[0x100]; + + u8 assert_existptr[0x20]; + + u8 assert_callra[0x20]; + + u8 reserved_1[0x40]; + + u8 fw_version[0x20]; + + u8 hw_id[0x20]; + + u8 reserved_2[0x20]; + + u8 irisc_index[0x8]; + u8 synd[0x8]; + u8 ext_synd[0x10]; +}; + +struct mlx5_ifc_register_loopback_control_bits { + u8 no_lb[0x1]; + u8 reserved_0[0x7]; + u8 port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x60]; +}; + +struct mlx5_ifc_teardown_hca_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +enum { + MLX5_TEARDOWN_HCA_IN_PROFILE_GRACEFUL_CLOSE = 0x0, + MLX5_TEARDOWN_HCA_IN_PROFILE_PANIC_CLOSE = 0x1, +}; + +struct mlx5_ifc_teardown_hca_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 profile[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_sqerr2rts_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_sqerr2rts_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_sqd2rts_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_sqd2rts_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_set_roce_address_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_set_roce_address_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 roce_address_index[0x10]; + u8 reserved_2[0x10]; + + u8 reserved_3[0x20]; + + struct mlx5_ifc_roce_addr_layout_bits roce_address; +}; + +struct mlx5_ifc_set_mad_demux_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +enum { + MLX5_SET_MAD_DEMUX_IN_DEMUX_MODE_PASS_ALL = 0x0, + MLX5_SET_MAD_DEMUX_IN_DEMUX_MODE_SELECTIVE = 0x2, +}; + +struct mlx5_ifc_set_mad_demux_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x20]; + + u8 reserved_3[0x6]; + u8 demux_mode[0x2]; + u8 reserved_4[0x18]; +}; + +struct mlx5_ifc_set_l2_table_entry_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_set_l2_table_entry_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x60]; + + u8 reserved_3[0x8]; + u8 table_index[0x18]; + + u8 reserved_4[0x20]; + + u8 reserved_5[0x13]; + u8 vlan_valid[0x1]; + u8 vlan[0xc]; + + struct mlx5_ifc_mac_address_layout_bits mac_address; + + u8 reserved_6[0xc0]; +}; + +struct mlx5_ifc_set_issi_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_set_issi_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 current_issi[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_set_hca_cap_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_set_hca_cap_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + union mlx5_ifc_hca_cap_union_bits capability; +}; + +struct mlx5_ifc_set_fte_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_set_fte_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 reserved_5[0x40]; + + u8 flow_index[0x20]; + + u8 reserved_6[0xe0]; + + struct mlx5_ifc_flow_context_bits flow_context; +}; + +struct mlx5_ifc_rts2rts_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_rts2rts_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_rtr2rts_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_rtr2rts_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_rst2init_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_rst2init_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_query_xrc_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry; + + u8 reserved_2[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_query_xrc_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 xrc_srqn[0x18]; + + u8 reserved_3[0x20]; +}; + +enum { + MLX5_QUERY_VPORT_STATE_OUT_STATE_DOWN = 0x0, + MLX5_QUERY_VPORT_STATE_OUT_STATE_UP = 0x1, +}; + +struct mlx5_ifc_query_vport_state_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x20]; + + u8 reserved_2[0x18]; + u8 admin_state[0x4]; + u8 state[0x4]; +}; + +enum { + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT = 0x0, +}; + +struct mlx5_ifc_query_vport_state_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_vport_counter_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_traffic_counter_bits received_errors; + + struct mlx5_ifc_traffic_counter_bits transmit_errors; + + struct mlx5_ifc_traffic_counter_bits received_ib_unicast; + + struct mlx5_ifc_traffic_counter_bits transmitted_ib_unicast; + + struct mlx5_ifc_traffic_counter_bits received_ib_multicast; + + struct mlx5_ifc_traffic_counter_bits transmitted_ib_multicast; + + struct mlx5_ifc_traffic_counter_bits received_eth_broadcast; + + struct mlx5_ifc_traffic_counter_bits transmitted_eth_broadcast; + + struct mlx5_ifc_traffic_counter_bits received_eth_unicast; + + struct mlx5_ifc_traffic_counter_bits transmitted_eth_unicast; + + struct mlx5_ifc_traffic_counter_bits received_eth_multicast; + + struct mlx5_ifc_traffic_counter_bits transmitted_eth_multicast; + + u8 reserved_2[0xa00]; +}; + +enum { + MLX5_QUERY_VPORT_COUNTER_IN_OP_MOD_VPORT_COUNTERS = 0x0, +}; + +struct mlx5_ifc_query_vport_counter_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x60]; + + u8 clear[0x1]; + u8 reserved_4[0x1f]; + + u8 reserved_5[0x20]; +}; + +struct mlx5_ifc_query_tis_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_tisc_bits tis_context; +}; + +struct mlx5_ifc_query_tis_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 tisn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_tir_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xc0]; + + struct mlx5_ifc_tirc_bits tir_context; +}; + +struct mlx5_ifc_query_tir_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 tirn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_srqc_bits srq_context_entry; + + u8 reserved_2[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_query_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 srqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_sq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xc0]; + + struct mlx5_ifc_sqc_bits sq_context; +}; + +struct mlx5_ifc_query_sq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 sqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_special_contexts_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x20]; + + u8 resd_lkey[0x20]; +}; + +struct mlx5_ifc_query_special_contexts_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_query_rqt_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xc0]; + + struct mlx5_ifc_rqtc_bits rqt_context; +}; + +struct mlx5_ifc_query_rqt_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rqtn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_rq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xc0]; + + struct mlx5_ifc_rqc_bits rq_context; +}; + +struct mlx5_ifc_query_rq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_roce_address_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_roce_addr_layout_bits roce_address; +}; + +struct mlx5_ifc_query_roce_address_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 roce_address_index[0x10]; + u8 reserved_2[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_rmp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xc0]; + + struct mlx5_ifc_rmpc_bits rmp_context; +}; + +struct mlx5_ifc_query_rmp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rmpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 opt_param_mask[0x20]; + + u8 reserved_2[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_3[0x80]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_query_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_q_counter_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 rx_write_requests[0x20]; + + u8 reserved_2[0x20]; + + u8 rx_read_requests[0x20]; + + u8 reserved_3[0x20]; + + u8 rx_atomic_requests[0x20]; + + u8 reserved_4[0x20]; + + u8 rx_dct_connect[0x20]; + + u8 reserved_5[0x20]; + + u8 out_of_buffer[0x20]; + + u8 reserved_6[0x20]; + + u8 out_of_sequence[0x20]; + + u8 reserved_7[0x620]; +}; + +struct mlx5_ifc_query_q_counter_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x80]; + + u8 clear[0x1]; + u8 reserved_3[0x1f]; + + u8 reserved_4[0x18]; + u8 counter_set_id[0x8]; +}; + +struct mlx5_ifc_query_pages_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x10]; + u8 function_id[0x10]; + + u8 num_pages[0x20]; +}; + +enum { + MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES = 0x1, + MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES = 0x2, + MLX5_QUERY_PAGES_IN_OP_MOD_REGULAR_PAGES = 0x3, +}; + +struct mlx5_ifc_query_pages_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 function_id[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_nic_vport_context_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_nic_vport_context_bits nic_vport_context; +}; + +struct mlx5_ifc_query_nic_vport_context_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x5]; + u8 allowed_list_type[0x3]; + u8 reserved_4[0x18]; +}; + +struct mlx5_ifc_query_mkey_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_mkc_bits memory_key_mkey_entry; + + u8 reserved_2[0x600]; + + u8 bsf0_klm0_pas_mtt0_1[16][0x8]; + + u8 bsf1_klm1_pas_mtt2_3[16][0x8]; +}; + +struct mlx5_ifc_query_mkey_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 mkey_index[0x18]; + + u8 pg_access[0x1]; + u8 reserved_3[0x1f]; +}; + +struct mlx5_ifc_query_mad_demux_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 mad_dumux_parameters_block[0x20]; +}; + +struct mlx5_ifc_query_mad_demux_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_query_l2_table_entry_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xa0]; + + u8 reserved_2[0x13]; + u8 vlan_valid[0x1]; + u8 vlan[0xc]; + + struct mlx5_ifc_mac_address_layout_bits mac_address; + + u8 reserved_3[0xc0]; +}; + +struct mlx5_ifc_query_l2_table_entry_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x60]; + + u8 reserved_3[0x8]; + u8 table_index[0x18]; + + u8 reserved_4[0x140]; +}; + +struct mlx5_ifc_query_issi_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x10]; + u8 current_issi[0x10]; + + u8 reserved_2[0xa0]; + + u8 supported_issi_reserved[76][0x8]; + u8 supported_issi_dw0[0x20]; +}; + +struct mlx5_ifc_query_issi_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_query_hca_vport_pkey_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_pkey_bits pkey[0]; +}; + +struct mlx5_ifc_query_hca_vport_pkey_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x10]; + u8 pkey_index[0x10]; +}; + +struct mlx5_ifc_query_hca_vport_gid_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x20]; + + u8 gids_num[0x10]; + u8 reserved_2[0x10]; + + struct mlx5_ifc_array128_auto_bits gid[0]; +}; + +struct mlx5_ifc_query_hca_vport_gid_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x10]; + u8 gid_index[0x10]; +}; + +struct mlx5_ifc_query_hca_vport_context_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_hca_vport_context_bits hca_vport_context; +}; + +struct mlx5_ifc_query_hca_vport_context_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_hca_cap_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + union mlx5_ifc_hca_cap_union_bits capability; +}; + +struct mlx5_ifc_query_hca_cap_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_query_flow_table_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x80]; + + u8 reserved_2[0x8]; + u8 level[0x8]; + u8 reserved_3[0x8]; + u8 log_size[0x8]; + + u8 reserved_4[0x120]; +}; + +struct mlx5_ifc_query_flow_table_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 reserved_5[0x140]; +}; + +struct mlx5_ifc_query_fte_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x1c0]; + + struct mlx5_ifc_flow_context_bits flow_context; +}; + +struct mlx5_ifc_query_fte_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 reserved_5[0x40]; + + u8 flow_index[0x20]; + + u8 reserved_6[0xe0]; +}; + +enum { + MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_OUTER_HEADERS = 0x0, + MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1, + MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_INNER_HEADERS = 0x2, +}; + +struct mlx5_ifc_query_flow_group_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0xa0]; + + u8 start_flow_index[0x20]; + + u8 reserved_2[0x20]; + + u8 end_flow_index[0x20]; + + u8 reserved_3[0xa0]; + + u8 reserved_4[0x18]; + u8 match_criteria_enable[0x8]; + + struct mlx5_ifc_fte_match_param_bits match_criteria; + + u8 reserved_5[0xe00]; +}; + +struct mlx5_ifc_query_flow_group_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 group_id[0x20]; + + u8 reserved_5[0x120]; +}; + +struct mlx5_ifc_query_eq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_eqc_bits eq_context_entry; + + u8 reserved_2[0x40]; + + u8 event_bitmask[0x40]; + + u8 reserved_3[0x580]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_query_eq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x18]; + u8 eq_number[0x8]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_dct_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_dctc_bits dct_context_entry; + + u8 reserved_2[0x180]; +}; + +struct mlx5_ifc_query_dct_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 dctn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_cq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_cqc_bits cq_context; + + u8 reserved_2[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_query_cq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 cqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_cong_status_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x20]; + + u8 enable[0x1]; + u8 tag_enable[0x1]; + u8 reserved_2[0x1e]; +}; + +struct mlx5_ifc_query_cong_status_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x18]; + u8 priority[0x4]; + u8 cong_protocol[0x4]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_cong_statistics_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 cur_flows[0x20]; + + u8 sum_flows[0x20]; + + u8 cnp_ignored_high[0x20]; + + u8 cnp_ignored_low[0x20]; + + u8 cnp_handled_high[0x20]; + + u8 cnp_handled_low[0x20]; + + u8 reserved_2[0x100]; + + u8 time_stamp_high[0x20]; + + u8 time_stamp_low[0x20]; + + u8 accumulators_period[0x20]; + + u8 ecn_marked_roce_packets_high[0x20]; + + u8 ecn_marked_roce_packets_low[0x20]; + + u8 cnps_sent_high[0x20]; + + u8 cnps_sent_low[0x20]; + + u8 reserved_3[0x560]; +}; + +struct mlx5_ifc_query_cong_statistics_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 clear[0x1]; + u8 reserved_2[0x1f]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_cong_params_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + union mlx5_ifc_cong_control_roce_ecn_auto_bits congestion_parameters; +}; + +struct mlx5_ifc_query_cong_params_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x1c]; + u8 cong_protocol[0x4]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_query_adapter_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_query_adapter_param_block_bits query_adapter_struct; +}; + +struct mlx5_ifc_query_adapter_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_qp_2rst_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_qp_2rst_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_qp_2err_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_qp_2err_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_page_fault_resume_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_page_fault_resume_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 error[0x1]; + u8 reserved_2[0x4]; + u8 rdma[0x1]; + u8 read_write[0x1]; + u8 req_res[0x1]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_nop_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_nop_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_modify_vport_state_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_vport_state_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x18]; + u8 admin_state[0x4]; + u8 reserved_4[0x4]; +}; + +struct mlx5_ifc_modify_tis_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_tis_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 tisn[0x18]; + + u8 reserved_3[0x20]; + + u8 modify_bitmask[0x40]; + + u8 reserved_4[0x40]; + + struct mlx5_ifc_tisc_bits ctx; +}; + +struct mlx5_ifc_modify_tir_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_tir_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 tirn[0x18]; + + u8 reserved_3[0x20]; + + u8 modify_bitmask[0x40]; + + u8 reserved_4[0x40]; + + struct mlx5_ifc_tirc_bits ctx; +}; + +struct mlx5_ifc_modify_sq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_sq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 sq_state[0x4]; + u8 reserved_2[0x4]; + u8 sqn[0x18]; + + u8 reserved_3[0x20]; + + u8 modify_bitmask[0x40]; + + u8 reserved_4[0x40]; + + struct mlx5_ifc_sqc_bits ctx; +}; + +struct mlx5_ifc_modify_rqt_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_rqt_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rqtn[0x18]; + + u8 reserved_3[0x20]; + + u8 modify_bitmask[0x40]; + + u8 reserved_4[0x40]; + + struct mlx5_ifc_rqtc_bits ctx; +}; + +struct mlx5_ifc_modify_rq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_rq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 rq_state[0x4]; + u8 reserved_2[0x4]; + u8 rqn[0x18]; + + u8 reserved_3[0x20]; + + u8 modify_bitmask[0x40]; + + u8 reserved_4[0x40]; + + struct mlx5_ifc_rqc_bits ctx; +}; + +struct mlx5_ifc_modify_rmp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_rmp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 rmp_state[0x4]; + u8 reserved_2[0x4]; + u8 rmpn[0x18]; + + u8 reserved_3[0x20]; + + u8 modify_bitmask[0x40]; + + u8 reserved_4[0x40]; + + struct mlx5_ifc_rmpc_bits ctx; +}; + +struct mlx5_ifc_modify_nic_vport_context_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_nic_vport_field_select_bits { + u8 reserved_0[0x1c]; + u8 permanent_address[0x1]; + u8 addresses_list[0x1]; + u8 roce_en[0x1]; + u8 reserved_1[0x1]; +}; + +struct mlx5_ifc_modify_nic_vport_context_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + struct mlx5_ifc_modify_nic_vport_field_select_bits field_select; + + u8 reserved_3[0x780]; + + struct mlx5_ifc_nic_vport_context_bits nic_vport_context; +}; + +struct mlx5_ifc_modify_hca_vport_context_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_hca_vport_context_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x20]; + + struct mlx5_ifc_hca_vport_context_bits hca_vport_context; +}; + +struct mlx5_ifc_modify_cq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +enum { + MLX5_MODIFY_CQ_IN_OP_MOD_MODIFY_CQ = 0x0, + MLX5_MODIFY_CQ_IN_OP_MOD_RESIZE_CQ = 0x1, +}; + +struct mlx5_ifc_modify_cq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 cqn[0x18]; + + union mlx5_ifc_modify_field_select_resize_field_select_auto_bits modify_field_select_resize_field_select; + + struct mlx5_ifc_cqc_bits cq_context; + + u8 reserved_3[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_modify_cong_status_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_cong_status_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x18]; + u8 priority[0x4]; + u8 cong_protocol[0x4]; + + u8 enable[0x1]; + u8 tag_enable[0x1]; + u8 reserved_3[0x1e]; +}; + +struct mlx5_ifc_modify_cong_params_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_modify_cong_params_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x1c]; + u8 cong_protocol[0x4]; + + union mlx5_ifc_field_select_802_1_r_roce_auto_bits field_select; + + u8 reserved_3[0x80]; + + union mlx5_ifc_cong_control_roce_ecn_auto_bits congestion_parameters; +}; + +struct mlx5_ifc_manage_pages_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 output_num_entries[0x20]; + + u8 reserved_1[0x20]; + + u8 pas[0][0x40]; +}; + +enum { + MLX5_MANAGE_PAGES_IN_OP_MOD_ALLOCATION_FAIL = 0x0, + MLX5_MANAGE_PAGES_IN_OP_MOD_ALLOCATION_SUCCESS = 0x1, + MLX5_MANAGE_PAGES_IN_OP_MOD_HCA_RETURN_PAGES = 0x2, +}; + +struct mlx5_ifc_manage_pages_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 function_id[0x10]; + + u8 input_num_entries[0x20]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_mad_ifc_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 response_mad_packet[256][0x8]; +}; + +struct mlx5_ifc_mad_ifc_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 remote_lid[0x10]; + u8 reserved_2[0x8]; + u8 port[0x8]; + + u8 reserved_3[0x20]; + + u8 mad[256][0x8]; +}; + +struct mlx5_ifc_init_hca_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_init_hca_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_init2rtr_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_init2rtr_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_init2init_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_init2init_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 opt_param_mask[0x20]; + + u8 reserved_4[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_get_dropped_packet_log_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 packet_headers_log[128][0x8]; + + u8 packet_syndrome[64][0x8]; +}; + +struct mlx5_ifc_get_dropped_packet_log_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_gen_eqe_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x18]; + u8 eq_number[0x8]; + + u8 reserved_3[0x20]; + + u8 eqe[64][0x8]; +}; + +struct mlx5_ifc_gen_eq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_enable_hca_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x20]; +}; + +struct mlx5_ifc_enable_hca_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 function_id[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_drain_dct_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_drain_dct_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 dctn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_disable_hca_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x20]; +}; + +struct mlx5_ifc_disable_hca_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 function_id[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_detach_from_mcg_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_detach_from_mcg_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 multicast_gid[16][0x8]; +}; + +struct mlx5_ifc_destroy_xrc_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_xrc_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 xrc_srqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_tis_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_tis_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 tisn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_tir_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_tir_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 tirn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 srqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_sq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_sq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 sqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_rqt_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_rqt_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rqtn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_rq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_rq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_rmp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_rmp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 rmpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_psv_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_psv_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 psvn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_mkey_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_mkey_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 mkey_index[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_flow_table_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_flow_table_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 reserved_5[0x140]; +}; + +struct mlx5_ifc_destroy_flow_group_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_flow_group_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 group_id[0x20]; + + u8 reserved_5[0x120]; +}; + +struct mlx5_ifc_destroy_eq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_eq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x18]; + u8 eq_number[0x8]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_dct_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_dct_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 dctn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_destroy_cq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_destroy_cq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 cqn[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_delete_vxlan_udp_dport_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_delete_vxlan_udp_dport_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x20]; + + u8 reserved_3[0x10]; + u8 vxlan_udp_port[0x10]; +}; + +struct mlx5_ifc_delete_l2_table_entry_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_delete_l2_table_entry_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x60]; + + u8 reserved_3[0x8]; + u8 table_index[0x18]; + + u8 reserved_4[0x140]; +}; + +struct mlx5_ifc_delete_fte_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_delete_fte_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 reserved_5[0x40]; + + u8 flow_index[0x20]; + + u8 reserved_6[0xe0]; +}; + +struct mlx5_ifc_dealloc_xrcd_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_dealloc_xrcd_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 xrcd[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_dealloc_uar_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_dealloc_uar_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 uar[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_dealloc_transport_domain_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_dealloc_transport_domain_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 transport_domain[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_dealloc_q_counter_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_dealloc_q_counter_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x18]; + u8 counter_set_id[0x8]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_dealloc_pd_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_dealloc_pd_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 pd[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_create_xrc_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 xrc_srqn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_xrc_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry; + + u8 reserved_3[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_create_tis_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 tisn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_tis_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0xc0]; + + struct mlx5_ifc_tisc_bits ctx; +}; + +struct mlx5_ifc_create_tir_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 tirn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_tir_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0xc0]; + + struct mlx5_ifc_tirc_bits ctx; +}; + +struct mlx5_ifc_create_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 srqn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + struct mlx5_ifc_srqc_bits srq_context_entry; + + u8 reserved_3[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_create_sq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 sqn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_sq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0xc0]; + + struct mlx5_ifc_sqc_bits ctx; +}; + +struct mlx5_ifc_create_rqt_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 rqtn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_rqt_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0xc0]; + + struct mlx5_ifc_rqtc_bits rqt_context; +}; + +struct mlx5_ifc_create_rq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 rqn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_rq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0xc0]; + + struct mlx5_ifc_rqc_bits ctx; +}; + +struct mlx5_ifc_create_rmp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 rmpn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_rmp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0xc0]; + + struct mlx5_ifc_rmpc_bits ctx; +}; + +struct mlx5_ifc_create_qp_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 qpn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_qp_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 opt_param_mask[0x20]; + + u8 reserved_3[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + + u8 reserved_4[0x80]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_create_psv_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 reserved_2[0x8]; + u8 psv0_index[0x18]; + + u8 reserved_3[0x8]; + u8 psv1_index[0x18]; + + u8 reserved_4[0x8]; + u8 psv2_index[0x18]; + + u8 reserved_5[0x8]; + u8 psv3_index[0x18]; +}; + +struct mlx5_ifc_create_psv_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 num_psv[0x4]; + u8 reserved_2[0x4]; + u8 pd[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_create_mkey_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 mkey_index[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_mkey_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x20]; + + u8 pg_access[0x1]; + u8 reserved_3[0x1f]; + + struct mlx5_ifc_mkc_bits memory_key_mkey_entry; + + u8 reserved_4[0x80]; + + u8 translations_octword_actual_size[0x20]; + + u8 reserved_5[0x560]; + + u8 klm_pas_mtt[0][0x20]; +}; + +struct mlx5_ifc_create_flow_table_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 table_id[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_flow_table_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x20]; + + u8 reserved_5[0x8]; + u8 level[0x8]; + u8 reserved_6[0x8]; + u8 log_size[0x8]; + + u8 reserved_7[0x120]; +}; + +struct mlx5_ifc_create_flow_group_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 group_id[0x18]; + + u8 reserved_2[0x20]; +}; + +enum { + MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS = 0x0, + MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1, + MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS = 0x2, +}; + +struct mlx5_ifc_create_flow_group_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + u8 table_type[0x8]; + u8 reserved_3[0x18]; + + u8 reserved_4[0x8]; + u8 table_id[0x18]; + + u8 reserved_5[0x20]; + + u8 start_flow_index[0x20]; + + u8 reserved_6[0x20]; + + u8 end_flow_index[0x20]; + + u8 reserved_7[0xa0]; + + u8 reserved_8[0x18]; + u8 match_criteria_enable[0x8]; + + struct mlx5_ifc_fte_match_param_bits match_criteria; + + u8 reserved_9[0xe00]; +}; + +struct mlx5_ifc_create_eq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x18]; + u8 eq_number[0x8]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_eq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + struct mlx5_ifc_eqc_bits eq_context_entry; + + u8 reserved_3[0x40]; + + u8 event_bitmask[0x40]; + + u8 reserved_4[0x580]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_create_dct_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 dctn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_dct_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + struct mlx5_ifc_dctc_bits dct_context_entry; + + u8 reserved_3[0x180]; +}; + +struct mlx5_ifc_create_cq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 cqn[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_create_cq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + + struct mlx5_ifc_cqc_bits cq_context; + + u8 reserved_3[0x600]; + + u8 pas[0][0x40]; +}; + +struct mlx5_ifc_config_int_moderation_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x4]; + u8 min_delay[0xc]; + u8 int_vector[0x10]; + + u8 reserved_2[0x20]; +}; + +enum { + MLX5_CONFIG_INT_MODERATION_IN_OP_MOD_WRITE = 0x0, + MLX5_CONFIG_INT_MODERATION_IN_OP_MOD_READ = 0x1, +}; + +struct mlx5_ifc_config_int_moderation_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x4]; + u8 min_delay[0xc]; + u8 int_vector[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_attach_to_mcg_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_attach_to_mcg_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 qpn[0x18]; + + u8 reserved_3[0x20]; + + u8 multicast_gid[16][0x8]; +}; + +struct mlx5_ifc_arm_xrc_srq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +enum { + MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ = 0x1, +}; + +struct mlx5_ifc_arm_xrc_srq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 xrc_srqn[0x18]; + + u8 reserved_3[0x10]; + u8 lwm[0x10]; +}; + +struct mlx5_ifc_arm_rq_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +enum { + MLX5_ARM_RQ_IN_OP_MOD_SRQ_ = 0x1, +}; + +struct mlx5_ifc_arm_rq_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 srq_number[0x18]; + + u8 reserved_3[0x10]; + u8 lwm[0x10]; +}; + +struct mlx5_ifc_arm_dct_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_arm_dct_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x8]; + u8 dct_number[0x18]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_alloc_xrcd_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 xrcd[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_alloc_xrcd_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_alloc_uar_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 uar[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_alloc_uar_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_alloc_transport_domain_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 transport_domain[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_alloc_transport_domain_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_alloc_q_counter_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x18]; + u8 counter_set_id[0x8]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_alloc_q_counter_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_alloc_pd_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x8]; + u8 pd[0x18]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_alloc_pd_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_add_vxlan_udp_dport_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_add_vxlan_udp_dport_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x20]; + + u8 reserved_3[0x10]; + u8 vxlan_udp_port[0x10]; +}; + +struct mlx5_ifc_access_register_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + u8 register_data[0][0x20]; +}; + +enum { + MLX5_ACCESS_REGISTER_IN_OP_MOD_WRITE = 0x0, + MLX5_ACCESS_REGISTER_IN_OP_MOD_READ = 0x1, +}; + +struct mlx5_ifc_access_register_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x10]; + u8 register_id[0x10]; + + u8 argument[0x20]; + + u8 register_data[0][0x20]; +}; + +struct mlx5_ifc_sltp_reg_bits { + u8 status[0x4]; + u8 version[0x4]; + u8 local_port[0x8]; + u8 pnat[0x2]; + u8 reserved_0[0x2]; + u8 lane[0x4]; + u8 reserved_1[0x8]; + + u8 reserved_2[0x20]; + + u8 reserved_3[0x7]; + u8 polarity[0x1]; + u8 ob_tap0[0x8]; + u8 ob_tap1[0x8]; + u8 ob_tap2[0x8]; + + u8 reserved_4[0xc]; + u8 ob_preemp_mode[0x4]; + u8 ob_reg[0x8]; + u8 ob_bias[0x8]; + + u8 reserved_5[0x20]; +}; + +struct mlx5_ifc_slrg_reg_bits { + u8 status[0x4]; + u8 version[0x4]; + u8 local_port[0x8]; + u8 pnat[0x2]; + u8 reserved_0[0x2]; + u8 lane[0x4]; + u8 reserved_1[0x8]; + + u8 time_to_link_up[0x10]; + u8 reserved_2[0xc]; + u8 grade_lane_speed[0x4]; + + u8 grade_version[0x8]; + u8 grade[0x18]; + + u8 reserved_3[0x4]; + u8 height_grade_type[0x4]; + u8 height_grade[0x18]; + + u8 height_dz[0x10]; + u8 height_dv[0x10]; + + u8 reserved_4[0x10]; + u8 height_sigma[0x10]; + + u8 reserved_5[0x20]; + + u8 reserved_6[0x4]; + u8 phase_grade_type[0x4]; + u8 phase_grade[0x18]; + + u8 reserved_7[0x8]; + u8 phase_eo_pos[0x8]; + u8 reserved_8[0x8]; + u8 phase_eo_neg[0x8]; + + u8 ffe_set_tested[0x10]; + u8 test_errors_per_lane[0x10]; +}; + +struct mlx5_ifc_pvlc_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x1c]; + u8 vl_hw_cap[0x4]; + + u8 reserved_3[0x1c]; + u8 vl_admin[0x4]; + + u8 reserved_4[0x1c]; + u8 vl_operational[0x4]; +}; + +struct mlx5_ifc_pude_reg_bits { + u8 swid[0x8]; + u8 local_port[0x8]; + u8 reserved_0[0x4]; + u8 admin_status[0x4]; + u8 reserved_1[0x4]; + u8 oper_status[0x4]; + + u8 reserved_2[0x60]; +}; + +struct mlx5_ifc_ptys_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0xd]; + u8 proto_mask[0x3]; + + u8 reserved_2[0x40]; + + u8 eth_proto_capability[0x20]; + + u8 ib_link_width_capability[0x10]; + u8 ib_proto_capability[0x10]; + + u8 reserved_3[0x20]; + + u8 eth_proto_admin[0x20]; + + u8 ib_link_width_admin[0x10]; + u8 ib_proto_admin[0x10]; + + u8 reserved_4[0x20]; + + u8 eth_proto_oper[0x20]; + + u8 ib_link_width_oper[0x10]; + u8 ib_proto_oper[0x10]; + + u8 reserved_5[0x20]; + + u8 eth_proto_lp_advertise[0x20]; + + u8 reserved_6[0x60]; +}; + +struct mlx5_ifc_ptas_reg_bits { + u8 reserved_0[0x20]; + + u8 algorithm_options[0x10]; + u8 reserved_1[0x4]; + u8 repetitions_mode[0x4]; + u8 num_of_repetitions[0x8]; + + u8 grade_version[0x8]; + u8 height_grade_type[0x4]; + u8 phase_grade_type[0x4]; + u8 height_grade_weight[0x8]; + u8 phase_grade_weight[0x8]; + + u8 gisim_measure_bits[0x10]; + u8 adaptive_tap_measure_bits[0x10]; + + u8 ber_bath_high_error_threshold[0x10]; + u8 ber_bath_mid_error_threshold[0x10]; + + u8 ber_bath_low_error_threshold[0x10]; + u8 one_ratio_high_threshold[0x10]; + + u8 one_ratio_high_mid_threshold[0x10]; + u8 one_ratio_low_mid_threshold[0x10]; + + u8 one_ratio_low_threshold[0x10]; + u8 ndeo_error_threshold[0x10]; + + u8 mixer_offset_step_size[0x10]; + u8 reserved_2[0x8]; + u8 mix90_phase_for_voltage_bath[0x8]; + + u8 mixer_offset_start[0x10]; + u8 mixer_offset_end[0x10]; + + u8 reserved_3[0x15]; + u8 ber_test_time[0xb]; +}; + +struct mlx5_ifc_pspa_reg_bits { + u8 swid[0x8]; + u8 local_port[0x8]; + u8 sub_port[0x8]; + u8 reserved_0[0x8]; + + u8 reserved_1[0x20]; +}; + +struct mlx5_ifc_pqdr_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x5]; + u8 prio[0x3]; + u8 reserved_2[0x6]; + u8 mode[0x2]; + + u8 reserved_3[0x20]; + + u8 reserved_4[0x10]; + u8 min_threshold[0x10]; + + u8 reserved_5[0x10]; + u8 max_threshold[0x10]; + + u8 reserved_6[0x10]; + u8 mark_probability_denominator[0x10]; + + u8 reserved_7[0x60]; +}; + +struct mlx5_ifc_ppsc_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x60]; + + u8 reserved_3[0x1c]; + u8 wrps_admin[0x4]; + + u8 reserved_4[0x1c]; + u8 wrps_status[0x4]; + + u8 reserved_5[0x8]; + u8 up_threshold[0x8]; + u8 reserved_6[0x8]; + u8 down_threshold[0x8]; + + u8 reserved_7[0x20]; + + u8 reserved_8[0x1c]; + u8 srps_admin[0x4]; + + u8 reserved_9[0x1c]; + u8 srps_status[0x4]; + + u8 reserved_10[0x40]; +}; + +struct mlx5_ifc_pplr_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x8]; + u8 lb_cap[0x8]; + u8 reserved_3[0x8]; + u8 lb_en[0x8]; +}; + +struct mlx5_ifc_pplm_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x20]; + + u8 port_profile_mode[0x8]; + u8 static_port_profile[0x8]; + u8 active_port_profile[0x8]; + u8 reserved_3[0x8]; + + u8 retransmission_active[0x8]; + u8 fec_mode_active[0x18]; + + u8 reserved_4[0x20]; +}; + +struct mlx5_ifc_ppcnt_reg_bits { + u8 swid[0x8]; + u8 local_port[0x8]; + u8 pnat[0x2]; + u8 reserved_0[0x8]; + u8 grp[0x6]; + + u8 clr[0x1]; + u8 reserved_1[0x1c]; + u8 prio_tc[0x3]; + + union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set; +}; + +struct mlx5_ifc_ppad_reg_bits { + u8 reserved_0[0x3]; + u8 single_mac[0x1]; + u8 reserved_1[0x4]; + u8 local_port[0x8]; + u8 mac_47_32[0x10]; + + u8 mac_31_0[0x20]; + + u8 reserved_2[0x40]; +}; + +struct mlx5_ifc_pmtu_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 max_mtu[0x10]; + u8 reserved_2[0x10]; + + u8 admin_mtu[0x10]; + u8 reserved_3[0x10]; + + u8 oper_mtu[0x10]; + u8 reserved_4[0x10]; +}; + +struct mlx5_ifc_pmpr_reg_bits { + u8 reserved_0[0x8]; + u8 module[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0x18]; + u8 attenuation_5g[0x8]; + + u8 reserved_3[0x18]; + u8 attenuation_7g[0x8]; + + u8 reserved_4[0x18]; + u8 attenuation_12g[0x8]; +}; + +struct mlx5_ifc_pmpe_reg_bits { + u8 reserved_0[0x8]; + u8 module[0x8]; + u8 reserved_1[0xc]; + u8 module_status[0x4]; + + u8 reserved_2[0x60]; +}; + +struct mlx5_ifc_pmpc_reg_bits { + u8 module_state_updated[32][0x8]; +}; + +struct mlx5_ifc_pmlpn_reg_bits { + u8 reserved_0[0x4]; + u8 mlpn_status[0x4]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 e[0x1]; + u8 reserved_2[0x1f]; +}; + +struct mlx5_ifc_pmlp_reg_bits { + u8 rxtx[0x1]; + u8 reserved_0[0x7]; + u8 local_port[0x8]; + u8 reserved_1[0x8]; + u8 width[0x8]; + + u8 lane0_module_mapping[0x20]; + + u8 lane1_module_mapping[0x20]; + + u8 lane2_module_mapping[0x20]; + + u8 lane3_module_mapping[0x20]; + + u8 reserved_2[0x160]; +}; + +struct mlx5_ifc_pmaos_reg_bits { + u8 reserved_0[0x8]; + u8 module[0x8]; + u8 reserved_1[0x4]; + u8 admin_status[0x4]; + u8 reserved_2[0x4]; + u8 oper_status[0x4]; + + u8 ase[0x1]; + u8 ee[0x1]; + u8 reserved_3[0x1c]; + u8 e[0x2]; + + u8 reserved_4[0x40]; +}; + +struct mlx5_ifc_plpc_reg_bits { + u8 reserved_0[0x4]; + u8 profile_id[0xc]; + u8 reserved_1[0x4]; + u8 proto_mask[0x4]; + u8 reserved_2[0x8]; + + u8 reserved_3[0x10]; + u8 lane_speed[0x10]; + + u8 reserved_4[0x17]; + u8 lpbf[0x1]; + u8 fec_mode_policy[0x8]; + + u8 retransmission_capability[0x8]; + u8 fec_mode_capability[0x18]; + + u8 retransmission_support_admin[0x8]; + u8 fec_mode_support_admin[0x18]; + + u8 retransmission_request_admin[0x8]; + u8 fec_mode_request_admin[0x18]; + + u8 reserved_5[0x80]; +}; + +struct mlx5_ifc_plib_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x8]; + u8 ib_port[0x8]; + + u8 reserved_2[0x60]; +}; + +struct mlx5_ifc_plbf_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0xd]; + u8 lbf_mode[0x3]; + + u8 reserved_2[0x20]; +}; + +struct mlx5_ifc_pipg_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 dic[0x1]; + u8 reserved_2[0x19]; + u8 ipg[0x4]; + u8 reserved_3[0x2]; +}; + +struct mlx5_ifc_pifr_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0xe0]; + + u8 port_filter[8][0x20]; + + u8 port_filter_update_en[8][0x20]; +}; + +struct mlx5_ifc_pfcc_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 ppan[0x4]; + u8 reserved_2[0x4]; + u8 prio_mask_tx[0x8]; + u8 reserved_3[0x8]; + u8 prio_mask_rx[0x8]; + + u8 pptx[0x1]; + u8 aptx[0x1]; + u8 reserved_4[0x6]; + u8 pfctx[0x8]; + u8 reserved_5[0x10]; + + u8 pprx[0x1]; + u8 aprx[0x1]; + u8 reserved_6[0x6]; + u8 pfcrx[0x8]; + u8 reserved_7[0x10]; + + u8 reserved_8[0x80]; +}; + +struct mlx5_ifc_pelc_reg_bits { + u8 op[0x4]; + u8 reserved_0[0x4]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 op_admin[0x8]; + u8 op_capability[0x8]; + u8 op_request[0x8]; + u8 op_active[0x8]; + + u8 admin[0x40]; + + u8 capability[0x40]; + + u8 request[0x40]; + + u8 active[0x40]; + + u8 reserved_2[0x80]; +}; + +struct mlx5_ifc_peir_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 reserved_2[0xc]; + u8 error_count[0x4]; + u8 reserved_3[0x10]; + + u8 reserved_4[0xc]; + u8 lane[0x4]; + u8 reserved_5[0x8]; + u8 error_type[0x8]; +}; + +struct mlx5_ifc_pcap_reg_bits { + u8 reserved_0[0x8]; + u8 local_port[0x8]; + u8 reserved_1[0x10]; + + u8 port_capability_mask[4][0x20]; +}; + +struct mlx5_ifc_paos_reg_bits { + u8 swid[0x8]; + u8 local_port[0x8]; + u8 reserved_0[0x4]; + u8 admin_status[0x4]; + u8 reserved_1[0x4]; + u8 oper_status[0x4]; + + u8 ase[0x1]; + u8 ee[0x1]; + u8 reserved_2[0x1c]; + u8 e[0x2]; + + u8 reserved_3[0x40]; +}; + +struct mlx5_ifc_pamp_reg_bits { + u8 reserved_0[0x8]; + u8 opamp_group[0x8]; + u8 reserved_1[0xc]; + u8 opamp_group_type[0x4]; + + u8 start_index[0x10]; + u8 reserved_2[0x4]; + u8 num_of_indices[0xc]; + + u8 index_data[18][0x10]; +}; + +struct mlx5_ifc_lane_2_module_mapping_bits { + u8 reserved_0[0x6]; + u8 rx_lane[0x2]; + u8 reserved_1[0x6]; + u8 tx_lane[0x2]; + u8 reserved_2[0x8]; + u8 module[0x8]; +}; + +struct mlx5_ifc_bufferx_reg_bits { + u8 reserved_0[0x6]; + u8 lossy[0x1]; + u8 epsb[0x1]; + u8 reserved_1[0xc]; + u8 size[0xc]; + + u8 xoff_threshold[0x10]; + u8 xon_threshold[0x10]; +}; + +struct mlx5_ifc_set_node_in_bits { + u8 node_description[64][0x8]; +}; + +struct mlx5_ifc_register_power_settings_bits { + u8 reserved_0[0x18]; + u8 power_settings_level[0x8]; + + u8 reserved_1[0x60]; +}; + +struct mlx5_ifc_register_host_endianness_bits { + u8 he[0x1]; + u8 reserved_0[0x1f]; + + u8 reserved_1[0x60]; +}; + +struct mlx5_ifc_umr_pointer_desc_argument_bits { + u8 reserved_0[0x20]; + + u8 mkey[0x20]; + + u8 addressh_63_32[0x20]; + + u8 addressl_31_0[0x20]; +}; + +struct mlx5_ifc_ud_adrs_vector_bits { + u8 dc_key[0x40]; + + u8 ext[0x1]; + u8 reserved_0[0x7]; + u8 destination_qp_dct[0x18]; + + u8 static_rate[0x4]; + u8 sl_eth_prio[0x4]; + u8 fl[0x1]; + u8 mlid[0x7]; + u8 rlid_udp_sport[0x10]; + + u8 reserved_1[0x20]; + + u8 rmac_47_16[0x20]; + + u8 rmac_15_0[0x10]; + u8 tclass[0x8]; + u8 hop_limit[0x8]; + + u8 reserved_2[0x1]; + u8 grh[0x1]; + u8 reserved_3[0x2]; + u8 src_addr_index[0x8]; + u8 flow_label[0x14]; + + u8 rgid_rip[16][0x8]; +}; + +struct mlx5_ifc_pages_req_event_bits { + u8 reserved_0[0x10]; + u8 function_id[0x10]; + + u8 num_pages[0x20]; + + u8 reserved_1[0xa0]; +}; + +struct mlx5_ifc_eqe_bits { + u8 reserved_0[0x8]; + u8 event_type[0x8]; + u8 reserved_1[0x8]; + u8 event_sub_type[0x8]; + + u8 reserved_2[0xe0]; + + union mlx5_ifc_event_auto_bits event_data; + + u8 reserved_3[0x10]; + u8 signature[0x8]; + u8 reserved_4[0x7]; + u8 owner[0x1]; +}; + +enum { + MLX5_CMD_QUEUE_ENTRY_TYPE_PCIE_CMD_IF_TRANSPORT = 0x7, +}; + +struct mlx5_ifc_cmd_queue_entry_bits { + u8 type[0x8]; + u8 reserved_0[0x18]; + + u8 input_length[0x20]; + + u8 input_mailbox_pointer_63_32[0x20]; + + u8 input_mailbox_pointer_31_9[0x17]; + u8 reserved_1[0x9]; + + u8 command_input_inline_data[16][0x8]; + + u8 command_output_inline_data[16][0x8]; + + u8 output_mailbox_pointer_63_32[0x20]; + + u8 output_mailbox_pointer_31_9[0x17]; + u8 reserved_2[0x9]; + + u8 output_length[0x20]; + + u8 token[0x8]; + u8 signature[0x8]; + u8 reserved_3[0x8]; + u8 status[0x7]; + u8 ownership[0x1]; +}; + +struct mlx5_ifc_cmd_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 command_output[0x20]; +}; + +struct mlx5_ifc_cmd_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 command[0][0x20]; +}; + +struct mlx5_ifc_cmd_if_box_bits { + u8 mailbox_data[512][0x8]; + + u8 reserved_0[0x180]; + + u8 next_pointer_63_32[0x20]; + + u8 next_pointer_31_10[0x16]; + u8 reserved_1[0xa]; + + u8 block_number[0x20]; + + u8 reserved_2[0x8]; + u8 token[0x8]; + u8 ctrl_signature[0x8]; + u8 signature[0x8]; +}; + +struct mlx5_ifc_mtt_bits { + u8 ptag_63_32[0x20]; + + u8 ptag_31_8[0x18]; + u8 reserved_0[0x6]; + u8 wr_en[0x1]; + u8 rd_en[0x1]; +}; + +enum { + MLX5_INITIAL_SEG_NIC_INTERFACE_FULL_DRIVER = 0x0, + MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED = 0x1, + MLX5_INITIAL_SEG_NIC_INTERFACE_NO_DRAM_NIC = 0x2, +}; + +enum { + MLX5_INITIAL_SEG_NIC_INTERFACE_SUPPORTED_FULL_DRIVER = 0x0, + MLX5_INITIAL_SEG_NIC_INTERFACE_SUPPORTED_DISABLED = 0x1, + MLX5_INITIAL_SEG_NIC_INTERFACE_SUPPORTED_NO_DRAM_NIC = 0x2, +}; + +enum { + MLX5_INITIAL_SEG_HEALTH_SYNDROME_FW_INTERNAL_ERR = 0x1, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_DEAD_IRISC = 0x7, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_HW_FATAL_ERR = 0x8, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_FW_CRC_ERR = 0x9, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_FETCH_PCI_ERR = 0xa, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PAGE_ERR = 0xb, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_ASYNCHRONOUS_EQ_BUF_OVERRUN = 0xc, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_IN_ERR = 0xd, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_INV = 0xe, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_FFSER_ERR = 0xf, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_HIGH_TEMP_ERR = 0x10, +}; + +struct mlx5_ifc_initial_seg_bits { + u8 fw_rev_minor[0x10]; + u8 fw_rev_major[0x10]; + + u8 cmd_interface_rev[0x10]; + u8 fw_rev_subminor[0x10]; + + u8 reserved_0[0x40]; + + u8 cmdq_phy_addr_63_32[0x20]; + + u8 cmdq_phy_addr_31_12[0x14]; + u8 reserved_1[0x2]; + u8 nic_interface[0x2]; + u8 log_cmdq_size[0x4]; + u8 log_cmdq_stride[0x4]; + + u8 command_doorbell_vector[0x20]; + + u8 reserved_2[0xf00]; + + u8 initializing[0x1]; + u8 reserved_3[0x4]; + u8 nic_interface_supported[0x3]; + u8 reserved_4[0x18]; + + struct mlx5_ifc_health_buffer_bits health_buffer; + + u8 no_dram_nic_offset[0x20]; + + u8 reserved_5[0x6e40]; + + u8 reserved_6[0x1f]; + u8 clear_int[0x1]; + + u8 health_syndrome[0x8]; + u8 health_counter[0x18]; + + u8 reserved_7[0x17fc0]; +}; + +union mlx5_ifc_ports_control_registers_document_bits { + struct mlx5_ifc_bufferx_reg_bits bufferx_reg; + struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout; + struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits eth_2863_cntrs_grp_data_layout; + struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits eth_3635_cntrs_grp_data_layout; + struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits eth_802_3_cntrs_grp_data_layout; + struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits eth_extended_cntrs_grp_data_layout; + struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout; + struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout; + struct mlx5_ifc_lane_2_module_mapping_bits lane_2_module_mapping; + struct mlx5_ifc_pamp_reg_bits pamp_reg; + struct mlx5_ifc_paos_reg_bits paos_reg; + struct mlx5_ifc_pcap_reg_bits pcap_reg; + struct mlx5_ifc_peir_reg_bits peir_reg; + struct mlx5_ifc_pelc_reg_bits pelc_reg; + struct mlx5_ifc_pfcc_reg_bits pfcc_reg; + struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; + struct mlx5_ifc_pifr_reg_bits pifr_reg; + struct mlx5_ifc_pipg_reg_bits pipg_reg; + struct mlx5_ifc_plbf_reg_bits plbf_reg; + struct mlx5_ifc_plib_reg_bits plib_reg; + struct mlx5_ifc_plpc_reg_bits plpc_reg; + struct mlx5_ifc_pmaos_reg_bits pmaos_reg; + struct mlx5_ifc_pmlp_reg_bits pmlp_reg; + struct mlx5_ifc_pmlpn_reg_bits pmlpn_reg; + struct mlx5_ifc_pmpc_reg_bits pmpc_reg; + struct mlx5_ifc_pmpe_reg_bits pmpe_reg; + struct mlx5_ifc_pmpr_reg_bits pmpr_reg; + struct mlx5_ifc_pmtu_reg_bits pmtu_reg; + struct mlx5_ifc_ppad_reg_bits ppad_reg; + struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg; + struct mlx5_ifc_pplm_reg_bits pplm_reg; + struct mlx5_ifc_pplr_reg_bits pplr_reg; + struct mlx5_ifc_ppsc_reg_bits ppsc_reg; + struct mlx5_ifc_pqdr_reg_bits pqdr_reg; + struct mlx5_ifc_pspa_reg_bits pspa_reg; + struct mlx5_ifc_ptas_reg_bits ptas_reg; + struct mlx5_ifc_ptys_reg_bits ptys_reg; + struct mlx5_ifc_pude_reg_bits pude_reg; + struct mlx5_ifc_pvlc_reg_bits pvlc_reg; + struct mlx5_ifc_slrg_reg_bits slrg_reg; + struct mlx5_ifc_sltp_reg_bits sltp_reg; + u8 reserved_0[0x60e0]; +}; + +union mlx5_ifc_debug_enhancements_document_bits { + struct mlx5_ifc_health_buffer_bits health_buffer; + u8 reserved_0[0x200]; +}; + +union mlx5_ifc_uplink_pci_interface_document_bits { + struct mlx5_ifc_initial_seg_bits initial_seg; + u8 reserved_0[0x20060]; }; #endif /* MLX5_IFC_H */ diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 310b5f7fd6ae..f079fb1a31f7 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -134,13 +134,21 @@ enum { enum { MLX5_WQE_CTRL_CQ_UPDATE = 2 << 2, + MLX5_WQE_CTRL_CQ_UPDATE_AND_EQE = 3 << 2, MLX5_WQE_CTRL_SOLICITED = 1 << 1, }; enum { + MLX5_SEND_WQE_DS = 16, MLX5_SEND_WQE_BB = 64, }; +#define MLX5_SEND_WQEBB_NUM_DS (MLX5_SEND_WQE_BB / MLX5_SEND_WQE_DS) + +enum { + MLX5_SEND_WQE_MAX_WQEBBS = 16, +}; + enum { MLX5_WQE_FMR_PERM_LOCAL_READ = 1 << 27, MLX5_WQE_FMR_PERM_LOCAL_WRITE = 1 << 28, @@ -200,6 +208,23 @@ struct mlx5_wqe_ctrl_seg { #define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00 #define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8 +enum { + MLX5_ETH_WQE_L3_INNER_CSUM = 1 << 4, + MLX5_ETH_WQE_L4_INNER_CSUM = 1 << 5, + MLX5_ETH_WQE_L3_CSUM = 1 << 6, + MLX5_ETH_WQE_L4_CSUM = 1 << 7, +}; + +struct mlx5_wqe_eth_seg { + u8 rsvd0[4]; + u8 cs_flags; + u8 rsvd1; + __be16 mss; + __be32 rsvd2; + __be16 inline_hdr_sz; + u8 inline_hdr_start[2]; +}; + struct mlx5_wqe_xrc_seg { __be32 xrc_srqn; u8 rsvd[12]; -- cgit v1.2.3 From 938fe83c8dcbbf294d167e6163200a8540ae43c4 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 28 May 2015 22:28:41 +0300 Subject: net/mlx5_core: New device capabilities handling - Query all supported types of dev caps on driver load. - Store the Cap data outbox per cap type into driver private data. - Introduce new Macros to access/dump stored caps (using the auto generated data types). - Obsolete SW representation of dev caps (no need for SW copy for each cap). - Modify IB driver to use new macros for checking caps. Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx5/cq.c | 8 +- drivers/infiniband/hw/mlx5/mad.c | 2 +- drivers/infiniband/hw/mlx5/main.c | 113 ++++++++------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 +- drivers/infiniband/hw/mlx5/mr.c | 3 +- drivers/infiniband/hw/mlx5/odp.c | 47 +++---- drivers/infiniband/hw/mlx5/qp.c | 84 +++++------ drivers/infiniband/hw/mlx5/srq.c | 7 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 4 +- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 90 +++++++----- drivers/net/ethernet/mellanox/mlx5/core/main.c | 154 +++++++-------------- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 10 +- drivers/net/ethernet/mellanox/mlx5/core/uar.c | 7 +- include/linux/mlx5/device.h | 66 ++++++++- include/linux/mlx5/driver.h | 58 +------- 15 files changed, 310 insertions(+), 349 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 4e88b18cf62e..e2bea9ab93b3 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -753,7 +753,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, return ERR_PTR(-EINVAL); entries = roundup_pow_of_two(entries + 1); - if (entries > dev->mdev->caps.gen.max_cqes) + if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) return ERR_PTR(-EINVAL); cq = kzalloc(sizeof(*cq), GFP_KERNEL); @@ -920,7 +920,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) int err; u32 fsel; - if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_CQ_MODER)) + if (!MLX5_CAP_GEN(dev->mdev, cq_moderation)) return -ENOSYS; in = kzalloc(sizeof(*in), GFP_KERNEL); @@ -1075,7 +1075,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) int uninitialized_var(cqe_size); unsigned long flags; - if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) { + if (!MLX5_CAP_GEN(dev->mdev, cq_resize)) { pr_info("Firmware does not support resize CQ\n"); return -ENOSYS; } @@ -1084,7 +1084,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) return -EINVAL; entries = roundup_pow_of_two(entries + 1); - if (entries > dev->mdev->caps.gen.max_cqes + 1) + if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1) return -EINVAL; if (entries == ibcq->cqe + 1) diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 9cf9a37bb5ff..f2d9e70818d7 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -129,7 +129,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) packet_error = be16_to_cpu(out_mad->status); - dev->mdev->caps.gen.ext_port_cap[port - 1] = (!err && !packet_error) ? + dev->mdev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ? MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0; out: diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 57c9809e8b87..9075649f30fc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -66,15 +66,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_core_dev *mdev = dev->mdev; struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; - struct mlx5_general_caps *gen; int err = -ENOMEM; int max_rq_sg; int max_sq_sg; - u64 flags; - gen = &dev->mdev->caps.gen; in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); if (!in_mad || !out_mad) @@ -96,18 +94,18 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN; - flags = gen->flags; - if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR) + + if (MLX5_CAP_GEN(mdev, pkv)) props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; - if (flags & MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR) + if (MLX5_CAP_GEN(mdev, qkv)) props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; - if (flags & MLX5_DEV_CAP_FLAG_APM) + if (MLX5_CAP_GEN(mdev, apm)) props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; - if (flags & MLX5_DEV_CAP_FLAG_XRC) + if (MLX5_CAP_GEN(mdev, xrc)) props->device_cap_flags |= IB_DEVICE_XRC; props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; - if (flags & MLX5_DEV_CAP_FLAG_SIG_HAND_OVER) { + if (MLX5_CAP_GEN(mdev, sho)) { props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; /* At this stage no support for signature handover */ props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | @@ -116,7 +114,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->sig_guard_cap = IB_GUARD_T10DIF_CRC | IB_GUARD_T10DIF_CSUM; } - if (flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST) + if (MLX5_CAP_GEN(mdev, block_lb_mc)) props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; props->vendor_id = be32_to_cpup((__be32 *)(out_mad->data + 36)) & @@ -126,37 +124,38 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, memcpy(&props->sys_image_guid, out_mad->data + 4, 8); props->max_mr_size = ~0ull; - props->page_size_cap = gen->min_page_sz; - props->max_qp = 1 << gen->log_max_qp; - props->max_qp_wr = gen->max_wqes; - max_rq_sg = gen->max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg); - max_sq_sg = (gen->max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) / - sizeof(struct mlx5_wqe_data_seg); + props->page_size_cap = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz); + props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp); + props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); + max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) / + sizeof(struct mlx5_wqe_data_seg); + max_sq_sg = (MLX5_CAP_GEN(mdev, max_wqe_sz_sq) - + sizeof(struct mlx5_wqe_ctrl_seg)) / + sizeof(struct mlx5_wqe_data_seg); props->max_sge = min(max_rq_sg, max_sq_sg); - props->max_cq = 1 << gen->log_max_cq; - props->max_cqe = gen->max_cqes - 1; - props->max_mr = 1 << gen->log_max_mkey; - props->max_pd = 1 << gen->log_max_pd; - props->max_qp_rd_atom = 1 << gen->log_max_ra_req_qp; - props->max_qp_init_rd_atom = 1 << gen->log_max_ra_res_qp; - props->max_srq = 1 << gen->log_max_srq; - props->max_srq_wr = gen->max_srq_wqes - 1; - props->local_ca_ack_delay = gen->local_ca_ack_delay; + props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); + props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_eq_sz)) - 1; + props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); + props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd); + props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp); + props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp); + props->max_srq = 1 << MLX5_CAP_GEN(mdev, log_max_srq); + props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1; + props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay); props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; props->max_srq_sge = max_rq_sg - 1; props->max_fast_reg_page_list_len = (unsigned int)-1; - props->local_ca_ack_delay = gen->local_ca_ack_delay; props->atomic_cap = IB_ATOMIC_NONE; props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_pkeys = be16_to_cpup((__be16 *)(out_mad->data + 28)); - props->max_mcast_grp = 1 << gen->log_max_mcg; - props->max_mcast_qp_attach = gen->max_qp_mcg; + props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); + props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg); props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG) + if (MLX5_CAP_GEN(mdev, pg)) props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; props->odp_caps = dev->odp_caps; #endif @@ -172,14 +171,13 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_core_dev *mdev = dev->mdev; struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; - struct mlx5_general_caps *gen; int ext_active_speed; int err = -ENOMEM; - gen = &dev->mdev->caps.gen; - if (port < 1 || port > gen->num_ports) { + if (port < 1 || port > MLX5_CAP_GEN(mdev, num_ports)) { mlx5_ib_warn(dev, "invalid port number %d\n", port); return -EINVAL; } @@ -210,8 +208,8 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, props->phys_state = out_mad->data[33] >> 4; props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20)); props->gid_tbl_len = out_mad->data[50]; - props->max_msg_sz = 1 << gen->log_max_msg; - props->pkey_tbl_len = gen->port[port - 1].pkey_table_len; + props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg); + props->pkey_tbl_len = mdev->port_caps[port - 1].pkey_table_len; props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; @@ -238,7 +236,7 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, /* If reported active speed is QDR, check if is FDR-10 */ if (props->active_speed == 4) { - if (gen->ext_port_cap[port - 1] & + if (mdev->port_caps[port - 1].ext_port_cap & MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) { init_query_mad(in_mad); in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO; @@ -392,7 +390,6 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct mlx5_ib_alloc_ucontext_req_v2 req; struct mlx5_ib_alloc_ucontext_resp resp; struct mlx5_ib_ucontext *context; - struct mlx5_general_caps *gen; struct mlx5_uuar_info *uuari; struct mlx5_uar *uars; int gross_uuars; @@ -403,7 +400,6 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, int i; size_t reqlen; - gen = &dev->mdev->caps.gen; if (!dev->ib_active) return ERR_PTR(-EAGAIN); @@ -436,14 +432,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE; - resp.qp_tab_size = 1 << gen->log_max_qp; - resp.bf_reg_size = gen->bf_reg_size; - resp.cache_line_size = L1_CACHE_BYTES; - resp.max_sq_desc_sz = gen->max_sq_desc_sz; - resp.max_rq_desc_sz = gen->max_rq_desc_sz; - resp.max_send_wqebb = gen->max_wqes; - resp.max_recv_wr = gen->max_wqes; - resp.max_srq_recv_wr = gen->max_srq_wqes; + resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); + resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); + resp.cache_line_size = L1_CACHE_BYTES; + resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); + resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); + resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); + resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); + resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) @@ -493,7 +489,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, mutex_init(&context->db_page_mutex); resp.tot_uuars = req.total_num_uuars; - resp.num_ports = gen->num_ports; + resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports); err = ib_copy_to_udata(udata, &resp, sizeof(resp) - sizeof(resp.reserved)); if (err) @@ -895,11 +891,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, static void get_ext_port_caps(struct mlx5_ib_dev *dev) { - struct mlx5_general_caps *gen; int port; - gen = &dev->mdev->caps.gen; - for (port = 1; port <= gen->num_ports; port++) + for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) mlx5_query_ext_port_caps(dev, port); } @@ -907,11 +901,9 @@ static int get_port_caps(struct mlx5_ib_dev *dev) { struct ib_device_attr *dprops = NULL; struct ib_port_attr *pprops = NULL; - struct mlx5_general_caps *gen; int err = -ENOMEM; int port; - gen = &dev->mdev->caps.gen; pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); if (!pprops) goto out; @@ -926,14 +918,17 @@ static int get_port_caps(struct mlx5_ib_dev *dev) goto out; } - for (port = 1; port <= gen->num_ports; port++) { + for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) { err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); if (err) { - mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err); + mlx5_ib_warn(dev, "query_port %d failed %d\n", + port, err); break; } - gen->port[port - 1].pkey_table_len = dprops->max_pkeys; - gen->port[port - 1].gid_table_len = pprops->gid_tbl_len; + dev->mdev->port_caps[port - 1].pkey_table_len = + dprops->max_pkeys; + dev->mdev->port_caps[port - 1].gid_table_len = + pprops->gid_tbl_len; mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n", dprops->max_pkeys, pprops->gid_tbl_len); } @@ -1207,8 +1202,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX); dev->ib_dev.owner = THIS_MODULE; dev->ib_dev.node_type = RDMA_NODE_IB_CA; - dev->ib_dev.local_dma_lkey = mdev->caps.gen.reserved_lkey; - dev->num_ports = mdev->caps.gen.num_ports; + dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; + dev->num_ports = MLX5_CAP_GEN(mdev, num_ports); dev->ib_dev.phys_port_cnt = dev->num_ports; dev->ib_dev.num_comp_vectors = dev->mdev->priv.eq_table.num_comp_vectors; @@ -1286,9 +1281,9 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; - mlx5_ib_internal_query_odp_caps(dev); + mlx5_ib_internal_fill_odp_caps(dev); - if (mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_XRC) { + if (MLX5_CAP_GEN(mdev, xrc)) { dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; dev->ib_dev.uverbs_cmd_mask |= diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index dff1cfcdf476..0c441add0464 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -617,7 +617,7 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING extern struct workqueue_struct *mlx5_ib_page_fault_wq; -int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev); +void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev); void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault); void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp); @@ -631,9 +631,9 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, unsigned long end); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ -static inline int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev) +static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { - return 0; + return; } static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) {} diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 71c593583864..bc9a0de897cb 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -975,8 +975,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, struct mlx5_ib_mr *mr; int inlen; int err; - bool pg_cap = !!(dev->mdev->caps.gen.flags & - MLX5_DEV_CAP_FLAG_ON_DMND_PG); + bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 5099db08afd2..aa8391e75385 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -109,40 +109,33 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, ib_umem_odp_unmap_dma_pages(umem, start, end); } -#define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do { \ - if (be32_to_cpu(reg.field_name) & MLX5_ODP_SUPPORT_##bit_name) \ - ib_caps->field_name |= IB_ODP_SUPPORT_##bit_name; \ -} while (0) - -int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev) +void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { - int err; - struct mlx5_odp_caps hw_caps; struct ib_odp_caps *caps = &dev->odp_caps; memset(caps, 0, sizeof(*caps)); - if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG)) - return 0; - - err = mlx5_query_odp_caps(dev->mdev, &hw_caps); - if (err) - goto out; + if (!MLX5_CAP_GEN(dev->mdev, pg)) + return; caps->general_caps = IB_ODP_SUPPORT; - COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.ud_odp_caps, - SEND); - COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps, - SEND); - COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps, - RECV); - COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps, - WRITE); - COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps, - READ); - -out: - return err; + + if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.send)) + caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND; + + if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.send)) + caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SEND; + + if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.receive)) + caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_RECV; + + if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.write)) + caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_WRITE; + + if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.read)) + caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_READ; + + return; } static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev, diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 426eb88dfa49..15fd485d1ad9 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -220,13 +220,11 @@ static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type) static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd) { - struct mlx5_general_caps *gen; int wqe_size; int wq_size; - gen = &dev->mdev->caps.gen; /* Sanity check RQ size before proceeding */ - if (cap->max_recv_wr > gen->max_wqes) + if (cap->max_recv_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) return -EINVAL; if (!has_rq) { @@ -246,10 +244,11 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size; wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB); qp->rq.wqe_cnt = wq_size / wqe_size; - if (wqe_size > gen->max_rq_desc_sz) { + if (wqe_size > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq)) { mlx5_ib_dbg(dev, "wqe_size %d, max %d\n", wqe_size, - gen->max_rq_desc_sz); + MLX5_CAP_GEN(dev->mdev, + max_wqe_sz_rq)); return -EINVAL; } qp->rq.wqe_shift = ilog2(wqe_size); @@ -330,11 +329,9 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, struct mlx5_ib_qp *qp) { - struct mlx5_general_caps *gen; int wqe_size; int wq_size; - gen = &dev->mdev->caps.gen; if (!attr->cap.max_send_wr) return 0; @@ -343,9 +340,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, if (wqe_size < 0) return wqe_size; - if (wqe_size > gen->max_sq_desc_sz) { + if (wqe_size > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)) { mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n", - wqe_size, gen->max_sq_desc_sz); + wqe_size, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)); return -EINVAL; } @@ -358,9 +355,10 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; - if (qp->sq.wqe_cnt > gen->max_wqes) { + if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) { mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n", - qp->sq.wqe_cnt, gen->max_wqes); + qp->sq.wqe_cnt, + 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz)); return -ENOMEM; } qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); @@ -375,13 +373,11 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd) { - struct mlx5_general_caps *gen; int desc_sz = 1 << qp->sq.wqe_shift; - gen = &dev->mdev->caps.gen; - if (desc_sz > gen->max_sq_desc_sz) { + if (desc_sz > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)) { mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n", - desc_sz, gen->max_sq_desc_sz); + desc_sz, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)); return -EINVAL; } @@ -393,9 +389,10 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev, qp->sq.wqe_cnt = ucmd->sq_wqe_count; - if (qp->sq.wqe_cnt > gen->max_wqes) { + if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) { mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n", - qp->sq.wqe_cnt, gen->max_wqes); + qp->sq.wqe_cnt, + 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz)); return -EINVAL; } @@ -866,22 +863,21 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct ib_udata *udata, struct mlx5_ib_qp *qp) { struct mlx5_ib_resources *devr = &dev->devr; + struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_create_qp_resp resp; struct mlx5_create_qp_mbox_in *in; - struct mlx5_general_caps *gen; struct mlx5_ib_create_qp ucmd; int inlen = sizeof(*in); int err; mlx5_ib_odp_create_qp(qp); - gen = &dev->mdev->caps.gen; mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { - if (!(gen->flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) { + if (!MLX5_CAP_GEN(mdev, block_lb_mc)) { mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n"); return -EINVAL; } else { @@ -914,15 +910,17 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (pd) { if (pd->uobject) { + __u32 max_wqes = + 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count); if (ucmd.rq_wqe_shift != qp->rq.wqe_shift || ucmd.rq_wqe_count != qp->rq.wqe_cnt) { mlx5_ib_dbg(dev, "invalid rq params\n"); return -EINVAL; } - if (ucmd.sq_wqe_count > gen->max_wqes) { + if (ucmd.sq_wqe_count > max_wqes) { mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n", - ucmd.sq_wqe_count, gen->max_wqes); + ucmd.sq_wqe_count, max_wqes); return -EINVAL; } err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen); @@ -1226,7 +1224,6 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { - struct mlx5_general_caps *gen; struct mlx5_ib_dev *dev; struct mlx5_ib_qp *qp; u16 xrcdn = 0; @@ -1244,12 +1241,11 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, } dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device); } - gen = &dev->mdev->caps.gen; switch (init_attr->qp_type) { case IB_QPT_XRC_TGT: case IB_QPT_XRC_INI: - if (!(gen->flags & MLX5_DEV_CAP_FLAG_XRC)) { + if (!MLX5_CAP_GEN(dev->mdev, xrc)) { mlx5_ib_dbg(dev, "XRC not supported\n"); return ERR_PTR(-ENOSYS); } @@ -1356,9 +1352,6 @@ enum { static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) { - struct mlx5_general_caps *gen; - - gen = &dev->mdev->caps.gen; if (rate == IB_RATE_PORT_CURRENT) { return 0; } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) { @@ -1366,7 +1359,7 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) } else { while (rate != IB_RATE_2_5_GBPS && !(1 << (rate + MLX5_STAT_RATE_OFFSET) & - gen->stat_rate_support)) + MLX5_CAP_GEN(dev->mdev, stat_rate_support))) --rate; } @@ -1377,10 +1370,8 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, struct mlx5_qp_path *path, u8 port, int attr_mask, u32 path_flags, const struct ib_qp_attr *attr) { - struct mlx5_general_caps *gen; int err; - gen = &dev->mdev->caps.gen; path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0; @@ -1391,9 +1382,11 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, path->rlid = cpu_to_be16(ah->dlid); if (ah->ah_flags & IB_AH_GRH) { - if (ah->grh.sgid_index >= gen->port[port - 1].gid_table_len) { + if (ah->grh.sgid_index >= + dev->mdev->port_caps[port - 1].gid_table_len) { pr_err("sgid_index (%u) too large. max is %d\n", - ah->grh.sgid_index, gen->port[port - 1].gid_table_len); + ah->grh.sgid_index, + dev->mdev->port_caps[port - 1].gid_table_len); return -EINVAL; } path->grh_mlid |= 1 << 7; @@ -1570,7 +1563,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_qp_context *context; - struct mlx5_general_caps *gen; struct mlx5_modify_qp_mbox_in *in; struct mlx5_ib_pd *pd; enum mlx5_qp_state mlx5_cur, mlx5_new; @@ -1579,7 +1571,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, int mlx5_st; int err; - gen = &dev->mdev->caps.gen; in = kzalloc(sizeof(*in), GFP_KERNEL); if (!in) return -ENOMEM; @@ -1619,7 +1610,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, err = -EINVAL; goto out; } - context->mtu_msgmax = (attr->path_mtu << 5) | gen->log_max_msg; + context->mtu_msgmax = (attr->path_mtu << 5) | + (u8)MLX5_CAP_GEN(dev->mdev, log_max_msg); } if (attr_mask & IB_QP_DEST_QPN) @@ -1777,11 +1769,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_qp *qp = to_mqp(ibqp); enum ib_qp_state cur_state, new_state; - struct mlx5_general_caps *gen; int err = -EINVAL; int port; - gen = &dev->mdev->caps.gen; mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; @@ -1793,21 +1783,25 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; if ((attr_mask & IB_QP_PORT) && - (attr->port_num == 0 || attr->port_num > gen->num_ports)) + (attr->port_num == 0 || + attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports))) goto out; if (attr_mask & IB_QP_PKEY_INDEX) { port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - if (attr->pkey_index >= gen->port[port - 1].pkey_table_len) + if (attr->pkey_index >= + dev->mdev->port_caps[port - 1].pkey_table_len) goto out; } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && - attr->max_rd_atomic > (1 << gen->log_max_ra_res_qp)) + attr->max_rd_atomic > + (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) goto out; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && - attr->max_dest_rd_atomic > (1 << gen->log_max_ra_req_qp)) + attr->max_dest_rd_atomic > + (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) goto out; if (cur_state == new_state && cur_state == IB_QPS_RESET) { @@ -3009,7 +3003,7 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at ib_ah_attr->port_num = path->port; if (ib_ah_attr->port_num == 0 || - ib_ah_attr->port_num > dev->caps.gen.num_ports) + ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports)) return; ib_ah_attr->sl = path->sl & 0xf; @@ -3135,12 +3129,10 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_general_caps *gen; struct mlx5_ib_xrcd *xrcd; int err; - gen = &dev->mdev->caps.gen; - if (!(gen->flags & MLX5_DEV_CAP_FLAG_XRC)) + if (!MLX5_CAP_GEN(dev->mdev, xrc)) return ERR_PTR(-ENOSYS); xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL); diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 4242e1ded868..e8e8e942fa4a 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -236,7 +236,6 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_general_caps *gen; struct mlx5_ib_srq *srq; int desc_size; int buf_size; @@ -245,13 +244,13 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, int uninitialized_var(inlen); int is_xrc; u32 flgs, xrcdn; + __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); - gen = &dev->mdev->caps.gen; /* Sanity check SRQ size before proceeding */ - if (init_attr->attr.max_wr >= gen->max_srq_wqes) { + if (init_attr->attr.max_wr >= max_srq_wqes) { mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", init_attr->attr.max_wr, - gen->max_srq_wqes); + max_srq_wqes); return ERR_PTR(-EINVAL); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 516efc25fc4f..a40b96d4c662 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -455,7 +455,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) u32 async_event_mask = MLX5_ASYNC_EVENT_MASK; int err; - if (dev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG) + if (MLX5_CAP_GEN(dev, pg)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT); err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, @@ -478,7 +478,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) err = mlx5_create_map_eq(dev, &table->pages_eq, MLX5_EQ_VEC_PAGES, - dev->caps.gen.max_vf + 1, + /* TODO: sriov max_vf + */ 1, 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq", &dev->priv.uuari.uars[0]); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index ef9b7695decd..801ccadd709a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -64,50 +64,74 @@ out_out: return err; } -int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev, struct mlx5_caps *caps) +int mlx5_query_hca_caps(struct mlx5_core_dev *dev) { - return mlx5_core_get_caps(dev, caps, HCA_CAP_OPMOD_GET_CUR); -} - -int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *caps) -{ - u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)]; - int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); - void *out; int err; - if (!(dev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG)) - return -ENOTSUPP; + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; - memset(in, 0, sizeof(in)); - out = kzalloc(out_sz, GFP_KERNEL); - if (!out) - return -ENOMEM; - MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); - MLX5_SET(query_hca_cap_in, in, op_mod, HCA_CAP_OPMOD_GET_ODP_CUR); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_MAX); if (err) - goto out; + return err; - err = mlx5_cmd_status_to_err_v2(out); - if (err) { - mlx5_core_warn(dev, "query cur hca ODP caps failed, %d\n", err); - goto out; + if (MLX5_CAP_GEN(dev, eth_net_offloads)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; } - memcpy(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability), - sizeof(*caps)); + if (MLX5_CAP_GEN(dev, pg)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ODP, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ODP, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } - mlx5_core_dbg(dev, "on-demand paging capabilities:\nrc: %08x\nuc: %08x\nud: %08x\n", - be32_to_cpu(caps->per_transport_caps.rc_odp_caps), - be32_to_cpu(caps->per_transport_caps.uc_odp_caps), - be32_to_cpu(caps->per_transport_caps.ud_odp_caps)); + if (MLX5_CAP_GEN(dev, atomic)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } -out: - kfree(out); - return err; + if (MLX5_CAP_GEN(dev, roce)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, nic_flow_table)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } + return 0; } -EXPORT_SYMBOL(mlx5_query_odp_caps); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index a652cb93ceaa..e7b7b123a128 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -211,11 +211,12 @@ static int mlx5_enable_msix(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; struct mlx5_eq_table *table = &priv->eq_table; - int num_eqs = 1 << dev->caps.gen.log_max_eq; + int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq); int nvec; int i; - nvec = dev->caps.gen.num_ports * num_online_cpus() + MLX5_EQ_VEC_COMP_BASE; + nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + + MLX5_EQ_VEC_COMP_BASE; nvec = min_t(int, nvec, num_eqs); if (nvec <= MLX5_EQ_VEC_COMP_BASE) return -ENOMEM; @@ -287,97 +288,28 @@ static u16 to_fw_pkey_sz(u32 size) } } -/* selectively copy writable fields clearing any reserved area - */ -static void copy_rw_fields(void *to, struct mlx5_caps *from) -{ - __be64 *flags_off = (__be64 *)MLX5_ADDR_OF(cmd_hca_cap, to, reserved_22); - u64 v64; - - MLX5_SET(cmd_hca_cap, to, log_max_qp, from->gen.log_max_qp); - MLX5_SET(cmd_hca_cap, to, log_max_ra_req_qp, from->gen.log_max_ra_req_qp); - MLX5_SET(cmd_hca_cap, to, log_max_ra_res_qp, from->gen.log_max_ra_res_qp); - MLX5_SET(cmd_hca_cap, to, pkey_table_size, from->gen.pkey_table_size); - MLX5_SET(cmd_hca_cap, to, pkey_table_size, to_fw_pkey_sz(from->gen.pkey_table_size)); - MLX5_SET(cmd_hca_cap, to, log_uar_page_sz, PAGE_SHIFT - 12); - v64 = from->gen.flags & MLX5_CAP_BITS_RW_MASK; - *flags_off = cpu_to_be64(v64); -} - -static u16 get_pkey_table_size(int pkey) +static u16 to_sw_pkey_sz(int pkey_sz) { - if (pkey > MLX5_MAX_LOG_PKEY_TABLE) + if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE) return 0; - return MLX5_MIN_PKEY_TABLE_SIZE << pkey; -} - -static void fw2drv_caps(struct mlx5_caps *caps, void *out) -{ - struct mlx5_general_caps *gen = &caps->gen; - - gen->max_srq_wqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_srq_sz); - gen->max_wqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_qp_sz); - gen->log_max_qp = MLX5_GET_PR(cmd_hca_cap, out, log_max_qp); - gen->log_max_srq = MLX5_GET_PR(cmd_hca_cap, out, log_max_srq); - gen->max_cqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_cq_sz); - gen->log_max_cq = MLX5_GET_PR(cmd_hca_cap, out, log_max_cq); - gen->max_eqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_eq_sz); - gen->log_max_mkey = MLX5_GET_PR(cmd_hca_cap, out, log_max_mkey); - gen->log_max_eq = MLX5_GET_PR(cmd_hca_cap, out, log_max_eq); - gen->max_indirection = MLX5_GET_PR(cmd_hca_cap, out, max_indirection); - gen->log_max_mrw_sz = MLX5_GET_PR(cmd_hca_cap, out, log_max_mrw_sz); - gen->log_max_bsf_list_size = MLX5_GET_PR(cmd_hca_cap, out, log_max_bsf_list_size); - gen->log_max_klm_list_size = MLX5_GET_PR(cmd_hca_cap, out, log_max_klm_list_size); - gen->log_max_ra_req_dc = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_req_dc); - gen->log_max_ra_res_dc = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_res_dc); - gen->log_max_ra_req_qp = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_req_qp); - gen->log_max_ra_res_qp = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_res_qp); - gen->max_qp_counters = MLX5_GET_PR(cmd_hca_cap, out, max_qp_cnt); - gen->pkey_table_size = get_pkey_table_size(MLX5_GET_PR(cmd_hca_cap, out, pkey_table_size)); - gen->local_ca_ack_delay = MLX5_GET_PR(cmd_hca_cap, out, local_ca_ack_delay); - gen->num_ports = MLX5_GET_PR(cmd_hca_cap, out, num_ports); - gen->log_max_msg = MLX5_GET_PR(cmd_hca_cap, out, log_max_msg); - gen->stat_rate_support = MLX5_GET_PR(cmd_hca_cap, out, stat_rate_support); - gen->flags = be64_to_cpu(*(__be64 *)MLX5_ADDR_OF(cmd_hca_cap, out, reserved_22)); - pr_debug("flags = 0x%llx\n", gen->flags); - gen->uar_sz = MLX5_GET_PR(cmd_hca_cap, out, uar_sz); - gen->min_log_pg_sz = MLX5_GET_PR(cmd_hca_cap, out, log_pg_sz); - gen->bf_reg_size = MLX5_GET_PR(cmd_hca_cap, out, bf); - gen->bf_reg_size = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_bf_reg_size); - gen->max_sq_desc_sz = MLX5_GET_PR(cmd_hca_cap, out, max_wqe_sz_sq); - gen->max_rq_desc_sz = MLX5_GET_PR(cmd_hca_cap, out, max_wqe_sz_rq); - gen->max_dc_sq_desc_sz = MLX5_GET_PR(cmd_hca_cap, out, max_wqe_sz_sq_dc); - gen->max_qp_mcg = MLX5_GET_PR(cmd_hca_cap, out, max_qp_mcg); - gen->log_max_pd = MLX5_GET_PR(cmd_hca_cap, out, log_max_pd); - gen->log_max_xrcd = MLX5_GET_PR(cmd_hca_cap, out, log_max_xrcd); - gen->log_uar_page_sz = MLX5_GET_PR(cmd_hca_cap, out, log_uar_page_sz); -} - -static const char *caps_opmod_str(u16 opmod) -{ - switch (opmod) { - case HCA_CAP_OPMOD_GET_MAX: - return "GET_MAX"; - case HCA_CAP_OPMOD_GET_CUR: - return "GET_CUR"; - default: - return "Invalid"; - } + return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; } -int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps, - u16 opmod) +int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, + enum mlx5_cap_mode cap_mode) { u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)]; int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); - void *out; + void *out, *hca_caps; + u16 opmod = (cap_type << 1) | (cap_mode & 0x01); int err; memset(in, 0, sizeof(in)); out = kzalloc(out_sz, GFP_KERNEL); if (!out) return -ENOMEM; + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); MLX5_SET(query_hca_cap_in, in, op_mod, opmod); err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); @@ -386,12 +318,30 @@ int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps, err = mlx5_cmd_status_to_err_v2(out); if (err) { - mlx5_core_warn(dev, "query max hca cap failed, %d\n", err); + mlx5_core_warn(dev, + "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n", + cap_type, cap_mode, err); goto query_ex; } - mlx5_core_dbg(dev, "%s\n", caps_opmod_str(opmod)); - fw2drv_caps(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability)); + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability); + + switch (cap_mode) { + case HCA_CAP_OPMOD_GET_MAX: + memcpy(dev->hca_caps_max[cap_type], hca_caps, + MLX5_UN_SZ_BYTES(hca_cap_union)); + break; + case HCA_CAP_OPMOD_GET_CUR: + memcpy(dev->hca_caps_cur[cap_type], hca_caps, + MLX5_UN_SZ_BYTES(hca_cap_union)); + break; + default: + mlx5_core_warn(dev, + "Tried to query dev cap type(%x) with wrong opmode(%x)\n", + cap_type, cap_mode); + err = -EINVAL; + break; + } query_ex: kfree(out); return err; @@ -418,49 +368,45 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) { void *set_ctx = NULL; struct mlx5_profile *prof = dev->profile; - struct mlx5_caps *cur_caps = NULL; - struct mlx5_caps *max_caps = NULL; int err = -ENOMEM; int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + void *set_hca_cap; set_ctx = kzalloc(set_sz, GFP_KERNEL); if (!set_ctx) goto query_ex; - max_caps = kzalloc(sizeof(*max_caps), GFP_KERNEL); - if (!max_caps) - goto query_ex; - - cur_caps = kzalloc(sizeof(*cur_caps), GFP_KERNEL); - if (!cur_caps) - goto query_ex; - - err = mlx5_core_get_caps(dev, max_caps, HCA_CAP_OPMOD_GET_MAX); + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_MAX); if (err) goto query_ex; - err = mlx5_core_get_caps(dev, cur_caps, HCA_CAP_OPMOD_GET_CUR); + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_CUR); if (err) goto query_ex; + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, + capability); + memcpy(set_hca_cap, dev->hca_caps_cur[MLX5_CAP_GENERAL], + MLX5_ST_SZ_BYTES(cmd_hca_cap)); + + mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n", + to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)), + 128); /* we limit the size of the pkey table to 128 entries for now */ - cur_caps->gen.pkey_table_size = 128; + MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size, + to_fw_pkey_sz(128)); if (prof->mask & MLX5_PROF_MASK_QP_SIZE) - cur_caps->gen.log_max_qp = prof->log_max_qp; + MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, + prof->log_max_qp); - /* disable checksum */ - cur_caps->gen.flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM; + /* disable cmdif checksum */ + MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0); - copy_rw_fields(MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability), - cur_caps); err = set_caps(dev, set_ctx, set_sz); query_ex: - kfree(cur_caps); - kfree(max_caps); kfree(set_ctx); - return err; } @@ -768,7 +714,7 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) mlx5_start_health_poll(dev); - err = mlx5_cmd_query_hca_cap(dev, &dev->caps); + err = mlx5_query_hca_caps(dev); if (err) { dev_err(&pdev->dev, "query hca failed\n"); goto err_stop_poll; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index a051b906afdf..b986f1c258bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -65,9 +65,15 @@ enum { MLX5_CMD_TIME, /* print command execution time */ }; +static inline int mlx5_cmd_exec_check_status(struct mlx5_core_dev *dev, u32 *in, + int in_size, u32 *out, + int out_size) +{ + mlx5_cmd_exec(dev, in, in_size, out, out_size); + return mlx5_cmd_status_to_err((struct mlx5_outbox_hdr *)out); +} -int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev, - struct mlx5_caps *caps); +int mlx5_query_hca_caps(struct mlx5_core_dev *dev); int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index ba58f6d7c761..9ef85873ceea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -175,12 +175,13 @@ int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) for (i = 0; i < tot_uuars; i++) { bf = &uuari->bfs[i]; - bf->buf_size = dev->caps.gen.bf_reg_size / 2; + bf->buf_size = (1 << MLX5_CAP_GEN(dev, log_bf_reg_size)) / 2; bf->uar = &uuari->uars[i / MLX5_BF_REGS_PER_PAGE]; bf->regreg = uuari->uars[i / MLX5_BF_REGS_PER_PAGE].map; bf->reg = NULL; /* Add WC support */ - bf->offset = (i % MLX5_BF_REGS_PER_PAGE) * dev->caps.gen.bf_reg_size + - MLX5_BF_OFFSET; + bf->offset = (i % MLX5_BF_REGS_PER_PAGE) * + (1 << MLX5_CAP_GEN(dev, log_bf_reg_size)) + + MLX5_BF_OFFSET; bf->need_lock = need_uuar_lock(i); spin_lock_init(&bf->lock); spin_lock_init(&bf->lock32); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index feebed7b392b..4ee52bf1f959 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -59,6 +59,8 @@ #define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8) #define MLX5_ST_SZ_BYTES(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8) #define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32) +#define MLX5_UN_SZ_BYTES(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 8) +#define MLX5_UN_SZ_DW(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 32) #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8) #define MLX5_ADDR_OF(typ, p, fld) ((char *)(p) + MLX5_BYTE_OFF(typ, fld)) @@ -322,13 +324,6 @@ enum { MLX5_CAP_OFF_CMDIF_CSUM = 46, }; -enum { - HCA_CAP_OPMOD_GET_MAX = 0, - HCA_CAP_OPMOD_GET_CUR = 1, - HCA_CAP_OPMOD_GET_ODP_MAX = 4, - HCA_CAP_OPMOD_GET_ODP_CUR = 5 -}; - struct mlx5_inbox_hdr { __be16 opcode; u8 rsvd[4]; @@ -1101,4 +1096,61 @@ enum { MLX5_RQC_RQ_TYPE_MEMORY_RQ_RPM = 0x1, }; +/* MLX5 DEV CAPs */ + +/* TODO: EAT.ME */ +enum mlx5_cap_mode { + HCA_CAP_OPMOD_GET_MAX = 0, + HCA_CAP_OPMOD_GET_CUR = 1, +}; + +enum mlx5_cap_type { + MLX5_CAP_GENERAL = 0, + MLX5_CAP_ETHERNET_OFFLOADS, + MLX5_CAP_ODP, + MLX5_CAP_ATOMIC, + MLX5_CAP_ROCE, + MLX5_CAP_IPOIB_OFFLOADS, + MLX5_CAP_EOIB_OFFLOADS, + MLX5_CAP_FLOW_TABLE, + /* NUM OF CAP Types */ + MLX5_CAP_NUM +}; + +/* GET Dev Caps macros */ +#define MLX5_CAP_GEN(mdev, cap) \ + MLX5_GET(cmd_hca_cap, mdev->hca_caps_cur[MLX5_CAP_GENERAL], cap) + +#define MLX5_CAP_GEN_MAX(mdev, cap) \ + MLX5_GET(cmd_hca_cap, mdev->hca_caps_max[MLX5_CAP_GENERAL], cap) + +#define MLX5_CAP_ETH(mdev, cap) \ + MLX5_GET(per_protocol_networking_offload_caps,\ + mdev->hca_caps_cur[MLX5_CAP_ETHERNET_OFFLOADS], cap) + +#define MLX5_CAP_ETH_MAX(mdev, cap) \ + MLX5_GET(per_protocol_networking_offload_caps,\ + mdev->hca_caps_max[MLX5_CAP_ETHERNET_OFFLOADS], cap) + +#define MLX5_CAP_ROCE(mdev, cap) \ + MLX5_GET(roce_cap, mdev->hca_caps_cur[MLX5_CAP_ROCE], cap) + +#define MLX5_CAP_ROCE_MAX(mdev, cap) \ + MLX5_GET(roce_cap, mdev->hca_caps_max[MLX5_CAP_ROCE], cap) + +#define MLX5_CAP_ATOMIC(mdev, cap) \ + MLX5_GET(atomic_caps, mdev->hca_caps_cur[MLX5_CAP_ATOMIC], cap) + +#define MLX5_CAP_ATOMIC_MAX(mdev, cap) \ + MLX5_GET(atomic_caps, mdev->hca_caps_max[MLX5_CAP_ATOMIC], cap) + +#define MLX5_CAP_FLOWTABLE(mdev, cap) \ + MLX5_GET(flow_table_nic_cap, mdev->hca_caps_cur[MLX5_CAP_FLOW_TABLE], cap) + +#define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \ + MLX5_GET(flow_table_nic_cap, mdev->hca_caps_max[MLX5_CAP_FLOW_TABLE], cap) + +#define MLX5_CAP_ODP(mdev, cap)\ + MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap) + #endif /* MLX5_DEVICE_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3fd4fdc1ba16..6b9199163633 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -268,55 +268,7 @@ struct mlx5_cmd { struct mlx5_port_caps { int gid_table_len; int pkey_table_len; -}; - -struct mlx5_general_caps { - u8 log_max_eq; - u8 log_max_cq; - u8 log_max_qp; - u8 log_max_mkey; - u8 log_max_pd; - u8 log_max_srq; - u8 log_max_mrw_sz; - u8 log_max_bsf_list_size; - u8 log_max_klm_list_size; - u32 max_cqes; - int max_wqes; - u32 max_eqes; - u32 max_indirection; - int max_sq_desc_sz; - int max_rq_desc_sz; - int max_dc_sq_desc_sz; - u64 flags; - u16 stat_rate_support; - int log_max_msg; - int num_ports; - u8 log_max_ra_res_qp; - u8 log_max_ra_req_qp; - int max_srq_wqes; - int bf_reg_size; - int bf_regs_per_page; - struct mlx5_port_caps port[MLX5_MAX_PORTS]; - u8 ext_port_cap[MLX5_MAX_PORTS]; - int max_vf; - u32 reserved_lkey; - u8 local_ca_ack_delay; - u8 log_max_mcg; - u32 max_qp_mcg; - int min_page_sz; - int pd_cap; - u32 max_qp_counters; - u32 pkey_table_size; - u8 log_max_ra_req_dc; - u8 log_max_ra_res_dc; - u32 uar_sz; - u8 min_log_pg_sz; - u8 log_max_xrcd; - u16 log_uar_page_sz; -}; - -struct mlx5_caps { - struct mlx5_general_caps gen; + u8 ext_port_cap; }; struct mlx5_cmd_mailbox { @@ -521,7 +473,9 @@ struct mlx5_core_dev { u8 rev_id; char board_id[MLX5_BOARD_ID_LEN]; struct mlx5_cmd cmd; - struct mlx5_caps caps; + struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; + u32 hca_caps_cur[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; + u32 hca_caps_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; phys_addr_t iseg_base; struct mlx5_init_seg __iomem *iseg; void (*event) (struct mlx5_core_dev *dev, @@ -651,8 +605,8 @@ void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr); int mlx5_cmd_status_to_err_v2(void *ptr); -int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps, - u16 opmod); +int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, + enum mlx5_cap_mode cap_mode); int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, -- cgit v1.2.3 From adb0c9545bce6f1b1d563e988e6ee5531861d449 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 28 May 2015 22:28:42 +0300 Subject: net/mlx5_core: Implement access functions of ptys register fields Those registers will be used by the ethtool to set/get settings. Signed-off-by: Rana Shahout Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/port.c | 77 ++++++++++++++++++++++++++ include/linux/mlx5/driver.h | 14 +++++ 2 files changed, 91 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 49e90f2612d8..6e2d99cc3b61 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -102,3 +102,80 @@ int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps) return err; } EXPORT_SYMBOL_GPL(mlx5_set_port_caps); + +int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, + int ptys_size, int proto_mask) +{ + u32 in[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + memset(in, 0, sizeof(in)); + MLX5_SET(ptys_reg, in, local_port, 1); + MLX5_SET(ptys_reg, in, proto_mask, proto_mask); + + err = mlx5_core_access_reg(dev, in, sizeof(in), ptys, + ptys_size, MLX5_REG_PTYS, 0, 0); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_ptys); + +int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, + u32 *proto_cap, int proto_mask) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask); + if (err) + return err; + + if (proto_mask == MLX5_PTYS_EN) + *proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); + else + *proto_cap = MLX5_GET(ptys_reg, out, ib_proto_capability); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_proto_cap); + +int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, + u32 *proto_admin, int proto_mask) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask); + if (err) + return err; + + if (proto_mask == MLX5_PTYS_EN) + *proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); + else + *proto_admin = MLX5_GET(ptys_reg, out, ib_proto_admin); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_proto_admin); + +int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, + int proto_mask) +{ + u32 in[MLX5_ST_SZ_DW(ptys_reg)]; + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(ptys_reg, in, local_port, 1); + MLX5_SET(ptys_reg, in, proto_mask, proto_mask); + if (proto_mask == MLX5_PTYS_EN) + MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); + else + MLX5_SET(ptys_reg, in, ib_proto_admin, proto_admin); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PTYS, 0, 1); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_set_port_proto); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6b9199163633..266d5498a270 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -504,6 +504,11 @@ enum { MLX5_COMP_EQ_SIZE = 1024, }; +enum { + MLX5_PTYS_IB = 1 << 0, + MLX5_PTYS_EN = 1 << 2, +}; + struct mlx5_db_pgdir { struct list_head list; DECLARE_BITMAP(bitmap, MLX5_DB_PER_PAGE); @@ -686,7 +691,16 @@ void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, void *data_out, int size_out, u16 reg_num, int arg, int write); + int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); +int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, + int ptys_size, int proto_mask); +int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, + u32 *proto_cap, int proto_mask); +int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, + u32 *proto_admin, int proto_mask); +int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, + int proto_mask); int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -- cgit v1.2.3 From 4c916a798058c1acf5a980438416020932c24aca Mon Sep 17 00:00:00 2001 From: Rana Shahout Date: Thu, 28 May 2015 22:28:43 +0300 Subject: net/mlx5_core: Implement get/set port status Implemet get/set port status low level functions to be exposed by the netdev. Signed-off-by: Rana Shahout Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/port.c | 32 ++++++++++++++++++++++++++ include/linux/mlx5/driver.h | 8 +++++++ 2 files changed, 40 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 6e2d99cc3b61..742a6fb8debe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -179,3 +179,35 @@ int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, return err; } EXPORT_SYMBOL_GPL(mlx5_set_port_proto); + +int mlx5_set_port_status(struct mlx5_core_dev *dev, + enum mlx5_port_status status) +{ + u32 in[MLX5_ST_SZ_DW(paos_reg)]; + u32 out[MLX5_ST_SZ_DW(paos_reg)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(paos_reg, in, admin_status, status); + MLX5_SET(paos_reg, in, ase, 1); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PAOS, 0, 1); +} + +int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status) +{ + u32 in[MLX5_ST_SZ_DW(paos_reg)]; + u32 out[MLX5_ST_SZ_DW(paos_reg)]; + int err; + + memset(in, 0, sizeof(in)); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PAOS, 0, 0); + if (err) + return err; + + *status = MLX5_GET(paos_reg, out, oper_status); + return err; +} diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 266d5498a270..6438444ab361 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -149,6 +149,11 @@ enum mlx5_dev_event { MLX5_DEV_EVENT_CLIENT_REREG, }; +enum mlx5_port_status { + MLX5_PORT_UP = 1 << 1, + MLX5_PORT_DOWN = 1 << 2, +}; + struct mlx5_uuar_info { struct mlx5_uar *uars; int num_uars; @@ -701,6 +706,9 @@ int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, u32 *proto_admin, int proto_mask); int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, int proto_mask); +int mlx5_set_port_status(struct mlx5_core_dev *dev, + enum mlx5_port_status status); +int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status); int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -- cgit v1.2.3 From 90b3e38d048f09b22fb50bcd460cea65fd00b2d7 Mon Sep 17 00:00:00 2001 From: Rana Shahout Date: Thu, 28 May 2015 22:28:44 +0300 Subject: net/mlx5_core: Modify CQ moderation parameters Introduce mlx5_core_modify_cq_moderation() to be used by the netdev, to set hardware coalescing. Signed-off-by: Rana Shahout Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/cq.c | 18 ++++++++++++++++++ include/linux/mlx5/cq.h | 3 +++ 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index eb0cf81f5f45..04ab7e445eae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -219,6 +219,24 @@ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, } EXPORT_SYMBOL(mlx5_core_modify_cq); +int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev, + struct mlx5_core_cq *cq, + u16 cq_period, + u16 cq_max_count) +{ + struct mlx5_modify_cq_mbox_in in; + + memset(&in, 0, sizeof(in)); + + in.cqn = cpu_to_be32(cq->cqn); + in.ctx.cq_period = cpu_to_be16(cq_period); + in.ctx.cq_max_count = cpu_to_be16(cq_max_count); + in.field_select = cpu_to_be32(MLX5_CQ_MODIFY_PERIOD | + MLX5_CQ_MODIFY_COUNT); + + return mlx5_core_modify_cq(dev, cq, &in, sizeof(in)); +} + int mlx5_init_cq_table(struct mlx5_core_dev *dev) { struct mlx5_cq_table *table = &dev->priv.cq_table; diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 2695ced222df..abc4767695e4 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -169,6 +169,9 @@ int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, struct mlx5_query_cq_mbox_out *out); int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, struct mlx5_modify_cq_mbox_in *in, int in_sz); +int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev, + struct mlx5_core_cq *cq, u16 cq_period, + u16 cq_max_count); int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); -- cgit v1.2.3 From e725440e75da8c4d617a31c4e38216acc55c24e3 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 28 May 2015 22:28:45 +0300 Subject: net/mlx5_core: Set/Query port MTU commands Introduce set/Query low level functions to access MTU in hardware. To be used by the netdev. Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/port.c | 53 ++++++++++++++++++++++++++ include/linux/mlx5/driver.h | 4 ++ 2 files changed, 57 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 742a6fb8debe..7d3d0f9f328d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -211,3 +211,56 @@ int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status) *status = MLX5_GET(paos_reg, out, oper_status); return err; } + +static int mlx5_query_port_mtu(struct mlx5_core_dev *dev, + int *admin_mtu, int *max_mtu, int *oper_mtu) +{ + u32 in[MLX5_ST_SZ_DW(pmtu_reg)]; + u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(pmtu_reg, in, local_port, 1); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PMTU, 0, 0); + if (err) + return err; + + if (max_mtu) + *max_mtu = MLX5_GET(pmtu_reg, out, max_mtu); + if (oper_mtu) + *oper_mtu = MLX5_GET(pmtu_reg, out, oper_mtu); + if (admin_mtu) + *admin_mtu = MLX5_GET(pmtu_reg, out, admin_mtu); + + return 0; +} + +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu) +{ + u32 in[MLX5_ST_SZ_DW(pmtu_reg)]; + u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(pmtu_reg, in, admin_mtu, mtu); + MLX5_SET(pmtu_reg, in, local_port, 1); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_PMTU, 0, 1); +} +EXPORT_SYMBOL_GPL(mlx5_set_port_mtu); + +int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu) +{ + return mlx5_query_port_mtu(dev, NULL, max_mtu, NULL); +} +EXPORT_SYMBOL_GPL(mlx5_query_port_max_mtu); + +int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu) +{ + return mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu); +} +EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6438444ab361..51738472657e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -710,6 +710,10 @@ int mlx5_set_port_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status); +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu); +int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu); +int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu); + int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, -- cgit v1.2.3 From afb736e9330ad6b2b6935d2f53ded784eb73f12d Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 28 May 2015 22:28:47 +0300 Subject: net/mlx5: Ethernet resource handling files This patch contains the resource handling files: - flow_table.c: This file contains the code to handle the low level API to configure hardware flow table. It is separated from the flow_table_en.c, because it will be used in the future by Raw Ethernet QP in mlx5_ib too. - en_flow_table.[ch]: Ethernet flow steering handling. The flow table object contain a mapping between flow specs and TIRs. This mechanism will be used also to configure e-switch in the future, when SR-IOV support will be added. - transobj.[ch] - Low level functions to create/modify/destroy the transport objects: RQ/SQ/TIR/TIS - vport.[ch] - Handle attributes of a virtual port (vPort) in the embedded switch. Currently this switch is a passthrough, until SR-IOV support will be added. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlx5/core/en_flow_table.c | 858 +++++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/flow_table.c | 422 ++++++++++ drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 169 ++++ drivers/net/ethernet/mellanox/mlx5/core/transobj.h | 47 ++ drivers/net/ethernet/mellanox/mlx5/core/vport.c | 84 ++ drivers/net/ethernet/mellanox/mlx5/core/vport.h | 41 + include/linux/mlx5/flow_table.h | 54 ++ 7 files changed, 1675 insertions(+) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/flow_table.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/transobj.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/transobj.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/vport.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/vport.h create mode 100644 include/linux/mlx5/flow_table.h (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c new file mode 100644 index 000000000000..6feebda4b3e4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c @@ -0,0 +1,858 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "en.h" + +enum { + MLX5E_FULLMATCH = 0, + MLX5E_ALLMULTI = 1, + MLX5E_PROMISC = 2, +}; + +enum { + MLX5E_UC = 0, + MLX5E_MC_IPV4 = 1, + MLX5E_MC_IPV6 = 2, + MLX5E_MC_OTHER = 3, +}; + +enum { + MLX5E_ACTION_NONE = 0, + MLX5E_ACTION_ADD = 1, + MLX5E_ACTION_DEL = 2, +}; + +struct mlx5e_eth_addr_hash_node { + struct hlist_node hlist; + u8 action; + struct mlx5e_eth_addr_info ai; +}; + +static inline int mlx5e_hash_eth_addr(u8 *addr) +{ + return addr[5]; +} + +static void mlx5e_add_eth_addr_to_hash(struct hlist_head *hash, u8 *addr) +{ + struct mlx5e_eth_addr_hash_node *hn; + int ix = mlx5e_hash_eth_addr(addr); + int found = 0; + + hlist_for_each_entry(hn, &hash[ix], hlist) + if (ether_addr_equal_64bits(hn->ai.addr, addr)) { + found = 1; + break; + } + + if (found) { + hn->action = MLX5E_ACTION_NONE; + return; + } + + hn = kzalloc(sizeof(*hn), GFP_ATOMIC); + if (!hn) + return; + + ether_addr_copy(hn->ai.addr, addr); + hn->action = MLX5E_ACTION_ADD; + + hlist_add_head(&hn->hlist, &hash[ix]); +} + +static void mlx5e_del_eth_addr_from_hash(struct mlx5e_eth_addr_hash_node *hn) +{ + hlist_del(&hn->hlist); + kfree(hn); +} + +static void mlx5e_del_eth_addr_from_flow_table(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_info *ai) +{ + void *ft = priv->ft.main; + + if (ai->tt_vec & (1 << MLX5E_TT_IPV6_TCP)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_TCP]); + + if (ai->tt_vec & (1 << MLX5E_TT_IPV4_TCP)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_TCP]); + + if (ai->tt_vec & (1 << MLX5E_TT_IPV6_UDP)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_UDP]); + + if (ai->tt_vec & (1 << MLX5E_TT_IPV4_UDP)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_UDP]); + + if (ai->tt_vec & (1 << MLX5E_TT_IPV6)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6]); + + if (ai->tt_vec & (1 << MLX5E_TT_IPV4)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4]); + + if (ai->tt_vec & (1 << MLX5E_TT_ANY)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_ANY]); +} + +static int mlx5e_get_eth_addr_type(u8 *addr) +{ + if (is_unicast_ether_addr(addr)) + return MLX5E_UC; + + if ((addr[0] == 0x01) && + (addr[1] == 0x00) && + (addr[2] == 0x5e) && + !(addr[3] & 0x80)) + return MLX5E_MC_IPV4; + + if ((addr[0] == 0x33) && + (addr[1] == 0x33)) + return MLX5E_MC_IPV6; + + return MLX5E_MC_OTHER; +} + +static u32 mlx5e_get_tt_vec(struct mlx5e_eth_addr_info *ai, int type) +{ + int eth_addr_type; + u32 ret; + + switch (type) { + case MLX5E_FULLMATCH: + eth_addr_type = mlx5e_get_eth_addr_type(ai->addr); + switch (eth_addr_type) { + case MLX5E_UC: + ret = + (1 << MLX5E_TT_IPV4_TCP) | + (1 << MLX5E_TT_IPV6_TCP) | + (1 << MLX5E_TT_IPV4_UDP) | + (1 << MLX5E_TT_IPV6_UDP) | + (1 << MLX5E_TT_IPV4) | + (1 << MLX5E_TT_IPV6) | + (1 << MLX5E_TT_ANY) | + 0; + break; + + case MLX5E_MC_IPV4: + ret = + (1 << MLX5E_TT_IPV4_UDP) | + (1 << MLX5E_TT_IPV4) | + 0; + break; + + case MLX5E_MC_IPV6: + ret = + (1 << MLX5E_TT_IPV6_UDP) | + (1 << MLX5E_TT_IPV6) | + 0; + break; + + case MLX5E_MC_OTHER: + ret = + (1 << MLX5E_TT_ANY) | + 0; + break; + } + + break; + + case MLX5E_ALLMULTI: + ret = + (1 << MLX5E_TT_IPV4_UDP) | + (1 << MLX5E_TT_IPV6_UDP) | + (1 << MLX5E_TT_IPV4) | + (1 << MLX5E_TT_IPV6) | + (1 << MLX5E_TT_ANY) | + 0; + break; + + default: /* MLX5E_PROMISC */ + ret = + (1 << MLX5E_TT_IPV4_TCP) | + (1 << MLX5E_TT_IPV6_TCP) | + (1 << MLX5E_TT_IPV4_UDP) | + (1 << MLX5E_TT_IPV6_UDP) | + (1 << MLX5E_TT_IPV4) | + (1 << MLX5E_TT_IPV6) | + (1 << MLX5E_TT_ANY) | + 0; + break; + } + + return ret; +} + +static int __mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_info *ai, int type, + void *flow_context, void *match_criteria) +{ + u8 match_criteria_enable = 0; + void *match_value; + void *dest; + u8 *dmac; + u8 *match_criteria_dmac; + void *ft = priv->ft.main; + u32 *tirn = priv->tirn; + u32 tt_vec; + int err; + + match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); + dmac = MLX5_ADDR_OF(fte_match_param, match_value, + outer_headers.dmac_47_16); + match_criteria_dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers.dmac_47_16); + dest = MLX5_ADDR_OF(flow_context, flow_context, destination); + + MLX5_SET(flow_context, flow_context, action, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + MLX5_SET(flow_context, flow_context, destination_list_size, 1); + MLX5_SET(dest_format_struct, dest, destination_type, + MLX5_FLOW_CONTEXT_DEST_TYPE_TIR); + + switch (type) { + case MLX5E_FULLMATCH: + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + memset(match_criteria_dmac, 0xff, ETH_ALEN); + ether_addr_copy(dmac, ai->addr); + break; + + case MLX5E_ALLMULTI: + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + match_criteria_dmac[0] = 0x01; + dmac[0] = 0x01; + break; + + case MLX5E_PROMISC: + break; + } + + tt_vec = mlx5e_get_tt_vec(ai, type); + + if (tt_vec & (1 << MLX5E_TT_ANY)) { + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_ANY]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + &ai->ft_ix[MLX5E_TT_ANY]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return err; + } + ai->tt_vec |= (1 << MLX5E_TT_ANY); + } + + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.ethertype); + + if (tt_vec & (1 << MLX5E_TT_IPV4)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IP); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV4]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + &ai->ft_ix[MLX5E_TT_IPV4]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return err; + } + ai->tt_vec |= (1 << MLX5E_TT_IPV4); + } + + if (tt_vec & (1 << MLX5E_TT_IPV6)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IPV6); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV6]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + &ai->ft_ix[MLX5E_TT_IPV6]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return err; + } + ai->tt_vec |= (1 << MLX5E_TT_IPV6); + } + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.ip_protocol); + MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, + IPPROTO_UDP); + + if (tt_vec & (1 << MLX5E_TT_IPV4_UDP)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IP); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV4_UDP]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + &ai->ft_ix[MLX5E_TT_IPV4_UDP]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return err; + } + ai->tt_vec |= (1 << MLX5E_TT_IPV4_UDP); + } + + if (tt_vec & (1 << MLX5E_TT_IPV6_UDP)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IPV6); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV6_UDP]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + &ai->ft_ix[MLX5E_TT_IPV6_UDP]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return err; + } + ai->tt_vec |= (1 << MLX5E_TT_IPV6_UDP); + } + + MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, + IPPROTO_TCP); + + if (tt_vec & (1 << MLX5E_TT_IPV4_TCP)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IP); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV4_TCP]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + &ai->ft_ix[MLX5E_TT_IPV4_TCP]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return err; + } + ai->tt_vec |= (1 << MLX5E_TT_IPV4_TCP); + } + + if (tt_vec & (1 << MLX5E_TT_IPV6_TCP)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETH_P_IPV6); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV6_TCP]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, + &ai->ft_ix[MLX5E_TT_IPV6_TCP]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return err; + } + ai->tt_vec |= (1 << MLX5E_TT_IPV6_TCP); + } + + return 0; +} + +static int mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_info *ai, int type) +{ + u32 *flow_context; + u32 *match_criteria; + int err; + + flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + + MLX5_ST_SZ_BYTES(dest_format_struct)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!flow_context || !match_criteria) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto add_eth_addr_rule_out; + } + + err = __mlx5e_add_eth_addr_rule(priv, ai, type, flow_context, + match_criteria); + if (err) + netdev_err(priv->netdev, "%s: failed\n", __func__); + +add_eth_addr_rule_out: + kvfree(match_criteria); + kvfree(flow_context); + return err; +} + +enum mlx5e_vlan_rule_type { + MLX5E_VLAN_RULE_TYPE_UNTAGGED, + MLX5E_VLAN_RULE_TYPE_ANY_VID, + MLX5E_VLAN_RULE_TYPE_MATCH_VID, +}; + +static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, u16 vid) +{ + u8 match_criteria_enable = 0; + u32 *flow_context; + void *match_value; + void *dest; + u32 *match_criteria; + u32 *ft_ix; + int err; + + flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + + MLX5_ST_SZ_BYTES(dest_format_struct)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!flow_context || !match_criteria) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto add_vlan_rule_out; + } + match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); + dest = MLX5_ADDR_OF(flow_context, flow_context, destination); + + MLX5_SET(flow_context, flow_context, action, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + MLX5_SET(flow_context, flow_context, destination_list_size, 1); + MLX5_SET(dest_format_struct, dest, destination_type, + MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE); + MLX5_SET(dest_format_struct, dest, destination_id, + mlx5_get_flow_table_id(priv->ft.main)); + + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.vlan_tag); + + switch (rule_type) { + case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + ft_ix = &priv->vlan.untagged_rule_ft_ix; + break; + case MLX5E_VLAN_RULE_TYPE_ANY_VID: + ft_ix = &priv->vlan.any_vlan_rule_ft_ix; + MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag, + 1); + break; + default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ + ft_ix = &priv->vlan.active_vlans_ft_ix[vid]; + MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag, + 1); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, match_value, outer_headers.first_vid, + vid); + break; + } + + err = mlx5_add_flow_table_entry(priv->ft.vlan, match_criteria_enable, + match_criteria, flow_context, ft_ix); + if (err) + netdev_err(priv->netdev, "%s: failed\n", __func__); + +add_vlan_rule_out: + kvfree(match_criteria); + kvfree(flow_context); + return err; +} + +static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, u16 vid) +{ + switch (rule_type) { + case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + mlx5_del_flow_table_entry(priv->ft.vlan, + priv->vlan.untagged_rule_ft_ix); + break; + case MLX5E_VLAN_RULE_TYPE_ANY_VID: + mlx5_del_flow_table_entry(priv->ft.vlan, + priv->vlan.any_vlan_rule_ft_ix); + break; + case MLX5E_VLAN_RULE_TYPE_MATCH_VID: + mlx5_del_flow_table_entry(priv->ft.vlan, + priv->vlan.active_vlans_ft_ix[vid]); + break; + } +} + +void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) +{ + WARN_ON(!mutex_is_locked(&priv->state_lock)); + + if (priv->vlan.filter_disabled) { + priv->vlan.filter_disabled = false; + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, + 0); + } +} + +void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) +{ + WARN_ON(!mutex_is_locked(&priv->state_lock)); + + if (!priv->vlan.filter_disabled) { + priv->vlan.filter_disabled = true; + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, + 0); + } +} + +int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err = 0; + + mutex_lock(&priv->state_lock); + + set_bit(vid, priv->vlan.active_vlans); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, + vid); + + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mutex_lock(&priv->state_lock); + + clear_bit(vid, priv->vlan.active_vlans); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); + + mutex_unlock(&priv->state_lock); + + return 0; +} + +int mlx5e_add_all_vlan_rules(struct mlx5e_priv *priv) +{ + u16 vid; + int err; + + for_each_set_bit(vid, priv->vlan.active_vlans, VLAN_N_VID) { + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, + vid); + if (err) + return err; + } + + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + if (err) + return err; + + if (priv->vlan.filter_disabled) { + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, + 0); + if (err) + return err; + } + + return 0; +} + +void mlx5e_del_all_vlan_rules(struct mlx5e_priv *priv) +{ + u16 vid; + + if (priv->vlan.filter_disabled) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); + + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + + for_each_set_bit(vid, priv->vlan.active_vlans, VLAN_N_VID) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); +} + +#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5E_ETH_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) + +static void mlx5e_execute_action(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_hash_node *hn) +{ + switch (hn->action) { + case MLX5E_ACTION_ADD: + mlx5e_add_eth_addr_rule(priv, &hn->ai, MLX5E_FULLMATCH); + hn->action = MLX5E_ACTION_NONE; + break; + + case MLX5E_ACTION_DEL: + mlx5e_del_eth_addr_from_flow_table(priv, &hn->ai); + mlx5e_del_eth_addr_from_hash(hn); + break; + } +} + +static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + struct netdev_hw_addr *ha; + + netif_addr_lock_bh(netdev); + + mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, + priv->netdev->dev_addr); + + netdev_for_each_uc_addr(ha, netdev) + mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, ha->addr); + + netdev_for_each_mc_addr(ha, netdev) + mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_mc, ha->addr); + + netif_addr_unlock_bh(netdev); +} + +static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_hash_node *hn; + struct hlist_node *tmp; + int i; + + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i) + mlx5e_execute_action(priv, hn); + + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i) + mlx5e_execute_action(priv, hn); +} + +static void mlx5e_handle_netdev_addr(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_hash_node *hn; + struct hlist_node *tmp; + int i; + + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i) + hn->action = MLX5E_ACTION_DEL; + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i) + hn->action = MLX5E_ACTION_DEL; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_sync_netdev_addr(priv); + + mlx5e_apply_netdev_addr(priv); +} + +void mlx5e_set_rx_mode_core(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_db *ea = &priv->eth_addr; + struct net_device *ndev = priv->netdev; + + bool rx_mode_enable = test_bit(MLX5E_STATE_OPENED, &priv->state); + bool promisc_enabled = rx_mode_enable && (ndev->flags & IFF_PROMISC); + bool allmulti_enabled = rx_mode_enable && (ndev->flags & IFF_ALLMULTI); + bool broadcast_enabled = rx_mode_enable; + + bool enable_promisc = !ea->promisc_enabled && promisc_enabled; + bool disable_promisc = ea->promisc_enabled && !promisc_enabled; + bool enable_allmulti = !ea->allmulti_enabled && allmulti_enabled; + bool disable_allmulti = ea->allmulti_enabled && !allmulti_enabled; + bool enable_broadcast = !ea->broadcast_enabled && broadcast_enabled; + bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled; + + if (enable_promisc) + mlx5e_add_eth_addr_rule(priv, &ea->promisc, MLX5E_PROMISC); + if (enable_allmulti) + mlx5e_add_eth_addr_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); + if (enable_broadcast) + mlx5e_add_eth_addr_rule(priv, &ea->broadcast, MLX5E_FULLMATCH); + + mlx5e_handle_netdev_addr(priv); + + if (disable_broadcast) + mlx5e_del_eth_addr_from_flow_table(priv, &ea->broadcast); + if (disable_allmulti) + mlx5e_del_eth_addr_from_flow_table(priv, &ea->allmulti); + if (disable_promisc) + mlx5e_del_eth_addr_from_flow_table(priv, &ea->promisc); + + ea->promisc_enabled = promisc_enabled; + ea->allmulti_enabled = allmulti_enabled; + ea->broadcast_enabled = broadcast_enabled; +} + +void mlx5e_set_rx_mode_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + set_rx_mode_work); + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_set_rx_mode_core(priv); + mutex_unlock(&priv->state_lock); +} + +void mlx5e_init_eth_addr(struct mlx5e_priv *priv) +{ + ether_addr_copy(priv->eth_addr.broadcast.addr, priv->netdev->broadcast); +} + +static int mlx5e_create_main_flow_table(struct mlx5e_priv *priv) +{ + struct mlx5_flow_table_group *g; + u8 *dmac; + + g = kcalloc(9, sizeof(*g), GFP_KERNEL); + + g[0].log_sz = 2; + g[0].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, + outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, + outer_headers.ip_protocol); + + g[1].log_sz = 1; + g[1].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, g[1].match_criteria, + outer_headers.ethertype); + + g[2].log_sz = 0; + + g[3].log_sz = 14; + g[3].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[3].match_criteria, + outer_headers.dmac_47_16); + memset(dmac, 0xff, ETH_ALEN); + MLX5_SET_TO_ONES(fte_match_param, g[3].match_criteria, + outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, g[3].match_criteria, + outer_headers.ip_protocol); + + g[4].log_sz = 13; + g[4].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[4].match_criteria, + outer_headers.dmac_47_16); + memset(dmac, 0xff, ETH_ALEN); + MLX5_SET_TO_ONES(fte_match_param, g[4].match_criteria, + outer_headers.ethertype); + + g[5].log_sz = 11; + g[5].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[5].match_criteria, + outer_headers.dmac_47_16); + memset(dmac, 0xff, ETH_ALEN); + + g[6].log_sz = 2; + g[6].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[6].match_criteria, + outer_headers.dmac_47_16); + dmac[0] = 0x01; + MLX5_SET_TO_ONES(fte_match_param, g[6].match_criteria, + outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, g[6].match_criteria, + outer_headers.ip_protocol); + + g[7].log_sz = 1; + g[7].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[7].match_criteria, + outer_headers.dmac_47_16); + dmac[0] = 0x01; + MLX5_SET_TO_ONES(fte_match_param, g[7].match_criteria, + outer_headers.ethertype); + + g[8].log_sz = 0; + g[8].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[8].match_criteria, + outer_headers.dmac_47_16); + dmac[0] = 0x01; + priv->ft.main = mlx5_create_flow_table(priv->mdev, 1, + MLX5_FLOW_TABLE_TYPE_NIC_RCV, + 9, g); + kfree(g); + + return priv->ft.main ? 0 : -ENOMEM; +} + +static void mlx5e_destroy_main_flow_table(struct mlx5e_priv *priv) +{ + mlx5_destroy_flow_table(priv->ft.main); +} + +static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv) +{ + struct mlx5_flow_table_group *g; + + g = kcalloc(2, sizeof(*g), GFP_KERNEL); + if (!g) + return -ENOMEM; + + g[0].log_sz = 12; + g[0].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, + outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, + outer_headers.first_vid); + + /* untagged + any vlan id */ + g[1].log_sz = 1; + g[1].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, g[1].match_criteria, + outer_headers.vlan_tag); + + priv->ft.vlan = mlx5_create_flow_table(priv->mdev, 0, + MLX5_FLOW_TABLE_TYPE_NIC_RCV, + 2, g); + + kfree(g); + return priv->ft.vlan ? 0 : -ENOMEM; +} + +static void mlx5e_destroy_vlan_flow_table(struct mlx5e_priv *priv) +{ + mlx5_destroy_flow_table(priv->ft.vlan); +} + +int mlx5e_open_flow_table(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_create_main_flow_table(priv); + if (err) + return err; + + err = mlx5e_create_vlan_flow_table(priv); + if (err) + goto err_destroy_main_flow_table; + + return 0; + +err_destroy_main_flow_table: + mlx5e_destroy_main_flow_table(priv); + + return err; +} + +void mlx5e_close_flow_table(struct mlx5e_priv *priv) +{ + mlx5e_destroy_vlan_flow_table(priv); + mlx5e_destroy_main_flow_table(priv); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c b/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c new file mode 100644 index 000000000000..ca90b9bc3b95 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/flow_table.c @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include "mlx5_core.h" + +struct mlx5_ftg { + struct mlx5_flow_table_group g; + u32 id; + u32 start_ix; +}; + +struct mlx5_flow_table { + struct mlx5_core_dev *dev; + u8 level; + u8 type; + u32 id; + struct mutex mutex; /* sync bitmap alloc */ + u16 num_groups; + struct mlx5_ftg *group; + unsigned long *bitmap; + u32 size; +}; + +static int mlx5_set_flow_entry_cmd(struct mlx5_flow_table *ft, u32 group_ix, + u32 flow_index, void *flow_context) +{ + u32 out[MLX5_ST_SZ_DW(set_fte_out)]; + u32 *in; + void *in_flow_context; + int fcdls = + MLX5_GET(flow_context, flow_context, destination_list_size) * + MLX5_ST_SZ_BYTES(dest_format_struct); + int inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fcdls; + int err; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_warn(ft->dev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(set_fte_in, in, table_type, ft->type); + MLX5_SET(set_fte_in, in, table_id, ft->id); + MLX5_SET(set_fte_in, in, flow_index, flow_index); + MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); + + in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); + memcpy(in_flow_context, flow_context, + MLX5_ST_SZ_BYTES(flow_context) + fcdls); + + MLX5_SET(flow_context, in_flow_context, group_id, + ft->group[group_ix].id); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(ft->dev, in, inlen, out, + sizeof(out)); + kvfree(in); + + return err; +} + +static void mlx5_del_flow_entry_cmd(struct mlx5_flow_table *ft, u32 flow_index) +{ + u32 in[MLX5_ST_SZ_DW(delete_fte_in)]; + u32 out[MLX5_ST_SZ_DW(delete_fte_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + +#define MLX5_SET_DFTEI(p, x, v) MLX5_SET(delete_fte_in, p, x, v) + MLX5_SET_DFTEI(in, table_type, ft->type); + MLX5_SET_DFTEI(in, table_id, ft->id); + MLX5_SET_DFTEI(in, flow_index, flow_index); + MLX5_SET_DFTEI(in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); + + mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out)); +} + +static void mlx5_destroy_flow_group_cmd(struct mlx5_flow_table *ft, int i) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + +#define MLX5_SET_DFGI(p, x, v) MLX5_SET(destroy_flow_group_in, p, x, v) + MLX5_SET_DFGI(in, table_type, ft->type); + MLX5_SET_DFGI(in, table_id, ft->id); + MLX5_SET_DFGI(in, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP); + MLX5_SET_DFGI(in, group_id, ft->group[i].id); + mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_create_flow_group_cmd(struct mlx5_flow_table *ft, int i) +{ + u32 out[MLX5_ST_SZ_DW(create_flow_group_out)]; + u32 *in; + void *in_match_criteria; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_group *g = &ft->group[i].g; + u32 start_ix = ft->group[i].start_ix; + u32 end_ix = start_ix + (1 << g->log_sz) - 1; + int err; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_warn(ft->dev, "failed to allocate inbox\n"); + return -ENOMEM; + } + in_match_criteria = MLX5_ADDR_OF(create_flow_group_in, in, + match_criteria); + + memset(out, 0, sizeof(out)); + +#define MLX5_SET_CFGI(p, x, v) MLX5_SET(create_flow_group_in, p, x, v) + MLX5_SET_CFGI(in, table_type, ft->type); + MLX5_SET_CFGI(in, table_id, ft->id); + MLX5_SET_CFGI(in, opcode, MLX5_CMD_OP_CREATE_FLOW_GROUP); + MLX5_SET_CFGI(in, start_flow_index, start_ix); + MLX5_SET_CFGI(in, end_flow_index, end_ix); + MLX5_SET_CFGI(in, match_criteria_enable, g->match_criteria_enable); + + memcpy(in_match_criteria, g->match_criteria, + MLX5_ST_SZ_BYTES(fte_match_param)); + + err = mlx5_cmd_exec_check_status(ft->dev, in, inlen, out, + sizeof(out)); + if (!err) + ft->group[i].id = MLX5_GET(create_flow_group_out, out, + group_id); + + kvfree(in); + + return err; +} + +static void mlx5_destroy_flow_table_groups(struct mlx5_flow_table *ft) +{ + int i; + + for (i = 0; i < ft->num_groups; i++) + mlx5_destroy_flow_group_cmd(ft, i); +} + +static int mlx5_create_flow_table_groups(struct mlx5_flow_table *ft) +{ + int err; + int i; + + for (i = 0; i < ft->num_groups; i++) { + err = mlx5_create_flow_group_cmd(ft, i); + if (err) + goto err_destroy_flow_table_groups; + } + + return 0; + +err_destroy_flow_table_groups: + for (i--; i >= 0; i--) + mlx5_destroy_flow_group_cmd(ft, i); + + return err; +} + +static int mlx5_create_flow_table_cmd(struct mlx5_flow_table *ft) +{ + u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]; + u32 out[MLX5_ST_SZ_DW(create_flow_table_out)]; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(create_flow_table_in, in, table_type, ft->type); + MLX5_SET(create_flow_table_in, in, level, ft->level); + MLX5_SET(create_flow_table_in, in, log_size, order_base_2(ft->size)); + + MLX5_SET(create_flow_table_in, in, opcode, + MLX5_CMD_OP_CREATE_FLOW_TABLE); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, + sizeof(out)); + if (err) + return err; + + ft->id = MLX5_GET(create_flow_table_out, out, table_id); + + return 0; +} + +static void mlx5_destroy_flow_table_cmd(struct mlx5_flow_table *ft) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + +#define MLX5_SET_DFTI(p, x, v) MLX5_SET(destroy_flow_table_in, p, x, v) + MLX5_SET_DFTI(in, table_type, ft->type); + MLX5_SET_DFTI(in, table_id, ft->id); + MLX5_SET_DFTI(in, opcode, MLX5_CMD_OP_DESTROY_FLOW_TABLE); + + mlx5_cmd_exec_check_status(ft->dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_find_group(struct mlx5_flow_table *ft, u8 match_criteria_enable, + u32 *match_criteria, int *group_ix) +{ + void *mc_outer = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers); + void *mc_misc = MLX5_ADDR_OF(fte_match_param, match_criteria, + misc_parameters); + void *mc_inner = MLX5_ADDR_OF(fte_match_param, match_criteria, + inner_headers); + int mc_outer_sz = MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4); + int mc_misc_sz = MLX5_ST_SZ_BYTES(fte_match_set_misc); + int mc_inner_sz = MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4); + int i; + + for (i = 0; i < ft->num_groups; i++) { + struct mlx5_flow_table_group *g = &ft->group[i].g; + void *gmc_outer = MLX5_ADDR_OF(fte_match_param, + g->match_criteria, + outer_headers); + void *gmc_misc = MLX5_ADDR_OF(fte_match_param, + g->match_criteria, + misc_parameters); + void *gmc_inner = MLX5_ADDR_OF(fte_match_param, + g->match_criteria, + inner_headers); + + if (g->match_criteria_enable != match_criteria_enable) + continue; + + if (match_criteria_enable & MLX5_MATCH_OUTER_HEADERS) + if (memcmp(mc_outer, gmc_outer, mc_outer_sz)) + continue; + + if (match_criteria_enable & MLX5_MATCH_MISC_PARAMETERS) + if (memcmp(mc_misc, gmc_misc, mc_misc_sz)) + continue; + + if (match_criteria_enable & MLX5_MATCH_INNER_HEADERS) + if (memcmp(mc_inner, gmc_inner, mc_inner_sz)) + continue; + + *group_ix = i; + return 0; + } + + return -EINVAL; +} + +static int alloc_flow_index(struct mlx5_flow_table *ft, int group_ix, u32 *ix) +{ + struct mlx5_ftg *g = &ft->group[group_ix]; + int err = 0; + + mutex_lock(&ft->mutex); + + *ix = find_next_zero_bit(ft->bitmap, ft->size, g->start_ix); + if (*ix >= (g->start_ix + (1 << g->g.log_sz))) + err = -ENOSPC; + else + __set_bit(*ix, ft->bitmap); + + mutex_unlock(&ft->mutex); + + return err; +} + +static void mlx5_free_flow_index(struct mlx5_flow_table *ft, u32 ix) +{ + __clear_bit(ix, ft->bitmap); +} + +int mlx5_add_flow_table_entry(void *flow_table, u8 match_criteria_enable, + void *match_criteria, void *flow_context, + u32 *flow_index) +{ + struct mlx5_flow_table *ft = flow_table; + int group_ix; + int err; + + err = mlx5_find_group(ft, match_criteria_enable, match_criteria, + &group_ix); + if (err) { + mlx5_core_warn(ft->dev, "mlx5_find_group failed\n"); + return err; + } + + err = alloc_flow_index(ft, group_ix, flow_index); + if (err) { + mlx5_core_warn(ft->dev, "alloc_flow_index failed\n"); + return err; + } + + return mlx5_set_flow_entry_cmd(ft, group_ix, *flow_index, flow_context); +} +EXPORT_SYMBOL(mlx5_add_flow_table_entry); + +void mlx5_del_flow_table_entry(void *flow_table, u32 flow_index) +{ + struct mlx5_flow_table *ft = flow_table; + + mlx5_del_flow_entry_cmd(ft, flow_index); + mlx5_free_flow_index(ft, flow_index); +} +EXPORT_SYMBOL(mlx5_del_flow_table_entry); + +void *mlx5_create_flow_table(struct mlx5_core_dev *dev, u8 level, u8 table_type, + u16 num_groups, + struct mlx5_flow_table_group *group) +{ + struct mlx5_flow_table *ft; + u32 start_ix = 0; + u32 ft_size = 0; + void *gr; + void *bm; + int err; + int i; + + for (i = 0; i < num_groups; i++) + ft_size += (1 << group[i].log_sz); + + ft = kzalloc(sizeof(*ft), GFP_KERNEL); + gr = kcalloc(num_groups, sizeof(struct mlx5_ftg), GFP_KERNEL); + bm = kcalloc(BITS_TO_LONGS(ft_size), sizeof(uintptr_t), GFP_KERNEL); + if (!ft || !gr || !bm) + goto err_free_ft; + + ft->group = gr; + ft->bitmap = bm; + ft->num_groups = num_groups; + ft->level = level; + ft->type = table_type; + ft->size = ft_size; + ft->dev = dev; + mutex_init(&ft->mutex); + + for (i = 0; i < ft->num_groups; i++) { + memcpy(&ft->group[i].g, &group[i], sizeof(*group)); + ft->group[i].start_ix = start_ix; + start_ix += 1 << group[i].log_sz; + } + + err = mlx5_create_flow_table_cmd(ft); + if (err) + goto err_free_ft; + + err = mlx5_create_flow_table_groups(ft); + if (err) + goto err_destroy_flow_table_cmd; + + return ft; + +err_destroy_flow_table_cmd: + mlx5_destroy_flow_table_cmd(ft); + +err_free_ft: + mlx5_core_warn(dev, "failed to alloc flow table\n"); + kfree(bm); + kfree(gr); + kfree(ft); + + return NULL; +} +EXPORT_SYMBOL(mlx5_create_flow_table); + +void mlx5_destroy_flow_table(void *flow_table) +{ + struct mlx5_flow_table *ft = flow_table; + + mlx5_destroy_flow_table_groups(ft); + mlx5_destroy_flow_table_cmd(ft); + kfree(ft->bitmap); + kfree(ft->group); + kfree(ft); +} +EXPORT_SYMBOL(mlx5_destroy_flow_table); + +u32 mlx5_get_flow_table_id(void *flow_table) +{ + struct mlx5_flow_table *ft = flow_table; + + return ft->id; +} +EXPORT_SYMBOL(mlx5_get_flow_table_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c new file mode 100644 index 000000000000..3c555d708af1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "mlx5_core.h" +#include "transobj.h" + +int mlx5_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn) +{ + u32 out[MLX5_ST_SZ_DW(create_rq_out)]; + int err; + + MLX5_SET(create_rq_in, in, opcode, MLX5_CMD_OP_CREATE_RQ); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + if (!err) + *rqn = MLX5_GET(create_rq_out, out, rqn); + + return err; +} + +int mlx5_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_rq_out)]; + + MLX5_SET(modify_rq_in, in, rqn, rqn); + MLX5_SET(modify_rq_in, in, opcode, MLX5_CMD_OP_MODIFY_RQ); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); +} + +void mlx5_destroy_rq(struct mlx5_core_dev *dev, u32 rqn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rq_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_rq_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ); + MLX5_SET(destroy_rq_in, in, rqn, rqn); + + mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn) +{ + u32 out[MLX5_ST_SZ_DW(create_sq_out)]; + int err; + + MLX5_SET(create_sq_in, in, opcode, MLX5_CMD_OP_CREATE_SQ); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + if (!err) + *sqn = MLX5_GET(create_sq_out, out, sqn); + + return err; +} + +int mlx5_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_sq_out)]; + + MLX5_SET(modify_sq_in, in, sqn, sqn); + MLX5_SET(modify_sq_in, in, opcode, MLX5_CMD_OP_MODIFY_SQ); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); +} + +void mlx5_destroy_sq(struct mlx5_core_dev *dev, u32 sqn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_sq_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_sq_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ); + MLX5_SET(destroy_sq_in, in, sqn, sqn); + + mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tirn) +{ + u32 out[MLX5_ST_SZ_DW(create_tir_out)]; + int err; + + MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + if (!err) + *tirn = MLX5_GET(create_tir_out, out, tirn); + + return err; +} + +void mlx5_destroy_tir(struct mlx5_core_dev *dev, u32 tirn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tir_out)]; + u32 out[MLX5_ST_SZ_DW(destroy_tir_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, in, tirn, tirn); + + mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tisn) +{ + u32 out[MLX5_ST_SZ_DW(create_tis_out)]; + int err; + + MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + if (!err) + *tisn = MLX5_GET(create_tis_out, out, tisn); + + return err; +} + +void mlx5_destroy_tis(struct mlx5_core_dev *dev, u32 tisn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tis_out)]; + u32 out[MLX5_ST_SZ_DW(destroy_tis_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, in, tisn, tisn); + + mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h b/drivers/net/ethernet/mellanox/mlx5/core/transobj.h new file mode 100644 index 000000000000..1bc898cc4933 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __TRANSOBJ_H__ +#define __TRANSOBJ_H__ + +int mlx5_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn); +int mlx5_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen); +void mlx5_destroy_rq(struct mlx5_core_dev *dev, u32 rqn); +int mlx5_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn); +int mlx5_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen); +void mlx5_destroy_sq(struct mlx5_core_dev *dev, u32 sqn); +int mlx5_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tirn); +void mlx5_destroy_tir(struct mlx5_core_dev *dev, u32 tirn); +int mlx5_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tisn); +void mlx5_destroy_tis(struct mlx5_core_dev *dev, u32 tisn); + +#endif /* __TRANSOBJ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c new file mode 100644 index 000000000000..ba374b9a6c87 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include "vport.h" +#include "mlx5_core.h" + +u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod) +{ + u32 in[MLX5_ST_SZ_DW(query_vport_state_in)]; + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)]; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_vport_state_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_STATE); + MLX5_SET(query_vport_state_in, in, op_mod, opmod); + + err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, + sizeof(out)); + if (err) + mlx5_core_warn(mdev, "MLX5_CMD_OP_QUERY_VPORT_STATE failed\n"); + + return MLX5_GET(query_vport_state_out, out, state); +} + +void mlx5_query_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + u8 *out_addr; + + out = mlx5_vzalloc(outlen); + if (!out) + return; + + out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context.permanent_address); + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + + memset(out, 0, outlen); + mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); + + ether_addr_copy(addr, &out_addr[2]); + + kvfree(out); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.h b/drivers/net/ethernet/mellanox/mlx5/core/vport.h new file mode 100644 index 000000000000..c05ca2c3419d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_VPORT_H__ +#define __MLX5_VPORT_H__ + +#include + +u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); +void mlx5_query_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr); + +#endif /* __MLX5_VPORT_H__ */ diff --git a/include/linux/mlx5/flow_table.h b/include/linux/mlx5/flow_table.h new file mode 100644 index 000000000000..5f922c6d4fc2 --- /dev/null +++ b/include/linux/mlx5/flow_table.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_FLOW_TABLE_H +#define MLX5_FLOW_TABLE_H + +#include + +struct mlx5_flow_table_group { + u8 log_sz; + u8 match_criteria_enable; + u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)]; +}; + +void *mlx5_create_flow_table(struct mlx5_core_dev *dev, u8 level, u8 table_type, + u16 num_groups, + struct mlx5_flow_table_group *group); +void mlx5_destroy_flow_table(void *flow_table); +int mlx5_add_flow_table_entry(void *flow_table, u8 match_criteria_enable, + void *match_criteria, void *flow_context, + u32 *flow_index); +void mlx5_del_flow_table_entry(void *flow_table, u32 flow_index); +u32 mlx5_get_flow_table_id(void *flow_table); + +#endif /* MLX5_FLOW_TABLE_H */ -- cgit v1.2.3 From f62b8bb8f2d30582f30f51e85a8c0e1260125d7e Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 28 May 2015 22:28:48 +0300 Subject: net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality This is the Ethernet part of the driver for the Mellanox ConnectX(R)-4 Single/Dual-Port Adapter supporting 100Gb/s with VPI. The driver extends the existing mlx5 driver with Ethernet functionality. This patch contains the driver entry points but does not include transmit and receive (see the previous patch in the series) routines. It also adds the option MLX5_CORE_EN to Kconfig to enable/disable the Ethernet functionality. Currently, Kconfig is programmed to make Ethernet and Infiniband functionality mutally exclusive. Also changed MLX5_INFINIBAND to be depandant on MLX5_CORE instead of selecting it, since MLX5_CORE could be selected without MLX5_INFINIBAND being selected. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx5/Kconfig | 4 +- drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 14 +- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 3 + drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 19 - drivers/net/ethernet/mellanox/mlx5/core/en.h | 520 ++++++ .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 679 +++++++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1899 ++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 74 +- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 9 +- include/linux/mlx5/device.h | 19 + include/linux/mlx5/driver.h | 1 + 11 files changed, 3213 insertions(+), 28 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_main.c (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig index 10df386c6344..bce263b92821 100644 --- a/drivers/infiniband/hw/mlx5/Kconfig +++ b/drivers/infiniband/hw/mlx5/Kconfig @@ -1,8 +1,6 @@ config MLX5_INFINIBAND tristate "Mellanox Connect-IB HCA support" - depends on NETDEVICES && ETHERNET && PCI - select NET_VENDOR_MELLANOX - select MLX5_CORE + depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE ---help--- This driver provides low-level InfiniBand support for Mellanox Connect-IB PCI Express host channel adapters (HCAs). diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 8ff57e8e3e91..0d7aef040fb0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -3,6 +3,18 @@ # config MLX5_CORE - tristate + tristate "Mellanox Technologies ConnectX-4 and Connect-IB core driver" depends on PCI default n + ---help--- + Core driver for low level functionality of the ConnectX-4 and + Connect-IB cards by Mellanox Technologies. + +config MLX5_CORE_EN + bool "Mellanox Technologies ConnectX-4 Ethernet support" + depends on MLX5_INFINIBAND=n && NETDEVICES && ETHERNET && PCI && MLX5_CORE + default n + ---help--- + Ethernet support in Mellanox Technologies ConnectX-4 NIC. + Ethernet and Infiniband support in ConnectX-4 are currently mutually + exclusive. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 105780bb980b..87e9e606596a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -3,3 +3,6 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o +mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o vport.o transobj.o \ + en_main.o en_flow_table.o en_ethtool.o en_tx.o en_rx.o \ + en_txrx.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 2f22cd2de2b0..75ff58dc1ff5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -75,25 +75,6 @@ enum { MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10, }; -enum { - MLX5_CMD_STAT_OK = 0x0, - MLX5_CMD_STAT_INT_ERR = 0x1, - MLX5_CMD_STAT_BAD_OP_ERR = 0x2, - MLX5_CMD_STAT_BAD_PARAM_ERR = 0x3, - MLX5_CMD_STAT_BAD_SYS_STATE_ERR = 0x4, - MLX5_CMD_STAT_BAD_RES_ERR = 0x5, - MLX5_CMD_STAT_RES_BUSY = 0x6, - MLX5_CMD_STAT_LIM_ERR = 0x8, - MLX5_CMD_STAT_BAD_RES_STATE_ERR = 0x9, - MLX5_CMD_STAT_IX_ERR = 0xa, - MLX5_CMD_STAT_NO_RES_ERR = 0xf, - MLX5_CMD_STAT_BAD_INP_LEN_ERR = 0x50, - MLX5_CMD_STAT_BAD_OUTP_LEN_ERR = 0x51, - MLX5_CMD_STAT_BAD_QP_STATE_ERR = 0x10, - MLX5_CMD_STAT_BAD_PKT_ERR = 0x30, - MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR = 0x40, -}; - static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in, struct mlx5_cmd_msg *out, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h new file mode 100644 index 000000000000..cbb3c7cb53f7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -0,0 +1,520 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "vport.h" +#include "wq.h" +#include "transobj.h" +#include "mlx5_core.h" + +#define MLX5E_MAX_NUM_TC 8 + +#define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x7 +#define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa +#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xd + +#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE 0x7 +#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa +#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd + +#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (16 * 1024) +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 +#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 +#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 +#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 +#define MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ 0x7 +#define MLX5E_PARAMS_MIN_MTU 46 + +#define MLX5E_TX_CQ_POLL_BUDGET 128 +#define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ + +static const char vport_strings[][ETH_GSTRING_LEN] = { + /* vport statistics */ + "rx_packets", + "rx_bytes", + "tx_packets", + "tx_bytes", + "rx_error_packets", + "rx_error_bytes", + "tx_error_packets", + "tx_error_bytes", + "rx_unicast_packets", + "rx_unicast_bytes", + "tx_unicast_packets", + "tx_unicast_bytes", + "rx_multicast_packets", + "rx_multicast_bytes", + "tx_multicast_packets", + "tx_multicast_bytes", + "rx_broadcast_packets", + "rx_broadcast_bytes", + "tx_broadcast_packets", + "tx_broadcast_bytes", + + /* SW counters */ + "tso_packets", + "tso_bytes", + "lro_packets", + "lro_bytes", + "rx_csum_good", + "rx_csum_none", + "tx_csum_offload", + "tx_queue_stopped", + "tx_queue_wake", + "tx_queue_dropped", + "rx_wqe_err", +}; + +struct mlx5e_vport_stats { + /* HW counters */ + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + u64 rx_error_packets; + u64 rx_error_bytes; + u64 tx_error_packets; + u64 tx_error_bytes; + u64 rx_unicast_packets; + u64 rx_unicast_bytes; + u64 tx_unicast_packets; + u64 tx_unicast_bytes; + u64 rx_multicast_packets; + u64 rx_multicast_bytes; + u64 tx_multicast_packets; + u64 tx_multicast_bytes; + u64 rx_broadcast_packets; + u64 rx_broadcast_bytes; + u64 tx_broadcast_packets; + u64 tx_broadcast_bytes; + + /* SW counters */ + u64 tso_packets; + u64 tso_bytes; + u64 lro_packets; + u64 lro_bytes; + u64 rx_csum_good; + u64 rx_csum_none; + u64 tx_csum_offload; + u64 tx_queue_stopped; + u64 tx_queue_wake; + u64 tx_queue_dropped; + u64 rx_wqe_err; + +#define NUM_VPORT_COUNTERS 31 +}; + +static const char rq_stats_strings[][ETH_GSTRING_LEN] = { + "packets", + "csum_none", + "lro_packets", + "lro_bytes", + "wqe_err" +}; + +struct mlx5e_rq_stats { + u64 packets; + u64 csum_none; + u64 lro_packets; + u64 lro_bytes; + u64 wqe_err; +#define NUM_RQ_STATS 5 +}; + +static const char sq_stats_strings[][ETH_GSTRING_LEN] = { + "packets", + "tso_packets", + "tso_bytes", + "csum_offload_none", + "stopped", + "wake", + "dropped", + "nop" +}; + +struct mlx5e_sq_stats { + u64 packets; + u64 tso_packets; + u64 tso_bytes; + u64 csum_offload_none; + u64 stopped; + u64 wake; + u64 dropped; + u64 nop; +#define NUM_SQ_STATS 8 +}; + +struct mlx5e_stats { + struct mlx5e_vport_stats vport; +}; + +struct mlx5e_params { + u8 log_sq_size; + u8 log_rq_size; + u16 num_channels; + u8 default_vlan_prio; + u8 num_tc; + u16 rx_cq_moderation_usec; + u16 rx_cq_moderation_pkts; + u16 tx_cq_moderation_usec; + u16 tx_cq_moderation_pkts; + u16 min_rx_wqes; + u16 rx_hash_log_tbl_sz; + bool lro_en; + u32 lro_wqe_sz; +}; + +enum { + MLX5E_RQ_STATE_POST_WQES_ENABLE, +}; + +enum cq_flags { + MLX5E_CQ_HAS_CQES = 1, +}; + +struct mlx5e_cq { + /* data path - accessed per cqe */ + struct mlx5_cqwq wq; + void *sqrq; + unsigned long flags; + + /* data path - accessed per napi poll */ + struct napi_struct *napi; + struct mlx5_core_cq mcq; + struct mlx5e_channel *channel; + + /* control */ + struct mlx5_wq_ctrl wq_ctrl; +} ____cacheline_aligned_in_smp; + +struct mlx5e_rq { + /* data path */ + struct mlx5_wq_ll wq; + u32 wqe_sz; + struct sk_buff **skb; + + struct device *pdev; + struct net_device *netdev; + struct mlx5e_rq_stats stats; + struct mlx5e_cq cq; + + unsigned long state; + int ix; + + /* control */ + struct mlx5_wq_ctrl wq_ctrl; + u32 rqn; + struct mlx5e_channel *channel; +} ____cacheline_aligned_in_smp; + +struct mlx5e_tx_skb_cb { + u32 num_bytes; + u8 num_wqebbs; + u8 num_dma; +}; + +#define MLX5E_TX_SKB_CB(__skb) ((struct mlx5e_tx_skb_cb *)__skb->cb) + +struct mlx5e_sq_dma { + dma_addr_t addr; + u32 size; +}; + +enum { + MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, +}; + +struct mlx5e_sq { + /* data path */ + + /* dirtied @completion */ + u16 cc; + u32 dma_fifo_cc; + + /* dirtied @xmit */ + u16 pc ____cacheline_aligned_in_smp; + u32 dma_fifo_pc; + u32 bf_offset; + struct mlx5e_sq_stats stats; + + struct mlx5e_cq cq; + + /* pointers to per packet info: write@xmit, read@completion */ + struct sk_buff **skb; + struct mlx5e_sq_dma *dma_fifo; + + /* read only */ + struct mlx5_wq_cyc wq; + u32 dma_fifo_mask; + void __iomem *uar_map; + struct netdev_queue *txq; + u32 sqn; + u32 bf_buf_size; + struct device *pdev; + __be32 mkey_be; + unsigned long state; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5_uar uar; + struct mlx5e_channel *channel; + int tc; +} ____cacheline_aligned_in_smp; + +static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) +{ + return (((sq->wq.sz_m1 & (sq->cc - sq->pc)) >= n) || + (sq->cc == sq->pc)); +} + +enum channel_flags { + MLX5E_CHANNEL_NAPI_SCHED = 1, +}; + +struct mlx5e_channel { + /* data path */ + struct mlx5e_rq rq; + struct mlx5e_sq sq[MLX5E_MAX_NUM_TC]; + struct napi_struct napi; + struct device *pdev; + struct net_device *netdev; + __be32 mkey_be; + u8 num_tc; + unsigned long flags; + + /* control */ + struct mlx5e_priv *priv; + int ix; + int cpu; +}; + +enum mlx5e_traffic_types { + MLX5E_TT_IPV4_TCP = 0, + MLX5E_TT_IPV6_TCP = 1, + MLX5E_TT_IPV4_UDP = 2, + MLX5E_TT_IPV6_UDP = 3, + MLX5E_TT_IPV4 = 4, + MLX5E_TT_IPV6 = 5, + MLX5E_TT_ANY = 6, + MLX5E_NUM_TT = 7, +}; + +enum { + MLX5E_RQT_SPREADING = 0, + MLX5E_RQT_DEFAULT_RQ = 1, + MLX5E_NUM_RQT = 2, +}; + +struct mlx5e_eth_addr_info { + u8 addr[ETH_ALEN + 2]; + u32 tt_vec; + u32 ft_ix[MLX5E_NUM_TT]; /* flow table index per traffic type */ +}; + +#define MLX5E_ETH_ADDR_HASH_SIZE (1 << BITS_PER_BYTE) + +struct mlx5e_eth_addr_db { + struct hlist_head netdev_uc[MLX5E_ETH_ADDR_HASH_SIZE]; + struct hlist_head netdev_mc[MLX5E_ETH_ADDR_HASH_SIZE]; + struct mlx5e_eth_addr_info broadcast; + struct mlx5e_eth_addr_info allmulti; + struct mlx5e_eth_addr_info promisc; + bool broadcast_enabled; + bool allmulti_enabled; + bool promisc_enabled; +}; + +enum { + MLX5E_STATE_ASYNC_EVENTS_ENABLE, + MLX5E_STATE_OPENED, +}; + +struct mlx5e_vlan_db { + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + u32 active_vlans_ft_ix[VLAN_N_VID]; + u32 untagged_rule_ft_ix; + u32 any_vlan_rule_ft_ix; + bool filter_disabled; +}; + +struct mlx5e_flow_table { + void *vlan; + void *main; +}; + +struct mlx5e_priv { + /* priv data path fields - start */ + int order_base_2_num_channels; + int queue_mapping_channel_mask; + int num_tc; + int default_vlan_prio; + /* priv data path fields - end */ + + unsigned long state; + struct mutex state_lock; /* Protects Interface state */ + struct mlx5_uar cq_uar; + u32 pdn; + struct mlx5_core_mr mr; + + struct mlx5e_channel **channel; + u32 tisn[MLX5E_MAX_NUM_TC]; + u32 rqtn; + u32 tirn[MLX5E_NUM_TT]; + + struct mlx5e_flow_table ft; + struct mlx5e_eth_addr_db eth_addr; + struct mlx5e_vlan_db vlan; + + struct mlx5e_params params; + spinlock_t async_events_spinlock; /* sync hw events */ + struct work_struct update_carrier_work; + struct work_struct set_rx_mode_work; + struct delayed_work update_stats_work; + + struct mlx5_core_dev *mdev; + struct net_device *netdev; + struct mlx5e_stats stats; +}; + +#define MLX5E_NET_IP_ALIGN 2 + +struct mlx5e_tx_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_eth_seg eth; +}; + +struct mlx5e_rx_wqe { + struct mlx5_wqe_srq_next_seg next; + struct mlx5_wqe_data_seg data; +}; + +enum mlx5e_link_mode { + MLX5E_1000BASE_CX_SGMII = 0, + MLX5E_1000BASE_KX = 1, + MLX5E_10GBASE_CX4 = 2, + MLX5E_10GBASE_KX4 = 3, + MLX5E_10GBASE_KR = 4, + MLX5E_20GBASE_KR2 = 5, + MLX5E_40GBASE_CR4 = 6, + MLX5E_40GBASE_KR4 = 7, + MLX5E_56GBASE_R4 = 8, + MLX5E_10GBASE_CR = 12, + MLX5E_10GBASE_SR = 13, + MLX5E_10GBASE_ER = 14, + MLX5E_40GBASE_SR4 = 15, + MLX5E_40GBASE_LR4 = 16, + MLX5E_100GBASE_CR4 = 20, + MLX5E_100GBASE_SR4 = 21, + MLX5E_100GBASE_KR4 = 22, + MLX5E_100GBASE_LR4 = 23, + MLX5E_100BASE_TX = 24, + MLX5E_100BASE_T = 25, + MLX5E_10GBASE_T = 26, + MLX5E_25GBASE_CR = 27, + MLX5E_25GBASE_KR = 28, + MLX5E_25GBASE_SR = 29, + MLX5E_50GBASE_CR2 = 30, + MLX5E_50GBASE_KR2 = 31, + MLX5E_LINK_MODES_NUMBER, +}; + +#define MLX5E_PROT_MASK(link_mode) (1 << link_mode) + +u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback); +netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); +netdev_tx_t mlx5e_xmit_multi_tc(struct sk_buff *skb, struct net_device *dev); + +void mlx5e_completion_event(struct mlx5_core_cq *mcq); +void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); +int mlx5e_napi_poll(struct napi_struct *napi, int budget); +bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq); +bool mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); +bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); +struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); + +void mlx5e_update_stats(struct mlx5e_priv *priv); + +int mlx5e_open_flow_table(struct mlx5e_priv *priv); +void mlx5e_close_flow_table(struct mlx5e_priv *priv); +void mlx5e_init_eth_addr(struct mlx5e_priv *priv); +void mlx5e_set_rx_mode_core(struct mlx5e_priv *priv); +void mlx5e_set_rx_mode_work(struct work_struct *work); + +int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid); +int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, + u16 vid); +void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); +void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); +int mlx5e_add_all_vlan_rules(struct mlx5e_priv *priv); +void mlx5e_del_all_vlan_rules(struct mlx5e_priv *priv); + +int mlx5e_open_locked(struct net_device *netdev); +int mlx5e_close_locked(struct net_device *netdev); +int mlx5e_update_priv_params(struct mlx5e_priv *priv, + struct mlx5e_params *new_params); + +static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, + struct mlx5e_tx_wqe *wqe) +{ + /* ensure wqe is visible to device before updating doorbell record */ + dma_wmb(); + + *sq->wq.db = cpu_to_be32(sq->pc); + + /* ensure doorbell record is visible to device before ringing the + * doorbell + */ + wmb(); + + mlx5_write64((__be32 *)&wqe->ctrl, + sq->uar_map + MLX5_BF_OFFSET + sq->bf_offset, + NULL); + + sq->bf_offset ^= sq->bf_buf_size; +} + +static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) +{ + struct mlx5_core_cq *mcq; + + mcq = &cq->mcq; + mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, NULL, cq->wq.cc); +} + +extern const struct ethtool_ops mlx5e_ethtool_ops; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c new file mode 100644 index 000000000000..de7aec8abca1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -0,0 +1,679 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" + +static void mlx5e_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + strlcpy(drvinfo->driver, DRIVER_NAME, sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, DRIVER_VERSION " (" DRIVER_RELDATE ")", + sizeof(drvinfo->version)); + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%d", + fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev)); + strlcpy(drvinfo->bus_info, pci_name(mdev->pdev), + sizeof(drvinfo->bus_info)); +} + +static const struct { + u32 supported; + u32 advertised; + u32 speed; +} ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER] = { + [MLX5E_1000BASE_CX_SGMII] = { + .supported = SUPPORTED_1000baseKX_Full, + .advertised = ADVERTISED_1000baseKX_Full, + .speed = 1000, + }, + [MLX5E_1000BASE_KX] = { + .supported = SUPPORTED_1000baseKX_Full, + .advertised = ADVERTISED_1000baseKX_Full, + .speed = 1000, + }, + [MLX5E_10GBASE_CX4] = { + .supported = SUPPORTED_10000baseKX4_Full, + .advertised = ADVERTISED_10000baseKX4_Full, + .speed = 10000, + }, + [MLX5E_10GBASE_KX4] = { + .supported = SUPPORTED_10000baseKX4_Full, + .advertised = ADVERTISED_10000baseKX4_Full, + .speed = 10000, + }, + [MLX5E_10GBASE_KR] = { + .supported = SUPPORTED_10000baseKR_Full, + .advertised = ADVERTISED_10000baseKR_Full, + .speed = 10000, + }, + [MLX5E_20GBASE_KR2] = { + .supported = SUPPORTED_20000baseKR2_Full, + .advertised = ADVERTISED_20000baseKR2_Full, + .speed = 20000, + }, + [MLX5E_40GBASE_CR4] = { + .supported = SUPPORTED_40000baseCR4_Full, + .advertised = ADVERTISED_40000baseCR4_Full, + .speed = 40000, + }, + [MLX5E_40GBASE_KR4] = { + .supported = SUPPORTED_40000baseKR4_Full, + .advertised = ADVERTISED_40000baseKR4_Full, + .speed = 40000, + }, + [MLX5E_56GBASE_R4] = { + .supported = SUPPORTED_56000baseKR4_Full, + .advertised = ADVERTISED_56000baseKR4_Full, + .speed = 56000, + }, + [MLX5E_10GBASE_CR] = { + .supported = SUPPORTED_10000baseKR_Full, + .advertised = ADVERTISED_10000baseKR_Full, + .speed = 10000, + }, + [MLX5E_10GBASE_SR] = { + .supported = SUPPORTED_10000baseKR_Full, + .advertised = ADVERTISED_10000baseKR_Full, + .speed = 10000, + }, + [MLX5E_10GBASE_ER] = { + .supported = SUPPORTED_10000baseKR_Full, + .advertised = ADVERTISED_10000baseKR_Full, + .speed = 10000, + }, + [MLX5E_40GBASE_SR4] = { + .supported = SUPPORTED_40000baseSR4_Full, + .advertised = ADVERTISED_40000baseSR4_Full, + .speed = 40000, + }, + [MLX5E_40GBASE_LR4] = { + .supported = SUPPORTED_40000baseLR4_Full, + .advertised = ADVERTISED_40000baseLR4_Full, + .speed = 40000, + }, + [MLX5E_100GBASE_CR4] = { + .speed = 100000, + }, + [MLX5E_100GBASE_SR4] = { + .speed = 100000, + }, + [MLX5E_100GBASE_KR4] = { + .speed = 100000, + }, + [MLX5E_100GBASE_LR4] = { + .speed = 100000, + }, + [MLX5E_100BASE_TX] = { + .speed = 100, + }, + [MLX5E_100BASE_T] = { + .supported = SUPPORTED_100baseT_Full, + .advertised = ADVERTISED_100baseT_Full, + .speed = 100, + }, + [MLX5E_10GBASE_T] = { + .supported = SUPPORTED_10000baseT_Full, + .advertised = ADVERTISED_10000baseT_Full, + .speed = 1000, + }, + [MLX5E_25GBASE_CR] = { + .speed = 25000, + }, + [MLX5E_25GBASE_KR] = { + .speed = 25000, + }, + [MLX5E_25GBASE_SR] = { + .speed = 25000, + }, + [MLX5E_50GBASE_CR2] = { + .speed = 50000, + }, + [MLX5E_50GBASE_KR2] = { + .speed = 50000, + }, +}; + +static int mlx5e_get_sset_count(struct net_device *dev, int sset) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + switch (sset) { + case ETH_SS_STATS: + return NUM_VPORT_COUNTERS + + priv->params.num_channels * NUM_RQ_STATS + + priv->params.num_channels * priv->num_tc * + NUM_SQ_STATS; + /* fallthrough */ + default: + return -EOPNOTSUPP; + } +} + +static void mlx5e_get_strings(struct net_device *dev, + uint32_t stringset, uint8_t *data) +{ + int i, j, tc, idx = 0; + struct mlx5e_priv *priv = netdev_priv(dev); + + switch (stringset) { + case ETH_SS_PRIV_FLAGS: + break; + + case ETH_SS_TEST: + break; + + case ETH_SS_STATS: + /* VPORT counters */ + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vport_strings[i]); + + /* per channel counters */ + for (i = 0; i < priv->params.num_channels; i++) + for (j = 0; j < NUM_RQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + "rx%d_%s", i, rq_stats_strings[j]); + + for (i = 0; i < priv->params.num_channels; i++) + for (tc = 0; tc < priv->num_tc; tc++) + for (j = 0; j < NUM_SQ_STATS; j++) + sprintf(data + + (idx++) * ETH_GSTRING_LEN, + "tx%d_%d_%s", i, tc, + sq_stats_strings[j]); + break; + } +} + +static void mlx5e_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int i, j, tc, idx = 0; + + if (!data) + return; + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_update_stats(priv); + mutex_unlock(&priv->state_lock); + + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + data[idx++] = ((u64 *)&priv->stats.vport)[i]; + + /* per channel counters */ + for (i = 0; i < priv->params.num_channels; i++) + for (j = 0; j < NUM_RQ_STATS; j++) + data[idx++] = !test_bit(MLX5E_STATE_OPENED, + &priv->state) ? 0 : + ((u64 *)&priv->channel[i]->rq.stats)[j]; + + for (i = 0; i < priv->params.num_channels; i++) + for (tc = 0; tc < priv->num_tc; tc++) + for (j = 0; j < NUM_SQ_STATS; j++) + data[idx++] = !test_bit(MLX5E_STATE_OPENED, + &priv->state) ? 0 : + ((u64 *)&priv->channel[i]->sq[tc].stats)[j]; +} + +static void mlx5e_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + param->rx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE; + param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; + param->rx_pending = 1 << priv->params.log_rq_size; + param->tx_pending = 1 << priv->params.log_sq_size; +} + +static int mlx5e_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_params new_params; + u16 min_rx_wqes; + u8 log_rq_size; + u8 log_sq_size; + int err = 0; + + if (param->rx_jumbo_pending) { + netdev_info(dev, "%s: rx_jumbo_pending not supported\n", + __func__); + return -EINVAL; + } + if (param->rx_mini_pending) { + netdev_info(dev, "%s: rx_mini_pending not supported\n", + __func__); + return -EINVAL; + } + if (param->rx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) { + netdev_info(dev, "%s: rx_pending (%d) < min (%d)\n", + __func__, param->rx_pending, + 1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE); + return -EINVAL; + } + if (param->rx_pending > (1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE)) { + netdev_info(dev, "%s: rx_pending (%d) > max (%d)\n", + __func__, param->rx_pending, + 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE); + return -EINVAL; + } + if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) { + netdev_info(dev, "%s: tx_pending (%d) < min (%d)\n", + __func__, param->tx_pending, + 1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); + return -EINVAL; + } + if (param->tx_pending > (1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE)) { + netdev_info(dev, "%s: tx_pending (%d) > max (%d)\n", + __func__, param->tx_pending, + 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE); + return -EINVAL; + } + + log_rq_size = order_base_2(param->rx_pending); + log_sq_size = order_base_2(param->tx_pending); + min_rx_wqes = min_t(u16, param->rx_pending - 1, + MLX5E_PARAMS_DEFAULT_MIN_RX_WQES); + + if (log_rq_size == priv->params.log_rq_size && + log_sq_size == priv->params.log_sq_size && + min_rx_wqes == priv->params.min_rx_wqes) + return 0; + + mutex_lock(&priv->state_lock); + new_params = priv->params; + new_params.log_rq_size = log_rq_size; + new_params.log_sq_size = log_sq_size; + new_params.min_rx_wqes = min_rx_wqes; + err = mlx5e_update_priv_params(priv, &new_params); + mutex_unlock(&priv->state_lock); + + return err; +} + +static void mlx5e_get_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int ncv = priv->mdev->priv.eq_table.num_comp_vectors; + + ch->max_combined = ncv; + ch->combined_count = priv->params.num_channels; +} + +static int mlx5e_set_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int ncv = priv->mdev->priv.eq_table.num_comp_vectors; + unsigned int count = ch->combined_count; + struct mlx5e_params new_params; + int err = 0; + + if (!count) { + netdev_info(dev, "%s: combined_count=0 not supported\n", + __func__); + return -EINVAL; + } + if (ch->rx_count || ch->tx_count) { + netdev_info(dev, "%s: separate rx/tx count not supported\n", + __func__); + return -EINVAL; + } + if (count > ncv) { + netdev_info(dev, "%s: count (%d) > max (%d)\n", + __func__, count, ncv); + return -EINVAL; + } + + if (priv->params.num_channels == count) + return 0; + + mutex_lock(&priv->state_lock); + new_params = priv->params; + new_params.num_channels = count; + err = mlx5e_update_priv_params(priv, &new_params); + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + coal->rx_coalesce_usecs = priv->params.rx_cq_moderation_usec; + coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation_pkts; + coal->tx_coalesce_usecs = priv->params.tx_cq_moderation_usec; + coal->tx_max_coalesced_frames = priv->params.tx_cq_moderation_pkts; + + return 0; +} + +static int mlx5e_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_channel *c; + int tc; + int i; + + priv->params.tx_cq_moderation_usec = coal->tx_coalesce_usecs; + priv->params.tx_cq_moderation_pkts = coal->tx_max_coalesced_frames; + priv->params.rx_cq_moderation_usec = coal->rx_coalesce_usecs; + priv->params.rx_cq_moderation_pkts = coal->rx_max_coalesced_frames; + + for (i = 0; i < priv->params.num_channels; ++i) { + c = priv->channel[i]; + + for (tc = 0; tc < c->num_tc; tc++) { + mlx5_core_modify_cq_moderation(mdev, + &c->sq[tc].cq.mcq, + coal->tx_coalesce_usecs, + coal->tx_max_coalesced_frames); + } + + mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq, + coal->rx_coalesce_usecs, + coal->rx_max_coalesced_frames); + } + + return 0; +} + +static u32 ptys2ethtool_supported_link(u32 eth_proto_cap) +{ + int i; + u32 supported_modes = 0; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (eth_proto_cap & MLX5E_PROT_MASK(i)) + supported_modes |= ptys2ethtool_table[i].supported; + } + return supported_modes; +} + +static u32 ptys2ethtool_adver_link(u32 eth_proto_cap) +{ + int i; + u32 advertising_modes = 0; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (eth_proto_cap & MLX5E_PROT_MASK(i)) + advertising_modes |= ptys2ethtool_table[i].advertised; + } + return advertising_modes; +} + +static u32 ptys2ethtool_supported_port(u32 eth_proto_cap) +{ + if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR) + | MLX5E_PROT_MASK(MLX5E_10GBASE_SR) + | MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) + | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) + | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) + | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { + return SUPPORTED_FIBRE; + } + + if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_100GBASE_KR4) + | MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) + | MLX5E_PROT_MASK(MLX5E_10GBASE_KR) + | MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) + | MLX5E_PROT_MASK(MLX5E_1000BASE_KX))) { + return SUPPORTED_Backplane; + } + return 0; +} + +static void get_speed_duplex(struct net_device *netdev, + u32 eth_proto_oper, + struct ethtool_cmd *cmd) +{ + int i; + u32 speed = SPEED_UNKNOWN; + u8 duplex = DUPLEX_UNKNOWN; + + if (!netif_carrier_ok(netdev)) + goto out; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (eth_proto_oper & MLX5E_PROT_MASK(i)) { + speed = ptys2ethtool_table[i].speed; + duplex = DUPLEX_FULL; + break; + } + } +out: + ethtool_cmd_speed_set(cmd, speed); + cmd->duplex = duplex; +} + +static void get_supported(u32 eth_proto_cap, u32 *supported) +{ + *supported |= ptys2ethtool_supported_port(eth_proto_cap); + *supported |= ptys2ethtool_supported_link(eth_proto_cap); + *supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; +} + +static void get_advertising(u32 eth_proto_cap, u8 tx_pause, + u8 rx_pause, u32 *advertising) +{ + *advertising |= ptys2ethtool_adver_link(eth_proto_cap); + *advertising |= tx_pause ? ADVERTISED_Pause : 0; + *advertising |= (tx_pause ^ rx_pause) ? ADVERTISED_Asym_Pause : 0; +} + +static u8 get_connector_port(u32 eth_proto) +{ + if (eth_proto & (MLX5E_PROT_MASK(MLX5E_10GBASE_SR) + | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) + | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) + | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { + return PORT_FIBRE; + } + + if (eth_proto & (MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) + | MLX5E_PROT_MASK(MLX5E_10GBASE_CR) + | MLX5E_PROT_MASK(MLX5E_100GBASE_CR4))) { + return PORT_DA; + } + + if (eth_proto & (MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) + | MLX5E_PROT_MASK(MLX5E_10GBASE_KR) + | MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) + | MLX5E_PROT_MASK(MLX5E_100GBASE_KR4))) { + return PORT_NONE; + } + + return PORT_OTHER; +} + +static void get_lp_advertising(u32 eth_proto_lp, u32 *lp_advertising) +{ + *lp_advertising = ptys2ethtool_adver_link(eth_proto_lp); +} + +static int mlx5e_get_settings(struct net_device *netdev, + struct ethtool_cmd *cmd) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + u32 eth_proto_cap; + u32 eth_proto_admin; + u32 eth_proto_lp; + u32 eth_proto_oper; + int err; + + err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN); + + if (err) { + netdev_err(netdev, "%s: query port ptys failed: %d\n", + __func__, err); + goto err_query_ptys; + } + + eth_proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); + eth_proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); + eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); + + cmd->supported = 0; + cmd->advertising = 0; + + get_supported(eth_proto_cap, &cmd->supported); + get_advertising(eth_proto_admin, 0, 0, &cmd->advertising); + get_speed_duplex(netdev, eth_proto_oper, cmd); + + eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; + + cmd->port = get_connector_port(eth_proto_oper); + get_lp_advertising(eth_proto_lp, &cmd->lp_advertising); + + cmd->transceiver = XCVR_INTERNAL; + +err_query_ptys: + return err; +} + +static u32 mlx5e_ethtool2ptys_adver_link(u32 link_modes) +{ + u32 i, ptys_modes = 0; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (ptys2ethtool_table[i].advertised & link_modes) + ptys_modes |= MLX5E_PROT_MASK(i); + } + + return ptys_modes; +} + +static u32 mlx5e_ethtool2ptys_speed_link(u32 speed) +{ + u32 i, speed_links = 0; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (ptys2ethtool_table[i].speed == speed) + speed_links |= MLX5E_PROT_MASK(i); + } + + return speed_links; +} + +static int mlx5e_set_settings(struct net_device *netdev, + struct ethtool_cmd *cmd) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u32 link_modes; + u32 speed; + u32 eth_proto_cap, eth_proto_admin; + u8 port_status; + int err; + + speed = ethtool_cmd_speed(cmd); + + link_modes = cmd->autoneg == AUTONEG_ENABLE ? + mlx5e_ethtool2ptys_adver_link(cmd->advertising) : + mlx5e_ethtool2ptys_speed_link(speed); + + err = mlx5_query_port_proto_cap(mdev, ð_proto_cap, MLX5_PTYS_EN); + if (err) { + netdev_err(netdev, "%s: query port eth proto cap failed: %d\n", + __func__, err); + goto out; + } + + link_modes = link_modes & eth_proto_cap; + if (!link_modes) { + netdev_err(netdev, "%s: Not supported link mode(s) requested", + __func__); + err = -EINVAL; + goto out; + } + + err = mlx5_query_port_proto_admin(mdev, ð_proto_admin, MLX5_PTYS_EN); + if (err) { + netdev_err(netdev, "%s: query port eth proto admin failed: %d\n", + __func__, err); + goto out; + } + + if (link_modes == eth_proto_admin) + goto out; + + err = mlx5_set_port_proto(mdev, link_modes, MLX5_PTYS_EN); + if (err) { + netdev_err(netdev, "%s: set port eth proto admin failed: %d\n", + __func__, err); + goto out; + } + + err = mlx5_query_port_status(mdev, &port_status); + if (err) + goto out; + + if (port_status == MLX5_PORT_DOWN) + return 0; + + err = mlx5_set_port_status(mdev, MLX5_PORT_DOWN); + if (err) + goto out; + err = mlx5_set_port_status(mdev, MLX5_PORT_UP); +out: + return err; +} + +const struct ethtool_ops mlx5e_ethtool_ops = { + .get_drvinfo = mlx5e_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = mlx5e_get_strings, + .get_sset_count = mlx5e_get_sset_count, + .get_ethtool_stats = mlx5e_get_ethtool_stats, + .get_ringparam = mlx5e_get_ringparam, + .set_ringparam = mlx5e_set_ringparam, + .get_channels = mlx5e_get_channels, + .set_channels = mlx5e_set_channels, + .get_coalesce = mlx5e_get_coalesce, + .set_coalesce = mlx5e_set_coalesce, + .get_settings = mlx5e_get_settings, + .set_settings = mlx5e_set_settings, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c new file mode 100644 index 000000000000..eee829d119f9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -0,0 +1,1899 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "en.h" + +struct mlx5e_rq_param { + u32 rqc[MLX5_ST_SZ_DW(rqc)]; + struct mlx5_wq_param wq; +}; + +struct mlx5e_sq_param { + u32 sqc[MLX5_ST_SZ_DW(sqc)]; + struct mlx5_wq_param wq; +}; + +struct mlx5e_cq_param { + u32 cqc[MLX5_ST_SZ_DW(cqc)]; + struct mlx5_wq_param wq; + u16 eq_ix; +}; + +struct mlx5e_channel_param { + struct mlx5e_rq_param rq; + struct mlx5e_sq_param sq; + struct mlx5e_cq_param rx_cq; + struct mlx5e_cq_param tx_cq; +}; + +static void mlx5e_update_carrier(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 port_state; + + port_state = mlx5_query_vport_state(mdev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT); + + if (port_state == VPORT_STATE_UP) + netif_carrier_on(priv->netdev); + else + netif_carrier_off(priv->netdev); +} + +static void mlx5e_update_carrier_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + update_carrier_work); + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_update_carrier(priv); + mutex_unlock(&priv->state_lock); +} + +void mlx5e_update_stats(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_vport_stats *s = &priv->stats.vport; + struct mlx5e_rq_stats *rq_stats; + struct mlx5e_sq_stats *sq_stats; + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); + u64 tx_offload_none; + int i, j; + + out = mlx5_vzalloc(outlen); + if (!out) + return; + + /* Collect firts the SW counters and then HW for consistency */ + s->tso_packets = 0; + s->tso_bytes = 0; + s->tx_queue_stopped = 0; + s->tx_queue_wake = 0; + s->tx_queue_dropped = 0; + tx_offload_none = 0; + s->lro_packets = 0; + s->lro_bytes = 0; + s->rx_csum_none = 0; + s->rx_wqe_err = 0; + for (i = 0; i < priv->params.num_channels; i++) { + rq_stats = &priv->channel[i]->rq.stats; + + s->lro_packets += rq_stats->lro_packets; + s->lro_bytes += rq_stats->lro_bytes; + s->rx_csum_none += rq_stats->csum_none; + s->rx_wqe_err += rq_stats->wqe_err; + + for (j = 0; j < priv->num_tc; j++) { + sq_stats = &priv->channel[i]->sq[j].stats; + + s->tso_packets += sq_stats->tso_packets; + s->tso_bytes += sq_stats->tso_bytes; + s->tx_queue_stopped += sq_stats->stopped; + s->tx_queue_wake += sq_stats->wake; + s->tx_queue_dropped += sq_stats->dropped; + tx_offload_none += sq_stats->csum_offload_none; + } + } + + /* HW counters */ + memset(in, 0, sizeof(in)); + + MLX5_SET(query_vport_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_COUNTER); + MLX5_SET(query_vport_counter_in, in, op_mod, 0); + MLX5_SET(query_vport_counter_in, in, other_vport, 0); + + memset(out, 0, outlen); + + if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen)) + goto free_out; + +#define MLX5_GET_CTR(p, x) \ + MLX5_GET64(query_vport_counter_out, p, x) + + s->rx_error_packets = + MLX5_GET_CTR(out, received_errors.packets); + s->rx_error_bytes = + MLX5_GET_CTR(out, received_errors.octets); + s->tx_error_packets = + MLX5_GET_CTR(out, transmit_errors.packets); + s->tx_error_bytes = + MLX5_GET_CTR(out, transmit_errors.octets); + + s->rx_unicast_packets = + MLX5_GET_CTR(out, received_eth_unicast.packets); + s->rx_unicast_bytes = + MLX5_GET_CTR(out, received_eth_unicast.octets); + s->tx_unicast_packets = + MLX5_GET_CTR(out, transmitted_eth_unicast.packets); + s->tx_unicast_bytes = + MLX5_GET_CTR(out, transmitted_eth_unicast.octets); + + s->rx_multicast_packets = + MLX5_GET_CTR(out, received_eth_multicast.packets); + s->rx_multicast_bytes = + MLX5_GET_CTR(out, received_eth_multicast.octets); + s->tx_multicast_packets = + MLX5_GET_CTR(out, transmitted_eth_multicast.packets); + s->tx_multicast_bytes = + MLX5_GET_CTR(out, transmitted_eth_multicast.octets); + + s->rx_broadcast_packets = + MLX5_GET_CTR(out, received_eth_broadcast.packets); + s->rx_broadcast_bytes = + MLX5_GET_CTR(out, received_eth_broadcast.octets); + s->tx_broadcast_packets = + MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); + s->tx_broadcast_bytes = + MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); + + s->rx_packets = + s->rx_unicast_packets + + s->rx_multicast_packets + + s->rx_broadcast_packets; + s->rx_bytes = + s->rx_unicast_bytes + + s->rx_multicast_bytes + + s->rx_broadcast_bytes; + s->tx_packets = + s->tx_unicast_packets + + s->tx_multicast_packets + + s->tx_broadcast_packets; + s->tx_bytes = + s->tx_unicast_bytes + + s->tx_multicast_bytes + + s->tx_broadcast_bytes; + + /* Update calculated offload counters */ + s->tx_csum_offload = s->tx_packets - tx_offload_none; + s->rx_csum_good = s->rx_packets - s->rx_csum_none; + +free_out: + kvfree(out); +} + +static void mlx5e_update_stats_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlx5e_priv *priv = container_of(dwork, struct mlx5e_priv, + update_stats_work); + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + mlx5e_update_stats(priv); + schedule_delayed_work(dwork, + msecs_to_jiffies( + MLX5E_UPDATE_STATS_INTERVAL)); + } + mutex_unlock(&priv->state_lock); +} + +static void __mlx5e_async_event(struct mlx5e_priv *priv, + enum mlx5_dev_event event) +{ + switch (event) { + case MLX5_DEV_EVENT_PORT_UP: + case MLX5_DEV_EVENT_PORT_DOWN: + schedule_work(&priv->update_carrier_work); + break; + + default: + break; + } +} + +static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, + enum mlx5_dev_event event, unsigned long param) +{ + struct mlx5e_priv *priv = vpriv; + + spin_lock(&priv->async_events_spinlock); + if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state)) + __mlx5e_async_event(priv, event); + spin_unlock(&priv->async_events_spinlock); +} + +static void mlx5e_enable_async_events(struct mlx5e_priv *priv) +{ + set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); +} + +static void mlx5e_disable_async_events(struct mlx5e_priv *priv) +{ + spin_lock_irq(&priv->async_events_spinlock); + clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); + spin_unlock_irq(&priv->async_events_spinlock); +} + +static void mlx5e_send_nop(struct mlx5e_sq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + + u16 pi = sq->pc & wq->sz_m1; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + + memset(cseg, 0, sizeof(*cseg)); + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | 0x01); + cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + + sq->skb[pi] = NULL; + sq->pc++; + mlx5e_tx_notify_hw(sq, wqe); +} + +static int mlx5e_create_rq(struct mlx5e_channel *c, + struct mlx5e_rq_param *param, + struct mlx5e_rq *rq) +{ + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + void *rqc = param->rqc; + void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + int wq_sz; + int err; + int i; + + err = mlx5_wq_ll_create(mdev, ¶m->wq, rqc_wq, &rq->wq, + &rq->wq_ctrl); + if (err) + return err; + + rq->wq.db = &rq->wq.db[MLX5_RCV_DBR]; + + wq_sz = mlx5_wq_ll_get_size(&rq->wq); + rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, + cpu_to_node(c->cpu)); + if (!rq->skb) { + err = -ENOMEM; + goto err_rq_wq_destroy; + } + + rq->wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : + priv->netdev->mtu + ETH_HLEN + VLAN_HLEN; + + for (i = 0; i < wq_sz; i++) { + struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); + + wqe->data.lkey = c->mkey_be; + wqe->data.byte_count = cpu_to_be32(rq->wqe_sz); + } + + rq->pdev = c->pdev; + rq->netdev = c->netdev; + rq->channel = c; + rq->ix = c->ix; + + return 0; + +err_rq_wq_destroy: + mlx5_wq_destroy(&rq->wq_ctrl); + + return err; +} + +static void mlx5e_destroy_rq(struct mlx5e_rq *rq) +{ + kfree(rq->skb); + mlx5_wq_destroy(&rq->wq_ctrl); +} + +static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *rqc; + void *wq; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_rq_in) + + sizeof(u64) * rq->wq_ctrl.buf.npages; + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); + wq = MLX5_ADDR_OF(rqc, rqc, wq); + + memcpy(rqc, param->rqc, sizeof(param->rqc)); + + MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); + MLX5_SET(rqc, rqc, flush_in_error_en, 1); + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); + MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - + PAGE_SHIFT); + MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); + + mlx5_fill_page_array(&rq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); + + err = mlx5_create_rq(mdev, in, inlen, &rq->rqn); + + kvfree(in); + + return err; +} + +static int mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + MLX5_SET(modify_rq_in, in, rq_state, curr_state); + MLX5_SET(rqc, rqc, state, next_state); + + err = mlx5_modify_rq(mdev, rq->rqn, in, inlen); + + kvfree(in); + + return err; +} + +static void mlx5e_disable_rq(struct mlx5e_rq *rq) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + mlx5_destroy_rq(mdev, rq->rqn); +} + +static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_wq_ll *wq = &rq->wq; + int i; + + for (i = 0; i < 1000; i++) { + if (wq->cur_sz >= priv->params.min_rx_wqes) + return 0; + + msleep(20); + } + + return -ETIMEDOUT; +} + +static int mlx5e_open_rq(struct mlx5e_channel *c, + struct mlx5e_rq_param *param, + struct mlx5e_rq *rq) +{ + int err; + + err = mlx5e_create_rq(c, param, rq); + if (err) + return err; + + err = mlx5e_enable_rq(rq, param); + if (err) + goto err_destroy_rq; + + err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + if (err) + goto err_disable_rq; + + set_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); + mlx5e_send_nop(&c->sq[0]); /* trigger mlx5e_post_rx_wqes() */ + + return 0; + +err_disable_rq: + mlx5e_disable_rq(rq); +err_destroy_rq: + mlx5e_destroy_rq(rq); + + return err; +} + +static void mlx5e_close_rq(struct mlx5e_rq *rq) +{ + clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); + napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ + + mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); + while (!mlx5_wq_ll_is_empty(&rq->wq)) + msleep(20); + + /* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */ + napi_synchronize(&rq->channel->napi); + + mlx5e_disable_rq(rq); + mlx5e_destroy_rq(rq); +} + +static void mlx5e_free_sq_db(struct mlx5e_sq *sq) +{ + kfree(sq->dma_fifo); + kfree(sq->skb); +} + +static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) +{ + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; + + sq->skb = kzalloc_node(wq_sz * sizeof(*sq->skb), GFP_KERNEL, numa); + sq->dma_fifo = kzalloc_node(df_sz * sizeof(*sq->dma_fifo), GFP_KERNEL, + numa); + + if (!sq->skb || !sq->dma_fifo) { + mlx5e_free_sq_db(sq); + return -ENOMEM; + } + + sq->dma_fifo_mask = df_sz - 1; + + return 0; +} + +static int mlx5e_create_sq(struct mlx5e_channel *c, + int tc, + struct mlx5e_sq_param *param, + struct mlx5e_sq *sq) +{ + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *sqc = param->sqc; + void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); + int err; + + err = mlx5_alloc_map_uar(mdev, &sq->uar); + if (err) + return err; + + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, + &sq->wq_ctrl); + if (err) + goto err_unmap_free_uar; + + sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; + sq->uar_map = sq->uar.map; + sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; + + if (mlx5e_alloc_sq_db(sq, cpu_to_node(c->cpu))) + goto err_sq_wq_destroy; + + sq->txq = netdev_get_tx_queue(priv->netdev, + c->ix + tc * priv->params.num_channels); + + sq->pdev = c->pdev; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->tc = tc; + + return 0; + +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + +err_unmap_free_uar: + mlx5_unmap_free_uar(mdev, &sq->uar); + + return err; +} + +static void mlx5e_destroy_sq(struct mlx5e_sq *sq) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5e_priv *priv = c->priv; + + mlx5e_free_sq_db(sq); + mlx5_wq_destroy(&sq->wq_ctrl); + mlx5_unmap_free_uar(priv->mdev, &sq->uar); +} + +static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *sqc; + void *wq; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + + sizeof(u64) * sq->wq_ctrl.buf.npages; + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); + wq = MLX5_ADDR_OF(sqc, sqc, wq); + + memcpy(sqc, param->sqc, sizeof(param->sqc)); + + MLX5_SET(sqc, sqc, user_index, sq->tc); + MLX5_SET(sqc, sqc, tis_num_0, priv->tisn[sq->tc]); + MLX5_SET(sqc, sqc, cqn, c->sq[sq->tc].cq.mcq.cqn); + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + MLX5_SET(sqc, sqc, tis_lst_sz, 1); + MLX5_SET(sqc, sqc, flush_in_error_en, 1); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, uar_page, sq->uar.index); + MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - + PAGE_SHIFT); + MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); + + mlx5_fill_page_array(&sq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); + + err = mlx5_create_sq(mdev, in, inlen, &sq->sqn); + + kvfree(in); + + return err; +} + +static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *sqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_sq_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); + + MLX5_SET(modify_sq_in, in, sq_state, curr_state); + MLX5_SET(sqc, sqc, state, next_state); + + err = mlx5_modify_sq(mdev, sq->sqn, in, inlen); + + kvfree(in); + + return err; +} + +static void mlx5e_disable_sq(struct mlx5e_sq *sq) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + mlx5_destroy_sq(mdev, sq->sqn); +} + +static int mlx5e_open_sq(struct mlx5e_channel *c, + int tc, + struct mlx5e_sq_param *param, + struct mlx5e_sq *sq) +{ + int err; + + err = mlx5e_create_sq(c, tc, param, sq); + if (err) + return err; + + err = mlx5e_enable_sq(sq, param); + if (err) + goto err_destroy_sq; + + err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); + if (err) + goto err_disable_sq; + + set_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); + netdev_tx_reset_queue(sq->txq); + netif_tx_start_queue(sq->txq); + + return 0; + +err_disable_sq: + mlx5e_disable_sq(sq); +err_destroy_sq: + mlx5e_destroy_sq(sq); + + return err; +} + +static inline void netif_tx_disable_queue(struct netdev_queue *txq) +{ + __netif_tx_lock_bh(txq); + netif_tx_stop_queue(txq); + __netif_tx_unlock_bh(txq); +} + +static void mlx5e_close_sq(struct mlx5e_sq *sq) +{ + clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); + napi_synchronize(&sq->channel->napi); /* prevent netif_tx_wake_queue */ + netif_tx_disable_queue(sq->txq); + + /* ensure hw is notified of all pending wqes */ + if (mlx5e_sq_has_room_for(sq, 1)) + mlx5e_send_nop(sq); + + mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); + while (sq->cc != sq->pc) /* wait till sq is empty */ + msleep(20); + + /* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */ + napi_synchronize(&sq->channel->napi); + + mlx5e_disable_sq(sq); + mlx5e_destroy_sq(sq); +} + +static int mlx5e_create_cq(struct mlx5e_channel *c, + struct mlx5e_cq_param *param, + struct mlx5e_cq *cq) +{ + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_cq *mcq = &cq->mcq; + int eqn_not_used; + int irqn; + int err; + u32 i; + + param->wq.numa = cpu_to_node(c->cpu); + param->eq_ix = c->ix; + + err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, + &cq->wq_ctrl); + if (err) + return err; + + mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn); + + cq->napi = &c->napi; + + mcq->cqe_sz = 64; + mcq->set_ci_db = cq->wq_ctrl.db.db; + mcq->arm_db = cq->wq_ctrl.db.db + 1; + *mcq->set_ci_db = 0; + *mcq->arm_db = 0; + mcq->vector = param->eq_ix; + mcq->comp = mlx5e_completion_event; + mcq->event = mlx5e_cq_error_event; + mcq->irqn = irqn; + mcq->uar = &priv->cq_uar; + + for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); + + cqe->op_own = 0xf1; + } + + cq->channel = c; + + return 0; +} + +static void mlx5e_destroy_cq(struct mlx5e_cq *cq) +{ + mlx5_wq_destroy(&cq->wq_ctrl); +} + +static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) +{ + struct mlx5e_channel *c = cq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_cq *mcq = &cq->mcq; + + void *in; + void *cqc; + int inlen; + int irqn_not_used; + int eqn; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * cq->wq_ctrl.buf.npages; + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + + memcpy(cqc, param->cqc, sizeof(param->cqc)); + + mlx5_fill_page_array(&cq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); + + mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used); + + MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, uar_page, mcq->uar->index); + MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - + PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); + + err = mlx5_core_create_cq(mdev, mcq, in, inlen); + + kvfree(in); + + if (err) + return err; + + mlx5e_cq_arm(cq); + + return 0; +} + +static void mlx5e_disable_cq(struct mlx5e_cq *cq) +{ + struct mlx5e_channel *c = cq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + mlx5_core_destroy_cq(mdev, &cq->mcq); +} + +static int mlx5e_open_cq(struct mlx5e_channel *c, + struct mlx5e_cq_param *param, + struct mlx5e_cq *cq, + u16 moderation_usecs, + u16 moderation_frames) +{ + int err; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + err = mlx5e_create_cq(c, param, cq); + if (err) + return err; + + err = mlx5e_enable_cq(cq, param); + if (err) + goto err_destroy_cq; + + err = mlx5_core_modify_cq_moderation(mdev, &cq->mcq, + moderation_usecs, + moderation_frames); + if (err) + goto err_destroy_cq; + + return 0; + +err_destroy_cq: + mlx5e_destroy_cq(cq); + + return err; +} + +static void mlx5e_close_cq(struct mlx5e_cq *cq) +{ + mlx5e_disable_cq(cq); + mlx5e_destroy_cq(cq); +} + +static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) +{ + return cpumask_first(priv->mdev->priv.irq_info[ix].mask); +} + +static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, + struct mlx5e_channel_param *cparam) +{ + struct mlx5e_priv *priv = c->priv; + int err; + int tc; + + for (tc = 0; tc < c->num_tc; tc++) { + err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq, + priv->params.tx_cq_moderation_usec, + priv->params.tx_cq_moderation_pkts); + if (err) + goto err_close_tx_cqs; + + c->sq[tc].cq.sqrq = &c->sq[tc]; + } + + return 0; + +err_close_tx_cqs: + for (tc--; tc >= 0; tc--) + mlx5e_close_cq(&c->sq[tc].cq); + + return err; +} + +static void mlx5e_close_tx_cqs(struct mlx5e_channel *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_close_cq(&c->sq[tc].cq); +} + +static int mlx5e_open_sqs(struct mlx5e_channel *c, + struct mlx5e_channel_param *cparam) +{ + int err; + int tc; + + for (tc = 0; tc < c->num_tc; tc++) { + err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]); + if (err) + goto err_close_sqs; + } + + return 0; + +err_close_sqs: + for (tc--; tc >= 0; tc--) + mlx5e_close_sq(&c->sq[tc]); + + return err; +} + +static void mlx5e_close_sqs(struct mlx5e_channel *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_close_sq(&c->sq[tc]); +} + +static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, + struct mlx5e_channel_param *cparam, + struct mlx5e_channel **cp) +{ + struct net_device *netdev = priv->netdev; + int cpu = mlx5e_get_cpu(priv, ix); + struct mlx5e_channel *c; + int err; + + c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); + if (!c) + return -ENOMEM; + + c->priv = priv; + c->ix = ix; + c->cpu = cpu; + c->pdev = &priv->mdev->pdev->dev; + c->netdev = priv->netdev; + c->mkey_be = cpu_to_be32(priv->mr.key); + c->num_tc = priv->num_tc; + + netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); + + err = mlx5e_open_tx_cqs(c, cparam); + if (err) + goto err_napi_del; + + err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq, + priv->params.rx_cq_moderation_usec, + priv->params.rx_cq_moderation_pkts); + if (err) + goto err_close_tx_cqs; + c->rq.cq.sqrq = &c->rq; + + napi_enable(&c->napi); + + err = mlx5e_open_sqs(c, cparam); + if (err) + goto err_disable_napi; + + err = mlx5e_open_rq(c, &cparam->rq, &c->rq); + if (err) + goto err_close_sqs; + + netif_set_xps_queue(netdev, get_cpu_mask(c->cpu), ix); + *cp = c; + + return 0; + +err_close_sqs: + mlx5e_close_sqs(c); + +err_disable_napi: + napi_disable(&c->napi); + mlx5e_close_cq(&c->rq.cq); + +err_close_tx_cqs: + mlx5e_close_tx_cqs(c); + +err_napi_del: + netif_napi_del(&c->napi); + kfree(c); + + return err; +} + +static void mlx5e_close_channel(struct mlx5e_channel *c) +{ + mlx5e_close_rq(&c->rq); + mlx5e_close_sqs(c); + napi_disable(&c->napi); + mlx5e_close_cq(&c->rq.cq); + mlx5e_close_tx_cqs(c); + netif_napi_del(&c->napi); + kfree(c); +} + +static void mlx5e_build_rq_param(struct mlx5e_priv *priv, + struct mlx5e_rq_param *param) +{ + void *rqc = param->rqc; + void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); + MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); + MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); + MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); + MLX5_SET(wq, wq, pd, priv->pdn); + + param->wq.numa = dev_to_node(&priv->mdev->pdev->dev); + param->wq.linear = 1; +} + +static void mlx5e_build_sq_param(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); + MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); + MLX5_SET(wq, wq, pd, priv->pdn); + + param->wq.numa = dev_to_node(&priv->mdev->pdev->dev); +} + +static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index); +} + +static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size); + + mlx5e_build_common_cq_param(priv, param); +} + +static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); + + mlx5e_build_common_cq_param(priv, param); +} + +static void mlx5e_build_channel_param(struct mlx5e_priv *priv, + struct mlx5e_channel_param *cparam) +{ + memset(cparam, 0, sizeof(*cparam)); + + mlx5e_build_rq_param(priv, &cparam->rq); + mlx5e_build_sq_param(priv, &cparam->sq); + mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); + mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); +} + +static int mlx5e_open_channels(struct mlx5e_priv *priv) +{ + struct mlx5e_channel_param cparam; + int err; + int i; + int j; + + priv->channel = kcalloc(priv->params.num_channels, + sizeof(struct mlx5e_channel *), GFP_KERNEL); + if (!priv->channel) + return -ENOMEM; + + mlx5e_build_channel_param(priv, &cparam); + for (i = 0; i < priv->params.num_channels; i++) { + err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]); + if (err) + goto err_close_channels; + } + + for (j = 0; j < priv->params.num_channels; j++) { + err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq); + if (err) + goto err_close_channels; + } + + return 0; + +err_close_channels: + for (i--; i >= 0; i--) + mlx5e_close_channel(priv->channel[i]); + + kfree(priv->channel); + + return err; +} + +static void mlx5e_close_channels(struct mlx5e_priv *priv) +{ + int i; + + for (i = 0; i < priv->params.num_channels; i++) + mlx5e_close_channel(priv->channel[i]); + + kfree(priv->channel); +} + +static int mlx5e_open_tis(struct mlx5e_priv *priv, int tc) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(create_tis_in)]; + void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + memset(in, 0, sizeof(in)); + + MLX5_SET(tisc, tisc, prio, tc); + + return mlx5_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]); +} + +static void mlx5e_close_tis(struct mlx5e_priv *priv, int tc) +{ + mlx5_destroy_tis(priv->mdev, priv->tisn[tc]); +} + +static int mlx5e_open_tises(struct mlx5e_priv *priv) +{ + int num_tc = priv->num_tc; + int err; + int tc; + + for (tc = 0; tc < num_tc; tc++) { + err = mlx5e_open_tis(priv, tc); + if (err) + goto err_close_tises; + } + + return 0; + +err_close_tises: + for (tc--; tc >= 0; tc--) + mlx5e_close_tis(priv, tc); + + return err; +} + +static void mlx5e_close_tises(struct mlx5e_priv *priv) +{ + int num_tc = priv->num_tc; + int tc; + + for (tc = 0; tc < num_tc; tc++) + mlx5e_close_tis(priv, tc); +} + +static int mlx5e_open_rqt(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 *in; + u32 out[MLX5_ST_SZ_DW(create_rqt_out)]; + void *rqtc; + int inlen; + int err; + int sz; + int i; + + sz = 1 << priv->params.rx_hash_log_tbl_sz; + + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); + + MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); + MLX5_SET(rqtc, rqtc, rqt_max_size, sz); + + for (i = 0; i < sz; i++) { + int ix = i % priv->params.num_channels; + + MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn); + } + + MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out)); + if (!err) + priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn); + + kvfree(in); + + return err; +} + +static void mlx5e_close_rqt(struct mlx5e_priv *priv) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn); + + mlx5_cmd_exec_check_status(priv->mdev, in, sizeof(in), out, + sizeof(out)); +} + +static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, int tt) +{ + void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + +#define ROUGH_MAX_L2_L3_HDR_SZ 256 + +#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP) + +#define MLX5_HASH_ALL (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_L4_SPORT |\ + MLX5_HASH_FIELD_SEL_L4_DPORT) + + if (priv->params.lro_en) { + MLX5_SET(tirc, tirc, lro_enable_mask, + MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | + MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO); + MLX5_SET(tirc, tirc, lro_max_ip_payload_size, + (priv->params.lro_wqe_sz - + ROUGH_MAX_L2_L3_HDR_SZ) >> 8); + MLX5_SET(tirc, tirc, lro_timeout_period_usecs, + MLX5_CAP_ETH(priv->mdev, + lro_timer_supported_periods[3])); + } + + switch (tt) { + case MLX5E_TT_ANY: + MLX5_SET(tirc, tirc, disp_type, + MLX5_TIRC_DISP_TYPE_DIRECT); + MLX5_SET(tirc, tirc, inline_rqn, + priv->channel[0]->rq.rqn); + break; + default: + MLX5_SET(tirc, tirc, disp_type, + MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, + priv->rqtn); + MLX5_SET(tirc, tirc, rx_hash_fn, + MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ); + MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); + netdev_rss_key_fill(MLX5_ADDR_OF(tirc, tirc, + rx_hash_toeplitz_key), + MLX5_FLD_SZ_BYTES(tirc, + rx_hash_toeplitz_key)); + break; + } + + switch (tt) { + case MLX5E_TT_IPV4_TCP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_TCP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_ALL); + break; + + case MLX5E_TT_IPV6_TCP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_TCP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_ALL); + break; + + case MLX5E_TT_IPV4_UDP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_UDP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_ALL); + break; + + case MLX5E_TT_IPV6_UDP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_UDP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_ALL); + break; + + case MLX5E_TT_IPV4: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP); + break; + + case MLX5E_TT_IPV6: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP); + break; + } +} + +static int mlx5e_open_tir(struct mlx5e_priv *priv, int tt) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 *in; + void *tirc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_tir_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + + mlx5e_build_tir_ctx(priv, tirc, tt); + + err = mlx5_create_tir(mdev, in, inlen, &priv->tirn[tt]); + + kvfree(in); + + return err; +} + +static void mlx5e_close_tir(struct mlx5e_priv *priv, int tt) +{ + mlx5_destroy_tir(priv->mdev, priv->tirn[tt]); +} + +static int mlx5e_open_tirs(struct mlx5e_priv *priv) +{ + int err; + int i; + + for (i = 0; i < MLX5E_NUM_TT; i++) { + err = mlx5e_open_tir(priv, i); + if (err) + goto err_close_tirs; + } + + return 0; + +err_close_tirs: + for (i--; i >= 0; i--) + mlx5e_close_tir(priv, i); + + return err; +} + +static void mlx5e_close_tirs(struct mlx5e_priv *priv) +{ + int i; + + for (i = 0; i < MLX5E_NUM_TT; i++) + mlx5e_close_tir(priv, i); +} + +int mlx5e_open_locked(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int actual_mtu; + int num_txqs; + int err; + + num_txqs = roundup_pow_of_two(priv->params.num_channels) * + priv->params.num_tc; + netif_set_real_num_tx_queues(netdev, num_txqs); + netif_set_real_num_rx_queues(netdev, priv->params.num_channels); + + err = mlx5_set_port_mtu(mdev, netdev->mtu); + if (err) { + netdev_err(netdev, "%s: mlx5_set_port_mtu failed %d\n", + __func__, err); + return err; + } + + err = mlx5_query_port_oper_mtu(mdev, &actual_mtu); + if (err) { + netdev_err(netdev, "%s: mlx5_query_port_oper_mtu failed %d\n", + __func__, err); + return err; + } + + if (actual_mtu != netdev->mtu) + netdev_warn(netdev, "%s: Failed to set MTU to %d\n", + __func__, netdev->mtu); + + netdev->mtu = actual_mtu; + + err = mlx5e_open_tises(priv); + if (err) { + netdev_err(netdev, "%s: mlx5e_open_tises failed, %d\n", + __func__, err); + return err; + } + + err = mlx5e_open_channels(priv); + if (err) { + netdev_err(netdev, "%s: mlx5e_open_channels failed, %d\n", + __func__, err); + goto err_close_tises; + } + + err = mlx5e_open_rqt(priv); + if (err) { + netdev_err(netdev, "%s: mlx5e_open_rqt failed, %d\n", + __func__, err); + goto err_close_channels; + } + + err = mlx5e_open_tirs(priv); + if (err) { + netdev_err(netdev, "%s: mlx5e_open_tir failed, %d\n", + __func__, err); + goto err_close_rqls; + } + + err = mlx5e_open_flow_table(priv); + if (err) { + netdev_err(netdev, "%s: mlx5e_open_flow_table failed, %d\n", + __func__, err); + goto err_close_tirs; + } + + err = mlx5e_add_all_vlan_rules(priv); + if (err) { + netdev_err(netdev, "%s: mlx5e_add_all_vlan_rules failed, %d\n", + __func__, err); + goto err_close_flow_table; + } + + mlx5e_init_eth_addr(priv); + + set_bit(MLX5E_STATE_OPENED, &priv->state); + + mlx5e_update_carrier(priv); + mlx5e_set_rx_mode_core(priv); + + schedule_delayed_work(&priv->update_stats_work, 0); + return 0; + +err_close_flow_table: + mlx5e_close_flow_table(priv); + +err_close_tirs: + mlx5e_close_tirs(priv); + +err_close_rqls: + mlx5e_close_rqt(priv); + +err_close_channels: + mlx5e_close_channels(priv); + +err_close_tises: + mlx5e_close_tises(priv); + + return err; +} + +static int mlx5e_open(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_open_locked(netdev); + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_close_locked(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + clear_bit(MLX5E_STATE_OPENED, &priv->state); + + mlx5e_set_rx_mode_core(priv); + mlx5e_del_all_vlan_rules(priv); + netif_carrier_off(priv->netdev); + mlx5e_close_flow_table(priv); + mlx5e_close_tirs(priv); + mlx5e_close_rqt(priv); + mlx5e_close_channels(priv); + mlx5e_close_tises(priv); + + return 0; +} + +static int mlx5e_close(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_close_locked(netdev); + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_update_priv_params(struct mlx5e_priv *priv, + struct mlx5e_params *new_params) +{ + int err = 0; + int was_opened; + + WARN_ON(!mutex_is_locked(&priv->state_lock)); + + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (was_opened) + mlx5e_close_locked(priv->netdev); + + priv->params = *new_params; + + if (was_opened) + err = mlx5e_open_locked(priv->netdev); + + return err; +} + +static struct rtnl_link_stats64 * +mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_vport_stats *vstats = &priv->stats.vport; + + stats->rx_packets = vstats->rx_packets; + stats->rx_bytes = vstats->rx_bytes; + stats->tx_packets = vstats->tx_packets; + stats->tx_bytes = vstats->tx_bytes; + stats->multicast = vstats->rx_multicast_packets + + vstats->tx_multicast_packets; + stats->tx_errors = vstats->tx_error_packets; + stats->rx_errors = vstats->rx_error_packets; + stats->tx_dropped = vstats->tx_queue_dropped; + stats->rx_crc_errors = 0; + stats->rx_length_errors = 0; + + return stats; +} + +static void mlx5e_set_rx_mode(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + schedule_work(&priv->set_rx_mode_work); +} + +static int mlx5e_set_mac(struct net_device *netdev, void *addr) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct sockaddr *saddr = addr; + + if (!is_valid_ether_addr(saddr->sa_data)) + return -EADDRNOTAVAIL; + + netif_addr_lock_bh(netdev); + ether_addr_copy(netdev->dev_addr, saddr->sa_data); + netif_addr_unlock_bh(netdev); + + schedule_work(&priv->set_rx_mode_work); + + return 0; +} + +static int mlx5e_set_features(struct net_device *netdev, + netdev_features_t features) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + netdev_features_t changes = features ^ netdev->features; + struct mlx5e_params new_params; + bool update_params = false; + + mutex_lock(&priv->state_lock); + new_params = priv->params; + + if (changes & NETIF_F_LRO) { + new_params.lro_en = !!(features & NETIF_F_LRO); + update_params = true; + } + + if (update_params) + mlx5e_update_priv_params(priv, &new_params); + + if (changes & NETIF_F_HW_VLAN_CTAG_FILTER) { + if (features & NETIF_F_HW_VLAN_CTAG_FILTER) + mlx5e_enable_vlan_filter(priv); + else + mlx5e_disable_vlan_filter(priv); + } + + mutex_unlock(&priv->state_lock); + + return 0; +} + +static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int max_mtu; + int err = 0; + + err = mlx5_query_port_max_mtu(mdev, &max_mtu); + if (err) + return err; + + if (new_mtu > max_mtu || new_mtu < MLX5E_PARAMS_MIN_MTU) { + netdev_err(netdev, "%s: Bad MTU size, mtu must be [%d-%d]\n", + __func__, MLX5E_PARAMS_MIN_MTU, max_mtu); + return -EINVAL; + } + + mutex_lock(&priv->state_lock); + netdev->mtu = new_mtu; + err = mlx5e_update_priv_params(priv, &priv->params); + mutex_unlock(&priv->state_lock); + + return err; +} + +static struct net_device_ops mlx5e_netdev_ops = { + .ndo_open = mlx5e_open, + .ndo_stop = mlx5e_close, + .ndo_start_xmit = mlx5e_xmit, + .ndo_get_stats64 = mlx5e_get_stats, + .ndo_set_rx_mode = mlx5e_set_rx_mode, + .ndo_set_mac_address = mlx5e_set_mac, + .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, + .ndo_set_features = mlx5e_set_features, + .ndo_change_mtu = mlx5e_change_mtu, +}; + +static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) +{ + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return -ENOTSUPP; + if (!MLX5_CAP_GEN(mdev, eth_net_offloads) || + !MLX5_CAP_GEN(mdev, nic_flow_table) || + !MLX5_CAP_ETH(mdev, csum_cap) || + !MLX5_CAP_ETH(mdev, max_lso_cap) || + !MLX5_CAP_ETH(mdev, vlan_cap) || + !MLX5_CAP_ETH(mdev, rss_ind_tbl_cap)) { + mlx5_core_warn(mdev, + "Not creating net device, some required device capabilities are missing\n"); + return -ENOTSUPP; + } + return 0; +} + +static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, + struct net_device *netdev, + int num_comp_vectors) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + priv->params.log_sq_size = + MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + priv->params.log_rq_size = + MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + priv->params.rx_cq_moderation_usec = + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; + priv->params.rx_cq_moderation_pkts = + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; + priv->params.tx_cq_moderation_usec = + MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; + priv->params.tx_cq_moderation_pkts = + MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; + priv->params.min_rx_wqes = + MLX5E_PARAMS_DEFAULT_MIN_RX_WQES; + priv->params.rx_hash_log_tbl_sz = + (order_base_2(num_comp_vectors) > + MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ? + order_base_2(num_comp_vectors) : + MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ; + priv->params.num_tc = 1; + priv->params.default_vlan_prio = 0; + + priv->params.lro_en = false && !!MLX5_CAP_ETH(priv->mdev, lro_cap); + priv->params.lro_wqe_sz = + MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + + priv->mdev = mdev; + priv->netdev = netdev; + priv->params.num_channels = num_comp_vectors; + priv->order_base_2_num_channels = order_base_2(num_comp_vectors); + priv->queue_mapping_channel_mask = + roundup_pow_of_two(num_comp_vectors) - 1; + priv->num_tc = priv->params.num_tc; + priv->default_vlan_prio = priv->params.default_vlan_prio; + + spin_lock_init(&priv->async_events_spinlock); + mutex_init(&priv->state_lock); + + INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); + INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); + INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); +} + +static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5_query_vport_mac_address(priv->mdev, netdev->dev_addr); +} + +static void mlx5e_build_netdev(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + SET_NETDEV_DEV(netdev, &mdev->pdev->dev); + + if (priv->num_tc > 1) { + mlx5e_netdev_ops.ndo_select_queue = mlx5e_select_queue; + mlx5e_netdev_ops.ndo_start_xmit = mlx5e_xmit_multi_tc; + } + + netdev->netdev_ops = &mlx5e_netdev_ops; + netdev->watchdog_timeo = 15 * HZ; + + netdev->ethtool_ops = &mlx5e_ethtool_ops; + + netdev->vlan_features |= NETIF_F_IP_CSUM; + netdev->vlan_features |= NETIF_F_IPV6_CSUM; + netdev->vlan_features |= NETIF_F_GRO; + netdev->vlan_features |= NETIF_F_TSO; + netdev->vlan_features |= NETIF_F_TSO6; + netdev->vlan_features |= NETIF_F_RXCSUM; + netdev->vlan_features |= NETIF_F_RXHASH; + + if (!!MLX5_CAP_ETH(mdev, lro_cap)) + netdev->vlan_features |= NETIF_F_LRO; + + netdev->hw_features = netdev->vlan_features; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + + netdev->features = netdev->hw_features; + if (!priv->params.lro_en) + netdev->features &= ~NETIF_F_LRO; + + netdev->features |= NETIF_F_HIGHDMA; + + netdev->priv_flags |= IFF_UNICAST_FLT; + + mlx5e_set_netdev_dev_addr(netdev); +} + +static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, + struct mlx5_core_mr *mr) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_create_mkey_mbox_in *in; + int err; + + in = mlx5_vzalloc(sizeof(*in)); + if (!in) + return -ENOMEM; + + in->seg.flags = MLX5_PERM_LOCAL_WRITE | + MLX5_PERM_LOCAL_READ | + MLX5_ACCESS_MODE_PA; + in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + + err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL, + NULL); + + kvfree(in); + + return err; +} + +static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) +{ + struct net_device *netdev; + struct mlx5e_priv *priv; + int ncv = mdev->priv.eq_table.num_comp_vectors; + int err; + + if (mlx5e_check_required_hca_cap(mdev)) + return NULL; + + netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), + roundup_pow_of_two(ncv) * MLX5E_MAX_NUM_TC, + ncv); + if (!netdev) { + mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); + return NULL; + } + + mlx5e_build_netdev_priv(mdev, netdev, ncv); + mlx5e_build_netdev(netdev); + + netif_carrier_off(netdev); + + priv = netdev_priv(netdev); + + err = mlx5_alloc_map_uar(mdev, &priv->cq_uar); + if (err) { + netdev_err(netdev, "%s: mlx5_alloc_map_uar failed, %d\n", + __func__, err); + goto err_free_netdev; + } + + err = mlx5_core_alloc_pd(mdev, &priv->pdn); + if (err) { + netdev_err(netdev, "%s: mlx5_core_alloc_pd failed, %d\n", + __func__, err); + goto err_unmap_free_uar; + } + + err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr); + if (err) { + netdev_err(netdev, "%s: mlx5e_create_mkey failed, %d\n", + __func__, err); + goto err_dealloc_pd; + } + + err = register_netdev(netdev); + if (err) { + netdev_err(netdev, "%s: register_netdev failed, %d\n", + __func__, err); + goto err_destroy_mkey; + } + + mlx5e_enable_async_events(priv); + + return priv; + +err_destroy_mkey: + mlx5_core_destroy_mkey(mdev, &priv->mr); + +err_dealloc_pd: + mlx5_core_dealloc_pd(mdev, priv->pdn); + +err_unmap_free_uar: + mlx5_unmap_free_uar(mdev, &priv->cq_uar); + +err_free_netdev: + free_netdev(netdev); + + return NULL; +} + +static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + struct net_device *netdev = priv->netdev; + + unregister_netdev(netdev); + mlx5_core_destroy_mkey(priv->mdev, &priv->mr); + mlx5_core_dealloc_pd(priv->mdev, priv->pdn); + mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar); + mlx5e_disable_async_events(priv); + flush_scheduled_work(); + free_netdev(netdev); +} + +static void *mlx5e_get_netdev(void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + + return priv->netdev; +} + +static struct mlx5_interface mlx5e_interface = { + .add = mlx5e_create_netdev, + .remove = mlx5e_destroy_netdev, + .event = mlx5e_async_event, + .protocol = MLX5_INTERFACE_PROTOCOL_ETH, + .get_dev = mlx5e_get_netdev, +}; + +void mlx5e_init(void) +{ + mlx5_register_interface(&mlx5e_interface); +} + +void mlx5e_cleanup(void) +{ + mlx5_unregister_interface(&mlx5e_interface); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index e7b7b123a128..1c37f587426d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -48,10 +48,6 @@ #include #include "mlx5_core.h" -#define DRIVER_NAME "mlx5_core" -#define DRIVER_VERSION "3.0" -#define DRIVER_RELDATE "January 2015" - MODULE_AUTHOR("Eli Cohen "); MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver"); MODULE_LICENSE("Dual BSD/GPL"); @@ -614,6 +610,61 @@ clean: return err; } +#ifdef CONFIG_MLX5_CORE_EN +static int mlx5_core_set_issi(struct mlx5_core_dev *dev) +{ + u32 query_in[MLX5_ST_SZ_DW(query_issi_in)]; + u32 query_out[MLX5_ST_SZ_DW(query_issi_out)]; + u32 set_in[MLX5_ST_SZ_DW(set_issi_in)]; + u32 set_out[MLX5_ST_SZ_DW(set_issi_out)]; + int err; + u32 sup_issi; + + memset(query_in, 0, sizeof(query_in)); + memset(query_out, 0, sizeof(query_out)); + + MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI); + + err = mlx5_cmd_exec_check_status(dev, query_in, sizeof(query_in), + query_out, sizeof(query_out)); + if (err) { + if (((struct mlx5_outbox_hdr *)query_out)->status == + MLX5_CMD_STAT_BAD_OP_ERR) { + pr_debug("Only ISSI 0 is supported\n"); + return 0; + } + + pr_err("failed to query ISSI\n"); + return err; + } + + sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0); + + if (sup_issi & (1 << 1)) { + memset(set_in, 0, sizeof(set_in)); + memset(set_out, 0, sizeof(set_out)); + + MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI); + MLX5_SET(set_issi_in, set_in, current_issi, 1); + + err = mlx5_cmd_exec_check_status(dev, set_in, sizeof(set_in), + set_out, sizeof(set_out)); + if (err) { + pr_err("failed to set ISSI=1\n"); + return err; + } + + dev->issi = 1; + + return 0; + } else if (sup_issi & (1 << 0)) { + return 0; + } + + return -ENOTSUPP; +} +#endif + static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) { struct mlx5_priv *priv = &dev->priv; @@ -676,6 +727,14 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) goto err_pagealloc_cleanup; } +#ifdef CONFIG_MLX5_CORE_EN + err = mlx5_core_set_issi(dev); + if (err) { + dev_err(&pdev->dev, "failed to set issi\n"); + goto err_disable_hca; + } +#endif + err = mlx5_satisfy_startup_pages(dev, 1); if (err) { dev_err(&pdev->dev, "failed to allocate boot pages\n"); @@ -1084,6 +1143,10 @@ static int __init init(void) if (err) goto err_health; +#ifdef CONFIG_MLX5_CORE_EN + mlx5e_init(); +#endif + return 0; err_health: @@ -1096,6 +1159,9 @@ err_debug: static void __exit cleanup(void) { +#ifdef CONFIG_MLX5_CORE_EN + mlx5e_cleanup(); +#endif pci_unregister_driver(&mlx5_core_driver); mlx5_health_cleanup(); destroy_workqueue(mlx5_core_wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index b986f1c258bc..6983c1047255 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -37,6 +37,10 @@ #include #include +#define DRIVER_NAME "mlx5_core" +#define DRIVER_VERSION "3.0-1" +#define DRIVER_RELDATE "January 2015" + extern int mlx5_core_debug_mask; #define mlx5_core_dbg(dev, format, ...) \ @@ -78,4 +82,7 @@ int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); +void mlx5e_init(void); +void mlx5e_cleanup(void); + #endif /* __MLX5_CORE_H__ */ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 4ee52bf1f959..b288c538347a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1153,4 +1153,23 @@ enum mlx5_cap_type { #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap) +enum { + MLX5_CMD_STAT_OK = 0x0, + MLX5_CMD_STAT_INT_ERR = 0x1, + MLX5_CMD_STAT_BAD_OP_ERR = 0x2, + MLX5_CMD_STAT_BAD_PARAM_ERR = 0x3, + MLX5_CMD_STAT_BAD_SYS_STATE_ERR = 0x4, + MLX5_CMD_STAT_BAD_RES_ERR = 0x5, + MLX5_CMD_STAT_RES_BUSY = 0x6, + MLX5_CMD_STAT_LIM_ERR = 0x8, + MLX5_CMD_STAT_BAD_RES_STATE_ERR = 0x9, + MLX5_CMD_STAT_IX_ERR = 0xa, + MLX5_CMD_STAT_NO_RES_ERR = 0xf, + MLX5_CMD_STAT_BAD_INP_LEN_ERR = 0x50, + MLX5_CMD_STAT_BAD_OUTP_LEN_ERR = 0x51, + MLX5_CMD_STAT_BAD_QP_STATE_ERR = 0x10, + MLX5_CMD_STAT_BAD_PKT_ERR = 0x30, + MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR = 0x40, +}; + #endif /* MLX5_DEVICE_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 51738472657e..7fa26f03acc1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -489,6 +489,7 @@ struct mlx5_core_dev { struct mlx5_priv priv; struct mlx5_profile *profile; atomic_t num_qps; + u32 issi; }; struct mlx5_db { -- cgit v1.2.3 From c66fa19c405a36673d4aab13658c8246413d5c0f Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 31 May 2015 09:30:16 +0300 Subject: net/mlx4: Add EQ pool Previously, mlx4_en allocated EQs and used them exclusively. This affected RoCE performance, as applications which are events sensitive were limited to use only the legacy EQs. Change that by introducing an EQ pool. This pool is managed by mlx4_core. EQs are assigned to ports (when there are limited number of EQs, multiple ports could be assigned to the same EQs). An exception to this rule is the ASYNC EQ which handles various events. Legacy EQs are completely removed as all EQs could be shared. When a consumer (mlx4_ib/mlx4_en) requests an EQ, it asks for EQ serving on a specific port. The core driver calculates which EQ should be assigned to that request. Because IRQs are shared between IB and Ethernet modules, their names only include the PCI device BDF address. Signed-off-by: Matan Barak Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 71 ++--- drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 - drivers/net/ethernet/mellanox/mlx4/cq.c | 10 +- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 48 ++-- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 7 +- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 13 +- drivers/net/ethernet/mellanox/mlx4/eq.c | 353 +++++++++++++++---------- drivers/net/ethernet/mellanox/mlx4/main.c | 74 ++++-- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 11 +- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 2 +- include/linux/mlx4/device.h | 11 +- 11 files changed, 342 insertions(+), 259 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 8c96c71e7bab..024b0f745035 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2041,77 +2041,52 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev) static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { - char name[80]; - int eq_per_port = 0; - int added_eqs = 0; - int total_eqs = 0; - int i, j, eq; - - /* Legacy mode or comp_pool is not large enough */ - if (dev->caps.comp_pool == 0 || - dev->caps.num_ports > dev->caps.comp_pool) - return; - - eq_per_port = dev->caps.comp_pool / dev->caps.num_ports; - - /* Init eq table */ - added_eqs = 0; - mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) - added_eqs += eq_per_port; - - total_eqs = dev->caps.num_comp_vectors + added_eqs; + int i, j, eq = 0, total_eqs = 0; - ibdev->eq_table = kzalloc(total_eqs * sizeof(int), GFP_KERNEL); + ibdev->eq_table = kcalloc(dev->caps.num_comp_vectors, + sizeof(ibdev->eq_table[0]), GFP_KERNEL); if (!ibdev->eq_table) return; - ibdev->eq_added = added_eqs; - - eq = 0; - mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) { - for (j = 0; j < eq_per_port; j++) { - snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s", - i, j, dev->persist->pdev->bus->name); - /* Set IRQ for specific name (per ring) */ - if (mlx4_assign_eq(dev, name, NULL, - &ibdev->eq_table[eq])) { - /* Use legacy (same as mlx4_en driver) */ - pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq); - ibdev->eq_table[eq] = - (eq % dev->caps.num_comp_vectors); - } - eq++; + for (i = 1; i <= dev->caps.num_ports; i++) { + for (j = 0; j < mlx4_get_eqs_per_port(dev, i); + j++, total_eqs++) { + if (i > 1 && mlx4_is_eq_shared(dev, total_eqs)) + continue; + ibdev->eq_table[eq] = total_eqs; + if (!mlx4_assign_eq(dev, i, + &ibdev->eq_table[eq])) + eq++; + else + ibdev->eq_table[eq] = -1; } } - /* Fill the reset of the vector with legacy EQ */ - for (i = 0, eq = added_eqs; i < dev->caps.num_comp_vectors; i++) - ibdev->eq_table[eq++] = i; + for (i = eq; i < dev->caps.num_comp_vectors; + ibdev->eq_table[i++] = -1) + ; /* Advertise the new number of EQs to clients */ - ibdev->ib_dev.num_comp_vectors = total_eqs; + ibdev->ib_dev.num_comp_vectors = eq; } static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { int i; + int total_eqs = ibdev->ib_dev.num_comp_vectors; - /* no additional eqs were added */ + /* no eqs were allocated */ if (!ibdev->eq_table) return; /* Reset the advertised EQ number */ - ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; + ibdev->ib_dev.num_comp_vectors = 0; - /* Free only the added eqs */ - for (i = 0; i < ibdev->eq_added; i++) { - /* Don't free legacy eqs if used */ - if (ibdev->eq_table[i] <= dev->caps.num_comp_vectors) - continue; + for (i = 0; i < total_eqs; i++) mlx4_release_eq(dev, ibdev->eq_table[i]); - } kfree(ibdev->eq_table); + ibdev->eq_table = NULL; } static void *mlx4_ib_add(struct mlx4_dev *dev) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index fce3934372a1..ef80e6c99a68 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -523,7 +523,6 @@ struct mlx4_ib_dev { struct mlx4_ib_iboe iboe; int counters[MLX4_MAX_PORTS]; int *eq_table; - int eq_added; struct kobject *iov_parent; struct kobject *ports_parent; struct kobject *dev_ports_parent[MLX4_MFUNC_MAX]; diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index e71f31387ac6..7431cd4d7390 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -292,7 +292,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, u64 mtt_addr; int err; - if (vector > dev->caps.num_comp_vectors + dev->caps.comp_pool) + if (vector >= dev->caps.num_comp_vectors) return -EINVAL; cq->vector = vector; @@ -319,7 +319,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, cq_context->flags |= cpu_to_be32(1 << 19); cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index); - cq_context->comp_eqn = priv->eq_table.eq[vector].eqn; + cq_context->comp_eqn = priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].eqn; cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; mtt_addr = mlx4_mtt_addr(dev, mtt); @@ -339,11 +339,11 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, init_completion(&cq->free); cq->comp = mlx4_add_cq_to_tasklet; cq->tasklet_ctx.priv = - &priv->eq_table.eq[cq->vector].tasklet_ctx; + &priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].tasklet_ctx; INIT_LIST_HEAD(&cq->tasklet_ctx.list); - cq->irq = priv->eq_table.eq[cq->vector].irq; + cq->irq = priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].irq; return 0; err_radix: @@ -368,7 +368,7 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) if (err) mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); - synchronize_irq(priv->eq_table.eq[cq->vector].irq); + synchronize_irq(priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq); spin_lock_irq(&cq_table->lock); radix_tree_delete(&cq_table->tree, cq->cqn); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 22da4d0d0f05..d71c567eb076 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -66,6 +66,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, cq->ring = ring; cq->is_tx = mode; + cq->vector = mdev->dev->caps.num_comp_vectors; /* Allocate HW buffers on provided NUMA node. * dev->numa_node is used in mtt range allocation flow. @@ -101,12 +102,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, int err = 0; char name[25]; int timestamp_en = 0; - struct cpu_rmap *rmap = -#ifdef CONFIG_RFS_ACCEL - priv->dev->rx_cpu_rmap; -#else - NULL; -#endif + bool assigned_eq = false; cq->dev = mdev->pndev[priv->port]; cq->mcq.set_ci_db = cq->wqres.db.db; @@ -116,23 +112,19 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, memset(cq->buf, 0, cq->buf_size); if (cq->is_tx == RX) { - if (mdev->dev->caps.comp_pool) { - if (!cq->vector) { - sprintf(name, "%s-%d", priv->dev->name, - cq->ring); - /* Set IRQ for specific name (per ring) */ - if (mlx4_assign_eq(mdev->dev, name, rmap, - &cq->vector)) { - cq->vector = (cq->ring + 1 + priv->port) - % mdev->dev->caps.num_comp_vectors; - mlx4_warn(mdev, "Failed assigning an EQ to %s, falling back to legacy EQ's\n", - name); - } - + if (!mlx4_is_eq_vector_valid(mdev->dev, priv->port, + cq->vector)) { + cq->vector = cq_idx; + + err = mlx4_assign_eq(mdev->dev, priv->port, + &cq->vector); + if (err) { + mlx4_err(mdev, "Failed assigning an EQ to %s\n", + name); + goto free_eq; } - } else { - cq->vector = (cq->ring + 1 + priv->port) % - mdev->dev->caps.num_comp_vectors; + + assigned_eq = true; } cq->irq_desc = @@ -159,7 +151,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, &mdev->priv_uar, cq->wqres.db.dma, &cq->mcq, cq->vector, 0, timestamp_en); if (err) - return err; + goto free_eq; cq->mcq.comp = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq; cq->mcq.event = mlx4_en_cq_event; @@ -182,6 +174,12 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, napi_enable(&cq->napi); return 0; + +free_eq: + if (assigned_eq) + mlx4_release_eq(mdev->dev, cq->vector); + cq->vector = mdev->dev->caps.num_comp_vectors; + return err; } void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq) @@ -191,9 +189,9 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq) mlx4_en_unmap_buffer(&cq->wqres.buf); mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size); - if (priv->mdev->dev->caps.comp_pool && cq->vector) { + if (mlx4_is_eq_vector_valid(mdev->dev, priv->port, cq->vector) && + cq->is_tx == RX) mlx4_release_eq(priv->mdev->dev, cq->vector); - } cq->vector = 0; cq->buf_size = 0; cq->buf = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 32f5ec737472..455cecae5aa4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1958,7 +1958,6 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv) int i; #ifdef CONFIG_RFS_ACCEL - free_irq_cpu_rmap(priv->dev->rx_cpu_rmap); priv->dev->rx_cpu_rmap = NULL; #endif @@ -2016,11 +2015,7 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) } #ifdef CONFIG_RFS_ACCEL - if (priv->mdev->dev->caps.comp_pool) { - priv->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->mdev->dev->caps.comp_pool); - if (!priv->dev->rx_cpu_rmap) - goto err; - } + priv->dev->rx_cpu_rmap = mlx4_get_cpu_rmap(priv->mdev->dev, priv->port); #endif return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 2a77a6b19121..35f726c17e48 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -337,15 +337,10 @@ void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev) struct mlx4_dev *dev = mdev->dev; mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { - if (!dev->caps.comp_pool) - num_of_eqs = max_t(int, MIN_RX_RINGS, - min_t(int, - dev->caps.num_comp_vectors, - DEF_RX_RINGS)); - else - num_of_eqs = min_t(int, MAX_MSIX_P_PORT, - dev->caps.comp_pool/ - dev->caps.num_ports) - 1; + num_of_eqs = max_t(int, MIN_RX_RINGS, + min_t(int, + mlx4_get_eqs_per_port(mdev->dev, i), + DEF_RX_RINGS)); num_rx_rings = mlx4_low_memory_profile() ? MIN_RX_RINGS : min_t(int, num_of_eqs, diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 80bcd648c5e0..2e6fc6a860a7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -895,8 +895,8 @@ static int mlx4_num_eq_uar(struct mlx4_dev *dev) * we need to map, take the difference of highest index and * the lowest index we'll use and add 1. */ - return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs + - dev->caps.comp_pool)/4 - dev->caps.reserved_eqs/4 + 1; + return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs) / 4 - + dev->caps.reserved_eqs / 4 + 1; } static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq) @@ -1085,8 +1085,7 @@ static void mlx4_free_eq(struct mlx4_dev *dev, static void mlx4_free_irqs(struct mlx4_dev *dev) { struct mlx4_eq_table *eq_table = &mlx4_priv(dev)->eq_table; - struct mlx4_priv *priv = mlx4_priv(dev); - int i, vec; + int i; if (eq_table->have_irq) free_irq(dev->persist->pdev->irq, dev); @@ -1097,20 +1096,6 @@ static void mlx4_free_irqs(struct mlx4_dev *dev) eq_table->eq[i].have_irq = 0; } - for (i = 0; i < dev->caps.comp_pool; i++) { - /* - * Freeing the assigned irq's - * all bits should be 0, but we need to validate - */ - if (priv->msix_ctl.pool_bm & 1ULL << i) { - /* NO need protecting*/ - vec = dev->caps.num_comp_vectors + 1 + i; - free_irq(priv->eq_table.eq[vec].irq, - &priv->eq_table.eq[vec]); - } - } - - kfree(eq_table->irq_names); } @@ -1191,76 +1176,73 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) } priv->eq_table.irq_names = - kmalloc(MLX4_IRQNAME_SIZE * (dev->caps.num_comp_vectors + 1 + - dev->caps.comp_pool), + kmalloc(MLX4_IRQNAME_SIZE * (dev->caps.num_comp_vectors + 1), GFP_KERNEL); if (!priv->eq_table.irq_names) { err = -ENOMEM; - goto err_out_bitmap; + goto err_out_clr_int; } - for (i = 0; i < dev->caps.num_comp_vectors; ++i) { - err = mlx4_create_eq(dev, dev->caps.num_cqs - - dev->caps.reserved_cqs + - MLX4_NUM_SPARE_EQE, - (dev->flags & MLX4_FLAG_MSI_X) ? i : 0, - &priv->eq_table.eq[i]); - if (err) { - --i; - goto err_out_unmap; - } - } - - err = mlx4_create_eq(dev, MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE, - (dev->flags & MLX4_FLAG_MSI_X) ? dev->caps.num_comp_vectors : 0, - &priv->eq_table.eq[dev->caps.num_comp_vectors]); - if (err) - goto err_out_comp; - - /*if additional completion vectors poolsize is 0 this loop will not run*/ - for (i = dev->caps.num_comp_vectors + 1; - i < dev->caps.num_comp_vectors + dev->caps.comp_pool + 1; ++i) { + for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) { + if (i == MLX4_EQ_ASYNC) { + err = mlx4_create_eq(dev, + MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE, + 0, &priv->eq_table.eq[MLX4_EQ_ASYNC]); + } else { +#ifdef CONFIG_RFS_ACCEL + struct mlx4_eq *eq = &priv->eq_table.eq[i]; + int port = find_first_bit(eq->actv_ports.ports, + dev->caps.num_ports) + 1; + + if (port <= dev->caps.num_ports) { + struct mlx4_port_info *info = + &mlx4_priv(dev)->port[port]; + + if (!info->rmap) { + info->rmap = alloc_irq_cpu_rmap( + mlx4_get_eqs_per_port(dev, port)); + if (!info->rmap) { + mlx4_warn(dev, "Failed to allocate cpu rmap\n"); + err = -ENOMEM; + goto err_out_unmap; + } + } - err = mlx4_create_eq(dev, dev->caps.num_cqs - - dev->caps.reserved_cqs + - MLX4_NUM_SPARE_EQE, - (dev->flags & MLX4_FLAG_MSI_X) ? i : 0, - &priv->eq_table.eq[i]); - if (err) { - --i; - goto err_out_unmap; + err = irq_cpu_rmap_add( + info->rmap, eq->irq); + if (err) + mlx4_warn(dev, "Failed adding irq rmap\n"); + } +#endif + err = mlx4_create_eq(dev, dev->caps.num_cqs - + dev->caps.reserved_cqs + + MLX4_NUM_SPARE_EQE, + (dev->flags & MLX4_FLAG_MSI_X) ? + i + 1 - !!(i > MLX4_EQ_ASYNC) : 0, + eq); } + if (err) + goto err_out_unmap; } - if (dev->flags & MLX4_FLAG_MSI_X) { const char *eq_name; - for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) { - if (i < dev->caps.num_comp_vectors) { - snprintf(priv->eq_table.irq_names + - i * MLX4_IRQNAME_SIZE, - MLX4_IRQNAME_SIZE, - "mlx4-comp-%d@pci:%s", i, - pci_name(dev->persist->pdev)); - } else { - snprintf(priv->eq_table.irq_names + - i * MLX4_IRQNAME_SIZE, - MLX4_IRQNAME_SIZE, - "mlx4-async@pci:%s", - pci_name(dev->persist->pdev)); - } + snprintf(priv->eq_table.irq_names + + MLX4_EQ_ASYNC * MLX4_IRQNAME_SIZE, + MLX4_IRQNAME_SIZE, + "mlx4-async@pci:%s", + pci_name(dev->persist->pdev)); + eq_name = priv->eq_table.irq_names + + MLX4_EQ_ASYNC * MLX4_IRQNAME_SIZE; - eq_name = priv->eq_table.irq_names + - i * MLX4_IRQNAME_SIZE; - err = request_irq(priv->eq_table.eq[i].irq, - mlx4_msi_x_interrupt, 0, eq_name, - priv->eq_table.eq + i); - if (err) - goto err_out_async; + err = request_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq, + mlx4_msi_x_interrupt, 0, eq_name, + priv->eq_table.eq + MLX4_EQ_ASYNC); + if (err) + goto err_out_unmap; - priv->eq_table.eq[i].have_irq = 1; - } + priv->eq_table.eq[MLX4_EQ_ASYNC].have_irq = 1; } else { snprintf(priv->eq_table.irq_names, MLX4_IRQNAME_SIZE, @@ -1269,36 +1251,38 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) err = request_irq(dev->persist->pdev->irq, mlx4_interrupt, IRQF_SHARED, priv->eq_table.irq_names, dev); if (err) - goto err_out_async; + goto err_out_unmap; priv->eq_table.have_irq = 1; } err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, - priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); + priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); if (err) mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n", - priv->eq_table.eq[dev->caps.num_comp_vectors].eqn, err); + priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err); - for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) - eq_set_ci(&priv->eq_table.eq[i], 1); + /* arm ASYNC eq */ + eq_set_ci(&priv->eq_table.eq[MLX4_EQ_ASYNC], 1); return 0; -err_out_async: - mlx4_free_eq(dev, &priv->eq_table.eq[dev->caps.num_comp_vectors]); - -err_out_comp: - i = dev->caps.num_comp_vectors - 1; - err_out_unmap: - while (i >= 0) { - mlx4_free_eq(dev, &priv->eq_table.eq[i]); - --i; + while (i >= 0) + mlx4_free_eq(dev, &priv->eq_table.eq[i--]); +#ifdef CONFIG_RFS_ACCEL + for (i = 1; i <= dev->caps.num_ports; i++) { + if (mlx4_priv(dev)->port[i].rmap) { + free_irq_cpu_rmap(mlx4_priv(dev)->port[i].rmap); + mlx4_priv(dev)->port[i].rmap = NULL; + } } +#endif + mlx4_free_irqs(dev); + +err_out_clr_int: if (!mlx4_is_slave(dev)) mlx4_unmap_clr_int(dev); - mlx4_free_irqs(dev); err_out_bitmap: mlx4_unmap_uar(dev); @@ -1316,11 +1300,19 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev) int i; mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 1, - priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); + priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); +#ifdef CONFIG_RFS_ACCEL + for (i = 1; i <= dev->caps.num_ports; i++) { + if (mlx4_priv(dev)->port[i].rmap) { + free_irq_cpu_rmap(mlx4_priv(dev)->port[i].rmap); + mlx4_priv(dev)->port[i].rmap = NULL; + } + } +#endif mlx4_free_irqs(dev); - for (i = 0; i < dev->caps.num_comp_vectors + dev->caps.comp_pool + 1; ++i) + for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) mlx4_free_eq(dev, &priv->eq_table.eq[i]); if (!mlx4_is_slave(dev)) @@ -1371,87 +1363,166 @@ int mlx4_test_interrupts(struct mlx4_dev *dev) /* Return to default */ mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, - priv->eq_table.eq[dev->caps.num_comp_vectors].eqn); + priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); return err; } EXPORT_SYMBOL(mlx4_test_interrupts); -int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, - int *vector) +bool mlx4_is_eq_vector_valid(struct mlx4_dev *dev, u8 port, int vector) { + struct mlx4_priv *priv = mlx4_priv(dev); + vector = MLX4_CQ_TO_EQ_VECTOR(vector); + if (vector < 0 || (vector >= dev->caps.num_comp_vectors + 1) || + (vector == MLX4_EQ_ASYNC)) + return false; + + return test_bit(port - 1, priv->eq_table.eq[vector].actv_ports.ports); +} +EXPORT_SYMBOL(mlx4_is_eq_vector_valid); + +u32 mlx4_get_eqs_per_port(struct mlx4_dev *dev, u8 port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + unsigned int i; + unsigned int sum = 0; + + for (i = 0; i < dev->caps.num_comp_vectors + 1; i++) + sum += !!test_bit(port - 1, + priv->eq_table.eq[i].actv_ports.ports); + + return sum; +} +EXPORT_SYMBOL(mlx4_get_eqs_per_port); + +int mlx4_is_eq_shared(struct mlx4_dev *dev, int vector) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + vector = MLX4_CQ_TO_EQ_VECTOR(vector); + if (vector <= 0 || (vector >= dev->caps.num_comp_vectors + 1)) + return -EINVAL; + + return !!(bitmap_weight(priv->eq_table.eq[vector].actv_ports.ports, + dev->caps.num_ports) > 1); +} +EXPORT_SYMBOL(mlx4_is_eq_shared); + +struct cpu_rmap *mlx4_get_cpu_rmap(struct mlx4_dev *dev, int port) +{ + return mlx4_priv(dev)->port[port].rmap; +} +EXPORT_SYMBOL(mlx4_get_cpu_rmap); + +int mlx4_assign_eq(struct mlx4_dev *dev, u8 port, int *vector) +{ struct mlx4_priv *priv = mlx4_priv(dev); - int vec = 0, err = 0, i; + int err = 0, i = 0; + u32 min_ref_count_val = (u32)-1; + int requested_vector = MLX4_CQ_TO_EQ_VECTOR(*vector); + int *prequested_vector = NULL; + mutex_lock(&priv->msix_ctl.pool_lock); - for (i = 0; !vec && i < dev->caps.comp_pool; i++) { - if (~priv->msix_ctl.pool_bm & 1ULL << i) { - priv->msix_ctl.pool_bm |= 1ULL << i; - vec = dev->caps.num_comp_vectors + 1 + i; - snprintf(priv->eq_table.irq_names + - vec * MLX4_IRQNAME_SIZE, - MLX4_IRQNAME_SIZE, "%s", name); -#ifdef CONFIG_RFS_ACCEL - if (rmap) { - err = irq_cpu_rmap_add(rmap, - priv->eq_table.eq[vec].irq); - if (err) - mlx4_warn(dev, "Failed adding irq rmap\n"); + if (requested_vector < (dev->caps.num_comp_vectors + 1) && + (requested_vector >= 0) && + (requested_vector != MLX4_EQ_ASYNC)) { + if (test_bit(port - 1, + priv->eq_table.eq[requested_vector].actv_ports.ports)) { + prequested_vector = &requested_vector; + } else { + struct mlx4_eq *eq; + + for (i = 1; i < port; + requested_vector += mlx4_get_eqs_per_port(dev, i++)) + ; + + eq = &priv->eq_table.eq[requested_vector]; + if (requested_vector < dev->caps.num_comp_vectors + 1 && + test_bit(port - 1, eq->actv_ports.ports)) { + prequested_vector = &requested_vector; } -#endif - err = request_irq(priv->eq_table.eq[vec].irq, - mlx4_msi_x_interrupt, 0, - &priv->eq_table.irq_names[vec<<5], - priv->eq_table.eq + vec); - if (err) { - /*zero out bit by fliping it*/ - priv->msix_ctl.pool_bm ^= 1 << i; - vec = 0; - continue; - /*we dont want to break here*/ + } + } + + if (!prequested_vector) { + requested_vector = -1; + for (i = 0; min_ref_count_val && i < dev->caps.num_comp_vectors + 1; + i++) { + struct mlx4_eq *eq = &priv->eq_table.eq[i]; + + if (min_ref_count_val > eq->ref_count && + test_bit(port - 1, eq->actv_ports.ports)) { + min_ref_count_val = eq->ref_count; + requested_vector = i; } + } - eq_set_ci(&priv->eq_table.eq[vec], 1); + if (requested_vector < 0) { + err = -ENOSPC; + goto err_unlock; } + + prequested_vector = &requested_vector; } + + if (!test_bit(*prequested_vector, priv->msix_ctl.pool_bm) && + dev->flags & MLX4_FLAG_MSI_X) { + set_bit(*prequested_vector, priv->msix_ctl.pool_bm); + snprintf(priv->eq_table.irq_names + + *prequested_vector * MLX4_IRQNAME_SIZE, + MLX4_IRQNAME_SIZE, "mlx4-%d@%s", + *prequested_vector, dev_name(&dev->persist->pdev->dev)); + + err = request_irq(priv->eq_table.eq[*prequested_vector].irq, + mlx4_msi_x_interrupt, 0, + &priv->eq_table.irq_names[*prequested_vector << 5], + priv->eq_table.eq + *prequested_vector); + + if (err) { + clear_bit(*prequested_vector, priv->msix_ctl.pool_bm); + *prequested_vector = -1; + } else { + eq_set_ci(&priv->eq_table.eq[*prequested_vector], 1); + priv->eq_table.eq[*prequested_vector].have_irq = 1; + } + } + + if (!err && *prequested_vector >= 0) + priv->eq_table.eq[*prequested_vector].ref_count++; + +err_unlock: mutex_unlock(&priv->msix_ctl.pool_lock); - if (vec) { - *vector = vec; - } else { + if (!err && *prequested_vector >= 0) + *vector = MLX4_EQ_TO_CQ_VECTOR(*prequested_vector); + else *vector = 0; - err = (i == dev->caps.comp_pool) ? -ENOSPC : err; - } + return err; } EXPORT_SYMBOL(mlx4_assign_eq); -int mlx4_eq_get_irq(struct mlx4_dev *dev, int vec) +int mlx4_eq_get_irq(struct mlx4_dev *dev, int cq_vec) { struct mlx4_priv *priv = mlx4_priv(dev); - return priv->eq_table.eq[vec].irq; + return priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq_vec)].irq; } EXPORT_SYMBOL(mlx4_eq_get_irq); void mlx4_release_eq(struct mlx4_dev *dev, int vec) { struct mlx4_priv *priv = mlx4_priv(dev); - /*bm index*/ - int i = vec - dev->caps.num_comp_vectors - 1; - - if (likely(i >= 0)) { - /*sanity check , making sure were not trying to free irq's - Belonging to a legacy EQ*/ - mutex_lock(&priv->msix_ctl.pool_lock); - if (priv->msix_ctl.pool_bm & 1ULL << i) { - free_irq(priv->eq_table.eq[vec].irq, - &priv->eq_table.eq[vec]); - priv->msix_ctl.pool_bm &= ~(1ULL << i); - } - mutex_unlock(&priv->msix_ctl.pool_lock); - } + int eq_vec = MLX4_CQ_TO_EQ_VECTOR(vec); + + mutex_lock(&priv->msix_ctl.pool_lock); + priv->eq_table.eq[eq_vec].ref_count--; + /* once we allocated EQ, we don't release it because it might be binded + * to cpu_rmap. + */ + mutex_unlock(&priv->msix_ctl.pool_lock); } EXPORT_SYMBOL(mlx4_release_eq); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 70d33f6e2a41..3ec5113c5a33 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2364,11 +2364,11 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) if (err) { if (dev->flags & MLX4_FLAG_MSI_X) { mlx4_warn(dev, "NOP command failed to generate MSI-X interrupt IRQ %d)\n", - priv->eq_table.eq[dev->caps.num_comp_vectors].irq); + priv->eq_table.eq[MLX4_EQ_ASYNC].irq); mlx4_warn(dev, "Trying again without MSI-X\n"); } else { mlx4_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting\n", - priv->eq_table.eq[dev->caps.num_comp_vectors].irq); + priv->eq_table.eq[MLX4_EQ_ASYNC].irq); mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n"); } @@ -2486,9 +2486,10 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); struct msix_entry *entries; int i; + int port = 0; if (msi_x) { - int nreq = dev->caps.num_ports * num_online_cpus() + MSIX_LEGACY_SZ; + int nreq = dev->caps.num_ports * num_online_cpus() + 1; nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, nreq); @@ -2503,20 +2504,49 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2, nreq); - if (nreq < 0) { + if (nreq < 0 || nreq < MLX4_EQ_ASYNC) { kfree(entries); goto no_msi; - } else if (nreq < MSIX_LEGACY_SZ + - dev->caps.num_ports * MIN_MSIX_P_PORT) { - /*Working in legacy mode , all EQ's shared*/ - dev->caps.comp_pool = 0; - dev->caps.num_comp_vectors = nreq - 1; - } else { - dev->caps.comp_pool = nreq - MSIX_LEGACY_SZ; - dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1; } - for (i = 0; i < nreq; ++i) - priv->eq_table.eq[i].irq = entries[i].vector; + /* 1 is reserved for events (asyncrounous EQ) */ + dev->caps.num_comp_vectors = nreq - 1; + + priv->eq_table.eq[MLX4_EQ_ASYNC].irq = entries[0].vector; + bitmap_zero(priv->eq_table.eq[MLX4_EQ_ASYNC].actv_ports.ports, + dev->caps.num_ports); + + for (i = 0; i < dev->caps.num_comp_vectors + 1; i++) { + if (i == MLX4_EQ_ASYNC) + continue; + + priv->eq_table.eq[i].irq = + entries[i + 1 - !!(i > MLX4_EQ_ASYNC)].vector; + + if (MLX4_IS_LEGACY_EQ_MODE(dev->caps)) { + bitmap_fill(priv->eq_table.eq[i].actv_ports.ports, + dev->caps.num_ports); + } else { + set_bit(port, + priv->eq_table.eq[i].actv_ports.ports); + } + /* We divide the Eqs evenly between the two ports. + * (dev->caps.num_comp_vectors / dev->caps.num_ports) + * refers to the number of Eqs per port + * (i.e eqs_per_port). Theoretically, we would like to + * write something like (i + 1) % eqs_per_port == 0. + * However, since there's an asynchronous Eq, we have + * to skip over it by comparing this condition to + * !!((i + 1) > MLX4_EQ_ASYNC). + */ + if ((dev->caps.num_comp_vectors > dev->caps.num_ports) && + ((i + 1) % + (dev->caps.num_comp_vectors / dev->caps.num_ports)) == + !!((i + 1) > MLX4_EQ_ASYNC)) + /* If dev->caps.num_comp_vectors < dev->caps.num_ports, + * everything is shared anyway. + */ + port++; + } dev->flags |= MLX4_FLAG_MSI_X; @@ -2526,10 +2556,15 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) no_msi: dev->caps.num_comp_vectors = 1; - dev->caps.comp_pool = 0; - for (i = 0; i < 2; ++i) + BUG_ON(MLX4_EQ_ASYNC >= 2); + for (i = 0; i < 2; ++i) { priv->eq_table.eq[i].irq = dev->persist->pdev->irq; + if (i != MLX4_EQ_ASYNC) { + bitmap_fill(priv->eq_table.eq[i].actv_ports.ports, + dev->caps.num_ports); + } + } } static int mlx4_init_port_info(struct mlx4_dev *dev, int port) @@ -2594,6 +2629,10 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info) device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); device_remove_file(&info->dev->persist->pdev->dev, &info->port_mtu_attr); +#ifdef CONFIG_RFS_ACCEL + free_irq_cpu_rmap(info->rmap); + info->rmap = NULL; +#endif } static int mlx4_init_steering(struct mlx4_dev *dev) @@ -3024,7 +3063,7 @@ slave_start: if (err) goto err_master_mfunc; - priv->msix_ctl.pool_bm = 0; + bitmap_zero(priv->msix_ctl.pool_bm, MAX_MSIX); mutex_init(&priv->msix_ctl.pool_lock); mlx4_enable_msi_x(dev); @@ -3046,7 +3085,6 @@ slave_start: !mlx4_is_mfunc(dev)) { dev->flags &= ~MLX4_FLAG_MSI_X; dev->caps.num_comp_vectors = 1; - dev->caps.comp_pool = 0; pci_disable_msix(pdev); err = mlx4_setup_hca(dev); } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 502d3dd2c888..ff40098eaf4c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -287,6 +287,12 @@ struct mlx4_icm_table { #define MLX4_CQE_SIZE_MASK_STRIDE 0x3 #define MLX4_EQE_SIZE_MASK_STRIDE 0x30 +#define MLX4_EQ_ASYNC 0 +#define MLX4_EQ_TO_CQ_VECTOR(vector) ((vector) - \ + !!((int)(vector) >= MLX4_EQ_ASYNC)) +#define MLX4_CQ_TO_EQ_VECTOR(vector) ((vector) + \ + !!((int)(vector) >= MLX4_EQ_ASYNC)) + /* * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits. */ @@ -391,6 +397,8 @@ struct mlx4_eq { struct mlx4_buf_list *page_list; struct mlx4_mtt mtt; struct mlx4_eq_tasklet tasklet_ctx; + struct mlx4_active_ports actv_ports; + u32 ref_count; }; struct mlx4_slave_eqe { @@ -808,6 +816,7 @@ struct mlx4_port_info { struct mlx4_vlan_table vlan_table; struct mlx4_roce_gid_table gid_table; int base_qpn; + struct cpu_rmap *rmap; }; struct mlx4_sense { @@ -818,7 +827,7 @@ struct mlx4_sense { }; struct mlx4_msix_ctl { - u64 pool_bm; + DECLARE_BITMAP(pool_bm, MAX_MSIX); struct mutex pool_lock; }; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index d021f079f181..edd8fd69ec9a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -338,7 +338,7 @@ struct mlx4_en_cq { struct napi_struct napi; int size; int buf_size; - unsigned vector; + int vector; enum cq_type is_tx; u16 moder_time; u16 moder_cnt; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 83e80ab94500..ad31e476873f 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -46,8 +46,9 @@ #define MAX_MSIX_P_PORT 17 #define MAX_MSIX 64 -#define MSIX_LEGACY_SZ 4 #define MIN_MSIX_P_PORT 5 +#define MLX4_IS_LEGACY_EQ_MODE(dev_cap) ((dev_cap).num_comp_vectors < \ + (dev_cap).num_ports * MIN_MSIX_P_PORT) #define MLX4_MAX_100M_UNITS_VAL 255 /* * work around: can't set values @@ -528,7 +529,6 @@ struct mlx4_caps { int num_eqs; int reserved_eqs; int num_comp_vectors; - int comp_pool; int num_mpts; int max_fmr_maps; int num_mtts; @@ -1332,10 +1332,13 @@ void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); int mlx4_SYNC_TPT(struct mlx4_dev *dev); int mlx4_test_interrupts(struct mlx4_dev *dev); -int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, - int *vector); +u32 mlx4_get_eqs_per_port(struct mlx4_dev *dev, u8 port); +bool mlx4_is_eq_vector_valid(struct mlx4_dev *dev, u8 port, int vector); +struct cpu_rmap *mlx4_get_cpu_rmap(struct mlx4_dev *dev, int port); +int mlx4_assign_eq(struct mlx4_dev *dev, u8 port, int *vector); void mlx4_release_eq(struct mlx4_dev *dev, int vec); +int mlx4_is_eq_shared(struct mlx4_dev *dev, int vector); int mlx4_eq_get_irq(struct mlx4_dev *dev, int vec); int mlx4_get_phys_port_id(struct mlx4_dev *dev); -- cgit v1.2.3 From abf2e7d6e2e315b32ee00067a69aaad2cf4e1b3f Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 28 May 2015 19:26:02 -0700 Subject: bpf: add missing rcu protection when releasing programs from prog_array Normally the program attachment place (like sockets, qdiscs) takes care of rcu protection and calls bpf_prog_put() after a grace period. The programs stored inside prog_array may not be attached anywhere, so prog_array needs to take care of preserving rcu protection. Otherwise bpf_tail_call() will race with bpf_prog_put(). To solve that introduce bpf_prog_put_rcu() helper function and use it in 3 places where unattached program can decrement refcnt: closing program fd, deleting/replacing program in prog_array. Fixes: 04fd61ab36ec ("bpf: allow bpf programs to tail-call other bpf programs") Reported-by: Martin Schwidefsky Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 6 +++++- kernel/bpf/arraymap.c | 4 ++-- kernel/bpf/syscall.c | 19 ++++++++++++++++++- 3 files changed, 25 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8821b9a8689e..5f520f5f087e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -123,7 +123,10 @@ struct bpf_prog_aux { const struct bpf_verifier_ops *ops; struct bpf_map **used_maps; struct bpf_prog *prog; - struct work_struct work; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; struct bpf_array { @@ -153,6 +156,7 @@ void bpf_register_map_type(struct bpf_map_type_list *tl); struct bpf_prog *bpf_prog_get(u32 ufd); void bpf_prog_put(struct bpf_prog *prog); +void bpf_prog_put_rcu(struct bpf_prog *prog); struct bpf_map *bpf_map_get(struct fd f); void bpf_map_put(struct bpf_map *map); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 614bcd4c1d74..cb31229a6fa4 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -202,7 +202,7 @@ static int prog_array_map_update_elem(struct bpf_map *map, void *key, old_prog = xchg(array->prog + index, prog); if (old_prog) - bpf_prog_put(old_prog); + bpf_prog_put_rcu(old_prog); return 0; } @@ -218,7 +218,7 @@ static int prog_array_map_delete_elem(struct bpf_map *map, void *key) old_prog = xchg(array->prog + index, NULL); if (old_prog) { - bpf_prog_put(old_prog); + bpf_prog_put_rcu(old_prog); return 0; } else { return -ENOENT; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 98a69bd83069..a1b14d197a4f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -432,6 +432,23 @@ static void free_used_maps(struct bpf_prog_aux *aux) kfree(aux->used_maps); } +static void __prog_put_rcu(struct rcu_head *rcu) +{ + struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); + + free_used_maps(aux); + bpf_prog_free(aux->prog); +} + +/* version of bpf_prog_put() that is called after a grace period */ +void bpf_prog_put_rcu(struct bpf_prog *prog) +{ + if (atomic_dec_and_test(&prog->aux->refcnt)) { + prog->aux->prog = prog; + call_rcu(&prog->aux->rcu, __prog_put_rcu); + } +} + void bpf_prog_put(struct bpf_prog *prog) { if (atomic_dec_and_test(&prog->aux->refcnt)) { @@ -445,7 +462,7 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) { struct bpf_prog *prog = filp->private_data; - bpf_prog_put(prog); + bpf_prog_put_rcu(prog); return 0; } -- cgit v1.2.3 From 3b369bd212d5cabb46cff0e863298971b382bbd6 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 28 May 2015 15:38:32 +0300 Subject: ieee802154: Fix generation of random EUI-64 addresses. Currently, ieee802154_random_extended_addr() has a 50% chance of generating a group (multicast) address, while this function is used for generating station addresses (which can't be group addresses) for interfaces that don't have a hardware-provided address. Also, in case get_random_bytes() generates the EUI-64 address 00:00:00:00:00:00:00:00 (extremely unlikely), which is an invalid address, ieee802154_random_extended_addr() reacts by changing it to 01:00:00:00:00:00:00:00, which is an invalid station address as well, as it is a group address. This patch changes the address generation procedure to grab eight random bytes, treat that as an EUI-64, and then clear the Group address bit and set the Locally Administered bit, which is in line with how eth_random_addr() generates random EUI-48s. Signed-off-by: Lennert Buytenhek Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 8872ca103d06..552210d0a46f 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -244,9 +244,9 @@ static inline void ieee802154_random_extended_addr(__le64 *addr) { get_random_bytes(addr, IEEE802154_EXTENDED_ADDR_LEN); - /* toggle some bit if we hit an invalid extended addr */ - if (!ieee802154_is_valid_extended_addr(*addr)) - ((u8 *)addr)[IEEE802154_EXTENDED_ADDR_LEN - 1] ^= 0x01; + /* clear the group bit, and set the locally administered bit */ + ((u8 *)addr)[IEEE802154_EXTENDED_ADDR_LEN - 1] &= ~0x01; + ((u8 *)addr)[IEEE802154_EXTENDED_ADDR_LEN - 1] |= 0x02; } #endif /* LINUX_IEEE802154_H */ -- cgit v1.2.3 From daf4e2c89254ed6eb8cf7ef60f614edebfdb9f3a Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 28 May 2015 15:38:43 +0300 Subject: ieee802154: Fix EUI-64 station address validation. Refuse to allow setting an EUI-64 group address as an interface address, as those are not valid station addresses. Signed-off-by: Lennert Buytenhek Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 10 ++++------ net/mac802154/iface.c | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 552210d0a46f..1dc1f4ed4001 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -225,15 +225,13 @@ static inline bool ieee802154_is_valid_psdu_len(const u8 len) * ieee802154_is_valid_psdu_len - check if extended addr is valid * @addr: extended addr to check */ -static inline bool ieee802154_is_valid_extended_addr(const __le64 addr) +static inline bool ieee802154_is_valid_extended_unicast_addr(const __le64 addr) { - /* These EUI-64 addresses are reserved by IEEE. 0xffffffffffffffff - * is used internally as extended to short address broadcast mapping. - * This is currently a workaround because neighbor discovery can't - * deal with short addresses types right now. + /* Bail out if the address is all zero, or if the group + * address bit is set. */ return ((addr != cpu_to_le64(0x0000000000000000ULL)) && - (addr != cpu_to_le64(0xffffffffffffffffULL))); + !(addr & cpu_to_le64(0x0100000000000000ULL))); } /** diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index b544b5dc4bfb..6ac023932ce0 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -126,7 +126,7 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p) return -EBUSY; ieee802154_be64_to_le64(&extended_addr, addr->sa_data); - if (!ieee802154_is_valid_extended_addr(extended_addr)) + if (!ieee802154_is_valid_extended_unicast_addr(extended_addr)) return -EINVAL; memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); @@ -539,7 +539,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, switch (type) { case NL802154_IFTYPE_NODE: ndev->type = ARPHRD_IEEE802154; - if (ieee802154_is_valid_extended_addr(extended_addr)) + if (ieee802154_is_valid_extended_unicast_addr(extended_addr)) ieee802154_le64_to_be64(ndev->dev_addr, &extended_addr); else memcpy(ndev->dev_addr, ndev->perm_addr, -- cgit v1.2.3 From 1a1bc59c5f7657387d1a4b45d63248fed55ab88c Mon Sep 17 00:00:00 2001 From: Varka Bhadram Date: Fri, 29 May 2015 10:56:55 +0530 Subject: cc2520: fix CC2591 handling This patch changes tha way of handling of cc2591-cc2520 combination by moving amplified variable from platform data to private data. This will be useful in other sections like tx power support. Signed-off-by: Varka Bhadram Cc: Brad Campbell Signed-off-by: Marcel Holtmann --- drivers/net/ieee802154/cc2520.c | 10 ++++++++-- include/linux/spi/cc2520.h | 1 - 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c index ea280d437ca1..0d9353756598 100644 --- a/drivers/net/ieee802154/cc2520.c +++ b/drivers/net/ieee802154/cc2520.c @@ -196,6 +196,7 @@ struct cc2520_private { u8 *buf; /* SPI TX/Rx data buffer */ struct mutex buffer_mutex; /* SPI buffer mutex */ bool is_tx; /* Flag for sync b/w Tx and Rx */ + bool amplified; /* Flag for CC2591 */ int fifo_pin; /* FIFO GPIO pin number */ struct work_struct fifop_irqwork;/* Workqueue for FIFOP */ spinlock_t lock; /* Lock for is_tx*/ @@ -738,7 +739,9 @@ static int cc2520_get_platform_data(struct spi_device *spi, pdata->vreg = of_get_named_gpio(np, "vreg-gpio", 0); pdata->reset = of_get_named_gpio(np, "reset-gpio", 0); - pdata->amplified = of_property_read_bool(np, "amplified"); + /* CC2591 front end for CC2520 */ + if (of_property_read_bool(np, "amplified")) + priv->amplified = true; return 0; } @@ -781,7 +784,7 @@ static int cc2520_hw_init(struct cc2520_private *priv) * amplifier. See section 8 page 17 of TI application note AN065. * http://www.ti.com/lit/an/swra229a/swra229a.pdf */ - if (pdata.amplified) { + if (priv->amplified) { ret = cc2520_write_register(priv, CC2520_AGCCTRL1, 0x16); if (ret) goto err_ret; @@ -896,6 +899,9 @@ static int cc2520_probe(struct spi_device *spi) spin_lock_init(&priv->lock); init_completion(&priv->tx_complete); + /* Assumption that CC2591 is not connected */ + priv->amplified = false; + /* Request all the gpio's */ if (!gpio_is_valid(pdata.fifo)) { dev_err(&spi->dev, "fifo gpio is not valid\n"); diff --git a/include/linux/spi/cc2520.h b/include/linux/spi/cc2520.h index e741e8baad92..85b8ee67e937 100644 --- a/include/linux/spi/cc2520.h +++ b/include/linux/spi/cc2520.h @@ -21,7 +21,6 @@ struct cc2520_platform_data { int sfd; int reset; int vreg; - bool amplified; }; #endif -- cgit v1.2.3 From 17ca8cbf49be3aa94bb1c2b7ee6545fd70094eb4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 29 May 2015 23:23:06 +0200 Subject: ebpf: allow bpf_ktime_get_ns_proto also for networking As this is already exported from tracing side via commit d9847d310ab4 ("tracing: Allow BPF programs to call bpf_ktime_get_ns()"), we might as well want to move it to the core, so also networking users can make use of it, e.g. to measure diffs for certain flows from ingress/egress. Signed-off-by: Daniel Borkmann Cc: Alexei Starovoitov Cc: Ingo Molnar Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + kernel/bpf/core.c | 1 + kernel/bpf/helpers.c | 13 +++++++++++++ kernel/trace/bpf_trace.c | 12 ------------ net/core/filter.c | 2 ++ 5 files changed, 17 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5f520f5f087e..ca854e5bb2f7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -186,5 +186,6 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; +extern const struct bpf_func_proto bpf_ktime_get_ns_proto; #endif /* _LINUX_BPF_H */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d44b25cbe460..4548422d5f6c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -734,6 +734,7 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak; const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; +const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call * skb_copy_bits(), so provide a weak definition of it for NET-less config. diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index bd7f5988ed9c..b3aaabdf9a50 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -13,6 +13,7 @@ #include #include #include +#include /* If kernel subsystem is allowing eBPF programs to call this function, * inside its own verifier_ops->get_func_proto() callback it should return @@ -111,3 +112,15 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = { .gpl_only = false, .ret_type = RET_INTEGER, }; + +static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + /* NMI safe access to clock monotonic */ + return ktime_get_mono_fast_ns(); +} + +const struct bpf_func_proto bpf_ktime_get_ns_proto = { + .func = bpf_ktime_get_ns, + .gpl_only = true, + .ret_type = RET_INTEGER, +}; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 646445e41bd4..50c4015a8ad3 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -79,18 +79,6 @@ static const struct bpf_func_proto bpf_probe_read_proto = { .arg3_type = ARG_ANYTHING, }; -static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) -{ - /* NMI safe access to clock monotonic */ - return ktime_get_mono_fast_ns(); -} - -static const struct bpf_func_proto bpf_ktime_get_ns_proto = { - .func = bpf_ktime_get_ns, - .gpl_only = true, - .ret_type = RET_INTEGER, -}; - /* * limited trace_printk() * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed diff --git a/net/core/filter.c b/net/core/filter.c index 2c30d6632d66..b78a010a957f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1423,6 +1423,8 @@ sk_filter_func_proto(enum bpf_func_id func_id) return &bpf_get_smp_processor_id_proto; case BPF_FUNC_tail_call: return &bpf_tail_call_proto; + case BPF_FUNC_ktime_get_ns: + return &bpf_ktime_get_ns_proto; default: return NULL; } -- cgit v1.2.3 From bdef7de4b8d9be4cf7bf5aea977f827310ab3ff0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 1 Jun 2015 14:56:09 -0700 Subject: net: Add priority to packet_offload objects. When we scan a packet for GRO processing, we want to see the most common packet types in the front of the offload_base list. So add a priority field so we can handle this properly. IPv4/IPv6 get the highest priority with the implicit zero priority field. Next comes ethernet with a priority of 10, and then we have the MPLS types with a priority of 15. Suggested-by: Eric Dumazet Suggested-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/core/dev.c | 8 ++++++-- net/ethernet/eth.c | 1 + net/mpls/mpls_gso.c | 2 ++ 4 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 51f8d2f5dc3f..6f5f71ff5169 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1997,6 +1997,7 @@ struct offload_callbacks { struct packet_offload { __be16 type; /* This is really htons(ether_type). */ + u16 priority; struct offload_callbacks callbacks; struct list_head list; }; diff --git a/net/core/dev.c b/net/core/dev.c index 594163d0c6eb..0602e917a305 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -469,10 +469,14 @@ EXPORT_SYMBOL(dev_remove_pack); */ void dev_add_offload(struct packet_offload *po) { - struct list_head *head = &offload_base; + struct packet_offload *elem; spin_lock(&offload_lock); - list_add_rcu(&po->list, head); + list_for_each_entry(elem, &offload_base, list) { + if (po->priority < elem->priority) + break; + } + list_add_rcu(&po->list, elem->list.prev); spin_unlock(&offload_lock); } EXPORT_SYMBOL(dev_add_offload); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index c3325bd2f3fb..7d0e239a6755 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -470,6 +470,7 @@ EXPORT_SYMBOL(eth_gro_complete); static struct packet_offload eth_packet_offload __read_mostly = { .type = cpu_to_be16(ETH_P_TEB), + .priority = 10, .callbacks = { .gro_receive = eth_gro_receive, .gro_complete = eth_gro_complete, diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 809df534a720..0183b32da942 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -62,6 +62,7 @@ out: static struct packet_offload mpls_mc_offload __read_mostly = { .type = cpu_to_be16(ETH_P_MPLS_MC), + .priority = 15, .callbacks = { .gso_segment = mpls_gso_segment, }, @@ -69,6 +70,7 @@ static struct packet_offload mpls_mc_offload __read_mostly = { static struct packet_offload mpls_uc_offload __read_mostly = { .type = cpu_to_be16(ETH_P_MPLS_UC), + .priority = 15, .callbacks = { .gso_segment = mpls_gso_segment, }, -- cgit v1.2.3 From 66e5133f19e901a044fa5eaeeb6ecff4545839e5 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Mon, 1 Jun 2015 21:55:06 +0900 Subject: vlan: Add GRO support for non hardware accelerated vlan Currently packets with non-hardware-accelerated vlan cannot be handled by GRO. This causes low performance for 802.1ad and stacked vlan, as their vlan tags are currently not stripped by hardware. This patch adds GRO support for non-hardware-accelerated vlan and improves receive performance of them. Test Environment: vlan device (.1Q) on vlan device (.1ad) on ixgbe (82599) Result: - Before $ netperf -t TCP_STREAM -H 192.168.20.2 -l 60 Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 60.00 5233.17 Rx side CPU usage: %usr %sys %irq %soft %idle 0.27 58.03 0.00 41.70 0.00 - After $ netperf -t TCP_STREAM -H 192.168.20.2 -l 60 Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 60.00 7586.85 Rx side CPU usage: %usr %sys %irq %soft %idle 0.50 25.83 0.00 59.53 14.14 [ Register VLAN offloads with priority 10 -DaveM ] Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 20 +++++++++++ net/8021q/vlan.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index a40d29846ac2..67ce5bd3b56a 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -628,4 +628,24 @@ static inline netdev_features_t vlan_features_check(const struct sk_buff *skb, return features; } +/** + * compare_vlan_header - Compare two vlan headers + * @h1: Pointer to vlan header + * @h2: Pointer to vlan header + * + * Compare two vlan headers, returns 0 if equal. + * + * Please note that alignment of h1 & h2 are only guaranteed to be 16 bits. + */ +static inline unsigned long compare_vlan_header(const struct vlan_hdr *h1, + const struct vlan_hdr *h2) +{ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + return *(u32 *)h1 ^ *(u32 *)h2; +#else + return ((__force u32)h1->h_vlan_TCI ^ (__force u32)h2->h_vlan_TCI) | + ((__force u32)h1->h_vlan_encapsulated_proto ^ + (__force u32)h2->h_vlan_encapsulated_proto); +#endif +} #endif /* !(_LINUX_IF_VLAN_H_) */ diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 59555f0f8fc8..d2cd9de4b724 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -618,6 +618,92 @@ out: return err; } +static struct sk_buff **vlan_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct sk_buff *p, **pp = NULL; + struct vlan_hdr *vhdr; + unsigned int hlen, off_vlan; + const struct packet_offload *ptype; + __be16 type; + int flush = 1; + + off_vlan = skb_gro_offset(skb); + hlen = off_vlan + sizeof(*vhdr); + vhdr = skb_gro_header_fast(skb, off_vlan); + if (skb_gro_header_hard(skb, hlen)) { + vhdr = skb_gro_header_slow(skb, hlen, off_vlan); + if (unlikely(!vhdr)) + goto out; + } + + type = vhdr->h_vlan_encapsulated_proto; + + rcu_read_lock(); + ptype = gro_find_receive_by_type(type); + if (!ptype) + goto out_unlock; + + flush = 0; + + for (p = *head; p; p = p->next) { + struct vlan_hdr *vhdr2; + + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + vhdr2 = (struct vlan_hdr *)(p->data + off_vlan); + if (compare_vlan_header(vhdr, vhdr2)) + NAPI_GRO_CB(p)->same_flow = 0; + } + + skb_gro_pull(skb, sizeof(*vhdr)); + skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr)); + pp = ptype->callbacks.gro_receive(head, skb); + +out_unlock: + rcu_read_unlock(); +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +static int vlan_gro_complete(struct sk_buff *skb, int nhoff) +{ + struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + nhoff); + __be16 type = vhdr->h_vlan_encapsulated_proto; + struct packet_offload *ptype; + int err = -ENOENT; + + rcu_read_lock(); + ptype = gro_find_complete_by_type(type); + if (ptype) + err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(*vhdr)); + + rcu_read_unlock(); + return err; +} + +static struct packet_offload vlan_packet_offloads[] __read_mostly = { + { + .type = cpu_to_be16(ETH_P_8021Q), + .priority = 10, + .callbacks = { + .gro_receive = vlan_gro_receive, + .gro_complete = vlan_gro_complete, + }, + }, + { + .type = cpu_to_be16(ETH_P_8021AD), + .priority = 10, + .callbacks = { + .gro_receive = vlan_gro_receive, + .gro_complete = vlan_gro_complete, + }, + }, +}; + static int __net_init vlan_init_net(struct net *net) { struct vlan_net *vn = net_generic(net, vlan_net_id); @@ -645,6 +731,7 @@ static struct pernet_operations vlan_net_ops = { static int __init vlan_proto_init(void) { int err; + unsigned int i; pr_info("%s v%s\n", vlan_fullname, vlan_version); @@ -668,6 +755,9 @@ static int __init vlan_proto_init(void) if (err < 0) goto err5; + for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++) + dev_add_offload(&vlan_packet_offloads[i]); + vlan_ioctl_set(vlan_ioctl_handler); return 0; @@ -685,7 +775,13 @@ err0: static void __exit vlan_cleanup_module(void) { + unsigned int i; + vlan_ioctl_set(NULL); + + for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++) + dev_remove_offload(&vlan_packet_offloads[i]); + vlan_netlink_fini(); unregister_netdevice_notifier(&vlan_notifier_block); -- cgit v1.2.3 From 42aecaa9bb2bd57eb8d61b4565cee5d3640863fb Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 4 Jun 2015 09:16:39 -0700 Subject: net: Get skb hash over flow_keys structure This patch changes flow hashing to use jhash2 over the flow_keys structure instead just doing jhash_3words over src, dst, and ports. This method will allow us take more input into the hashing function so that we can include full IPv6 addresses, VLAN, flow labels etc. without needing to resort to xor'ing which makes for a poor hash. Acked-by: Jiri Pirko Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- include/net/flow_dissector.h | 21 ++++++++++++++--- include/net/ip.h | 2 ++ include/net/ipv6.h | 2 ++ net/core/flow_dissector.c | 54 +++++++++++++++++++++++++++++++++----------- net/sched/cls_flower.c | 2 ++ 6 files changed, 66 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6b41c15efa27..cc612fc0a894 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1943,7 +1943,7 @@ static inline void skb_probe_transport_header(struct sk_buff *skb, if (skb_transport_header_was_set(skb)) return; else if (skb_flow_dissect_flow_keys(skb, &keys)) - skb_set_transport_header(skb, keys.basic.thoff); + skb_set_transport_header(skb, keys.control.thoff); else skb_set_transport_header(skb, offset_hint); } diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index bac9c1421f58..cba6a10b214a 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -6,6 +6,15 @@ #include #include +/** + * struct flow_dissector_key_control: + * @thoff: Transport header offset + */ +struct flow_dissector_key_control { + u16 thoff; + u16 padding; +}; + /** * struct flow_dissector_key_basic: * @thoff: Transport header offset @@ -13,9 +22,9 @@ * @ip_proto: Transport header protocol (eg. TCP/UDP) */ struct flow_dissector_key_basic { - u16 thoff; __be16 n_proto; u8 ip_proto; + u8 padding; }; /** @@ -70,6 +79,7 @@ struct flow_dissector_key_eth_addrs { }; enum flow_dissector_key_id { + FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_addrs */ FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */ @@ -109,11 +119,16 @@ static inline bool skb_flow_dissect(const struct sk_buff *skb, } struct flow_keys { - struct flow_dissector_key_addrs addrs; - struct flow_dissector_key_ports ports; + struct flow_dissector_key_control control; +#define FLOW_KEYS_HASH_START_FIELD basic struct flow_dissector_key_basic basic; + struct flow_dissector_key_ports ports; + struct flow_dissector_key_addrs addrs; }; +#define FLOW_KEYS_HASH_OFFSET \ + offsetof(struct flow_keys, FLOW_KEYS_HASH_START_FIELD) + extern struct flow_dissector flow_keys_dissector; extern struct flow_dissector flow_keys_buf_dissector; diff --git a/include/net/ip.h b/include/net/ip.h index 9b976cf99122..16cfc87fed6c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -360,6 +360,8 @@ static inline void inet_set_txhash(struct sock *sk) struct inet_sock *inet = inet_sk(sk); struct flow_keys keys; + memset(&keys, 0, sizeof(keys)); + keys.addrs.src = inet->inet_saddr; keys.addrs.dst = inet->inet_daddr; keys.ports.src = inet->inet_sport; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 35d485c78080..474ca466a091 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -699,6 +699,8 @@ static inline void ip6_set_txhash(struct sock *sk) struct ipv6_pinfo *np = inet6_sk(sk); struct flow_keys keys; + memset(&keys, 0, sizeof(keys)); + keys.addrs.src = (__force __be32)ipv6_addr_hash(&np->saddr); keys.addrs.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr); keys.ports.src = inet->inet_sport; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 0763795bea8f..55b5f2962afa 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -57,9 +57,11 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, flow_dissector->offset[key->key_id] = key->offset; } - /* Ensure that the dissector always includes basic key. That way - * we are able to avoid handling lack of it in fast path. + /* Ensure that the dissector always includes control and basic key. + * That way we are able to avoid handling lack of these in fast path. */ + BUG_ON(!skb_flow_dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_CONTROL)); BUG_ON(!skb_flow_dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_BASIC)); } @@ -120,6 +122,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, void *target_container, void *data, __be16 proto, int nhoff, int hlen) { + struct flow_dissector_key_control *key_control; struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_ports *key_ports; @@ -132,6 +135,13 @@ bool __skb_flow_dissect(const struct sk_buff *skb, hlen = skb_headlen(skb); } + /* It is ensured by skb_flow_dissector_init() that control key will + * be always present. + */ + key_control = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_CONTROL, + target_container); + /* It is ensured by skb_flow_dissector_init() that basic key will * be always present. */ @@ -219,7 +229,7 @@ flow_label: key_basic->n_proto = proto; key_basic->ip_proto = ip_proto; - key_basic->thoff = (u16)nhoff; + key_control->thoff = (u16)nhoff; if (skb_flow_dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) { @@ -275,7 +285,7 @@ flow_label: if (!hdr) return false; key_basic->n_proto = proto; - key_basic->thoff = (u16)nhoff; + key_control->thoff = (u16)nhoff; if (skb_flow_dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) { @@ -288,7 +298,7 @@ flow_label: return true; } case htons(ETH_P_FCOE): - key_basic->thoff = (u16)(nhoff + FCOE_HEADER_LEN); + key_control->thoff = (u16)(nhoff + FCOE_HEADER_LEN); /* fall through */ default: return false; @@ -345,7 +355,7 @@ flow_label: key_basic->n_proto = proto; key_basic->ip_proto = ip_proto; - key_basic->thoff = (u16) nhoff; + key_control->thoff = (u16)nhoff; if (skb_flow_dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) { @@ -366,9 +376,21 @@ static __always_inline void __flow_hash_secret_init(void) net_get_random_once(&hashrnd, sizeof(hashrnd)); } -static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c, u32 keyval) +static __always_inline u32 __flow_hash_words(u32 *words, u32 length, u32 keyval) +{ + return jhash2(words, length, keyval); +} + +static inline void *flow_keys_hash_start(struct flow_keys *flow) { - return jhash_3words(a, b, c, keyval); + BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32)); + return (void *)flow + FLOW_KEYS_HASH_OFFSET; +} + +static inline size_t flow_keys_hash_length(struct flow_keys *flow) +{ + BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32)); + return (sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) / sizeof(u32); } static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) @@ -383,10 +405,8 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) swap(keys->ports.src, keys->ports.dst); } - hash = __flow_hash_3words((__force u32)keys->addrs.dst, - (__force u32)keys->addrs.src, - (__force u32)keys->ports.ports, - keyval); + hash = __flow_hash_words((u32 *)flow_keys_hash_start(keys), + flow_keys_hash_length(keys), keyval); if (!hash) hash = 1; @@ -473,7 +493,7 @@ EXPORT_SYMBOL(skb_get_hash_perturb); u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen) { - u32 poff = keys->basic.thoff; + u32 poff = keys->control.thoff; switch (keys->basic.ip_proto) { case IPPROTO_TCP: { @@ -536,6 +556,10 @@ u32 skb_get_poff(const struct sk_buff *skb) } static const struct flow_dissector_key flow_keys_dissector_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_CONTROL, + .offset = offsetof(struct flow_keys, control), + }, { .key_id = FLOW_DISSECTOR_KEY_BASIC, .offset = offsetof(struct flow_keys, basic), @@ -555,6 +579,10 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { }; static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_CONTROL, + .offset = offsetof(struct flow_keys, control), + }, { .key_id = FLOW_DISSECTOR_KEY_BASIC, .offset = offsetof(struct flow_keys, basic), diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 8c8f34ef6980..5a7d66c59684 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -25,6 +25,7 @@ struct fl_flow_key { int indev_ifindex; + struct flow_dissector_key_control control; struct flow_dissector_key_basic basic; struct flow_dissector_key_eth_addrs eth; union { @@ -347,6 +348,7 @@ static void fl_init_dissector(struct cls_fl_head *head, struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX]; size_t cnt = 0; + FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control); FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic); FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, FLOW_DISSECTOR_KEY_ETH_ADDRS, eth); -- cgit v1.2.3 From 01949d0109ee5fae33752f0db99a36f1619e1873 Mon Sep 17 00:00:00 2001 From: Haggai Abramonvsky Date: Thu, 4 Jun 2015 19:30:38 +0300 Subject: net/mlx5_core: Enable XRCs and SRQs when using ISSI > 0 When working in ISSI > 0 mode, the model exposed by the device for XRCs and SRQs is different. XRCs use XRC SRQs and plain SRQs are based on RPM (Receive Memory Pool). Add helper functions to create, modify, query, and arm XRC SRQs and RMPs. Signed-off-by: Haggai Abramovsky Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx5/srq.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 4 +- drivers/net/ethernet/mellanox/mlx5/core/srq.c | 444 ++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 154 +++++++ drivers/net/ethernet/mellanox/mlx5/core/transobj.h | 11 + include/linux/mlx5/driver.h | 6 +- include/linux/mlx5/mlx5_ifc.h | 16 +- 7 files changed, 564 insertions(+), 73 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index e8e8e942fa4a..e008505e96e9 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -302,7 +302,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn); in->ctx.db_record = cpu_to_be64(srq->db.dma); - err = mlx5_core_create_srq(dev->mdev, &srq->msrq, in, inlen); + err = mlx5_core_create_srq(dev->mdev, &srq->msrq, in, inlen, is_xrc); kvfree(in); if (err) { mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 87e9e606596a..07540f732df1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o -mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o vport.o transobj.o \ + mad.o transobj.o +mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o vport.o \ en_main.o en_flow_table.o en_ethtool.o en_tx.o en_rx.o \ en_txrx.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index f9d25dcd03c1..c48f504ccbeb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -37,6 +37,7 @@ #include #include #include "mlx5_core.h" +#include "transobj.h" void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) { @@ -62,6 +63,74 @@ void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) complete(&srq->free); } +static int get_pas_size(void *srqc) +{ + u32 log_page_size = MLX5_GET(srqc, srqc, log_page_size) + 12; + u32 log_srq_size = MLX5_GET(srqc, srqc, log_srq_size); + u32 log_rq_stride = MLX5_GET(srqc, srqc, log_rq_stride); + u32 page_offset = MLX5_GET(srqc, srqc, page_offset); + u32 po_quanta = 1 << (log_page_size - 6); + u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride); + u32 page_size = 1 << log_page_size; + u32 rq_sz_po = rq_sz + (page_offset * po_quanta); + u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size; + + return rq_num_pas * sizeof(u64); +} + +static void rmpc_srqc_reformat(void *srqc, void *rmpc, bool srqc_to_rmpc) +{ + void *wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + if (srqc_to_rmpc) { + switch (MLX5_GET(srqc, srqc, state)) { + case MLX5_SRQC_STATE_GOOD: + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + break; + case MLX5_SRQC_STATE_ERROR: + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_ERR); + break; + default: + pr_warn("%s: %d: Unknown srq state = 0x%x\n", __func__, + __LINE__, MLX5_GET(srqc, srqc, state)); + MLX5_SET(rmpc, rmpc, state, MLX5_GET(srqc, srqc, state)); + } + + MLX5_SET(wq, wq, wq_signature, MLX5_GET(srqc, srqc, wq_signature)); + MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(srqc, srqc, log_page_size)); + MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(srqc, srqc, log_rq_stride) + 4); + MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(srqc, srqc, log_srq_size)); + MLX5_SET(wq, wq, page_offset, MLX5_GET(srqc, srqc, page_offset)); + MLX5_SET(wq, wq, lwm, MLX5_GET(srqc, srqc, lwm)); + MLX5_SET(wq, wq, pd, MLX5_GET(srqc, srqc, pd)); + MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(srqc, srqc, dbr_addr)); + } else { + switch (MLX5_GET(rmpc, rmpc, state)) { + case MLX5_RMPC_STATE_RDY: + MLX5_SET(srqc, srqc, state, MLX5_SRQC_STATE_GOOD); + break; + case MLX5_RMPC_STATE_ERR: + MLX5_SET(srqc, srqc, state, MLX5_SRQC_STATE_ERROR); + break; + default: + pr_warn("%s: %d: Unknown rmp state = 0x%x\n", + __func__, __LINE__, + MLX5_GET(rmpc, rmpc, state)); + MLX5_SET(srqc, srqc, state, + MLX5_GET(rmpc, rmpc, state)); + } + + MLX5_SET(srqc, srqc, wq_signature, MLX5_GET(wq, wq, wq_signature)); + MLX5_SET(srqc, srqc, log_page_size, MLX5_GET(wq, wq, log_wq_pg_sz)); + MLX5_SET(srqc, srqc, log_rq_stride, MLX5_GET(wq, wq, log_wq_stride) - 4); + MLX5_SET(srqc, srqc, log_srq_size, MLX5_GET(wq, wq, log_wq_sz)); + MLX5_SET(srqc, srqc, page_offset, MLX5_GET(wq, wq, page_offset)); + MLX5_SET(srqc, srqc, lwm, MLX5_GET(wq, wq, lwm)); + MLX5_SET(srqc, srqc, pd, MLX5_GET(wq, wq, pd)); + MLX5_SET64(srqc, srqc, dbr_addr, MLX5_GET64(wq, wq, dbr_addr)); + } +} + struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) { struct mlx5_srq_table *table = &dev->priv.srq_table; @@ -79,26 +148,311 @@ struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) } EXPORT_SYMBOL(mlx5_core_get_srq); -int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, int inlen) +static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_create_srq_mbox_in *in, int inlen) { struct mlx5_create_srq_mbox_out out; - struct mlx5_srq_table *table = &dev->priv.srq_table; - struct mlx5_destroy_srq_mbox_in din; - struct mlx5_destroy_srq_mbox_out dout; int err; memset(&out, 0, sizeof(out)); + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_SRQ); - err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); - if (err) - return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); + err = mlx5_cmd_exec_check_status(dev, (u32 *)in, inlen, (u32 *)(&out), + sizeof(out)); srq->srqn = be32_to_cpu(out.srqn) & 0xffffff; + return err; +} + +static int destroy_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + struct mlx5_destroy_srq_mbox_in in; + struct mlx5_destroy_srq_mbox_out out; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ); + in.srqn = cpu_to_be32(srq->srqn); + + return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), sizeof(in), + (u32 *)(&out), sizeof(out)); +} + +static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq) +{ + struct mlx5_arm_srq_mbox_in in; + struct mlx5_arm_srq_mbox_out out; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ARM_RQ); + in.hdr.opmod = cpu_to_be16(!!is_srq); + in.srqn = cpu_to_be32(srq->srqn); + in.lwm = cpu_to_be16(lwm); + + return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), + sizeof(in), (u32 *)(&out), + sizeof(out)); +} + +static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_query_srq_mbox_out *out) +{ + struct mlx5_query_srq_mbox_in in; + + memset(&in, 0, sizeof(in)); + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SRQ); + in.srqn = cpu_to_be32(srq->srqn); + + return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), sizeof(in), + (u32 *)out, sizeof(*out)); +} + +static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_create_srq_mbox_in *in, + int srq_inlen) +{ + u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; + void *create_in; + void *srqc; + void *xrc_srqc; + void *pas; + int pas_size; + int inlen; + int err; + + srqc = MLX5_ADDR_OF(create_srq_in, in, srq_context_entry); + pas_size = get_pas_size(srqc); + inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; + create_in = mlx5_vzalloc(inlen); + if (!create_in) + return -ENOMEM; + + xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in, + xrc_srq_context_entry); + pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); + + memcpy(xrc_srqc, srqc, MLX5_ST_SZ_BYTES(srqc)); + memcpy(pas, in->pas, pas_size); + /* 0xffffff means we ask to work with cqe version 0 */ + MLX5_SET(xrc_srqc, xrc_srqc, user_index, 0xffffff); + MLX5_SET(create_xrc_srq_in, create_in, opcode, + MLX5_CMD_OP_CREATE_XRC_SRQ); + + memset(create_out, 0, sizeof(create_out)); + err = mlx5_cmd_exec_check_status(dev, create_in, inlen, create_out, + sizeof(create_out)); + if (err) + goto out; + + srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn); +out: + kvfree(create_in); + return err; +} + +static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]; + u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)]; + + memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); + memset(xrcsrq_out, 0, sizeof(xrcsrq_out)); + + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, + MLX5_CMD_OP_DESTROY_XRC_SRQ); + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + + return mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); +} + +static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, u16 lwm) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]; + u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)]; + + memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); + memset(xrcsrq_out, 0, sizeof(xrcsrq_out)); + + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); + + return mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); +} + +static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_query_srq_mbox_out *out) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; + u32 *xrcsrq_out; + void *srqc; + void *xrc_srqc; + int err; + + xrcsrq_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + if (!xrcsrq_out) + return -ENOMEM; + memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); + + MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode, + MLX5_CMD_OP_QUERY_XRC_SRQ); + MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + err = mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, + MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + if (err) + goto out; + + xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out, + xrc_srq_context_entry); + srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); + memcpy(srqc, xrc_srqc, MLX5_ST_SZ_BYTES(srqc)); + +out: + kvfree(xrcsrq_out); + return err; +} + +static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_create_srq_mbox_in *in, int srq_inlen) +{ + void *create_in; + void *rmpc; + void *srqc; + int pas_size; + int inlen; + int err; + + srqc = MLX5_ADDR_OF(create_srq_in, in, srq_context_entry); + pas_size = get_pas_size(srqc); + inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; + create_in = mlx5_vzalloc(inlen); + if (!create_in) + return -ENOMEM; + + rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx); + + memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); + rmpc_srqc_reformat(srqc, rmpc, true); + + err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn); + + kvfree(create_in); + return err; +} + +static int destroy_rmp_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + return mlx5_core_destroy_rmp(dev, srq->srqn); +} + +static int arm_rmp_cmd(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + u16 lwm) +{ + void *in; + void *rmpc; + void *wq; + void *bitmask; + int err; + + in = mlx5_vzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in)); + if (!in) + return -ENOMEM; + + rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); + bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); + MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn); + MLX5_SET(wq, wq, lwm, lwm); + MLX5_SET(rmp_bitmask, bitmask, lwm, 1); + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + + err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); + + kvfree(in); + return err; +} + +static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_query_srq_mbox_out *out) +{ + u32 *rmp_out; + void *rmpc; + void *srqc; + int err; + + rmp_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_rmp_out)); + if (!rmp_out) + return -ENOMEM; + + err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out); + if (err) + goto out; + + srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); + rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context); + rmpc_srqc_reformat(srqc, rmpc, false); + +out: + kvfree(rmp_out); + return err; +} + +static int create_srq_split(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_create_srq_mbox_in *in, + int inlen, int is_xrc) +{ + if (!dev->issi) + return create_srq_cmd(dev, srq, in, inlen); + else if (srq->common.res == MLX5_RES_XSRQ) + return create_xrc_srq_cmd(dev, srq, in, inlen); + else + return create_rmp_cmd(dev, srq, in, inlen); +} + +static int destroy_srq_split(struct mlx5_core_dev *dev, + struct mlx5_core_srq *srq) +{ + if (!dev->issi) + return destroy_srq_cmd(dev, srq); + else if (srq->common.res == MLX5_RES_XSRQ) + return destroy_xrc_srq_cmd(dev, srq); + else + return destroy_rmp_cmd(dev, srq); +} + +int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_create_srq_mbox_in *in, int inlen, + int is_xrc) +{ + int err; + struct mlx5_srq_table *table = &dev->priv.srq_table; + + srq->common.res = is_xrc ? MLX5_RES_XSRQ : MLX5_RES_SRQ; + + err = create_srq_split(dev, srq, in, inlen, is_xrc); + if (err) + return err; + atomic_set(&srq->refcount, 1); init_completion(&srq->free); @@ -107,25 +461,20 @@ int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, spin_unlock_irq(&table->lock); if (err) { mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn); - goto err_cmd; + goto err_destroy_srq_split; } return 0; -err_cmd: - memset(&din, 0, sizeof(din)); - memset(&dout, 0, sizeof(dout)); - din.srqn = cpu_to_be32(srq->srqn); - din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ); - mlx5_cmd_exec(dev, &din, sizeof(din), &dout, sizeof(dout)); +err_destroy_srq_split: + destroy_srq_split(dev, srq); + return err; } EXPORT_SYMBOL(mlx5_core_create_srq); int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) { - struct mlx5_destroy_srq_mbox_in in; - struct mlx5_destroy_srq_mbox_out out; struct mlx5_srq_table *table = &dev->priv.srq_table; struct mlx5_core_srq *tmp; int err; @@ -142,17 +491,10 @@ int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) return -EINVAL; } - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ); - in.srqn = cpu_to_be32(srq->srqn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + err = destroy_srq_split(dev, srq); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - if (atomic_dec_and_test(&srq->refcount)) complete(&srq->free); wait_for_completion(&srq->free); @@ -164,48 +506,24 @@ EXPORT_SYMBOL(mlx5_core_destroy_srq); int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, struct mlx5_query_srq_mbox_out *out) { - struct mlx5_query_srq_mbox_in in; - int err; - - memset(&in, 0, sizeof(in)); - memset(out, 0, sizeof(*out)); - - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SRQ); - in.srqn = cpu_to_be32(srq->srqn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out)); - if (err) - return err; - - if (out->hdr.status) - return mlx5_cmd_status_to_err(&out->hdr); - - return err; + if (!dev->issi) + return query_srq_cmd(dev, srq, out); + else if (srq->common.res == MLX5_RES_XSRQ) + return query_xrc_srq_cmd(dev, srq, out); + else + return query_rmp_cmd(dev, srq, out); } EXPORT_SYMBOL(mlx5_core_query_srq); int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq) { - struct mlx5_arm_srq_mbox_in in; - struct mlx5_arm_srq_mbox_out out; - int err; - - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ARM_RQ); - in.hdr.opmod = cpu_to_be16(!!is_srq); - in.srqn = cpu_to_be32(srq->srqn); - in.lwm = cpu_to_be16(lwm); - - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - return err; + if (!dev->issi) + return arm_srq_cmd(dev, srq, lwm, is_srq); + else if (srq->common.res == MLX5_RES_XSRQ) + return arm_xrc_srq_cmd(dev, srq, lwm); + else + return arm_rmp_cmd(dev, srq, lwm); } EXPORT_SYMBOL(mlx5_core_arm_srq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index d918816ac4d8..7a120283de2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -169,3 +169,157 @@ void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn) mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); } + +int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rmpn) +{ + u32 out[MLX5_ST_SZ_DW(create_rmp_out)]; + int err; + + MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + if (!err) + *rmpn = MLX5_GET(create_rmp_out, out, rmpn); + + return err; +} + +int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_rmp_out)]; + + MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); +} + +int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, in, rmpn, rmpn); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + +int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_rmp_in)]; + int outlen = MLX5_ST_SZ_BYTES(query_rmp_out); + + memset(in, 0, sizeof(in)); + MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP); + MLX5_SET(query_rmp_in, in, rmpn, rmpn); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen); +} + +int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm) +{ + void *in; + void *rmpc; + void *wq; + void *bitmask; + int err; + + in = mlx5_vzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in)); + if (!in) + return -ENOMEM; + + rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); + bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); + MLX5_SET(modify_rmp_in, in, rmpn, rmpn); + MLX5_SET(wq, wq, lwm, lwm); + MLX5_SET(rmp_bitmask, bitmask, lwm, 1); + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + + err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); + + kvfree(in); + + return err; +} + +int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *xsrqn) +{ + u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; + int err; + + MLX5_SET(create_xrc_srq_in, in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + if (!err) + *xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn); + + return err; +} + +int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); + MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + +int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u32 *out) +{ + u32 in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; + void *srqc; + void *xrc_srqc; + int err; + + memset(in, 0, sizeof(in)); + MLX5_SET(query_xrc_srq_in, in, opcode, MLX5_CMD_OP_QUERY_XRC_SRQ); + MLX5_SET(query_xrc_srq_in, in, xrc_srqn, xsrqn); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), + out, + MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + if (!err) { + xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, out, + xrc_srq_context_entry); + srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); + memcpy(srqc, xrc_srqc, MLX5_ST_SZ_BYTES(srqc)); + } + + return err; +} + +int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm) +{ + u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]; + u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn); + MLX5_SET(arm_xrc_srq_in, in, lwm, lwm); + MLX5_SET(arm_xrc_srq_in, in, op_mod, + MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h b/drivers/net/ethernet/mellanox/mlx5/core/transobj.h index b71d77f61a1d..90322c11361f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.h @@ -47,5 +47,16 @@ void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn); int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tisn); void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn); +int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rmpn); +int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen); +int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn); +int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out); +int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); +int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, + u32 *rmpn); +int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn); +int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u32 *out); +int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); #endif /* __TRANSOBJ_H__ */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7fa26f03acc1..ba9f212c94bb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -339,6 +339,8 @@ struct mlx5_core_mr { enum mlx5_res_type { MLX5_RES_QP, + MLX5_RES_SRQ, + MLX5_RES_XSRQ, }; struct mlx5_core_rsc_common { @@ -348,6 +350,7 @@ struct mlx5_core_rsc_common { }; struct mlx5_core_srq { + struct mlx5_core_rsc_common common; /* must be first */ u32 srqn; int max; int max_gs; @@ -640,7 +643,8 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, struct mlx5_cmd_mailbox *head); int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, int inlen); + struct mlx5_create_srq_mbox_in *in, int inlen, + int is_xrc); int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq); int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, struct mlx5_query_srq_mbox_out *out); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index b27e9f6e090a..dbe2b32c0539 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2022,12 +2022,9 @@ struct mlx5_ifc_srqc_bits { u8 reserved_9[0x40]; - u8 db_record_addr_h[0x20]; - - u8 db_record_addr_l[0x1e]; - u8 reserved_10[0x2]; + u8 dbr_addr[0x40]; - u8 reserved_11[0x80]; + u8 reserved_10[0x80]; }; enum { @@ -4167,6 +4164,13 @@ struct mlx5_ifc_modify_rmp_out_bits { u8 reserved_1[0x40]; }; +struct mlx5_ifc_rmp_bitmask_bits { + u8 reserved[0x20]; + + u8 reserved1[0x1f]; + u8 lwm[0x1]; +}; + struct mlx5_ifc_modify_rmp_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; @@ -4180,7 +4184,7 @@ struct mlx5_ifc_modify_rmp_in_bits { u8 reserved_3[0x20]; - u8 modify_bitmask[0x40]; + struct mlx5_ifc_rmp_bitmask_bits bitmask; u8 reserved_4[0x40]; -- cgit v1.2.3 From d18a9470f89727f870db944a36223bf1bb15bdc1 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 4 Jun 2015 19:30:40 +0300 Subject: net/mlx5_core: Make the vport helpers available for the IB driver too Move the vport header file to be under include/linux/mlx5, such that the mlx5 IB can use it as well. Also add nic_ prefix to the vport NIC commands to differeniate between HCA vport commands and NIC vport commands. Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 6 ++-- drivers/net/ethernet/mellanox/mlx5/core/vport.h | 41 ----------------------- include/linux/mlx5/vport.h | 41 +++++++++++++++++++++++ 5 files changed, 47 insertions(+), 45 deletions(-) delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/vport.h create mode 100644 include/linux/mlx5/vport.h (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index cbb3c7cb53f7..e9edb7210de1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -35,7 +35,7 @@ #include #include #include -#include "vport.h" +#include #include "wq.h" #include "transobj.h" #include "mlx5_core.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1b592913d4d7..edba09cca600 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1715,7 +1715,7 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - mlx5_query_vport_mac_address(priv->mdev, netdev->dev_addr); + mlx5_query_nic_vport_mac_address(priv->mdev, netdev->dev_addr); } static void mlx5e_build_netdev(struct net_device *netdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index ba374b9a6c87..32c4e03f6d3c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -33,7 +33,7 @@ #include #include #include -#include "vport.h" +#include #include "mlx5_core.h" u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod) @@ -55,8 +55,9 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod) return MLX5_GET(query_vport_state_out, out, state); } +EXPORT_SYMBOL(mlx5_query_vport_state); -void mlx5_query_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr) +void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr) { u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; u32 *out; @@ -82,3 +83,4 @@ void mlx5_query_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr) kvfree(out); } +EXPORT_SYMBOL(mlx5_query_nic_vport_mac_address); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.h b/drivers/net/ethernet/mellanox/mlx5/core/vport.h deleted file mode 100644 index c05ca2c3419d..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __MLX5_VPORT_H__ -#define __MLX5_VPORT_H__ - -#include - -u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); -void mlx5_query_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr); - -#endif /* __MLX5_VPORT_H__ */ diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h new file mode 100644 index 000000000000..99d0e9f85432 --- /dev/null +++ b/include/linux/mlx5/vport.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_VPORT_H__ +#define __MLX5_VPORT_H__ + +#include + +u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); +void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr); + +#endif /* __MLX5_VPORT_H__ */ -- cgit v1.2.3 From 707c4602cda6624940761b66a4119f1909492385 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 4 Jun 2015 19:30:41 +0300 Subject: net/mlx5_core: Add new query HCA vport commands Added the implementation for the following commands: 1. QUERY_HCA_VPORT_GID 2. QUERY_HCA_VPORT_PKEY 3. QUERY_HCA_VPORT_CONTEXT They will be needed when we move to work with ISSI > 0 in the IB driver too. Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 4 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 10 +- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 259 +++++++++++++++++++++++ include/linux/mlx5/device.h | 13 ++ include/linux/mlx5/driver.h | 45 ++++ include/linux/mlx5/mlx5_ifc.h | 23 +- include/linux/mlx5/vport.h | 14 ++ 7 files changed, 349 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 07540f732df1..26a68b8af2c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o -mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o vport.o \ + mad.o transobj.o vport.o +mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o \ en_main.o en_flow_table.o en_ethtool.o en_tx.o en_rx.o \ en_txrx.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 11c7216a2517..58354122dfe4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -284,14 +284,6 @@ static u16 to_fw_pkey_sz(u32 size) } } -static u16 to_sw_pkey_sz(int pkey_sz) -{ - if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE) - return 0; - - return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; -} - int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, enum mlx5_cap_mode cap_mode) { @@ -386,7 +378,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) MLX5_ST_SZ_BYTES(cmd_hca_cap)); mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n", - to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)), + mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)), 128); /* we limit the size of the pkey table to 128 entries for now */ MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 32c4e03f6d3c..20150ffa8b16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -84,3 +84,262 @@ void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr) kvfree(out); } EXPORT_SYMBOL(mlx5_query_nic_vport_mac_address); + +int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, u16 vf_num, u16 gid_index, + union ib_gid *gid) +{ + int in_sz = MLX5_ST_SZ_BYTES(query_hca_vport_gid_in); + int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_gid_out); + int is_group_manager; + void *out = NULL; + void *in = NULL; + union ib_gid *tmp; + int tbsz; + int nout; + int err; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + tbsz = mlx5_get_gid_table_len(MLX5_CAP_GEN(dev, gid_table_size)); + mlx5_core_dbg(dev, "vf_num %d, index %d, gid_table_size %d\n", + vf_num, gid_index, tbsz); + + if (gid_index > tbsz && gid_index != 0xffff) + return -EINVAL; + + if (gid_index == 0xffff) + nout = tbsz; + else + nout = 1; + + out_sz += nout * sizeof(*gid); + + in = kzalloc(in_sz, GFP_KERNEL); + out = kzalloc(out_sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(query_hca_vport_gid_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_GID); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_hca_vport_gid_in, in, vport_number, vf_num); + MLX5_SET(query_hca_vport_gid_in, in, other_vport, 1); + } else { + err = -EPERM; + goto out; + } + } + MLX5_SET(query_hca_vport_gid_in, in, gid_index, gid_index); + + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_hca_vport_gid_in, in, port_num, port_num); + + err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz); + if (err) + goto out; + + err = mlx5_cmd_status_to_err_v2(out); + if (err) + goto out; + + tmp = out + MLX5_ST_SZ_BYTES(query_hca_vport_gid_out); + gid->global.subnet_prefix = tmp->global.subnet_prefix; + gid->global.interface_id = tmp->global.interface_id; + +out: + kfree(in); + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_gid); + +int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, u16 vf_num, u16 pkey_index, + u16 *pkey) +{ + int in_sz = MLX5_ST_SZ_BYTES(query_hca_vport_pkey_in); + int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_pkey_out); + int is_group_manager; + void *out = NULL; + void *in = NULL; + void *pkarr; + int nout; + int tbsz; + int err; + int i; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + + tbsz = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)); + if (pkey_index > tbsz && pkey_index != 0xffff) + return -EINVAL; + + if (pkey_index == 0xffff) + nout = tbsz; + else + nout = 1; + + out_sz += nout * MLX5_ST_SZ_BYTES(pkey); + + in = kzalloc(in_sz, GFP_KERNEL); + out = kzalloc(out_sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(query_hca_vport_pkey_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY); + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_hca_vport_pkey_in, in, vport_number, vf_num); + MLX5_SET(query_hca_vport_pkey_in, in, other_vport, 1); + } else { + err = -EPERM; + goto out; + } + } + MLX5_SET(query_hca_vport_pkey_in, in, pkey_index, pkey_index); + + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_hca_vport_pkey_in, in, port_num, port_num); + + err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz); + if (err) + goto out; + + err = mlx5_cmd_status_to_err_v2(out); + if (err) + goto out; + + pkarr = MLX5_ADDR_OF(query_hca_vport_pkey_out, out, pkey); + for (i = 0; i < nout; i++, pkey++, pkarr += MLX5_ST_SZ_BYTES(pkey)) + *pkey = MLX5_GET_PR(pkey, pkarr, pkey); + +out: + kfree(in); + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_pkey); + +int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev, + u8 other_vport, u8 port_num, + u16 vf_num, + struct mlx5_hca_vport_context *rep) +{ + int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_context_out); + int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)]; + int is_group_manager; + void *out; + void *ctx; + int err; + + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); + + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_hca_vport_context_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT); + + if (other_vport) { + if (is_group_manager) { + MLX5_SET(query_hca_vport_context_in, in, other_vport, 1); + MLX5_SET(query_hca_vport_context_in, in, vport_number, vf_num); + } else { + err = -EPERM; + goto ex; + } + } + + if (MLX5_CAP_GEN(dev, num_ports) == 2) + MLX5_SET(query_hca_vport_context_in, in, port_num, port_num); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); + if (err) + goto ex; + err = mlx5_cmd_status_to_err_v2(out); + if (err) + goto ex; + + ctx = MLX5_ADDR_OF(query_hca_vport_context_out, out, hca_vport_context); + rep->field_select = MLX5_GET_PR(hca_vport_context, ctx, field_select); + rep->sm_virt_aware = MLX5_GET_PR(hca_vport_context, ctx, sm_virt_aware); + rep->has_smi = MLX5_GET_PR(hca_vport_context, ctx, has_smi); + rep->has_raw = MLX5_GET_PR(hca_vport_context, ctx, has_raw); + rep->policy = MLX5_GET_PR(hca_vport_context, ctx, vport_state_policy); + rep->phys_state = MLX5_GET_PR(hca_vport_context, ctx, + port_physical_state); + rep->vport_state = MLX5_GET_PR(hca_vport_context, ctx, vport_state); + rep->port_physical_state = MLX5_GET_PR(hca_vport_context, ctx, + port_physical_state); + rep->port_guid = MLX5_GET64_PR(hca_vport_context, ctx, port_guid); + rep->node_guid = MLX5_GET64_PR(hca_vport_context, ctx, node_guid); + rep->cap_mask1 = MLX5_GET_PR(hca_vport_context, ctx, cap_mask1); + rep->cap_mask1_perm = MLX5_GET_PR(hca_vport_context, ctx, + cap_mask1_field_select); + rep->cap_mask2 = MLX5_GET_PR(hca_vport_context, ctx, cap_mask2); + rep->cap_mask2_perm = MLX5_GET_PR(hca_vport_context, ctx, + cap_mask2_field_select); + rep->lid = MLX5_GET_PR(hca_vport_context, ctx, lid); + rep->init_type_reply = MLX5_GET_PR(hca_vport_context, ctx, + init_type_reply); + rep->lmc = MLX5_GET_PR(hca_vport_context, ctx, lmc); + rep->subnet_timeout = MLX5_GET_PR(hca_vport_context, ctx, + subnet_timeout); + rep->sm_lid = MLX5_GET_PR(hca_vport_context, ctx, sm_lid); + rep->sm_sl = MLX5_GET_PR(hca_vport_context, ctx, sm_sl); + rep->qkey_violation_counter = MLX5_GET_PR(hca_vport_context, ctx, + qkey_violation_counter); + rep->pkey_violation_counter = MLX5_GET_PR(hca_vport_context, ctx, + pkey_violation_counter); + rep->grh_required = MLX5_GET_PR(hca_vport_context, ctx, grh_required); + rep->sys_image_guid = MLX5_GET64_PR(hca_vport_context, ctx, + system_image_guid); + +ex: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_context); + +int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, + __be64 *sys_image_guid) +{ + struct mlx5_hca_vport_context *rep; + int err; + + rep = kzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) + return -ENOMEM; + + err = mlx5_query_hca_vport_context(dev, 0, 1, 0, rep); + if (!err) + *sys_image_guid = rep->sys_image_guid; + + kfree(rep); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_system_image_guid); + +int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, + u64 *node_guid) +{ + struct mlx5_hca_vport_context *rep; + int err; + + rep = kzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) + return -ENOMEM; + + err = mlx5_query_hca_vport_context(dev, 0, 1, 0, rep); + if (!err) + *node_guid = rep->node_guid; + + kfree(rep); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index b288c538347a..b2c43508a737 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -99,6 +99,12 @@ __mlx5_mask(typ, fld)) #define MLX5_GET64(typ, p, fld) be64_to_cpu(*((__be64 *)(p) + __mlx5_64_off(typ, fld))) +#define MLX5_GET64_PR(typ, p, fld) ({ \ + u64 ___t = MLX5_GET64(typ, p, fld); \ + pr_debug(#fld " = 0x%llx\n", ___t); \ + ___t; \ +}) + enum { MLX5_MAX_COMMANDS = 32, MLX5_CMD_DATA_BLOCK_SIZE = 512, @@ -1172,4 +1178,11 @@ enum { MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR = 0x40, }; +static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) +{ + if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE) + return 0; + return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; +} + #endif /* MLX5_DEVICE_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ba9f212c94bb..8ab8b8af5c32 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -553,6 +553,41 @@ struct mlx5_pas { u8 log_sz; }; +enum port_state_policy { + MLX5_AAA_000 +}; + +enum phy_port_state { + MLX5_AAA_111 +}; + +struct mlx5_hca_vport_context { + u32 field_select; + bool sm_virt_aware; + bool has_smi; + bool has_raw; + enum port_state_policy policy; + enum phy_port_state phys_state; + enum ib_port_state vport_state; + u8 port_physical_state; + u64 sys_image_guid; + u64 port_guid; + u64 node_guid; + u32 cap_mask1; + u32 cap_mask1_perm; + u32 cap_mask2; + u32 cap_mask2_perm; + u16 lid; + u8 init_type_reply; /* bitmask: see ib spec 14.2.5.6 InitTypeReply */ + u8 lmc; + u8 subnet_timeout; + u16 sm_lid; + u8 sm_sl; + u16 qkey_violation_counter; + u16 pkey_violation_counter; + bool grh_required; +}; + static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset) { return buf->direct.buf + offset; @@ -792,4 +827,14 @@ struct mlx5_profile { } mr_cache[MAX_MR_CACHE_ENTRIES]; }; +static inline int mlx5_get_gid_table_len(u16 param) +{ + if (param > 4) { + pr_warn("gid table length is zero\n"); + return 0; + } + + return 8 * (1 << param); +} + #endif /* MLX5_DRIVER_H */ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index dbe2b32c0539..f06d054ad021 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2221,12 +2221,15 @@ struct mlx5_ifc_hca_vport_context_bits { u8 has_smi[0x1]; u8 has_raw[0x1]; u8 grh_required[0x1]; - u8 reserved_1[0x10]; - u8 port_state_policy[0x4]; - u8 phy_port_state[0x4]; + u8 reserved_1[0xc]; + u8 port_physical_state[0x4]; + u8 vport_state_policy[0x4]; + u8 port_state[0x4]; u8 vport_state[0x4]; - u8 reserved_2[0x60]; + u8 reserved_2[0x20]; + + u8 system_image_guid[0x40]; u8 port_guid[0x40]; @@ -3490,7 +3493,8 @@ struct mlx5_ifc_query_hca_vport_pkey_in_bits { u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xf]; + u8 reserved_2[0xb]; + u8 port_num[0x4]; u8 vport_number[0x10]; u8 reserved_3[0x10]; @@ -3519,7 +3523,8 @@ struct mlx5_ifc_query_hca_vport_gid_in_bits { u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xf]; + u8 reserved_2[0xb]; + u8 port_num[0x4]; u8 vport_number[0x10]; u8 reserved_3[0x10]; @@ -3545,7 +3550,8 @@ struct mlx5_ifc_query_hca_vport_context_in_bits { u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xf]; + u8 reserved_2[0xb]; + u8 port_num[0x4]; u8 vport_number[0x10]; u8 reserved_3[0x20]; @@ -4243,7 +4249,8 @@ struct mlx5_ifc_modify_hca_vport_context_in_bits { u8 op_mod[0x10]; u8 other_vport[0x1]; - u8 reserved_2[0xf]; + u8 reserved_2[0xb]; + u8 port_num[0x4]; u8 vport_number[0x10]; u8 reserved_3[0x20]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 99d0e9f85432..67882a834efb 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -37,5 +37,19 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr); +int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, u16 vf_num, u16 gid_index, + union ib_gid *gid); +int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport, + u8 port_num, u16 vf_num, u16 pkey_index, + u16 *pkey); +int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev, + u8 other_vport, u8 port_num, + u16 vf_num, + struct mlx5_hca_vport_context *rep); +int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, + __be64 *sys_image_guid); +int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, + u64 *node_guid); #endif /* __MLX5_VPORT_H__ */ -- cgit v1.2.3 From 211e6c80e5a68ef39a81484583e8efbf9774627d Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 4 Jun 2015 19:30:42 +0300 Subject: net/mlx5_core: Get vendor-id using the query adapter command Add two wrapper functions to the query adapter command: 1. mlx5_query_board_id -- replaces the old mlx5_cmd_query_adapter. 2. mlx5_core_query_vendor_id -- retrieves the vendor_id from the query_adapter command. Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 59 ++++++++++++++++------ drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 +- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 7 ++- 5 files changed, 53 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 801ccadd709a..ba87442ef776 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -35,34 +35,63 @@ #include #include "mlx5_core.h" -int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev) +int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out, int outlen) { - struct mlx5_cmd_query_adapter_mbox_out *out; - struct mlx5_cmd_query_adapter_mbox_in in; + u32 in[MLX5_ST_SZ_DW(query_adapter_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen); +} + +int mlx5_query_board_id(struct mlx5_core_dev *dev) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_adapter_out); int err; - out = kzalloc(sizeof(*out), GFP_KERNEL); + out = kzalloc(outlen, GFP_KERNEL); if (!out) return -ENOMEM; - memset(&in, 0, sizeof(in)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_ADAPTER); - err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out)); + err = mlx5_cmd_query_adapter(dev, out, outlen); if (err) - goto out_out; + goto out; - if (out->hdr.status) { - err = mlx5_cmd_status_to_err(&out->hdr); - goto out_out; - } + memcpy(dev->board_id, + MLX5_ADDR_OF(query_adapter_out, out, + query_adapter_struct.vsd_contd_psid), + MLX5_FLD_SZ_BYTES(query_adapter_out, + query_adapter_struct.vsd_contd_psid)); - memcpy(dev->board_id, out->vsd_psid, sizeof(out->vsd_psid)); - -out_out: +out: kfree(out); + return err; +} + +int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_adapter_out); + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + err = mlx5_cmd_query_adapter(mdev, out, outlen); + if (err) + goto out; + + *vendor_id = MLX5_GET(query_adapter_out, out, + query_adapter_struct.ieee_vendor_id); +out: + kfree(out); return err; } +EXPORT_SYMBOL(mlx5_core_query_vendor_id); int mlx5_query_hca_caps(struct mlx5_core_dev *dev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 58354122dfe4..afad529838de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -768,9 +768,9 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) goto err_stop_poll; } - err = mlx5_cmd_query_adapter(dev); + err = mlx5_query_board_id(dev); if (err) { - dev_err(&pdev->dev, "query adapter failed\n"); + dev_err(&pdev->dev, "query board id failed\n"); goto err_stop_poll; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 6983c1047255..fc88ecaecb4b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -78,7 +78,7 @@ static inline int mlx5_cmd_exec_check_status(struct mlx5_core_dev *dev, u32 *in, } int mlx5_query_hca_caps(struct mlx5_core_dev *dev); -int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev); +int mlx5_query_board_id(struct mlx5_core_dev *dev); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8ab8b8af5c32..b90fb9336d21 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -817,6 +817,7 @@ struct mlx5_interface { void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol); int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); +int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); struct mlx5_profile { u64 mask; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f06d054ad021..6d2f6fee041c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2470,9 +2470,12 @@ union mlx5_ifc_cong_control_roce_ecn_auto_bits { }; struct mlx5_ifc_query_adapter_param_block_bits { - u8 reserved_0[0xe0]; + u8 reserved_0[0xc0]; - u8 reserved_1[0x10]; + u8 reserved_1[0x8]; + u8 ieee_vendor_id[0x18]; + + u8 reserved_2[0x10]; u8 vsd_vendor_id[0x10]; u8 vsd[208][0x8]; -- cgit v1.2.3 From e760152d08da78aa160e68ac90bf8f3f10aff462 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 4 Jun 2015 19:30:43 +0300 Subject: net/mlx5_core: Use port number in the query port mtu helpers Extend the function prototypes for max and operational mtu to take the local port number. In the Ethernet driver is this hard coded to one, since ConnectX4 Ethernet devices are always function-per-port. The IB driver also serves older devices (ConnectIB) which isn't such, and hence the part can vary. Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/port.c | 15 +++++++++------ include/linux/mlx5/driver.h | 6 ++++-- 3 files changed, 15 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index edba09cca600..7348c5173aa9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1386,7 +1386,7 @@ int mlx5e_open_locked(struct net_device *netdev) return err; } - err = mlx5_query_port_oper_mtu(mdev, &actual_mtu); + err = mlx5_query_port_oper_mtu(mdev, &actual_mtu, 1); if (err) { netdev_err(netdev, "%s: mlx5_query_port_oper_mtu failed %d\n", __func__, err); @@ -1614,7 +1614,7 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) int max_mtu; int err = 0; - err = mlx5_query_port_max_mtu(mdev, &max_mtu); + err = mlx5_query_port_max_mtu(mdev, &max_mtu, 1); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 7d3d0f9f328d..d9498aae5ab5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -213,7 +213,8 @@ int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status) } static int mlx5_query_port_mtu(struct mlx5_core_dev *dev, - int *admin_mtu, int *max_mtu, int *oper_mtu) + int *admin_mtu, int *max_mtu, int *oper_mtu, + u8 local_port) { u32 in[MLX5_ST_SZ_DW(pmtu_reg)]; u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; @@ -221,7 +222,7 @@ static int mlx5_query_port_mtu(struct mlx5_core_dev *dev, memset(in, 0, sizeof(in)); - MLX5_SET(pmtu_reg, in, local_port, 1); + MLX5_SET(pmtu_reg, in, local_port, local_port); err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PMTU, 0, 0); @@ -253,14 +254,16 @@ int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu) } EXPORT_SYMBOL_GPL(mlx5_set_port_mtu); -int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu) +int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, + u8 local_port) { - return mlx5_query_port_mtu(dev, NULL, max_mtu, NULL); + return mlx5_query_port_mtu(dev, NULL, max_mtu, NULL, local_port); } EXPORT_SYMBOL_GPL(mlx5_query_port_max_mtu); -int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu) +int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, + u8 local_port) { - return mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu); + return mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu, local_port); } EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b90fb9336d21..cd09784b6999 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -751,8 +751,10 @@ int mlx5_set_port_status(struct mlx5_core_dev *dev, int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status); int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu); -int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu); -int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu); +int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, + u8 local_port); +int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, + u8 local_port); int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -- cgit v1.2.3 From a05bdefa4081d43f9c86c3bb693d0492a21590da Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 4 Jun 2015 19:30:44 +0300 Subject: net/mlx5_core: Use port number when querying port ptys Until now, mlx5_query_port_ptys always queried port number one. Added new argument in the function's prototype so we can also query the second port. This will be needed when thr helper will be invoked from the IB driver on non FPP (Function-Per-Port) devices. Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/port.c | 8 ++++---- include/linux/mlx5/driver.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index de7aec8abca1..388938482ff9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -543,7 +543,7 @@ static int mlx5e_get_settings(struct net_device *netdev, u32 eth_proto_oper; int err; - err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN); + err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); if (err) { netdev_err(netdev, "%s: query port ptys failed: %d\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index d9498aae5ab5..cbbce40d853c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -104,13 +104,13 @@ int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps) EXPORT_SYMBOL_GPL(mlx5_set_port_caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, - int ptys_size, int proto_mask) + int ptys_size, int proto_mask, u8 local_port) { u32 in[MLX5_ST_SZ_DW(ptys_reg)]; int err; memset(in, 0, sizeof(in)); - MLX5_SET(ptys_reg, in, local_port, 1); + MLX5_SET(ptys_reg, in, local_port, local_port); MLX5_SET(ptys_reg, in, proto_mask, proto_mask); err = mlx5_core_access_reg(dev, in, sizeof(in), ptys, @@ -126,7 +126,7 @@ int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, u32 out[MLX5_ST_SZ_DW(ptys_reg)]; int err; - err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask); + err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1); if (err) return err; @@ -145,7 +145,7 @@ int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, u32 out[MLX5_ST_SZ_DW(ptys_reg)]; int err; - err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask); + err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1); if (err) return err; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index cd09784b6999..e4b814f64014 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -739,7 +739,7 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, - int ptys_size, int proto_mask); + int ptys_size, int proto_mask, u8 local_port); int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, u32 *proto_cap, int proto_mask); int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, -- cgit v1.2.3 From a124d13ef59e09941fc0924fd7c29ae6d7cd77a3 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 4 Jun 2015 19:30:45 +0300 Subject: net/mlx5_core: Add more query port helpers Add the following helpers: 1. mlx5_query_port_proto_oper -- queries the port speed port mask 2. mlx5_query_port_link_width_oper - queries the port link with bitmask 3. mlx5_query_port_vl_hw_cap - queries the Virtual Lanes supported on this port These helpers will be used from the IB driver when working in ISSI > 0 mode. Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/port.c | 67 ++++++++++++++++++++++++++ include/linux/mlx5/driver.h | 8 +++ 2 files changed, 75 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index cbbce40d853c..619d3baf19ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -158,6 +158,42 @@ int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_query_port_proto_admin); +int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, + u8 *link_width_oper, u8 local_port) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB, local_port); + if (err) + return err; + + *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper); + +int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev, + u8 *proto_oper, int proto_mask, + u8 local_port) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, local_port); + if (err) + return err; + + if (proto_mask == MLX5_PTYS_EN) + *proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + else + *proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_proto_oper); + int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, int proto_mask) { @@ -267,3 +303,34 @@ int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, return mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu, local_port); } EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu); + +static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc, + int pvlc_size, u8 local_port) +{ + u32 in[MLX5_ST_SZ_DW(pvlc_reg)]; + int err; + + memset(in, 0, sizeof(in)); + MLX5_SET(ptys_reg, in, local_port, local_port); + + err = mlx5_core_access_reg(dev, in, sizeof(in), pvlc, + pvlc_size, MLX5_REG_PVLC, 0, 0); + + return err; +} + +int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, + u8 *vl_hw_cap, u8 local_port) +{ + u32 out[MLX5_ST_SZ_DW(pvlc_reg)]; + int err; + + err = mlx5_query_port_pvlc(dev, out, sizeof(out), local_port); + if (err) + return err; + + *vl_hw_cap = MLX5_GET(pvlc_reg, out, vl_hw_cap); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e4b814f64014..6093bde16b94 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -107,6 +107,7 @@ enum { MLX5_REG_PUDE = 0x5009, MLX5_REG_PMPE = 0x5010, MLX5_REG_PELC = 0x500e, + MLX5_REG_PVLC = 0x500f, MLX5_REG_PMLP = 0, /* TBD */ MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, @@ -744,6 +745,11 @@ int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, u32 *proto_cap, int proto_mask); int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, u32 *proto_admin, int proto_mask); +int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, + u8 *link_width_oper, u8 local_port); +int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev, + u8 *proto_oper, int proto_mask, + u8 local_port); int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, int proto_mask); int mlx5_set_port_status(struct mlx5_core_dev *dev, @@ -755,6 +761,8 @@ int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 local_port); int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, u8 local_port); +int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, + u8 *vl_hw_cap, u8 local_port); int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -- cgit v1.2.3 From d691f9e8d4405c334aa10d556e73c8bf44cb0e01 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 4 Jun 2015 10:11:54 -0700 Subject: bpf: allow programs to write to certain skb fields allow programs read/write skb->mark, tc_index fields and ((struct qdisc_skb_cb *)cb)->data. mark and tc_index are generically useful in TC. cb[0]-cb[4] are primarily used to pass arguments from one program to another called via bpf_tail_call() which can be seen in sockex3_kern.c example. All fields of 'struct __sk_buff' are readable to socket and tc_cls_act progs. mark, tc_index are writeable from tc_cls_act only. cb[0]-cb[4] are writeable by both sockets and tc_cls_act. Add verifier tests and improve sample code. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 +- include/uapi/linux/bpf.h | 2 + kernel/bpf/verifier.c | 37 +++++++++++++----- net/core/filter.c | 94 +++++++++++++++++++++++++++++++++++++++------ samples/bpf/sockex3_kern.c | 35 ++++++----------- samples/bpf/test_verifier.c | 84 +++++++++++++++++++++++++++++++++++++++- 6 files changed, 207 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ca854e5bb2f7..2235aee8096a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -105,7 +105,8 @@ struct bpf_verifier_ops { */ bool (*is_valid_access)(int off, int size, enum bpf_access_type type); - u32 (*convert_ctx_access)(int dst_reg, int src_reg, int ctx_off, + u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg, + int src_reg, int ctx_off, struct bpf_insn *insn); }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 42aa19abab86..602f05b7a275 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -248,6 +248,8 @@ struct __sk_buff { __u32 priority; __u32 ingress_ifindex; __u32 ifindex; + __u32 tc_index; + __u32 cb[5]; }; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index cfd9a40b9a5a..039d866fd36a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1692,6 +1692,8 @@ static int do_check(struct verifier_env *env) } } else if (class == BPF_STX) { + enum bpf_reg_type dst_reg_type; + if (BPF_MODE(insn->code) == BPF_XADD) { err = check_xadd(env, insn); if (err) @@ -1700,11 +1702,6 @@ static int do_check(struct verifier_env *env) continue; } - if (BPF_MODE(insn->code) != BPF_MEM || - insn->imm != 0) { - verbose("BPF_STX uses reserved fields\n"); - return -EINVAL; - } /* check src1 operand */ err = check_reg_arg(regs, insn->src_reg, SRC_OP); if (err) @@ -1714,6 +1711,8 @@ static int do_check(struct verifier_env *env) if (err) return err; + dst_reg_type = regs[insn->dst_reg].type; + /* check that memory (dst_reg + off) is writeable */ err = check_mem_access(env, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, @@ -1721,6 +1720,15 @@ static int do_check(struct verifier_env *env) if (err) return err; + if (insn->imm == 0) { + insn->imm = dst_reg_type; + } else if (dst_reg_type != insn->imm && + (dst_reg_type == PTR_TO_CTX || + insn->imm == PTR_TO_CTX)) { + verbose("same insn cannot be used with different pointers\n"); + return -EINVAL; + } + } else if (class == BPF_ST) { if (BPF_MODE(insn->code) != BPF_MEM || insn->src_reg != BPF_REG_0) { @@ -1839,12 +1847,18 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env) for (i = 0; i < insn_cnt; i++, insn++) { if (BPF_CLASS(insn->code) == BPF_LDX && - (BPF_MODE(insn->code) != BPF_MEM || - insn->imm != 0)) { + (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) { verbose("BPF_LDX uses reserved fields\n"); return -EINVAL; } + if (BPF_CLASS(insn->code) == BPF_STX && + ((BPF_MODE(insn->code) != BPF_MEM && + BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) { + verbose("BPF_STX uses reserved fields\n"); + return -EINVAL; + } + if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) { struct bpf_map *map; struct fd f; @@ -1967,12 +1981,17 @@ static int convert_ctx_accesses(struct verifier_env *env) struct bpf_prog *new_prog; u32 cnt; int i; + enum bpf_access_type type; if (!env->prog->aux->ops->convert_ctx_access) return 0; for (i = 0; i < insn_cnt; i++, insn++) { - if (insn->code != (BPF_LDX | BPF_MEM | BPF_W)) + if (insn->code == (BPF_LDX | BPF_MEM | BPF_W)) + type = BPF_READ; + else if (insn->code == (BPF_STX | BPF_MEM | BPF_W)) + type = BPF_WRITE; + else continue; if (insn->imm != PTR_TO_CTX) { @@ -1982,7 +2001,7 @@ static int convert_ctx_accesses(struct verifier_env *env) } cnt = env->prog->aux->ops-> - convert_ctx_access(insn->dst_reg, insn->src_reg, + convert_ctx_access(type, insn->dst_reg, insn->src_reg, insn->off, insn_buf); if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { verbose("bpf verifier is misconfigured\n"); diff --git a/net/core/filter.c b/net/core/filter.c index 36a69e33d76b..d271c06bf01f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -46,6 +46,7 @@ #include #include #include +#include /** * sk_filter - run a packet through a socket filter @@ -1463,13 +1464,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) } } -static bool sk_filter_is_valid_access(int off, int size, - enum bpf_access_type type) +static bool __is_valid_access(int off, int size, enum bpf_access_type type) { - /* only read is allowed */ - if (type != BPF_READ) - return false; - /* check bounds */ if (off < 0 || off >= sizeof(struct __sk_buff)) return false; @@ -1485,8 +1481,42 @@ static bool sk_filter_is_valid_access(int off, int size, return true; } -static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, - struct bpf_insn *insn_buf) +static bool sk_filter_is_valid_access(int off, int size, + enum bpf_access_type type) +{ + if (type == BPF_WRITE) { + switch (off) { + case offsetof(struct __sk_buff, cb[0]) ... + offsetof(struct __sk_buff, cb[4]): + break; + default: + return false; + } + } + + return __is_valid_access(off, size, type); +} + +static bool tc_cls_act_is_valid_access(int off, int size, + enum bpf_access_type type) +{ + if (type == BPF_WRITE) { + switch (off) { + case offsetof(struct __sk_buff, mark): + case offsetof(struct __sk_buff, tc_index): + case offsetof(struct __sk_buff, cb[0]) ... + offsetof(struct __sk_buff, cb[4]): + break; + default: + return false; + } + } + return __is_valid_access(off, size, type); +} + +static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, + int src_reg, int ctx_off, + struct bpf_insn *insn_buf) { struct bpf_insn *insn = insn_buf; @@ -1538,7 +1568,15 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, break; case offsetof(struct __sk_buff, mark): - return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn); + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, mark)); + else + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, mark)); + break; case offsetof(struct __sk_buff, pkt_type): return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn); @@ -1553,6 +1591,38 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, case offsetof(struct __sk_buff, vlan_tci): return convert_skb_access(SKF_AD_VLAN_TAG, dst_reg, src_reg, insn); + + case offsetof(struct __sk_buff, cb[0]) ... + offsetof(struct __sk_buff, cb[4]): + BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20); + + ctx_off -= offsetof(struct __sk_buff, cb[0]); + ctx_off += offsetof(struct sk_buff, cb); + ctx_off += offsetof(struct qdisc_skb_cb, data); + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, ctx_off); + else + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off); + break; + + case offsetof(struct __sk_buff, tc_index): +#ifdef CONFIG_NET_SCHED + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2); + + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, tc_index)); + else + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sk_buff, tc_index)); + break; +#else + if (type == BPF_WRITE) + *insn++ = BPF_MOV64_REG(dst_reg, dst_reg); + else + *insn++ = BPF_MOV64_IMM(dst_reg, 0); + break; +#endif } return insn - insn_buf; @@ -1561,13 +1631,13 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off, static const struct bpf_verifier_ops sk_filter_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, - .convert_ctx_access = sk_filter_convert_ctx_access, + .convert_ctx_access = bpf_net_convert_ctx_access, }; static const struct bpf_verifier_ops tc_cls_act_ops = { .get_func_proto = tc_cls_act_func_proto, - .is_valid_access = sk_filter_is_valid_access, - .convert_ctx_access = sk_filter_convert_ctx_access, + .is_valid_access = tc_cls_act_is_valid_access, + .convert_ctx_access = bpf_net_convert_ctx_access, }; static struct bpf_prog_type_list sk_filter_type __read_mostly = { diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c index 2625b987944f..41ae2fd21b13 100644 --- a/samples/bpf/sockex3_kern.c +++ b/samples/bpf/sockex3_kern.c @@ -89,7 +89,6 @@ static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) struct globals { struct flow_keys flow; - __u32 nhoff; }; struct bpf_map_def SEC("maps") percpu_map = { @@ -139,7 +138,7 @@ static void update_stats(struct __sk_buff *skb, struct globals *g) static __always_inline void parse_ip_proto(struct __sk_buff *skb, struct globals *g, __u32 ip_proto) { - __u32 nhoff = g->nhoff; + __u32 nhoff = skb->cb[0]; int poff; switch (ip_proto) { @@ -165,7 +164,7 @@ static __always_inline void parse_ip_proto(struct __sk_buff *skb, if (gre_flags & GRE_SEQ) nhoff += 4; - g->nhoff = nhoff; + skb->cb[0] = nhoff; parse_eth_proto(skb, gre_proto); break; } @@ -195,7 +194,7 @@ PROG(PARSE_IP)(struct __sk_buff *skb) if (!g) return 0; - nhoff = g->nhoff; + nhoff = skb->cb[0]; if (unlikely(ip_is_fragment(skb, nhoff))) return 0; @@ -210,7 +209,7 @@ PROG(PARSE_IP)(struct __sk_buff *skb) verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/); nhoff += (verlen & 0xF) << 2; - g->nhoff = nhoff; + skb->cb[0] = nhoff; parse_ip_proto(skb, g, ip_proto); return 0; } @@ -223,7 +222,7 @@ PROG(PARSE_IPV6)(struct __sk_buff *skb) if (!g) return 0; - nhoff = g->nhoff; + nhoff = skb->cb[0]; ip_proto = load_byte(skb, nhoff + offsetof(struct ipv6hdr, nexthdr)); @@ -233,25 +232,21 @@ PROG(PARSE_IPV6)(struct __sk_buff *skb) nhoff + offsetof(struct ipv6hdr, daddr)); nhoff += sizeof(struct ipv6hdr); - g->nhoff = nhoff; + skb->cb[0] = nhoff; parse_ip_proto(skb, g, ip_proto); return 0; } PROG(PARSE_VLAN)(struct __sk_buff *skb) { - struct globals *g = this_cpu_globals(); __u32 nhoff, proto; - if (!g) - return 0; - - nhoff = g->nhoff; + nhoff = skb->cb[0]; proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, h_vlan_encapsulated_proto)); nhoff += sizeof(struct vlan_hdr); - g->nhoff = nhoff; + skb->cb[0] = nhoff; parse_eth_proto(skb, proto); @@ -260,17 +255,13 @@ PROG(PARSE_VLAN)(struct __sk_buff *skb) PROG(PARSE_MPLS)(struct __sk_buff *skb) { - struct globals *g = this_cpu_globals(); __u32 nhoff, label; - if (!g) - return 0; - - nhoff = g->nhoff; + nhoff = skb->cb[0]; label = load_word(skb, nhoff); nhoff += sizeof(struct mpls_label); - g->nhoff = nhoff; + skb->cb[0] = nhoff; if (label & MPLS_LS_S_MASK) { __u8 verlen = load_byte(skb, nhoff); @@ -288,14 +279,10 @@ PROG(PARSE_MPLS)(struct __sk_buff *skb) SEC("socket/0") int main_prog(struct __sk_buff *skb) { - struct globals *g = this_cpu_globals(); __u32 nhoff = ETH_HLEN; __u32 proto = load_half(skb, 12); - if (!g) - return 0; - - g->nhoff = nhoff; + skb->cb[0] = nhoff; parse_eth_proto(skb, proto); return 0; } diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index 12f3780af73f..693605997abc 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -29,6 +29,7 @@ struct bpf_test { ACCEPT, REJECT } result; + enum bpf_prog_type prog_type; }; static struct bpf_test tests[] = { @@ -743,6 +744,84 @@ static struct bpf_test tests[] = { .errstr = "different pointers", .result = REJECT, }, + { + "check skb->mark is not writeable by sockets", + .insns = { + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check skb->tc_index is not writeable by sockets", + .insns = { + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, tc_index)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check non-u32 access to cb", + .insns = { + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check out of range skb->cb access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[60])), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_ACT, + }, + { + "write skb fields from socket prog", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4])), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_index)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, cb[2])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "write skb fields from tc_cls_act prog", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_index)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tc_index)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[3])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, }; static int probe_filter_length(struct bpf_insn *fp) @@ -775,6 +854,7 @@ static int test(void) for (i = 0; i < ARRAY_SIZE(tests); i++) { struct bpf_insn *prog = tests[i].insns; + int prog_type = tests[i].prog_type; int prog_len = probe_filter_length(prog); int *fixup = tests[i].fixup; int map_fd = -1; @@ -789,8 +869,8 @@ static int test(void) } printf("#%d %s ", i, tests[i].descr); - prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, - prog_len * sizeof(struct bpf_insn), + prog_fd = bpf_prog_load(prog_type ?: BPF_PROG_TYPE_SOCKET_FILTER, + prog, prog_len * sizeof(struct bpf_insn), "GPL", 0); if (tests[i].result == ACCEPT) { -- cgit v1.2.3 From 7cf7fa529d0b6b514949cc67b39e3ce406c37006 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Sun, 7 Jun 2015 15:44:23 +0300 Subject: net/mlx5_core: Fix static checker warnings around system guid query flow Fix static checker warnings in the flow of system guid query. Fixes: 707c4602cda6 ('net/mlx5_core: Add new query HCA vport commands') Signed-off-by: Majd Dibbiny Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 2 +- include/linux/mlx5/vport.h | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index ba87442ef776..9335e5ae18cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -35,7 +35,8 @@ #include #include "mlx5_core.h" -int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out, int outlen) +static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out, + int outlen) { u32 in[MLX5_ST_SZ_DW(query_adapter_in)]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 20150ffa8b16..b94177ebcf3a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -307,7 +307,7 @@ ex: EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_context); int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, - __be64 *sys_image_guid) + u64 *sys_image_guid) { struct mlx5_hca_vport_context *rep; int err; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 67882a834efb..967e0fd06e89 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -48,7 +48,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev, u16 vf_num, struct mlx5_hca_vport_context *rep); int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, - __be64 *sys_image_guid); + u64 *sys_image_guid); int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, u64 *node_guid); -- cgit v1.2.3 From a8077d6573530a91d5674a28cdedbed39c391ff0 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Sun, 7 Jun 2015 13:15:30 +0200 Subject: bcma: make calls to PCI hostmode functions config-safe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma_driver_pci.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 5ba6918ca20b..9657f11d48a7 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -246,7 +246,18 @@ static inline void bcma_core_pci_power_save(struct bcma_bus *bus, bool up) } #endif +#ifdef CONFIG_BCMA_DRIVER_PCI_HOSTMODE extern int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev); extern int bcma_core_pci_plat_dev_init(struct pci_dev *dev); +#else +static inline int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev) +{ + return -ENOTSUPP; +} +static inline int bcma_core_pci_plat_dev_init(struct pci_dev *dev) +{ + return -ENOTSUPP; +} +#endif #endif /* LINUX_BCMA_DRIVER_PCI_H_ */ -- cgit v1.2.3 From ed06aeefdac348cfb91a3db5fe1067e3202afd70 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Tue, 9 Jun 2015 22:26:05 +0200 Subject: nfc: st-nci: Rename st21nfcb to st-nci STMicroelectronics NFC NCI chips family is extending with the new ST21NFCC using the AMS AS39230 RF booster. The st21nfcb driver is relevant for this solution and might be with future products. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- .../devicetree/bindings/net/nfc/st-nci.txt | 33 + .../devicetree/bindings/net/nfc/st21nfcb.txt | 33 - drivers/nfc/Kconfig | 2 +- drivers/nfc/Makefile | 2 +- drivers/nfc/st-nci/Kconfig | 23 + drivers/nfc/st-nci/Makefile | 9 + drivers/nfc/st-nci/core.c | 179 ++++++ drivers/nfc/st-nci/i2c.c | 385 +++++++++++ drivers/nfc/st-nci/ndlc.c | 313 +++++++++ drivers/nfc/st-nci/ndlc.h | 60 ++ drivers/nfc/st-nci/st-nci.h | 50 ++ drivers/nfc/st-nci/st-nci_se.c | 714 +++++++++++++++++++++ drivers/nfc/st-nci/st-nci_se.h | 61 ++ drivers/nfc/st21nfcb/Kconfig | 22 - drivers/nfc/st21nfcb/Makefile | 9 - drivers/nfc/st21nfcb/i2c.c | 384 ----------- drivers/nfc/st21nfcb/ndlc.c | 313 --------- drivers/nfc/st21nfcb/ndlc.h | 60 -- drivers/nfc/st21nfcb/st21nfcb.c | 179 ------ drivers/nfc/st21nfcb/st21nfcb.h | 50 -- drivers/nfc/st21nfcb/st21nfcb_se.c | 713 -------------------- drivers/nfc/st21nfcb/st21nfcb_se.h | 61 -- include/linux/platform_data/st-nci.h | 29 + include/linux/platform_data/st21nfcb.h | 29 - include/linux/platform_data/st_nci.h | 29 + 25 files changed, 1887 insertions(+), 1855 deletions(-) create mode 100644 Documentation/devicetree/bindings/net/nfc/st-nci.txt delete mode 100644 Documentation/devicetree/bindings/net/nfc/st21nfcb.txt create mode 100644 drivers/nfc/st-nci/Kconfig create mode 100644 drivers/nfc/st-nci/Makefile create mode 100644 drivers/nfc/st-nci/core.c create mode 100644 drivers/nfc/st-nci/i2c.c create mode 100644 drivers/nfc/st-nci/ndlc.c create mode 100644 drivers/nfc/st-nci/ndlc.h create mode 100644 drivers/nfc/st-nci/st-nci.h create mode 100644 drivers/nfc/st-nci/st-nci_se.c create mode 100644 drivers/nfc/st-nci/st-nci_se.h delete mode 100644 drivers/nfc/st21nfcb/Kconfig delete mode 100644 drivers/nfc/st21nfcb/Makefile delete mode 100644 drivers/nfc/st21nfcb/i2c.c delete mode 100644 drivers/nfc/st21nfcb/ndlc.c delete mode 100644 drivers/nfc/st21nfcb/ndlc.h delete mode 100644 drivers/nfc/st21nfcb/st21nfcb.c delete mode 100644 drivers/nfc/st21nfcb/st21nfcb.h delete mode 100644 drivers/nfc/st21nfcb/st21nfcb_se.c delete mode 100644 drivers/nfc/st21nfcb/st21nfcb_se.h create mode 100644 include/linux/platform_data/st-nci.h delete mode 100644 include/linux/platform_data/st21nfcb.h create mode 100644 include/linux/platform_data/st_nci.h (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/net/nfc/st-nci.txt b/Documentation/devicetree/bindings/net/nfc/st-nci.txt new file mode 100644 index 000000000000..d707588ed734 --- /dev/null +++ b/Documentation/devicetree/bindings/net/nfc/st-nci.txt @@ -0,0 +1,33 @@ +* STMicroelectronics SAS. ST NCI NFC Controller + +Required properties: +- compatible: Should be "st,st21nfcb-i2c" or "st,st21nfcc-i2c". +- clock-frequency: I²C work frequency. +- reg: address on the bus +- interrupt-parent: phandle for the interrupt gpio controller +- interrupts: GPIO interrupt to which the chip is connected +- reset-gpios: Output GPIO pin used to reset the ST21NFCB + +Optional SoC Specific Properties: +- pinctrl-names: Contains only one value - "default". +- pintctrl-0: Specifies the pin control groups used for this controller. + +Example (for ARM-based BeagleBoard xM with ST21NFCB on I2C2): + +&i2c2 { + + status = "okay"; + + st21nfcb: st21nfcb@8 { + + compatible = "st,st21nfcb-i2c"; + + reg = <0x08>; + clock-frequency = <400000>; + + interrupt-parent = <&gpio5>; + interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; + + reset-gpios = <&gpio5 29 GPIO_ACTIVE_HIGH>; + }; +}; diff --git a/Documentation/devicetree/bindings/net/nfc/st21nfcb.txt b/Documentation/devicetree/bindings/net/nfc/st21nfcb.txt deleted file mode 100644 index bb237072dbe9..000000000000 --- a/Documentation/devicetree/bindings/net/nfc/st21nfcb.txt +++ /dev/null @@ -1,33 +0,0 @@ -* STMicroelectronics SAS. ST21NFCB NFC Controller - -Required properties: -- compatible: Should be "st,st21nfcb-i2c". -- clock-frequency: I²C work frequency. -- reg: address on the bus -- interrupt-parent: phandle for the interrupt gpio controller -- interrupts: GPIO interrupt to which the chip is connected -- reset-gpios: Output GPIO pin used to reset the ST21NFCB - -Optional SoC Specific Properties: -- pinctrl-names: Contains only one value - "default". -- pintctrl-0: Specifies the pin control groups used for this controller. - -Example (for ARM-based BeagleBoard xM with ST21NFCB on I2C2): - -&i2c2 { - - status = "okay"; - - st21nfcb: st21nfcb@8 { - - compatible = "st,st21nfcb-i2c"; - - reg = <0x08>; - clock-frequency = <400000>; - - interrupt-parent = <&gpio5>; - interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; - - reset-gpios = <&gpio5 29 GPIO_ACTIVE_HIGH>; - }; -}; diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig index 107714e4405f..722673cb785b 100644 --- a/drivers/nfc/Kconfig +++ b/drivers/nfc/Kconfig @@ -72,6 +72,6 @@ source "drivers/nfc/pn544/Kconfig" source "drivers/nfc/microread/Kconfig" source "drivers/nfc/nfcmrvl/Kconfig" source "drivers/nfc/st21nfca/Kconfig" -source "drivers/nfc/st21nfcb/Kconfig" +source "drivers/nfc/st-nci/Kconfig" source "drivers/nfc/nxp-nci/Kconfig" endmenu diff --git a/drivers/nfc/Makefile b/drivers/nfc/Makefile index 13b648baf175..368b6dfe71b3 100644 --- a/drivers/nfc/Makefile +++ b/drivers/nfc/Makefile @@ -12,5 +12,5 @@ obj-$(CONFIG_NFC_PORT100) += port100.o obj-$(CONFIG_NFC_MRVL) += nfcmrvl/ obj-$(CONFIG_NFC_TRF7970A) += trf7970a.o obj-$(CONFIG_NFC_ST21NFCA) += st21nfca/ -obj-$(CONFIG_NFC_ST21NFCB) += st21nfcb/ +obj-$(CONFIG_NFC_ST_NCI) += st-nci/ obj-$(CONFIG_NFC_NXP_NCI) += nxp-nci/ diff --git a/drivers/nfc/st-nci/Kconfig b/drivers/nfc/st-nci/Kconfig new file mode 100644 index 000000000000..fc3904c946ee --- /dev/null +++ b/drivers/nfc/st-nci/Kconfig @@ -0,0 +1,23 @@ +config NFC_ST_NCI + tristate "STMicroelectronics ST NCI NFC driver" + depends on NFC_NCI + default n + ---help--- + STMicroelectronics NFC NCI chips core driver. It implements the chipset + NCI logic and hooks into the NFC kernel APIs. Physical layers will + register against it. + + To compile this driver as a module, choose m here. The module will + be called st-nci. + Say N if unsure. + +config NFC_ST_NCI_I2C + tristate "NFC ST NCI i2c support" + depends on NFC_ST_NCI && I2C + ---help--- + This module adds support for an I2C interface to the + STMicroelectronics NFC NCI chips familly. + Select this if your platform is using the i2c bus. + + If you choose to build a module, it'll be called st-nci_i2c. + Say N if unsure. diff --git a/drivers/nfc/st-nci/Makefile b/drivers/nfc/st-nci/Makefile new file mode 100644 index 000000000000..0df157df3a94 --- /dev/null +++ b/drivers/nfc/st-nci/Makefile @@ -0,0 +1,9 @@ +# +# Makefile for ST21NFCB NCI based NFC driver +# + +st-nci-objs = ndlc.o core.o st-nci_se.o +obj-$(CONFIG_NFC_ST_NCI) += st-nci.o + +st-nci_i2c-objs = i2c.o +obj-$(CONFIG_NFC_ST_NCI_I2C) += st-nci_i2c.o diff --git a/drivers/nfc/st-nci/core.c b/drivers/nfc/st-nci/core.c new file mode 100644 index 000000000000..c419d3943973 --- /dev/null +++ b/drivers/nfc/st-nci/core.c @@ -0,0 +1,179 @@ +/* + * NCI based Driver for STMicroelectronics NFC Chip + * + * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include "st-nci.h" +#include "st-nci_se.h" + +#define DRIVER_DESC "NCI NFC driver for ST_NCI" + +#define ST_NCI1_X_PROPRIETARY_ISO15693 0x83 + +static int st_nci_init(struct nci_dev *ndev) +{ + struct nci_mode_set_cmd cmd; + + cmd.cmd_type = ST_NCI_SET_NFC_MODE; + cmd.mode = 1; + + return nci_prop_cmd(ndev, ST_NCI_CORE_PROP, + sizeof(struct nci_mode_set_cmd), (__u8 *)&cmd); +} + +static int st_nci_open(struct nci_dev *ndev) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + int r; + + if (test_and_set_bit(ST_NCI_RUNNING, &info->flags)) + return 0; + + r = ndlc_open(info->ndlc); + if (r) + clear_bit(ST_NCI_RUNNING, &info->flags); + + return r; +} + +static int st_nci_close(struct nci_dev *ndev) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + + if (!test_bit(ST_NCI_RUNNING, &info->flags)) + return 0; + + ndlc_close(info->ndlc); + + clear_bit(ST_NCI_RUNNING, &info->flags); + + return 0; +} + +static int st_nci_send(struct nci_dev *ndev, struct sk_buff *skb) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + + skb->dev = (void *)ndev; + + if (!test_bit(ST_NCI_RUNNING, &info->flags)) + return -EBUSY; + + return ndlc_send(info->ndlc, skb); +} + +static __u32 st_nci_get_rfprotocol(struct nci_dev *ndev, + __u8 rf_protocol) +{ + return rf_protocol == ST_NCI1_X_PROPRIETARY_ISO15693 ? + NFC_PROTO_ISO15693_MASK : 0; +} + +static int st_nci_prop_rsp_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + __u8 status = skb->data[0]; + + nci_req_complete(ndev, status); + return 0; +} + +static struct nci_prop_ops st_nci_prop_ops[] = { + { + .opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, + ST_NCI_CORE_PROP), + .rsp = st_nci_prop_rsp_packet, + }, +}; + +static struct nci_ops st_nci_ops = { + .init = st_nci_init, + .open = st_nci_open, + .close = st_nci_close, + .send = st_nci_send, + .get_rfprotocol = st_nci_get_rfprotocol, + .discover_se = st_nci_discover_se, + .enable_se = st_nci_enable_se, + .disable_se = st_nci_disable_se, + .se_io = st_nci_se_io, + .hci_load_session = st_nci_hci_load_session, + .hci_event_received = st_nci_hci_event_received, + .hci_cmd_received = st_nci_hci_cmd_received, + .prop_ops = st_nci_prop_ops, + .n_prop_ops = ARRAY_SIZE(st_nci_prop_ops), +}; + +int st_nci_probe(struct llt_ndlc *ndlc, int phy_headroom, + int phy_tailroom) +{ + struct st_nci_info *info; + int r; + u32 protocols; + + info = devm_kzalloc(ndlc->dev, + sizeof(struct st_nci_info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + protocols = NFC_PROTO_JEWEL_MASK + | NFC_PROTO_MIFARE_MASK + | NFC_PROTO_FELICA_MASK + | NFC_PROTO_ISO14443_MASK + | NFC_PROTO_ISO14443_B_MASK + | NFC_PROTO_ISO15693_MASK + | NFC_PROTO_NFC_DEP_MASK; + + ndlc->ndev = nci_allocate_device(&st_nci_ops, protocols, + phy_headroom, phy_tailroom); + if (!ndlc->ndev) { + pr_err("Cannot allocate nfc ndev\n"); + return -ENOMEM; + } + info->ndlc = ndlc; + + nci_set_drvdata(ndlc->ndev, info); + + r = nci_register_device(ndlc->ndev); + if (r) { + pr_err("Cannot register nfc device to nci core\n"); + nci_free_device(ndlc->ndev); + return r; + } + + return st_nci_se_init(ndlc->ndev); +} +EXPORT_SYMBOL_GPL(st_nci_probe); + +void st_nci_remove(struct nci_dev *ndev) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + + ndlc_close(info->ndlc); + + nci_unregister_device(ndev); + nci_free_device(ndev); +} +EXPORT_SYMBOL_GPL(st_nci_remove); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/nfc/st-nci/i2c.c b/drivers/nfc/st-nci/i2c.c new file mode 100644 index 000000000000..06175ce769bb --- /dev/null +++ b/drivers/nfc/st-nci/i2c.c @@ -0,0 +1,385 @@ +/* + * I2C Link Layer for ST NCI NFC controller familly based Driver + * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ndlc.h" + +#define DRIVER_DESC "NCI NFC driver for ST21NFCB" + +/* ndlc header */ +#define ST21NFCB_FRAME_HEADROOM 1 +#define ST21NFCB_FRAME_TAILROOM 0 + +#define ST_NCI_I2C_MIN_SIZE 4 /* PCB(1) + NCI Packet header(3) */ +#define ST_NCI_I2C_MAX_SIZE 250 /* req 4.2.1 */ + +#define ST_NCI_I2C_DRIVER_NAME "st_nci_i2c" + +static struct i2c_device_id st_nci_i2c_id_table[] = { + {ST_NCI_DRIVER_NAME, 0}, + {} +}; +MODULE_DEVICE_TABLE(i2c, st_nci_i2c_id_table); + +struct st_nci_i2c_phy { + struct i2c_client *i2c_dev; + struct llt_ndlc *ndlc; + + unsigned int gpio_reset; + unsigned int irq_polarity; +}; + +#define I2C_DUMP_SKB(info, skb) \ +do { \ + pr_debug("%s:\n", info); \ + print_hex_dump(KERN_DEBUG, "i2c: ", DUMP_PREFIX_OFFSET, \ + 16, 1, (skb)->data, (skb)->len, 0); \ +} while (0) + +static int st_nci_i2c_enable(void *phy_id) +{ + struct st_nci_i2c_phy *phy = phy_id; + + gpio_set_value(phy->gpio_reset, 0); + usleep_range(10000, 15000); + gpio_set_value(phy->gpio_reset, 1); + usleep_range(80000, 85000); + + if (phy->ndlc->powered == 0) + enable_irq(phy->i2c_dev->irq); + + return 0; +} + +static void st_nci_i2c_disable(void *phy_id) +{ + struct st_nci_i2c_phy *phy = phy_id; + + disable_irq_nosync(phy->i2c_dev->irq); +} + +/* + * Writing a frame must not return the number of written bytes. + * It must return either zero for success, or <0 for error. + * In addition, it must not alter the skb + */ +static int st_nci_i2c_write(void *phy_id, struct sk_buff *skb) +{ + int r = -1; + struct st_nci_i2c_phy *phy = phy_id; + struct i2c_client *client = phy->i2c_dev; + + I2C_DUMP_SKB("st_nci_i2c_write", skb); + + if (phy->ndlc->hard_fault != 0) + return phy->ndlc->hard_fault; + + r = i2c_master_send(client, skb->data, skb->len); + if (r < 0) { /* Retry, chip was in standby */ + usleep_range(1000, 4000); + r = i2c_master_send(client, skb->data, skb->len); + } + + if (r >= 0) { + if (r != skb->len) + r = -EREMOTEIO; + else + r = 0; + } + + return r; +} + +/* + * Reads an ndlc frame and returns it in a newly allocated sk_buff. + * returns: + * frame size : if received frame is complete (find ST21NFCB_SOF_EOF at + * end of read) + * -EAGAIN : if received frame is incomplete (not find ST21NFCB_SOF_EOF + * at end of read) + * -EREMOTEIO : i2c read error (fatal) + * -EBADMSG : frame was incorrect and discarded + * (value returned from st_nci_i2c_repack) + * -EIO : if no ST21NFCB_SOF_EOF is found after reaching + * the read length end sequence + */ +static int st_nci_i2c_read(struct st_nci_i2c_phy *phy, + struct sk_buff **skb) +{ + int r; + u8 len; + u8 buf[ST_NCI_I2C_MAX_SIZE]; + struct i2c_client *client = phy->i2c_dev; + + r = i2c_master_recv(client, buf, ST_NCI_I2C_MIN_SIZE); + if (r < 0) { /* Retry, chip was in standby */ + usleep_range(1000, 4000); + r = i2c_master_recv(client, buf, ST_NCI_I2C_MIN_SIZE); + } + + if (r != ST_NCI_I2C_MIN_SIZE) + return -EREMOTEIO; + + len = be16_to_cpu(*(__be16 *) (buf + 2)); + if (len > ST_NCI_I2C_MAX_SIZE) { + nfc_err(&client->dev, "invalid frame len\n"); + return -EBADMSG; + } + + *skb = alloc_skb(ST_NCI_I2C_MIN_SIZE + len, GFP_KERNEL); + if (*skb == NULL) + return -ENOMEM; + + skb_reserve(*skb, ST_NCI_I2C_MIN_SIZE); + skb_put(*skb, ST_NCI_I2C_MIN_SIZE); + memcpy((*skb)->data, buf, ST_NCI_I2C_MIN_SIZE); + + if (!len) + return 0; + + r = i2c_master_recv(client, buf, len); + if (r != len) { + kfree_skb(*skb); + return -EREMOTEIO; + } + + skb_put(*skb, len); + memcpy((*skb)->data + ST_NCI_I2C_MIN_SIZE, buf, len); + + I2C_DUMP_SKB("i2c frame read", *skb); + + return 0; +} + +/* + * Reads an ndlc frame from the chip. + * + * On ST21NFCB, IRQ goes in idle state when read starts. + */ +static irqreturn_t st_nci_irq_thread_fn(int irq, void *phy_id) +{ + struct st_nci_i2c_phy *phy = phy_id; + struct i2c_client *client; + struct sk_buff *skb = NULL; + int r; + + if (!phy || !phy->ndlc || irq != phy->i2c_dev->irq) { + WARN_ON_ONCE(1); + return IRQ_NONE; + } + + client = phy->i2c_dev; + dev_dbg(&client->dev, "IRQ\n"); + + if (phy->ndlc->hard_fault) + return IRQ_HANDLED; + + if (!phy->ndlc->powered) { + st_nci_i2c_disable(phy); + return IRQ_HANDLED; + } + + r = st_nci_i2c_read(phy, &skb); + if (r == -EREMOTEIO || r == -ENOMEM || r == -EBADMSG) + return IRQ_HANDLED; + + ndlc_recv(phy->ndlc, skb); + + return IRQ_HANDLED; +} + +static struct nfc_phy_ops i2c_phy_ops = { + .write = st_nci_i2c_write, + .enable = st_nci_i2c_enable, + .disable = st_nci_i2c_disable, +}; + +#ifdef CONFIG_OF +static int st_nci_i2c_of_request_resources(struct i2c_client *client) +{ + struct st_nci_i2c_phy *phy = i2c_get_clientdata(client); + struct device_node *pp; + int gpio; + int r; + + pp = client->dev.of_node; + if (!pp) + return -ENODEV; + + /* Get GPIO from device tree */ + gpio = of_get_named_gpio(pp, "reset-gpios", 0); + if (gpio < 0) { + nfc_err(&client->dev, + "Failed to retrieve reset-gpios from device tree\n"); + return gpio; + } + + /* GPIO request and configuration */ + r = devm_gpio_request_one(&client->dev, gpio, + GPIOF_OUT_INIT_HIGH, "clf_reset"); + if (r) { + nfc_err(&client->dev, "Failed to request reset pin\n"); + return r; + } + phy->gpio_reset = gpio; + + phy->irq_polarity = irq_get_trigger_type(client->irq); + + return 0; +} +#else +static int st_nci_i2c_of_request_resources(struct i2c_client *client) +{ + return -ENODEV; +} +#endif + +static int st_nci_i2c_request_resources(struct i2c_client *client) +{ + struct st_nci_nfc_platform_data *pdata; + struct st_nci_i2c_phy *phy = i2c_get_clientdata(client); + int r; + + pdata = client->dev.platform_data; + if (pdata == NULL) { + nfc_err(&client->dev, "No platform data\n"); + return -EINVAL; + } + + /* store for later use */ + phy->gpio_reset = pdata->gpio_reset; + phy->irq_polarity = pdata->irq_polarity; + + r = devm_gpio_request_one(&client->dev, + phy->gpio_reset, GPIOF_OUT_INIT_HIGH, "clf_reset"); + if (r) { + pr_err("%s : reset gpio_request failed\n", __FILE__); + return r; + } + + return 0; +} + +static int st_nci_i2c_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct st_nci_i2c_phy *phy; + struct st_nci_nfc_platform_data *pdata; + int r; + + dev_dbg(&client->dev, "%s\n", __func__); + dev_dbg(&client->dev, "IRQ: %d\n", client->irq); + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { + nfc_err(&client->dev, "Need I2C_FUNC_I2C\n"); + return -ENODEV; + } + + phy = devm_kzalloc(&client->dev, sizeof(struct st_nci_i2c_phy), + GFP_KERNEL); + if (!phy) + return -ENOMEM; + + phy->i2c_dev = client; + + i2c_set_clientdata(client, phy); + + pdata = client->dev.platform_data; + if (!pdata && client->dev.of_node) { + r = st_nci_i2c_of_request_resources(client); + if (r) { + nfc_err(&client->dev, "No platform data\n"); + return r; + } + } else if (pdata) { + r = st_nci_i2c_request_resources(client); + if (r) { + nfc_err(&client->dev, + "Cannot get platform resources\n"); + return r; + } + } else { + nfc_err(&client->dev, + "st21nfcb platform resources not available\n"); + return -ENODEV; + } + + r = ndlc_probe(phy, &i2c_phy_ops, &client->dev, + ST21NFCB_FRAME_HEADROOM, ST21NFCB_FRAME_TAILROOM, + &phy->ndlc); + if (r < 0) { + nfc_err(&client->dev, "Unable to register ndlc layer\n"); + return r; + } + + r = devm_request_threaded_irq(&client->dev, client->irq, NULL, + st_nci_irq_thread_fn, + phy->irq_polarity | IRQF_ONESHOT, + ST_NCI_DRIVER_NAME, phy); + if (r < 0) + nfc_err(&client->dev, "Unable to register IRQ handler\n"); + + return r; +} + +static int st_nci_i2c_remove(struct i2c_client *client) +{ + struct st_nci_i2c_phy *phy = i2c_get_clientdata(client); + + dev_dbg(&client->dev, "%s\n", __func__); + + ndlc_remove(phy->ndlc); + + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id of_st_nci_i2c_match[] = { + { .compatible = "st,st21nfcb-i2c", }, + { .compatible = "st,st21nfcb_i2c", }, + { .compatible = "st,st21nfcc-i2c", }, + {} +}; +MODULE_DEVICE_TABLE(of, of_st_nci_i2c_match); +#endif + +static struct i2c_driver st_nci_i2c_driver = { + .driver = { + .owner = THIS_MODULE, + .name = ST_NCI_I2C_DRIVER_NAME, + .of_match_table = of_match_ptr(of_st_nci_i2c_match), + }, + .probe = st_nci_i2c_probe, + .id_table = st_nci_i2c_id_table, + .remove = st_nci_i2c_remove, +}; + +module_i2c_driver(st_nci_i2c_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/nfc/st-nci/ndlc.c b/drivers/nfc/st-nci/ndlc.c new file mode 100644 index 000000000000..56c6a4cb4c96 --- /dev/null +++ b/drivers/nfc/st-nci/ndlc.c @@ -0,0 +1,313 @@ +/* + * Low Level Transport (NDLC) Driver for STMicroelectronics NFC Chip + * + * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include + +#include "ndlc.h" +#include "st-nci.h" + +#define NDLC_TIMER_T1 100 +#define NDLC_TIMER_T1_WAIT 400 +#define NDLC_TIMER_T2 1200 + +#define PCB_TYPE_DATAFRAME 0x80 +#define PCB_TYPE_SUPERVISOR 0xc0 +#define PCB_TYPE_MASK PCB_TYPE_SUPERVISOR + +#define PCB_SYNC_ACK 0x20 +#define PCB_SYNC_NACK 0x10 +#define PCB_SYNC_WAIT 0x30 +#define PCB_SYNC_NOINFO 0x00 +#define PCB_SYNC_MASK PCB_SYNC_WAIT + +#define PCB_DATAFRAME_RETRANSMIT_YES 0x00 +#define PCB_DATAFRAME_RETRANSMIT_NO 0x04 +#define PCB_DATAFRAME_RETRANSMIT_MASK PCB_DATAFRAME_RETRANSMIT_NO + +#define PCB_SUPERVISOR_RETRANSMIT_YES 0x00 +#define PCB_SUPERVISOR_RETRANSMIT_NO 0x02 +#define PCB_SUPERVISOR_RETRANSMIT_MASK PCB_SUPERVISOR_RETRANSMIT_NO + +#define PCB_FRAME_CRC_INFO_PRESENT 0x08 +#define PCB_FRAME_CRC_INFO_NOTPRESENT 0x00 +#define PCB_FRAME_CRC_INFO_MASK PCB_FRAME_CRC_INFO_PRESENT + +#define NDLC_DUMP_SKB(info, skb) \ +do { \ + pr_debug("%s:\n", info); \ + print_hex_dump(KERN_DEBUG, "ndlc: ", DUMP_PREFIX_OFFSET, \ + 16, 1, skb->data, skb->len, 0); \ +} while (0) + +int ndlc_open(struct llt_ndlc *ndlc) +{ + /* toggle reset pin */ + ndlc->ops->enable(ndlc->phy_id); + ndlc->powered = 1; + return 0; +} +EXPORT_SYMBOL(ndlc_open); + +void ndlc_close(struct llt_ndlc *ndlc) +{ + struct nci_mode_set_cmd cmd; + + cmd.cmd_type = ST_NCI_SET_NFC_MODE; + cmd.mode = 0; + + /* toggle reset pin */ + ndlc->ops->enable(ndlc->phy_id); + + nci_prop_cmd(ndlc->ndev, ST_NCI_CORE_PROP, + sizeof(struct nci_mode_set_cmd), (__u8 *)&cmd); + + ndlc->powered = 0; + ndlc->ops->disable(ndlc->phy_id); +} +EXPORT_SYMBOL(ndlc_close); + +int ndlc_send(struct llt_ndlc *ndlc, struct sk_buff *skb) +{ + /* add ndlc header */ + u8 pcb = PCB_TYPE_DATAFRAME | PCB_DATAFRAME_RETRANSMIT_NO | + PCB_FRAME_CRC_INFO_NOTPRESENT; + + *skb_push(skb, 1) = pcb; + skb_queue_tail(&ndlc->send_q, skb); + + schedule_work(&ndlc->sm_work); + + return 0; +} +EXPORT_SYMBOL(ndlc_send); + +static void llt_ndlc_send_queue(struct llt_ndlc *ndlc) +{ + struct sk_buff *skb; + int r; + unsigned long time_sent; + + if (ndlc->send_q.qlen) + pr_debug("sendQlen=%d unackQlen=%d\n", + ndlc->send_q.qlen, ndlc->ack_pending_q.qlen); + + while (ndlc->send_q.qlen) { + skb = skb_dequeue(&ndlc->send_q); + NDLC_DUMP_SKB("ndlc frame written", skb); + r = ndlc->ops->write(ndlc->phy_id, skb); + if (r < 0) { + ndlc->hard_fault = r; + break; + } + time_sent = jiffies; + *(unsigned long *)skb->cb = time_sent; + + skb_queue_tail(&ndlc->ack_pending_q, skb); + + /* start timer t1 for ndlc aknowledge */ + ndlc->t1_active = true; + mod_timer(&ndlc->t1_timer, time_sent + + msecs_to_jiffies(NDLC_TIMER_T1)); + /* start timer t2 for chip availability */ + ndlc->t2_active = true; + mod_timer(&ndlc->t2_timer, time_sent + + msecs_to_jiffies(NDLC_TIMER_T2)); + } +} + +static void llt_ndlc_requeue_data_pending(struct llt_ndlc *ndlc) +{ + struct sk_buff *skb; + u8 pcb; + + while ((skb = skb_dequeue_tail(&ndlc->ack_pending_q))) { + pcb = skb->data[0]; + switch (pcb & PCB_TYPE_MASK) { + case PCB_TYPE_SUPERVISOR: + skb->data[0] = (pcb & ~PCB_SUPERVISOR_RETRANSMIT_MASK) | + PCB_SUPERVISOR_RETRANSMIT_YES; + break; + case PCB_TYPE_DATAFRAME: + skb->data[0] = (pcb & ~PCB_DATAFRAME_RETRANSMIT_MASK) | + PCB_DATAFRAME_RETRANSMIT_YES; + break; + default: + pr_err("UNKNOWN Packet Control Byte=%d\n", pcb); + kfree_skb(skb); + continue; + } + skb_queue_head(&ndlc->send_q, skb); + } +} + +static void llt_ndlc_rcv_queue(struct llt_ndlc *ndlc) +{ + struct sk_buff *skb; + u8 pcb; + unsigned long time_sent; + + if (ndlc->rcv_q.qlen) + pr_debug("rcvQlen=%d\n", ndlc->rcv_q.qlen); + + while ((skb = skb_dequeue(&ndlc->rcv_q)) != NULL) { + pcb = skb->data[0]; + skb_pull(skb, 1); + if ((pcb & PCB_TYPE_MASK) == PCB_TYPE_SUPERVISOR) { + switch (pcb & PCB_SYNC_MASK) { + case PCB_SYNC_ACK: + del_timer_sync(&ndlc->t1_timer); + del_timer_sync(&ndlc->t2_timer); + ndlc->t2_active = false; + ndlc->t1_active = false; + break; + case PCB_SYNC_NACK: + llt_ndlc_requeue_data_pending(ndlc); + llt_ndlc_send_queue(ndlc); + /* start timer t1 for ndlc aknowledge */ + time_sent = jiffies; + ndlc->t1_active = true; + mod_timer(&ndlc->t1_timer, time_sent + + msecs_to_jiffies(NDLC_TIMER_T1)); + break; + case PCB_SYNC_WAIT: + time_sent = jiffies; + ndlc->t1_active = true; + mod_timer(&ndlc->t1_timer, time_sent + + msecs_to_jiffies(NDLC_TIMER_T1_WAIT)); + break; + default: + pr_err("UNKNOWN Packet Control Byte=%d\n", pcb); + kfree_skb(skb); + break; + } + } else { + nci_recv_frame(ndlc->ndev, skb); + } + } +} + +static void llt_ndlc_sm_work(struct work_struct *work) +{ + struct llt_ndlc *ndlc = container_of(work, struct llt_ndlc, sm_work); + + llt_ndlc_send_queue(ndlc); + llt_ndlc_rcv_queue(ndlc); + + if (ndlc->t1_active && timer_pending(&ndlc->t1_timer) == 0) { + pr_debug + ("Handle T1(recv SUPERVISOR) elapsed (T1 now inactive)\n"); + ndlc->t1_active = false; + + llt_ndlc_requeue_data_pending(ndlc); + llt_ndlc_send_queue(ndlc); + } + + if (ndlc->t2_active && timer_pending(&ndlc->t2_timer) == 0) { + pr_debug("Handle T2(recv DATA) elapsed (T2 now inactive)\n"); + ndlc->t2_active = false; + ndlc->t1_active = false; + del_timer_sync(&ndlc->t1_timer); + del_timer_sync(&ndlc->t2_timer); + ndlc_close(ndlc); + ndlc->hard_fault = -EREMOTEIO; + } +} + +void ndlc_recv(struct llt_ndlc *ndlc, struct sk_buff *skb) +{ + if (skb == NULL) { + pr_err("NULL Frame -> link is dead\n"); + ndlc->hard_fault = -EREMOTEIO; + ndlc_close(ndlc); + } else { + NDLC_DUMP_SKB("incoming frame", skb); + skb_queue_tail(&ndlc->rcv_q, skb); + } + + schedule_work(&ndlc->sm_work); +} +EXPORT_SYMBOL(ndlc_recv); + +static void ndlc_t1_timeout(unsigned long data) +{ + struct llt_ndlc *ndlc = (struct llt_ndlc *)data; + + pr_debug("\n"); + + schedule_work(&ndlc->sm_work); +} + +static void ndlc_t2_timeout(unsigned long data) +{ + struct llt_ndlc *ndlc = (struct llt_ndlc *)data; + + pr_debug("\n"); + + schedule_work(&ndlc->sm_work); +} + +int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev, + int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id) +{ + struct llt_ndlc *ndlc; + + ndlc = devm_kzalloc(dev, sizeof(struct llt_ndlc), GFP_KERNEL); + if (!ndlc) + return -ENOMEM; + + ndlc->ops = phy_ops; + ndlc->phy_id = phy_id; + ndlc->dev = dev; + ndlc->powered = 0; + + *ndlc_id = ndlc; + + /* initialize timers */ + init_timer(&ndlc->t1_timer); + ndlc->t1_timer.data = (unsigned long)ndlc; + ndlc->t1_timer.function = ndlc_t1_timeout; + + init_timer(&ndlc->t2_timer); + ndlc->t2_timer.data = (unsigned long)ndlc; + ndlc->t2_timer.function = ndlc_t2_timeout; + + skb_queue_head_init(&ndlc->rcv_q); + skb_queue_head_init(&ndlc->send_q); + skb_queue_head_init(&ndlc->ack_pending_q); + + INIT_WORK(&ndlc->sm_work, llt_ndlc_sm_work); + + return st_nci_probe(ndlc, phy_headroom, phy_tailroom); +} +EXPORT_SYMBOL(ndlc_probe); + +void ndlc_remove(struct llt_ndlc *ndlc) +{ + st_nci_remove(ndlc->ndev); + + /* cancel timers */ + del_timer_sync(&ndlc->t1_timer); + del_timer_sync(&ndlc->t2_timer); + ndlc->t2_active = false; + ndlc->t1_active = false; + + skb_queue_purge(&ndlc->rcv_q); + skb_queue_purge(&ndlc->send_q); +} +EXPORT_SYMBOL(ndlc_remove); diff --git a/drivers/nfc/st-nci/ndlc.h b/drivers/nfc/st-nci/ndlc.h new file mode 100644 index 000000000000..6361005ef003 --- /dev/null +++ b/drivers/nfc/st-nci/ndlc.h @@ -0,0 +1,60 @@ +/* + * NCI based Driver for STMicroelectronics NFC Chip + * + * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef __LOCAL_NDLC_H_ +#define __LOCAL_NDLC_H_ + +#include +#include + +/* Low Level Transport description */ +struct llt_ndlc { + struct nci_dev *ndev; + struct nfc_phy_ops *ops; + void *phy_id; + + struct timer_list t1_timer; + bool t1_active; + + struct timer_list t2_timer; + bool t2_active; + + struct sk_buff_head rcv_q; + struct sk_buff_head send_q; + struct sk_buff_head ack_pending_q; + + struct work_struct sm_work; + + struct device *dev; + + /* + * < 0 if hardware error occurred + * and prevents normal operation. + */ + int hard_fault; + int powered; +}; + +int ndlc_open(struct llt_ndlc *ndlc); +void ndlc_close(struct llt_ndlc *ndlc); +int ndlc_send(struct llt_ndlc *ndlc, struct sk_buff *skb); +void ndlc_recv(struct llt_ndlc *ndlc, struct sk_buff *skb); +int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev, + int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id); +void ndlc_remove(struct llt_ndlc *ndlc); +#endif /* __LOCAL_NDLC_H__ */ diff --git a/drivers/nfc/st-nci/st-nci.h b/drivers/nfc/st-nci/st-nci.h new file mode 100644 index 000000000000..850a2395deb7 --- /dev/null +++ b/drivers/nfc/st-nci/st-nci.h @@ -0,0 +1,50 @@ +/* + * NCI based Driver for STMicroelectronics NFC Chip + * + * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef __LOCAL_ST_NCI_H_ +#define __LOCAL_ST_NCI_H_ + +#include "st-nci_se.h" +#include "ndlc.h" + +/* Define private flags: */ +#define ST_NCI_RUNNING 1 + +#define ST_NCI_CORE_PROP 0x01 +#define ST_NCI_SET_NFC_MODE 0x02 + +struct nci_mode_set_cmd { + u8 cmd_type; + u8 mode; +} __packed; + +struct nci_mode_set_rsp { + u8 status; +} __packed; + +struct st_nci_info { + struct llt_ndlc *ndlc; + unsigned long flags; + struct st_nci_se_info se_info; +}; + +void st_nci_remove(struct nci_dev *ndev); +int st_nci_probe(struct llt_ndlc *ndlc, int phy_headroom, + int phy_tailroom); + +#endif /* __LOCAL_ST_NCI_H_ */ diff --git a/drivers/nfc/st-nci/st-nci_se.c b/drivers/nfc/st-nci/st-nci_se.c new file mode 100644 index 000000000000..97addfa96c6f --- /dev/null +++ b/drivers/nfc/st-nci/st-nci_se.c @@ -0,0 +1,714 @@ +/* + * Secure Element driver for STMicroelectronics NFC NCI chip + * + * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "st-nci.h" +#include "st-nci_se.h" + +struct st_nci_pipe_info { + u8 pipe_state; + u8 src_host_id; + u8 src_gate_id; + u8 dst_host_id; + u8 dst_gate_id; +} __packed; + +/* Hosts */ +#define ST_NCI_HOST_CONTROLLER_ID 0x00 +#define ST_NCI_TERMINAL_HOST_ID 0x01 +#define ST_NCI_UICC_HOST_ID 0x02 +#define ST_NCI_ESE_HOST_ID 0xc0 + +/* Gates */ +#define ST_NCI_DEVICE_MGNT_GATE 0x01 +#define ST_NCI_APDU_READER_GATE 0xf0 +#define ST_NCI_CONNECTIVITY_GATE 0x41 + +/* Pipes */ +#define ST_NCI_DEVICE_MGNT_PIPE 0x02 + +/* Connectivity pipe only */ +#define ST_NCI_SE_COUNT_PIPE_UICC 0x01 +/* Connectivity + APDU Reader pipe */ +#define ST_NCI_SE_COUNT_PIPE_EMBEDDED 0x02 + +#define ST_NCI_SE_TO_HOT_PLUG 1000 /* msecs */ +#define ST_NCI_SE_TO_PIPES 2000 + +#define ST_NCI_EVT_HOT_PLUG_IS_INHIBITED(x) (x->data[0] & 0x80) + +#define NCI_HCI_APDU_PARAM_ATR 0x01 +#define NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY 0x01 +#define NCI_HCI_ADMIN_PARAM_WHITELIST 0x03 +#define NCI_HCI_ADMIN_PARAM_HOST_LIST 0x04 + +#define ST_NCI_EVT_SE_HARD_RESET 0x20 +#define ST_NCI_EVT_TRANSMIT_DATA 0x10 +#define ST_NCI_EVT_WTX_REQUEST 0x11 +#define ST_NCI_EVT_SE_SOFT_RESET 0x11 +#define ST_NCI_EVT_SE_END_OF_APDU_TRANSFER 0x21 +#define ST_NCI_EVT_HOT_PLUG 0x03 + +#define ST_NCI_SE_MODE_OFF 0x00 +#define ST_NCI_SE_MODE_ON 0x01 + +#define ST_NCI_EVT_CONNECTIVITY 0x10 +#define ST_NCI_EVT_TRANSACTION 0x12 + +#define ST_NCI_DM_GETINFO 0x13 +#define ST_NCI_DM_GETINFO_PIPE_LIST 0x02 +#define ST_NCI_DM_GETINFO_PIPE_INFO 0x01 +#define ST_NCI_DM_PIPE_CREATED 0x02 +#define ST_NCI_DM_PIPE_OPEN 0x04 +#define ST_NCI_DM_RF_ACTIVE 0x80 +#define ST_NCI_DM_DISCONNECT 0x30 + +#define ST_NCI_DM_IS_PIPE_OPEN(p) \ + ((p & 0x0f) == (ST_NCI_DM_PIPE_CREATED | ST_NCI_DM_PIPE_OPEN)) + +#define ST_NCI_ATR_DEFAULT_BWI 0x04 + +/* + * WT = 2^BWI/10[s], convert into msecs and add a secure + * room by increasing by 2 this timeout + */ +#define ST_NCI_BWI_TO_TIMEOUT(x) ((1 << x) * 200) +#define ST_NCI_ATR_GET_Y_FROM_TD(x) (x >> 4) + +/* If TA is present bit 0 is set */ +#define ST_NCI_ATR_TA_PRESENT(x) (x & 0x01) +/* If TB is present bit 1 is set */ +#define ST_NCI_ATR_TB_PRESENT(x) (x & 0x02) + +#define ST_NCI_NUM_DEVICES 256 + +static DECLARE_BITMAP(dev_mask, ST_NCI_NUM_DEVICES); + +/* Here are the mandatory pipe for st_nci */ +static struct nci_hci_gate st_nci_gates[] = { + {NCI_HCI_ADMIN_GATE, NCI_HCI_ADMIN_PIPE, + ST_NCI_HOST_CONTROLLER_ID}, + {NCI_HCI_LINK_MGMT_GATE, NCI_HCI_LINK_MGMT_PIPE, + ST_NCI_HOST_CONTROLLER_ID}, + {ST_NCI_DEVICE_MGNT_GATE, ST_NCI_DEVICE_MGNT_PIPE, + ST_NCI_HOST_CONTROLLER_ID}, + + /* Secure element pipes are created by secure element host */ + {ST_NCI_CONNECTIVITY_GATE, NCI_HCI_DO_NOT_OPEN_PIPE, + ST_NCI_HOST_CONTROLLER_ID}, + {ST_NCI_APDU_READER_GATE, NCI_HCI_DO_NOT_OPEN_PIPE, + ST_NCI_HOST_CONTROLLER_ID}, +}; + +static u8 st_nci_se_get_bwi(struct nci_dev *ndev) +{ + int i; + u8 td; + struct st_nci_info *info = nci_get_drvdata(ndev); + + /* Bits 8 to 5 of the first TB for T=1 encode BWI from zero to nine */ + for (i = 1; i < ST_NCI_ESE_MAX_LENGTH; i++) { + td = ST_NCI_ATR_GET_Y_FROM_TD(info->se_info.atr[i]); + if (ST_NCI_ATR_TA_PRESENT(td)) + i++; + if (ST_NCI_ATR_TB_PRESENT(td)) { + i++; + return info->se_info.atr[i] >> 4; + } + } + return ST_NCI_ATR_DEFAULT_BWI; +} + +static void st_nci_se_get_atr(struct nci_dev *ndev) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + int r; + struct sk_buff *skb; + + r = nci_hci_get_param(ndev, ST_NCI_APDU_READER_GATE, + NCI_HCI_APDU_PARAM_ATR, &skb); + if (r < 0) + return; + + if (skb->len <= ST_NCI_ESE_MAX_LENGTH) { + memcpy(info->se_info.atr, skb->data, skb->len); + + info->se_info.wt_timeout = + ST_NCI_BWI_TO_TIMEOUT(st_nci_se_get_bwi(ndev)); + } + kfree_skb(skb); +} + +int st_nci_hci_load_session(struct nci_dev *ndev) +{ + int i, j, r; + struct sk_buff *skb_pipe_list, *skb_pipe_info; + struct st_nci_pipe_info *dm_pipe_info; + u8 pipe_list[] = { ST_NCI_DM_GETINFO_PIPE_LIST, + ST_NCI_TERMINAL_HOST_ID}; + u8 pipe_info[] = { ST_NCI_DM_GETINFO_PIPE_INFO, + ST_NCI_TERMINAL_HOST_ID, 0}; + + /* On ST_NCI device pipes number are dynamics + * If pipes are already created, hci_dev_up will fail. + * Doing a clear all pipe is a bad idea because: + * - It does useless EEPROM cycling + * - It might cause issue for secure elements support + * (such as removing connectivity or APDU reader pipe) + * A better approach on ST_NCI is to: + * - get a pipe list for each host. + * (eg: ST_NCI_HOST_CONTROLLER_ID for now). + * (TODO Later on UICC HOST and eSE HOST) + * - get pipe information + * - match retrieved pipe list in st_nci_gates + * ST_NCI_DEVICE_MGNT_GATE is a proprietary gate + * with ST_NCI_DEVICE_MGNT_PIPE. + * Pipe can be closed and need to be open. + */ + r = nci_hci_connect_gate(ndev, ST_NCI_HOST_CONTROLLER_ID, + ST_NCI_DEVICE_MGNT_GATE, + ST_NCI_DEVICE_MGNT_PIPE); + if (r < 0) + goto free_info; + + /* Get pipe list */ + r = nci_hci_send_cmd(ndev, ST_NCI_DEVICE_MGNT_GATE, + ST_NCI_DM_GETINFO, pipe_list, sizeof(pipe_list), + &skb_pipe_list); + if (r < 0) + goto free_info; + + /* Complete the existing gate_pipe table */ + for (i = 0; i < skb_pipe_list->len; i++) { + pipe_info[2] = skb_pipe_list->data[i]; + r = nci_hci_send_cmd(ndev, ST_NCI_DEVICE_MGNT_GATE, + ST_NCI_DM_GETINFO, pipe_info, + sizeof(pipe_info), &skb_pipe_info); + + if (r) + continue; + + /* + * Match pipe ID and gate ID + * Output format from ST21NFC_DM_GETINFO is: + * - pipe state (1byte) + * - source hid (1byte) + * - source gid (1byte) + * - destination hid (1byte) + * - destination gid (1byte) + */ + dm_pipe_info = (struct st_nci_pipe_info *)skb_pipe_info->data; + if (dm_pipe_info->dst_gate_id == ST_NCI_APDU_READER_GATE && + dm_pipe_info->src_host_id != ST_NCI_ESE_HOST_ID) { + pr_err("Unexpected apdu_reader pipe on host %x\n", + dm_pipe_info->src_host_id); + continue; + } + + for (j = 0; (j < ARRAY_SIZE(st_nci_gates)) && + (st_nci_gates[j].gate != dm_pipe_info->dst_gate_id); j++) + ; + + if (j < ARRAY_SIZE(st_nci_gates) && + st_nci_gates[j].gate == dm_pipe_info->dst_gate_id && + ST_NCI_DM_IS_PIPE_OPEN(dm_pipe_info->pipe_state)) { + st_nci_gates[j].pipe = pipe_info[2]; + + ndev->hci_dev->gate2pipe[st_nci_gates[j].gate] = + st_nci_gates[j].pipe; + ndev->hci_dev->pipes[st_nci_gates[j].pipe].gate = + st_nci_gates[j].gate; + ndev->hci_dev->pipes[st_nci_gates[j].pipe].host = + dm_pipe_info->src_host_id; + } + } + + memcpy(ndev->hci_dev->init_data.gates, st_nci_gates, + sizeof(st_nci_gates)); + +free_info: + kfree_skb(skb_pipe_info); + kfree_skb(skb_pipe_list); + return r; +} +EXPORT_SYMBOL_GPL(st_nci_hci_load_session); + +static void st_nci_hci_admin_event_received(struct nci_dev *ndev, + u8 event, struct sk_buff *skb) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + + switch (event) { + case ST_NCI_EVT_HOT_PLUG: + if (info->se_info.se_active) { + if (!ST_NCI_EVT_HOT_PLUG_IS_INHIBITED(skb)) { + del_timer_sync(&info->se_info.se_active_timer); + info->se_info.se_active = false; + complete(&info->se_info.req_completion); + } else { + mod_timer(&info->se_info.se_active_timer, + jiffies + + msecs_to_jiffies(ST_NCI_SE_TO_PIPES)); + } + } + break; + } +} + +static int st_nci_hci_apdu_reader_event_received(struct nci_dev *ndev, + u8 event, + struct sk_buff *skb) +{ + int r = 0; + struct st_nci_info *info = nci_get_drvdata(ndev); + + pr_debug("apdu reader gate event: %x\n", event); + + switch (event) { + case ST_NCI_EVT_TRANSMIT_DATA: + del_timer_sync(&info->se_info.bwi_timer); + info->se_info.bwi_active = false; + info->se_info.cb(info->se_info.cb_context, + skb->data, skb->len, 0); + break; + case ST_NCI_EVT_WTX_REQUEST: + mod_timer(&info->se_info.bwi_timer, jiffies + + msecs_to_jiffies(info->se_info.wt_timeout)); + break; + } + + kfree_skb(skb); + return r; +} + +/* + * Returns: + * <= 0: driver handled the event, skb consumed + * 1: driver does not handle the event, please do standard processing + */ +static int st_nci_hci_connectivity_event_received(struct nci_dev *ndev, + u8 host, u8 event, + struct sk_buff *skb) +{ + int r = 0; + struct device *dev = &ndev->nfc_dev->dev; + struct nfc_evt_transaction *transaction; + + pr_debug("connectivity gate event: %x\n", event); + + switch (event) { + case ST_NCI_EVT_CONNECTIVITY: + + break; + case ST_NCI_EVT_TRANSACTION: + /* According to specification etsi 102 622 + * 11.2.2.4 EVT_TRANSACTION Table 52 + * Description Tag Length + * AID 81 5 to 16 + * PARAMETERS 82 0 to 255 + */ + if (skb->len < NFC_MIN_AID_LENGTH + 2 && + skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) + return -EPROTO; + + transaction = (struct nfc_evt_transaction *)devm_kzalloc(dev, + skb->len - 2, GFP_KERNEL); + + transaction->aid_len = skb->data[1]; + memcpy(transaction->aid, &skb->data[2], transaction->aid_len); + + /* Check next byte is PARAMETERS tag (82) */ + if (skb->data[transaction->aid_len + 2] != + NFC_EVT_TRANSACTION_PARAMS_TAG) + return -EPROTO; + + transaction->params_len = skb->data[transaction->aid_len + 3]; + memcpy(transaction->params, skb->data + + transaction->aid_len + 4, transaction->params_len); + + r = nfc_se_transaction(ndev->nfc_dev, host, transaction); + break; + default: + return 1; + } + kfree_skb(skb); + return r; +} + +void st_nci_hci_event_received(struct nci_dev *ndev, u8 pipe, + u8 event, struct sk_buff *skb) +{ + u8 gate = ndev->hci_dev->pipes[pipe].gate; + u8 host = ndev->hci_dev->pipes[pipe].host; + + switch (gate) { + case NCI_HCI_ADMIN_GATE: + st_nci_hci_admin_event_received(ndev, event, skb); + break; + case ST_NCI_APDU_READER_GATE: + st_nci_hci_apdu_reader_event_received(ndev, event, skb); + break; + case ST_NCI_CONNECTIVITY_GATE: + st_nci_hci_connectivity_event_received(ndev, host, event, + skb); + break; + } +} +EXPORT_SYMBOL_GPL(st_nci_hci_event_received); + + +void st_nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, u8 cmd, + struct sk_buff *skb) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + u8 gate = ndev->hci_dev->pipes[pipe].gate; + + pr_debug("cmd: %x\n", cmd); + + switch (cmd) { + case NCI_HCI_ANY_OPEN_PIPE: + if (gate != ST_NCI_APDU_READER_GATE && + ndev->hci_dev->pipes[pipe].host != ST_NCI_UICC_HOST_ID) + ndev->hci_dev->count_pipes++; + + if (ndev->hci_dev->count_pipes == + ndev->hci_dev->expected_pipes) { + del_timer_sync(&info->se_info.se_active_timer); + info->se_info.se_active = false; + ndev->hci_dev->count_pipes = 0; + complete(&info->se_info.req_completion); + } + break; + } +} +EXPORT_SYMBOL_GPL(st_nci_hci_cmd_received); + +/* + * Remarks: On some early st_nci firmware, nci_nfcee_mode_set(0) + * is rejected + */ +static int st_nci_control_se(struct nci_dev *ndev, u8 se_idx, + u8 state) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + int r; + struct sk_buff *sk_host_list; + u8 host_id; + + switch (se_idx) { + case ST_NCI_UICC_HOST_ID: + ndev->hci_dev->count_pipes = 0; + ndev->hci_dev->expected_pipes = ST_NCI_SE_COUNT_PIPE_UICC; + break; + case ST_NCI_ESE_HOST_ID: + ndev->hci_dev->count_pipes = 0; + ndev->hci_dev->expected_pipes = ST_NCI_SE_COUNT_PIPE_EMBEDDED; + break; + default: + return -EINVAL; + } + + /* + * Wait for an EVT_HOT_PLUG in order to + * retrieve a relevant host list. + */ + reinit_completion(&info->se_info.req_completion); + r = nci_nfcee_mode_set(ndev, se_idx, NCI_NFCEE_ENABLE); + if (r != NCI_STATUS_OK) + return r; + + mod_timer(&info->se_info.se_active_timer, jiffies + + msecs_to_jiffies(ST_NCI_SE_TO_HOT_PLUG)); + info->se_info.se_active = true; + + /* Ignore return value and check in any case the host_list */ + wait_for_completion_interruptible(&info->se_info.req_completion); + + /* There might be some "collision" after receiving a HOT_PLUG event + * This may cause the CLF to not answer to the next hci command. + * There is no possible synchronization to prevent this. + * Adding a small delay is the only way to solve the issue. + */ + usleep_range(3000, 5000); + + r = nci_hci_get_param(ndev, NCI_HCI_ADMIN_GATE, + NCI_HCI_ADMIN_PARAM_HOST_LIST, &sk_host_list); + if (r != NCI_HCI_ANY_OK) + return r; + + host_id = sk_host_list->data[sk_host_list->len - 1]; + kfree_skb(sk_host_list); + if (state == ST_NCI_SE_MODE_ON && host_id == se_idx) + return se_idx; + else if (state == ST_NCI_SE_MODE_OFF && host_id != se_idx) + return se_idx; + + return -1; +} + +int st_nci_disable_se(struct nci_dev *ndev, u32 se_idx) +{ + int r; + + pr_debug("st_nci_disable_se\n"); + + if (se_idx == NFC_SE_EMBEDDED) { + r = nci_hci_send_event(ndev, ST_NCI_APDU_READER_GATE, + ST_NCI_EVT_SE_END_OF_APDU_TRANSFER, NULL, 0); + if (r < 0) + return r; + } + + return 0; +} +EXPORT_SYMBOL_GPL(st_nci_disable_se); + +int st_nci_enable_se(struct nci_dev *ndev, u32 se_idx) +{ + int r; + + pr_debug("st_nci_enable_se\n"); + + if (se_idx == ST_NCI_HCI_HOST_ID_ESE) { + r = nci_hci_send_event(ndev, ST_NCI_APDU_READER_GATE, + ST_NCI_EVT_SE_SOFT_RESET, NULL, 0); + if (r < 0) + return r; + } + + return 0; +} +EXPORT_SYMBOL_GPL(st_nci_enable_se); + +static int st_nci_hci_network_init(struct nci_dev *ndev) +{ + struct core_conn_create_dest_spec_params *dest_params; + struct dest_spec_params spec_params; + struct nci_conn_info *conn_info; + int r, dev_num; + + dest_params = + kzalloc(sizeof(struct core_conn_create_dest_spec_params) + + sizeof(struct dest_spec_params), GFP_KERNEL); + if (dest_params == NULL) { + r = -ENOMEM; + goto exit; + } + + dest_params->type = NCI_DESTINATION_SPECIFIC_PARAM_NFCEE_TYPE; + dest_params->length = sizeof(struct dest_spec_params); + spec_params.id = ndev->hci_dev->nfcee_id; + spec_params.protocol = NCI_NFCEE_INTERFACE_HCI_ACCESS; + memcpy(dest_params->value, &spec_params, + sizeof(struct dest_spec_params)); + r = nci_core_conn_create(ndev, NCI_DESTINATION_NFCEE, 1, + sizeof(struct core_conn_create_dest_spec_params) + + sizeof(struct dest_spec_params), + dest_params); + if (r != NCI_STATUS_OK) + goto free_dest_params; + + conn_info = ndev->hci_dev->conn_info; + if (!conn_info) + goto free_dest_params; + + memcpy(ndev->hci_dev->init_data.gates, st_nci_gates, + sizeof(st_nci_gates)); + + /* + * Session id must include the driver name + i2c bus addr + * persistent info to discriminate 2 identical chips + */ + dev_num = find_first_zero_bit(dev_mask, ST_NCI_NUM_DEVICES); + if (dev_num >= ST_NCI_NUM_DEVICES) { + r = -ENODEV; + goto free_dest_params; + } + + scnprintf(ndev->hci_dev->init_data.session_id, + sizeof(ndev->hci_dev->init_data.session_id), + "%s%2x", "ST21BH", dev_num); + + r = nci_hci_dev_session_init(ndev); + if (r != NCI_HCI_ANY_OK) + goto free_dest_params; + + r = nci_nfcee_mode_set(ndev, ndev->hci_dev->conn_info->id, + NCI_NFCEE_ENABLE); + if (r != NCI_STATUS_OK) + goto free_dest_params; + +free_dest_params: + kfree(dest_params); + +exit: + return r; +} + +int st_nci_discover_se(struct nci_dev *ndev) +{ + u8 param[2]; + int r; + int se_count = 0; + + pr_debug("st_nci_discover_se\n"); + + r = st_nci_hci_network_init(ndev); + if (r != 0) + return r; + + param[0] = ST_NCI_UICC_HOST_ID; + param[1] = ST_NCI_HCI_HOST_ID_ESE; + r = nci_hci_set_param(ndev, NCI_HCI_ADMIN_GATE, + NCI_HCI_ADMIN_PARAM_WHITELIST, + param, sizeof(param)); + if (r != NCI_HCI_ANY_OK) + return r; + + r = st_nci_control_se(ndev, ST_NCI_UICC_HOST_ID, + ST_NCI_SE_MODE_ON); + if (r == ST_NCI_UICC_HOST_ID) { + nfc_add_se(ndev->nfc_dev, ST_NCI_UICC_HOST_ID, NFC_SE_UICC); + se_count++; + } + + /* Try to enable eSE in order to check availability */ + r = st_nci_control_se(ndev, ST_NCI_HCI_HOST_ID_ESE, + ST_NCI_SE_MODE_ON); + if (r == ST_NCI_HCI_HOST_ID_ESE) { + nfc_add_se(ndev->nfc_dev, ST_NCI_HCI_HOST_ID_ESE, + NFC_SE_EMBEDDED); + se_count++; + st_nci_se_get_atr(ndev); + } + + return !se_count; +} +EXPORT_SYMBOL_GPL(st_nci_discover_se); + +int st_nci_se_io(struct nci_dev *ndev, u32 se_idx, + u8 *apdu, size_t apdu_length, + se_io_cb_t cb, void *cb_context) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + + pr_debug("\n"); + + switch (se_idx) { + case ST_NCI_HCI_HOST_ID_ESE: + info->se_info.cb = cb; + info->se_info.cb_context = cb_context; + mod_timer(&info->se_info.bwi_timer, jiffies + + msecs_to_jiffies(info->se_info.wt_timeout)); + info->se_info.bwi_active = true; + return nci_hci_send_event(ndev, ST_NCI_APDU_READER_GATE, + ST_NCI_EVT_TRANSMIT_DATA, apdu, + apdu_length); + default: + return -ENODEV; + } +} +EXPORT_SYMBOL(st_nci_se_io); + +static void st_nci_se_wt_timeout(unsigned long data) +{ + /* + * No answer from the secure element + * within the defined timeout. + * Let's send a reset request as recovery procedure. + * According to the situation, we first try to send a software reset + * to the secure element. If the next command is still not + * answering in time, we send to the CLF a secure element hardware + * reset request. + */ + /* hardware reset managed through VCC_UICC_OUT power supply */ + u8 param = 0x01; + struct st_nci_info *info = (struct st_nci_info *) data; + + pr_debug("\n"); + + info->se_info.bwi_active = false; + + if (!info->se_info.xch_error) { + info->se_info.xch_error = true; + nci_hci_send_event(info->ndlc->ndev, ST_NCI_APDU_READER_GATE, + ST_NCI_EVT_SE_SOFT_RESET, NULL, 0); + } else { + info->se_info.xch_error = false; + nci_hci_send_event(info->ndlc->ndev, ST_NCI_DEVICE_MGNT_GATE, + ST_NCI_EVT_SE_HARD_RESET, ¶m, 1); + } + info->se_info.cb(info->se_info.cb_context, NULL, 0, -ETIME); +} + +static void st_nci_se_activation_timeout(unsigned long data) +{ + struct st_nci_info *info = (struct st_nci_info *) data; + + pr_debug("\n"); + + info->se_info.se_active = false; + + complete(&info->se_info.req_completion); +} + +int st_nci_se_init(struct nci_dev *ndev) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + + init_completion(&info->se_info.req_completion); + /* initialize timers */ + init_timer(&info->se_info.bwi_timer); + info->se_info.bwi_timer.data = (unsigned long)info; + info->se_info.bwi_timer.function = st_nci_se_wt_timeout; + info->se_info.bwi_active = false; + + init_timer(&info->se_info.se_active_timer); + info->se_info.se_active_timer.data = (unsigned long)info; + info->se_info.se_active_timer.function = + st_nci_se_activation_timeout; + info->se_info.se_active = false; + + info->se_info.xch_error = false; + + info->se_info.wt_timeout = + ST_NCI_BWI_TO_TIMEOUT(ST_NCI_ATR_DEFAULT_BWI); + + return 0; +} +EXPORT_SYMBOL(st_nci_se_init); + +void st_nci_se_deinit(struct nci_dev *ndev) +{ + struct st_nci_info *info = nci_get_drvdata(ndev); + + if (info->se_info.bwi_active) + del_timer_sync(&info->se_info.bwi_timer); + if (info->se_info.se_active) + del_timer_sync(&info->se_info.se_active_timer); + + info->se_info.se_active = false; + info->se_info.bwi_active = false; +} +EXPORT_SYMBOL(st_nci_se_deinit); + diff --git a/drivers/nfc/st-nci/st-nci_se.h b/drivers/nfc/st-nci/st-nci_se.h new file mode 100644 index 000000000000..ea66e879d67f --- /dev/null +++ b/drivers/nfc/st-nci/st-nci_se.h @@ -0,0 +1,61 @@ +/* + * Secure Element Driver for STMicroelectronics NFC NCI Chip + * + * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ +#ifndef __LOCAL_ST_NCI_SE_H_ +#define __LOCAL_ST_NCI_SE_H_ + +/* + * ref ISO7816-3 chap 8.1. the initial character TS is followed by a + * sequence of at most 32 characters. + */ +#define ST_NCI_ESE_MAX_LENGTH 33 +#define ST_NCI_HCI_HOST_ID_ESE 0xc0 + +struct st_nci_se_info { + u8 atr[ST_NCI_ESE_MAX_LENGTH]; + struct completion req_completion; + + struct timer_list bwi_timer; + int wt_timeout; /* in msecs */ + bool bwi_active; + + struct timer_list se_active_timer; + bool se_active; + + bool xch_error; + + se_io_cb_t cb; + void *cb_context; +}; + +int st_nci_se_init(struct nci_dev *ndev); +void st_nci_se_deinit(struct nci_dev *ndev); + +int st_nci_discover_se(struct nci_dev *ndev); +int st_nci_enable_se(struct nci_dev *ndev, u32 se_idx); +int st_nci_disable_se(struct nci_dev *ndev, u32 se_idx); +int st_nci_se_io(struct nci_dev *ndev, u32 se_idx, + u8 *apdu, size_t apdu_length, + se_io_cb_t cb, void *cb_context); +int st_nci_hci_load_session(struct nci_dev *ndev); +void st_nci_hci_event_received(struct nci_dev *ndev, u8 pipe, + u8 event, struct sk_buff *skb); +void st_nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, u8 cmd, + struct sk_buff *skb); + + +#endif /* __LOCAL_ST_NCI_SE_H_ */ diff --git a/drivers/nfc/st21nfcb/Kconfig b/drivers/nfc/st21nfcb/Kconfig deleted file mode 100644 index e0322dd03a70..000000000000 --- a/drivers/nfc/st21nfcb/Kconfig +++ /dev/null @@ -1,22 +0,0 @@ -config NFC_ST21NFCB - tristate "STMicroelectronics ST21NFCB NFC driver" - depends on NFC_NCI - default n - ---help--- - STMicroelectronics ST21NFCB core driver. It implements the chipset - NCI logic and hooks into the NFC kernel APIs. Physical layers will - register against it. - - To compile this driver as a module, choose m here. The module will - be called st21nfcb. - Say N if unsure. - -config NFC_ST21NFCB_I2C - tristate "NFC ST21NFCB i2c support" - depends on NFC_ST21NFCB && I2C - ---help--- - This module adds support for the STMicroelectronics st21nfcb i2c interface. - Select this if your platform is using the i2c bus. - - If you choose to build a module, it'll be called st21nfcb_i2c. - Say N if unsure. diff --git a/drivers/nfc/st21nfcb/Makefile b/drivers/nfc/st21nfcb/Makefile deleted file mode 100644 index ce659a9e5a1a..000000000000 --- a/drivers/nfc/st21nfcb/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -# -# Makefile for ST21NFCB NCI based NFC driver -# - -st21nfcb_nci-objs = ndlc.o st21nfcb.o st21nfcb_se.o -obj-$(CONFIG_NFC_ST21NFCB) += st21nfcb_nci.o - -st21nfcb_i2c-objs = i2c.o -obj-$(CONFIG_NFC_ST21NFCB_I2C) += st21nfcb_i2c.o diff --git a/drivers/nfc/st21nfcb/i2c.c b/drivers/nfc/st21nfcb/i2c.c deleted file mode 100644 index dbc0dfd8ae85..000000000000 --- a/drivers/nfc/st21nfcb/i2c.c +++ /dev/null @@ -1,384 +0,0 @@ -/* - * I2C Link Layer for ST21NFCB NCI based Driver - * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ndlc.h" - -#define DRIVER_DESC "NCI NFC driver for ST21NFCB" - -/* ndlc header */ -#define ST21NFCB_FRAME_HEADROOM 1 -#define ST21NFCB_FRAME_TAILROOM 0 - -#define ST21NFCB_NCI_I2C_MIN_SIZE 4 /* PCB(1) + NCI Packet header(3) */ -#define ST21NFCB_NCI_I2C_MAX_SIZE 250 /* req 4.2.1 */ - -#define ST21NFCB_NCI_I2C_DRIVER_NAME "st21nfcb_nci_i2c" - -static struct i2c_device_id st21nfcb_nci_i2c_id_table[] = { - {ST21NFCB_NCI_DRIVER_NAME, 0}, - {} -}; -MODULE_DEVICE_TABLE(i2c, st21nfcb_nci_i2c_id_table); - -struct st21nfcb_i2c_phy { - struct i2c_client *i2c_dev; - struct llt_ndlc *ndlc; - - unsigned int gpio_reset; - unsigned int irq_polarity; -}; - -#define I2C_DUMP_SKB(info, skb) \ -do { \ - pr_debug("%s:\n", info); \ - print_hex_dump(KERN_DEBUG, "i2c: ", DUMP_PREFIX_OFFSET, \ - 16, 1, (skb)->data, (skb)->len, 0); \ -} while (0) - -static int st21nfcb_nci_i2c_enable(void *phy_id) -{ - struct st21nfcb_i2c_phy *phy = phy_id; - - gpio_set_value(phy->gpio_reset, 0); - usleep_range(10000, 15000); - gpio_set_value(phy->gpio_reset, 1); - usleep_range(80000, 85000); - - if (phy->ndlc->powered == 0) - enable_irq(phy->i2c_dev->irq); - - return 0; -} - -static void st21nfcb_nci_i2c_disable(void *phy_id) -{ - struct st21nfcb_i2c_phy *phy = phy_id; - - disable_irq_nosync(phy->i2c_dev->irq); -} - -/* - * Writing a frame must not return the number of written bytes. - * It must return either zero for success, or <0 for error. - * In addition, it must not alter the skb - */ -static int st21nfcb_nci_i2c_write(void *phy_id, struct sk_buff *skb) -{ - int r = -1; - struct st21nfcb_i2c_phy *phy = phy_id; - struct i2c_client *client = phy->i2c_dev; - - I2C_DUMP_SKB("st21nfcb_nci_i2c_write", skb); - - if (phy->ndlc->hard_fault != 0) - return phy->ndlc->hard_fault; - - r = i2c_master_send(client, skb->data, skb->len); - if (r < 0) { /* Retry, chip was in standby */ - usleep_range(1000, 4000); - r = i2c_master_send(client, skb->data, skb->len); - } - - if (r >= 0) { - if (r != skb->len) - r = -EREMOTEIO; - else - r = 0; - } - - return r; -} - -/* - * Reads an ndlc frame and returns it in a newly allocated sk_buff. - * returns: - * frame size : if received frame is complete (find ST21NFCB_SOF_EOF at - * end of read) - * -EAGAIN : if received frame is incomplete (not find ST21NFCB_SOF_EOF - * at end of read) - * -EREMOTEIO : i2c read error (fatal) - * -EBADMSG : frame was incorrect and discarded - * (value returned from st21nfcb_nci_i2c_repack) - * -EIO : if no ST21NFCB_SOF_EOF is found after reaching - * the read length end sequence - */ -static int st21nfcb_nci_i2c_read(struct st21nfcb_i2c_phy *phy, - struct sk_buff **skb) -{ - int r; - u8 len; - u8 buf[ST21NFCB_NCI_I2C_MAX_SIZE]; - struct i2c_client *client = phy->i2c_dev; - - r = i2c_master_recv(client, buf, ST21NFCB_NCI_I2C_MIN_SIZE); - if (r < 0) { /* Retry, chip was in standby */ - usleep_range(1000, 4000); - r = i2c_master_recv(client, buf, ST21NFCB_NCI_I2C_MIN_SIZE); - } - - if (r != ST21NFCB_NCI_I2C_MIN_SIZE) - return -EREMOTEIO; - - len = be16_to_cpu(*(__be16 *) (buf + 2)); - if (len > ST21NFCB_NCI_I2C_MAX_SIZE) { - nfc_err(&client->dev, "invalid frame len\n"); - return -EBADMSG; - } - - *skb = alloc_skb(ST21NFCB_NCI_I2C_MIN_SIZE + len, GFP_KERNEL); - if (*skb == NULL) - return -ENOMEM; - - skb_reserve(*skb, ST21NFCB_NCI_I2C_MIN_SIZE); - skb_put(*skb, ST21NFCB_NCI_I2C_MIN_SIZE); - memcpy((*skb)->data, buf, ST21NFCB_NCI_I2C_MIN_SIZE); - - if (!len) - return 0; - - r = i2c_master_recv(client, buf, len); - if (r != len) { - kfree_skb(*skb); - return -EREMOTEIO; - } - - skb_put(*skb, len); - memcpy((*skb)->data + ST21NFCB_NCI_I2C_MIN_SIZE, buf, len); - - I2C_DUMP_SKB("i2c frame read", *skb); - - return 0; -} - -/* - * Reads an ndlc frame from the chip. - * - * On ST21NFCB, IRQ goes in idle state when read starts. - */ -static irqreturn_t st21nfcb_nci_irq_thread_fn(int irq, void *phy_id) -{ - struct st21nfcb_i2c_phy *phy = phy_id; - struct i2c_client *client; - struct sk_buff *skb = NULL; - int r; - - if (!phy || !phy->ndlc || irq != phy->i2c_dev->irq) { - WARN_ON_ONCE(1); - return IRQ_NONE; - } - - client = phy->i2c_dev; - dev_dbg(&client->dev, "IRQ\n"); - - if (phy->ndlc->hard_fault) - return IRQ_HANDLED; - - if (!phy->ndlc->powered) { - st21nfcb_nci_i2c_disable(phy); - return IRQ_HANDLED; - } - - r = st21nfcb_nci_i2c_read(phy, &skb); - if (r == -EREMOTEIO || r == -ENOMEM || r == -EBADMSG) - return IRQ_HANDLED; - - ndlc_recv(phy->ndlc, skb); - - return IRQ_HANDLED; -} - -static struct nfc_phy_ops i2c_phy_ops = { - .write = st21nfcb_nci_i2c_write, - .enable = st21nfcb_nci_i2c_enable, - .disable = st21nfcb_nci_i2c_disable, -}; - -#ifdef CONFIG_OF -static int st21nfcb_nci_i2c_of_request_resources(struct i2c_client *client) -{ - struct st21nfcb_i2c_phy *phy = i2c_get_clientdata(client); - struct device_node *pp; - int gpio; - int r; - - pp = client->dev.of_node; - if (!pp) - return -ENODEV; - - /* Get GPIO from device tree */ - gpio = of_get_named_gpio(pp, "reset-gpios", 0); - if (gpio < 0) { - nfc_err(&client->dev, - "Failed to retrieve reset-gpios from device tree\n"); - return gpio; - } - - /* GPIO request and configuration */ - r = devm_gpio_request_one(&client->dev, gpio, - GPIOF_OUT_INIT_HIGH, "clf_reset"); - if (r) { - nfc_err(&client->dev, "Failed to request reset pin\n"); - return r; - } - phy->gpio_reset = gpio; - - phy->irq_polarity = irq_get_trigger_type(client->irq); - - return 0; -} -#else -static int st21nfcb_nci_i2c_of_request_resources(struct i2c_client *client) -{ - return -ENODEV; -} -#endif - -static int st21nfcb_nci_i2c_request_resources(struct i2c_client *client) -{ - struct st21nfcb_nfc_platform_data *pdata; - struct st21nfcb_i2c_phy *phy = i2c_get_clientdata(client); - int r; - - pdata = client->dev.platform_data; - if (pdata == NULL) { - nfc_err(&client->dev, "No platform data\n"); - return -EINVAL; - } - - /* store for later use */ - phy->gpio_reset = pdata->gpio_reset; - phy->irq_polarity = pdata->irq_polarity; - - r = devm_gpio_request_one(&client->dev, - phy->gpio_reset, GPIOF_OUT_INIT_HIGH, "clf_reset"); - if (r) { - pr_err("%s : reset gpio_request failed\n", __FILE__); - return r; - } - - return 0; -} - -static int st21nfcb_nci_i2c_probe(struct i2c_client *client, - const struct i2c_device_id *id) -{ - struct st21nfcb_i2c_phy *phy; - struct st21nfcb_nfc_platform_data *pdata; - int r; - - dev_dbg(&client->dev, "%s\n", __func__); - dev_dbg(&client->dev, "IRQ: %d\n", client->irq); - - if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { - nfc_err(&client->dev, "Need I2C_FUNC_I2C\n"); - return -ENODEV; - } - - phy = devm_kzalloc(&client->dev, sizeof(struct st21nfcb_i2c_phy), - GFP_KERNEL); - if (!phy) - return -ENOMEM; - - phy->i2c_dev = client; - - i2c_set_clientdata(client, phy); - - pdata = client->dev.platform_data; - if (!pdata && client->dev.of_node) { - r = st21nfcb_nci_i2c_of_request_resources(client); - if (r) { - nfc_err(&client->dev, "No platform data\n"); - return r; - } - } else if (pdata) { - r = st21nfcb_nci_i2c_request_resources(client); - if (r) { - nfc_err(&client->dev, - "Cannot get platform resources\n"); - return r; - } - } else { - nfc_err(&client->dev, - "st21nfcb platform resources not available\n"); - return -ENODEV; - } - - r = ndlc_probe(phy, &i2c_phy_ops, &client->dev, - ST21NFCB_FRAME_HEADROOM, ST21NFCB_FRAME_TAILROOM, - &phy->ndlc); - if (r < 0) { - nfc_err(&client->dev, "Unable to register ndlc layer\n"); - return r; - } - - r = devm_request_threaded_irq(&client->dev, client->irq, NULL, - st21nfcb_nci_irq_thread_fn, - phy->irq_polarity | IRQF_ONESHOT, - ST21NFCB_NCI_DRIVER_NAME, phy); - if (r < 0) - nfc_err(&client->dev, "Unable to register IRQ handler\n"); - - return r; -} - -static int st21nfcb_nci_i2c_remove(struct i2c_client *client) -{ - struct st21nfcb_i2c_phy *phy = i2c_get_clientdata(client); - - dev_dbg(&client->dev, "%s\n", __func__); - - ndlc_remove(phy->ndlc); - - return 0; -} - -#ifdef CONFIG_OF -static const struct of_device_id of_st21nfcb_i2c_match[] = { - { .compatible = "st,st21nfcb-i2c", }, - { .compatible = "st,st21nfcb_i2c", }, - {} -}; -MODULE_DEVICE_TABLE(of, of_st21nfcb_i2c_match); -#endif - -static struct i2c_driver st21nfcb_nci_i2c_driver = { - .driver = { - .owner = THIS_MODULE, - .name = ST21NFCB_NCI_I2C_DRIVER_NAME, - .of_match_table = of_match_ptr(of_st21nfcb_i2c_match), - }, - .probe = st21nfcb_nci_i2c_probe, - .id_table = st21nfcb_nci_i2c_id_table, - .remove = st21nfcb_nci_i2c_remove, -}; - -module_i2c_driver(st21nfcb_nci_i2c_driver); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/nfc/st21nfcb/ndlc.c b/drivers/nfc/st21nfcb/ndlc.c deleted file mode 100644 index 91e81f37b3a6..000000000000 --- a/drivers/nfc/st21nfcb/ndlc.c +++ /dev/null @@ -1,313 +0,0 @@ -/* - * Low Level Transport (NDLC) Driver for STMicroelectronics NFC Chip - * - * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#include -#include - -#include "ndlc.h" -#include "st21nfcb.h" - -#define NDLC_TIMER_T1 100 -#define NDLC_TIMER_T1_WAIT 400 -#define NDLC_TIMER_T2 1200 - -#define PCB_TYPE_DATAFRAME 0x80 -#define PCB_TYPE_SUPERVISOR 0xc0 -#define PCB_TYPE_MASK PCB_TYPE_SUPERVISOR - -#define PCB_SYNC_ACK 0x20 -#define PCB_SYNC_NACK 0x10 -#define PCB_SYNC_WAIT 0x30 -#define PCB_SYNC_NOINFO 0x00 -#define PCB_SYNC_MASK PCB_SYNC_WAIT - -#define PCB_DATAFRAME_RETRANSMIT_YES 0x00 -#define PCB_DATAFRAME_RETRANSMIT_NO 0x04 -#define PCB_DATAFRAME_RETRANSMIT_MASK PCB_DATAFRAME_RETRANSMIT_NO - -#define PCB_SUPERVISOR_RETRANSMIT_YES 0x00 -#define PCB_SUPERVISOR_RETRANSMIT_NO 0x02 -#define PCB_SUPERVISOR_RETRANSMIT_MASK PCB_SUPERVISOR_RETRANSMIT_NO - -#define PCB_FRAME_CRC_INFO_PRESENT 0x08 -#define PCB_FRAME_CRC_INFO_NOTPRESENT 0x00 -#define PCB_FRAME_CRC_INFO_MASK PCB_FRAME_CRC_INFO_PRESENT - -#define NDLC_DUMP_SKB(info, skb) \ -do { \ - pr_debug("%s:\n", info); \ - print_hex_dump(KERN_DEBUG, "ndlc: ", DUMP_PREFIX_OFFSET, \ - 16, 1, skb->data, skb->len, 0); \ -} while (0) - -int ndlc_open(struct llt_ndlc *ndlc) -{ - /* toggle reset pin */ - ndlc->ops->enable(ndlc->phy_id); - ndlc->powered = 1; - return 0; -} -EXPORT_SYMBOL(ndlc_open); - -void ndlc_close(struct llt_ndlc *ndlc) -{ - struct nci_mode_set_cmd cmd; - - cmd.cmd_type = ST21NFCB_NCI_SET_NFC_MODE; - cmd.mode = 0; - - /* toggle reset pin */ - ndlc->ops->enable(ndlc->phy_id); - - nci_prop_cmd(ndlc->ndev, ST21NFCB_NCI_CORE_PROP, - sizeof(struct nci_mode_set_cmd), (__u8 *)&cmd); - - ndlc->powered = 0; - ndlc->ops->disable(ndlc->phy_id); -} -EXPORT_SYMBOL(ndlc_close); - -int ndlc_send(struct llt_ndlc *ndlc, struct sk_buff *skb) -{ - /* add ndlc header */ - u8 pcb = PCB_TYPE_DATAFRAME | PCB_DATAFRAME_RETRANSMIT_NO | - PCB_FRAME_CRC_INFO_NOTPRESENT; - - *skb_push(skb, 1) = pcb; - skb_queue_tail(&ndlc->send_q, skb); - - schedule_work(&ndlc->sm_work); - - return 0; -} -EXPORT_SYMBOL(ndlc_send); - -static void llt_ndlc_send_queue(struct llt_ndlc *ndlc) -{ - struct sk_buff *skb; - int r; - unsigned long time_sent; - - if (ndlc->send_q.qlen) - pr_debug("sendQlen=%d unackQlen=%d\n", - ndlc->send_q.qlen, ndlc->ack_pending_q.qlen); - - while (ndlc->send_q.qlen) { - skb = skb_dequeue(&ndlc->send_q); - NDLC_DUMP_SKB("ndlc frame written", skb); - r = ndlc->ops->write(ndlc->phy_id, skb); - if (r < 0) { - ndlc->hard_fault = r; - break; - } - time_sent = jiffies; - *(unsigned long *)skb->cb = time_sent; - - skb_queue_tail(&ndlc->ack_pending_q, skb); - - /* start timer t1 for ndlc aknowledge */ - ndlc->t1_active = true; - mod_timer(&ndlc->t1_timer, time_sent + - msecs_to_jiffies(NDLC_TIMER_T1)); - /* start timer t2 for chip availability */ - ndlc->t2_active = true; - mod_timer(&ndlc->t2_timer, time_sent + - msecs_to_jiffies(NDLC_TIMER_T2)); - } -} - -static void llt_ndlc_requeue_data_pending(struct llt_ndlc *ndlc) -{ - struct sk_buff *skb; - u8 pcb; - - while ((skb = skb_dequeue_tail(&ndlc->ack_pending_q))) { - pcb = skb->data[0]; - switch (pcb & PCB_TYPE_MASK) { - case PCB_TYPE_SUPERVISOR: - skb->data[0] = (pcb & ~PCB_SUPERVISOR_RETRANSMIT_MASK) | - PCB_SUPERVISOR_RETRANSMIT_YES; - break; - case PCB_TYPE_DATAFRAME: - skb->data[0] = (pcb & ~PCB_DATAFRAME_RETRANSMIT_MASK) | - PCB_DATAFRAME_RETRANSMIT_YES; - break; - default: - pr_err("UNKNOWN Packet Control Byte=%d\n", pcb); - kfree_skb(skb); - continue; - } - skb_queue_head(&ndlc->send_q, skb); - } -} - -static void llt_ndlc_rcv_queue(struct llt_ndlc *ndlc) -{ - struct sk_buff *skb; - u8 pcb; - unsigned long time_sent; - - if (ndlc->rcv_q.qlen) - pr_debug("rcvQlen=%d\n", ndlc->rcv_q.qlen); - - while ((skb = skb_dequeue(&ndlc->rcv_q)) != NULL) { - pcb = skb->data[0]; - skb_pull(skb, 1); - if ((pcb & PCB_TYPE_MASK) == PCB_TYPE_SUPERVISOR) { - switch (pcb & PCB_SYNC_MASK) { - case PCB_SYNC_ACK: - del_timer_sync(&ndlc->t1_timer); - del_timer_sync(&ndlc->t2_timer); - ndlc->t2_active = false; - ndlc->t1_active = false; - break; - case PCB_SYNC_NACK: - llt_ndlc_requeue_data_pending(ndlc); - llt_ndlc_send_queue(ndlc); - /* start timer t1 for ndlc aknowledge */ - time_sent = jiffies; - ndlc->t1_active = true; - mod_timer(&ndlc->t1_timer, time_sent + - msecs_to_jiffies(NDLC_TIMER_T1)); - break; - case PCB_SYNC_WAIT: - time_sent = jiffies; - ndlc->t1_active = true; - mod_timer(&ndlc->t1_timer, time_sent + - msecs_to_jiffies(NDLC_TIMER_T1_WAIT)); - break; - default: - pr_err("UNKNOWN Packet Control Byte=%d\n", pcb); - kfree_skb(skb); - break; - } - } else { - nci_recv_frame(ndlc->ndev, skb); - } - } -} - -static void llt_ndlc_sm_work(struct work_struct *work) -{ - struct llt_ndlc *ndlc = container_of(work, struct llt_ndlc, sm_work); - - llt_ndlc_send_queue(ndlc); - llt_ndlc_rcv_queue(ndlc); - - if (ndlc->t1_active && timer_pending(&ndlc->t1_timer) == 0) { - pr_debug - ("Handle T1(recv SUPERVISOR) elapsed (T1 now inactive)\n"); - ndlc->t1_active = false; - - llt_ndlc_requeue_data_pending(ndlc); - llt_ndlc_send_queue(ndlc); - } - - if (ndlc->t2_active && timer_pending(&ndlc->t2_timer) == 0) { - pr_debug("Handle T2(recv DATA) elapsed (T2 now inactive)\n"); - ndlc->t2_active = false; - ndlc->t1_active = false; - del_timer_sync(&ndlc->t1_timer); - del_timer_sync(&ndlc->t2_timer); - ndlc_close(ndlc); - ndlc->hard_fault = -EREMOTEIO; - } -} - -void ndlc_recv(struct llt_ndlc *ndlc, struct sk_buff *skb) -{ - if (skb == NULL) { - pr_err("NULL Frame -> link is dead\n"); - ndlc->hard_fault = -EREMOTEIO; - ndlc_close(ndlc); - } else { - NDLC_DUMP_SKB("incoming frame", skb); - skb_queue_tail(&ndlc->rcv_q, skb); - } - - schedule_work(&ndlc->sm_work); -} -EXPORT_SYMBOL(ndlc_recv); - -static void ndlc_t1_timeout(unsigned long data) -{ - struct llt_ndlc *ndlc = (struct llt_ndlc *)data; - - pr_debug("\n"); - - schedule_work(&ndlc->sm_work); -} - -static void ndlc_t2_timeout(unsigned long data) -{ - struct llt_ndlc *ndlc = (struct llt_ndlc *)data; - - pr_debug("\n"); - - schedule_work(&ndlc->sm_work); -} - -int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev, - int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id) -{ - struct llt_ndlc *ndlc; - - ndlc = devm_kzalloc(dev, sizeof(struct llt_ndlc), GFP_KERNEL); - if (!ndlc) - return -ENOMEM; - - ndlc->ops = phy_ops; - ndlc->phy_id = phy_id; - ndlc->dev = dev; - ndlc->powered = 0; - - *ndlc_id = ndlc; - - /* initialize timers */ - init_timer(&ndlc->t1_timer); - ndlc->t1_timer.data = (unsigned long)ndlc; - ndlc->t1_timer.function = ndlc_t1_timeout; - - init_timer(&ndlc->t2_timer); - ndlc->t2_timer.data = (unsigned long)ndlc; - ndlc->t2_timer.function = ndlc_t2_timeout; - - skb_queue_head_init(&ndlc->rcv_q); - skb_queue_head_init(&ndlc->send_q); - skb_queue_head_init(&ndlc->ack_pending_q); - - INIT_WORK(&ndlc->sm_work, llt_ndlc_sm_work); - - return st21nfcb_nci_probe(ndlc, phy_headroom, phy_tailroom); -} -EXPORT_SYMBOL(ndlc_probe); - -void ndlc_remove(struct llt_ndlc *ndlc) -{ - st21nfcb_nci_remove(ndlc->ndev); - - /* cancel timers */ - del_timer_sync(&ndlc->t1_timer); - del_timer_sync(&ndlc->t2_timer); - ndlc->t2_active = false; - ndlc->t1_active = false; - - skb_queue_purge(&ndlc->rcv_q); - skb_queue_purge(&ndlc->send_q); -} -EXPORT_SYMBOL(ndlc_remove); diff --git a/drivers/nfc/st21nfcb/ndlc.h b/drivers/nfc/st21nfcb/ndlc.h deleted file mode 100644 index cf6a9d9f2983..000000000000 --- a/drivers/nfc/st21nfcb/ndlc.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * NCI based Driver for STMicroelectronics NFC Chip - * - * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#ifndef __LOCAL_NDLC_H_ -#define __LOCAL_NDLC_H_ - -#include -#include - -/* Low Level Transport description */ -struct llt_ndlc { - struct nci_dev *ndev; - struct nfc_phy_ops *ops; - void *phy_id; - - struct timer_list t1_timer; - bool t1_active; - - struct timer_list t2_timer; - bool t2_active; - - struct sk_buff_head rcv_q; - struct sk_buff_head send_q; - struct sk_buff_head ack_pending_q; - - struct work_struct sm_work; - - struct device *dev; - - /* - * < 0 if hardware error occured - * and prevents normal operation. - */ - int hard_fault; - int powered; -}; - -int ndlc_open(struct llt_ndlc *ndlc); -void ndlc_close(struct llt_ndlc *ndlc); -int ndlc_send(struct llt_ndlc *ndlc, struct sk_buff *skb); -void ndlc_recv(struct llt_ndlc *ndlc, struct sk_buff *skb); -int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev, - int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id); -void ndlc_remove(struct llt_ndlc *ndlc); -#endif /* __LOCAL_NDLC_H__ */ diff --git a/drivers/nfc/st21nfcb/st21nfcb.c b/drivers/nfc/st21nfcb/st21nfcb.c deleted file mode 100644 index a16c3a3d3fff..000000000000 --- a/drivers/nfc/st21nfcb/st21nfcb.c +++ /dev/null @@ -1,179 +0,0 @@ -/* - * NCI based Driver for STMicroelectronics NFC Chip - * - * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#include -#include -#include -#include -#include -#include - -#include "st21nfcb.h" -#include "st21nfcb_se.h" - -#define DRIVER_DESC "NCI NFC driver for ST21NFCB" - -#define ST21NFCB_NCI1_X_PROPRIETARY_ISO15693 0x83 - -static int st21nfcb_nci_init(struct nci_dev *ndev) -{ - struct nci_mode_set_cmd cmd; - - cmd.cmd_type = ST21NFCB_NCI_SET_NFC_MODE; - cmd.mode = 1; - - return nci_prop_cmd(ndev, ST21NFCB_NCI_CORE_PROP, - sizeof(struct nci_mode_set_cmd), (__u8 *)&cmd); -} - -static int st21nfcb_nci_open(struct nci_dev *ndev) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - int r; - - if (test_and_set_bit(ST21NFCB_NCI_RUNNING, &info->flags)) - return 0; - - r = ndlc_open(info->ndlc); - if (r) - clear_bit(ST21NFCB_NCI_RUNNING, &info->flags); - - return r; -} - -static int st21nfcb_nci_close(struct nci_dev *ndev) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - - if (!test_bit(ST21NFCB_NCI_RUNNING, &info->flags)) - return 0; - - ndlc_close(info->ndlc); - - clear_bit(ST21NFCB_NCI_RUNNING, &info->flags); - - return 0; -} - -static int st21nfcb_nci_send(struct nci_dev *ndev, struct sk_buff *skb) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - - skb->dev = (void *)ndev; - - if (!test_bit(ST21NFCB_NCI_RUNNING, &info->flags)) - return -EBUSY; - - return ndlc_send(info->ndlc, skb); -} - -static __u32 st21nfcb_nci_get_rfprotocol(struct nci_dev *ndev, - __u8 rf_protocol) -{ - return rf_protocol == ST21NFCB_NCI1_X_PROPRIETARY_ISO15693 ? - NFC_PROTO_ISO15693_MASK : 0; -} - -static int st21nfcb_nci_prop_rsp_packet(struct nci_dev *ndev, - struct sk_buff *skb) -{ - __u8 status = skb->data[0]; - - nci_req_complete(ndev, status); - return 0; -} - -static struct nci_prop_ops st21nfcb_nci_prop_ops[] = { - { - .opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, - ST21NFCB_NCI_CORE_PROP), - .rsp = st21nfcb_nci_prop_rsp_packet, - }, -}; - -static struct nci_ops st21nfcb_nci_ops = { - .init = st21nfcb_nci_init, - .open = st21nfcb_nci_open, - .close = st21nfcb_nci_close, - .send = st21nfcb_nci_send, - .get_rfprotocol = st21nfcb_nci_get_rfprotocol, - .discover_se = st21nfcb_nci_discover_se, - .enable_se = st21nfcb_nci_enable_se, - .disable_se = st21nfcb_nci_disable_se, - .se_io = st21nfcb_nci_se_io, - .hci_load_session = st21nfcb_hci_load_session, - .hci_event_received = st21nfcb_hci_event_received, - .hci_cmd_received = st21nfcb_hci_cmd_received, - .prop_ops = st21nfcb_nci_prop_ops, - .n_prop_ops = ARRAY_SIZE(st21nfcb_nci_prop_ops), -}; - -int st21nfcb_nci_probe(struct llt_ndlc *ndlc, int phy_headroom, - int phy_tailroom) -{ - struct st21nfcb_nci_info *info; - int r; - u32 protocols; - - info = devm_kzalloc(ndlc->dev, - sizeof(struct st21nfcb_nci_info), GFP_KERNEL); - if (!info) - return -ENOMEM; - - protocols = NFC_PROTO_JEWEL_MASK - | NFC_PROTO_MIFARE_MASK - | NFC_PROTO_FELICA_MASK - | NFC_PROTO_ISO14443_MASK - | NFC_PROTO_ISO14443_B_MASK - | NFC_PROTO_ISO15693_MASK - | NFC_PROTO_NFC_DEP_MASK; - - ndlc->ndev = nci_allocate_device(&st21nfcb_nci_ops, protocols, - phy_headroom, phy_tailroom); - if (!ndlc->ndev) { - pr_err("Cannot allocate nfc ndev\n"); - return -ENOMEM; - } - info->ndlc = ndlc; - - nci_set_drvdata(ndlc->ndev, info); - - r = nci_register_device(ndlc->ndev); - if (r) { - pr_err("Cannot register nfc device to nci core\n"); - nci_free_device(ndlc->ndev); - return r; - } - - return st21nfcb_se_init(ndlc->ndev); -} -EXPORT_SYMBOL_GPL(st21nfcb_nci_probe); - -void st21nfcb_nci_remove(struct nci_dev *ndev) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - - ndlc_close(info->ndlc); - - nci_unregister_device(ndev); - nci_free_device(ndev); -} -EXPORT_SYMBOL_GPL(st21nfcb_nci_remove); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/nfc/st21nfcb/st21nfcb.h b/drivers/nfc/st21nfcb/st21nfcb.h deleted file mode 100644 index 710636325c1f..000000000000 --- a/drivers/nfc/st21nfcb/st21nfcb.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * NCI based Driver for STMicroelectronics NFC Chip - * - * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#ifndef __LOCAL_ST21NFCB_H_ -#define __LOCAL_ST21NFCB_H_ - -#include "st21nfcb_se.h" -#include "ndlc.h" - -/* Define private flags: */ -#define ST21NFCB_NCI_RUNNING 1 - -#define ST21NFCB_NCI_CORE_PROP 0x01 -#define ST21NFCB_NCI_SET_NFC_MODE 0x02 - -struct nci_mode_set_cmd { - u8 cmd_type; - u8 mode; -} __packed; - -struct nci_mode_set_rsp { - u8 status; -} __packed; - -struct st21nfcb_nci_info { - struct llt_ndlc *ndlc; - unsigned long flags; - struct st21nfcb_se_info se_info; -}; - -void st21nfcb_nci_remove(struct nci_dev *ndev); -int st21nfcb_nci_probe(struct llt_ndlc *ndlc, int phy_headroom, - int phy_tailroom); - -#endif /* __LOCAL_ST21NFCB_H_ */ diff --git a/drivers/nfc/st21nfcb/st21nfcb_se.c b/drivers/nfc/st21nfcb/st21nfcb_se.c deleted file mode 100644 index 24862a525fb5..000000000000 --- a/drivers/nfc/st21nfcb/st21nfcb_se.c +++ /dev/null @@ -1,713 +0,0 @@ -/* - * NCI based Driver for STMicroelectronics NFC Chip - * - * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#include -#include -#include -#include -#include - -#include "st21nfcb.h" -#include "st21nfcb_se.h" - -struct st21nfcb_pipe_info { - u8 pipe_state; - u8 src_host_id; - u8 src_gate_id; - u8 dst_host_id; - u8 dst_gate_id; -} __packed; - -/* Hosts */ -#define ST21NFCB_HOST_CONTROLLER_ID 0x00 -#define ST21NFCB_TERMINAL_HOST_ID 0x01 -#define ST21NFCB_UICC_HOST_ID 0x02 -#define ST21NFCB_ESE_HOST_ID 0xc0 - -/* Gates */ -#define ST21NFCB_DEVICE_MGNT_GATE 0x01 -#define ST21NFCB_APDU_READER_GATE 0xf0 -#define ST21NFCB_CONNECTIVITY_GATE 0x41 - -/* Pipes */ -#define ST21NFCB_DEVICE_MGNT_PIPE 0x02 - -/* Connectivity pipe only */ -#define ST21NFCB_SE_COUNT_PIPE_UICC 0x01 -/* Connectivity + APDU Reader pipe */ -#define ST21NFCB_SE_COUNT_PIPE_EMBEDDED 0x02 - -#define ST21NFCB_SE_TO_HOT_PLUG 1000 /* msecs */ -#define ST21NFCB_SE_TO_PIPES 2000 - -#define ST21NFCB_EVT_HOT_PLUG_IS_INHIBITED(x) (x->data[0] & 0x80) - -#define NCI_HCI_APDU_PARAM_ATR 0x01 -#define NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY 0x01 -#define NCI_HCI_ADMIN_PARAM_WHITELIST 0x03 -#define NCI_HCI_ADMIN_PARAM_HOST_LIST 0x04 - -#define ST21NFCB_EVT_SE_HARD_RESET 0x20 -#define ST21NFCB_EVT_TRANSMIT_DATA 0x10 -#define ST21NFCB_EVT_WTX_REQUEST 0x11 -#define ST21NFCB_EVT_SE_SOFT_RESET 0x11 -#define ST21NFCB_EVT_SE_END_OF_APDU_TRANSFER 0x21 -#define ST21NFCB_EVT_HOT_PLUG 0x03 - -#define ST21NFCB_SE_MODE_OFF 0x00 -#define ST21NFCB_SE_MODE_ON 0x01 - -#define ST21NFCB_EVT_CONNECTIVITY 0x10 -#define ST21NFCB_EVT_TRANSACTION 0x12 - -#define ST21NFCB_DM_GETINFO 0x13 -#define ST21NFCB_DM_GETINFO_PIPE_LIST 0x02 -#define ST21NFCB_DM_GETINFO_PIPE_INFO 0x01 -#define ST21NFCB_DM_PIPE_CREATED 0x02 -#define ST21NFCB_DM_PIPE_OPEN 0x04 -#define ST21NFCB_DM_RF_ACTIVE 0x80 -#define ST21NFCB_DM_DISCONNECT 0x30 - -#define ST21NFCB_DM_IS_PIPE_OPEN(p) \ - ((p & 0x0f) == (ST21NFCB_DM_PIPE_CREATED | ST21NFCB_DM_PIPE_OPEN)) - -#define ST21NFCB_ATR_DEFAULT_BWI 0x04 - -/* - * WT = 2^BWI/10[s], convert into msecs and add a secure - * room by increasing by 2 this timeout - */ -#define ST21NFCB_BWI_TO_TIMEOUT(x) ((1 << x) * 200) -#define ST21NFCB_ATR_GET_Y_FROM_TD(x) (x >> 4) - -/* If TA is present bit 0 is set */ -#define ST21NFCB_ATR_TA_PRESENT(x) (x & 0x01) -/* If TB is present bit 1 is set */ -#define ST21NFCB_ATR_TB_PRESENT(x) (x & 0x02) - -#define ST21NFCB_NUM_DEVICES 256 - -static DECLARE_BITMAP(dev_mask, ST21NFCB_NUM_DEVICES); - -/* Here are the mandatory pipe for st21nfcb */ -static struct nci_hci_gate st21nfcb_gates[] = { - {NCI_HCI_ADMIN_GATE, NCI_HCI_ADMIN_PIPE, - ST21NFCB_HOST_CONTROLLER_ID}, - {NCI_HCI_LINK_MGMT_GATE, NCI_HCI_LINK_MGMT_PIPE, - ST21NFCB_HOST_CONTROLLER_ID}, - {ST21NFCB_DEVICE_MGNT_GATE, ST21NFCB_DEVICE_MGNT_PIPE, - ST21NFCB_HOST_CONTROLLER_ID}, - - /* Secure element pipes are created by secure element host */ - {ST21NFCB_CONNECTIVITY_GATE, NCI_HCI_DO_NOT_OPEN_PIPE, - ST21NFCB_HOST_CONTROLLER_ID}, - {ST21NFCB_APDU_READER_GATE, NCI_HCI_DO_NOT_OPEN_PIPE, - ST21NFCB_HOST_CONTROLLER_ID}, -}; - -static u8 st21nfcb_se_get_bwi(struct nci_dev *ndev) -{ - int i; - u8 td; - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - - /* Bits 8 to 5 of the first TB for T=1 encode BWI from zero to nine */ - for (i = 1; i < ST21NFCB_ESE_MAX_LENGTH; i++) { - td = ST21NFCB_ATR_GET_Y_FROM_TD(info->se_info.atr[i]); - if (ST21NFCB_ATR_TA_PRESENT(td)) - i++; - if (ST21NFCB_ATR_TB_PRESENT(td)) { - i++; - return info->se_info.atr[i] >> 4; - } - } - return ST21NFCB_ATR_DEFAULT_BWI; -} - -static void st21nfcb_se_get_atr(struct nci_dev *ndev) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - int r; - struct sk_buff *skb; - - r = nci_hci_get_param(ndev, ST21NFCB_APDU_READER_GATE, - NCI_HCI_APDU_PARAM_ATR, &skb); - if (r < 0) - return; - - if (skb->len <= ST21NFCB_ESE_MAX_LENGTH) { - memcpy(info->se_info.atr, skb->data, skb->len); - - info->se_info.wt_timeout = - ST21NFCB_BWI_TO_TIMEOUT(st21nfcb_se_get_bwi(ndev)); - } - kfree_skb(skb); -} - -int st21nfcb_hci_load_session(struct nci_dev *ndev) -{ - int i, j, r; - struct sk_buff *skb_pipe_list, *skb_pipe_info; - struct st21nfcb_pipe_info *dm_pipe_info; - u8 pipe_list[] = { ST21NFCB_DM_GETINFO_PIPE_LIST, - ST21NFCB_TERMINAL_HOST_ID}; - u8 pipe_info[] = { ST21NFCB_DM_GETINFO_PIPE_INFO, - ST21NFCB_TERMINAL_HOST_ID, 0}; - - /* On ST21NFCB device pipes number are dynamics - * If pipes are already created, hci_dev_up will fail. - * Doing a clear all pipe is a bad idea because: - * - It does useless EEPROM cycling - * - It might cause issue for secure elements support - * (such as removing connectivity or APDU reader pipe) - * A better approach on ST21NFCB is to: - * - get a pipe list for each host. - * (eg: ST21NFCB_HOST_CONTROLLER_ID for now). - * (TODO Later on UICC HOST and eSE HOST) - * - get pipe information - * - match retrieved pipe list in st21nfcb_gates - * ST21NFCB_DEVICE_MGNT_GATE is a proprietary gate - * with ST21NFCB_DEVICE_MGNT_PIPE. - * Pipe can be closed and need to be open. - */ - r = nci_hci_connect_gate(ndev, ST21NFCB_HOST_CONTROLLER_ID, - ST21NFCB_DEVICE_MGNT_GATE, - ST21NFCB_DEVICE_MGNT_PIPE); - if (r < 0) - goto free_info; - - /* Get pipe list */ - r = nci_hci_send_cmd(ndev, ST21NFCB_DEVICE_MGNT_GATE, - ST21NFCB_DM_GETINFO, pipe_list, sizeof(pipe_list), - &skb_pipe_list); - if (r < 0) - goto free_info; - - /* Complete the existing gate_pipe table */ - for (i = 0; i < skb_pipe_list->len; i++) { - pipe_info[2] = skb_pipe_list->data[i]; - r = nci_hci_send_cmd(ndev, ST21NFCB_DEVICE_MGNT_GATE, - ST21NFCB_DM_GETINFO, pipe_info, - sizeof(pipe_info), &skb_pipe_info); - - if (r) - continue; - - /* - * Match pipe ID and gate ID - * Output format from ST21NFC_DM_GETINFO is: - * - pipe state (1byte) - * - source hid (1byte) - * - source gid (1byte) - * - destination hid (1byte) - * - destination gid (1byte) - */ - dm_pipe_info = (struct st21nfcb_pipe_info *)skb_pipe_info->data; - if (dm_pipe_info->dst_gate_id == ST21NFCB_APDU_READER_GATE && - dm_pipe_info->src_host_id != ST21NFCB_ESE_HOST_ID) { - pr_err("Unexpected apdu_reader pipe on host %x\n", - dm_pipe_info->src_host_id); - continue; - } - - for (j = 0; (j < ARRAY_SIZE(st21nfcb_gates)) && - (st21nfcb_gates[j].gate != dm_pipe_info->dst_gate_id); j++) - ; - - if (j < ARRAY_SIZE(st21nfcb_gates) && - st21nfcb_gates[j].gate == dm_pipe_info->dst_gate_id && - ST21NFCB_DM_IS_PIPE_OPEN(dm_pipe_info->pipe_state)) { - st21nfcb_gates[j].pipe = pipe_info[2]; - - ndev->hci_dev->gate2pipe[st21nfcb_gates[j].gate] = - st21nfcb_gates[j].pipe; - ndev->hci_dev->pipes[st21nfcb_gates[j].pipe].gate = - st21nfcb_gates[j].gate; - ndev->hci_dev->pipes[st21nfcb_gates[j].pipe].host = - dm_pipe_info->src_host_id; - } - } - - memcpy(ndev->hci_dev->init_data.gates, st21nfcb_gates, - sizeof(st21nfcb_gates)); - -free_info: - kfree_skb(skb_pipe_info); - kfree_skb(skb_pipe_list); - return r; -} -EXPORT_SYMBOL_GPL(st21nfcb_hci_load_session); - -static void st21nfcb_hci_admin_event_received(struct nci_dev *ndev, - u8 event, struct sk_buff *skb) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - - switch (event) { - case ST21NFCB_EVT_HOT_PLUG: - if (info->se_info.se_active) { - if (!ST21NFCB_EVT_HOT_PLUG_IS_INHIBITED(skb)) { - del_timer_sync(&info->se_info.se_active_timer); - info->se_info.se_active = false; - complete(&info->se_info.req_completion); - } else { - mod_timer(&info->se_info.se_active_timer, - jiffies + - msecs_to_jiffies(ST21NFCB_SE_TO_PIPES)); - } - } - break; - } -} - -static int st21nfcb_hci_apdu_reader_event_received(struct nci_dev *ndev, - u8 event, - struct sk_buff *skb) -{ - int r = 0; - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - - pr_debug("apdu reader gate event: %x\n", event); - - switch (event) { - case ST21NFCB_EVT_TRANSMIT_DATA: - del_timer_sync(&info->se_info.bwi_timer); - info->se_info.bwi_active = false; - info->se_info.cb(info->se_info.cb_context, - skb->data, skb->len, 0); - break; - case ST21NFCB_EVT_WTX_REQUEST: - mod_timer(&info->se_info.bwi_timer, jiffies + - msecs_to_jiffies(info->se_info.wt_timeout)); - break; - } - - kfree_skb(skb); - return r; -} - -/* - * Returns: - * <= 0: driver handled the event, skb consumed - * 1: driver does not handle the event, please do standard processing - */ -static int st21nfcb_hci_connectivity_event_received(struct nci_dev *ndev, - u8 host, u8 event, - struct sk_buff *skb) -{ - int r = 0; - struct device *dev = &ndev->nfc_dev->dev; - struct nfc_evt_transaction *transaction; - - pr_debug("connectivity gate event: %x\n", event); - - switch (event) { - case ST21NFCB_EVT_CONNECTIVITY: - - break; - case ST21NFCB_EVT_TRANSACTION: - /* According to specification etsi 102 622 - * 11.2.2.4 EVT_TRANSACTION Table 52 - * Description Tag Length - * AID 81 5 to 16 - * PARAMETERS 82 0 to 255 - */ - if (skb->len < NFC_MIN_AID_LENGTH + 2 && - skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) - return -EPROTO; - - transaction = (struct nfc_evt_transaction *)devm_kzalloc(dev, - skb->len - 2, GFP_KERNEL); - - transaction->aid_len = skb->data[1]; - memcpy(transaction->aid, &skb->data[2], transaction->aid_len); - - /* Check next byte is PARAMETERS tag (82) */ - if (skb->data[transaction->aid_len + 2] != - NFC_EVT_TRANSACTION_PARAMS_TAG) - return -EPROTO; - - transaction->params_len = skb->data[transaction->aid_len + 3]; - memcpy(transaction->params, skb->data + - transaction->aid_len + 4, transaction->params_len); - - r = nfc_se_transaction(ndev->nfc_dev, host, transaction); - break; - default: - return 1; - } - kfree_skb(skb); - return r; -} - -void st21nfcb_hci_event_received(struct nci_dev *ndev, u8 pipe, - u8 event, struct sk_buff *skb) -{ - u8 gate = ndev->hci_dev->pipes[pipe].gate; - u8 host = ndev->hci_dev->pipes[pipe].host; - - switch (gate) { - case NCI_HCI_ADMIN_GATE: - st21nfcb_hci_admin_event_received(ndev, event, skb); - break; - case ST21NFCB_APDU_READER_GATE: - st21nfcb_hci_apdu_reader_event_received(ndev, event, skb); - break; - case ST21NFCB_CONNECTIVITY_GATE: - st21nfcb_hci_connectivity_event_received(ndev, host, event, - skb); - break; - } -} -EXPORT_SYMBOL_GPL(st21nfcb_hci_event_received); - - -void st21nfcb_hci_cmd_received(struct nci_dev *ndev, u8 pipe, u8 cmd, - struct sk_buff *skb) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - u8 gate = ndev->hci_dev->pipes[pipe].gate; - - pr_debug("cmd: %x\n", cmd); - - switch (cmd) { - case NCI_HCI_ANY_OPEN_PIPE: - if (gate != ST21NFCB_APDU_READER_GATE && - ndev->hci_dev->pipes[pipe].host != ST21NFCB_UICC_HOST_ID) - ndev->hci_dev->count_pipes++; - - if (ndev->hci_dev->count_pipes == - ndev->hci_dev->expected_pipes) { - del_timer_sync(&info->se_info.se_active_timer); - info->se_info.se_active = false; - ndev->hci_dev->count_pipes = 0; - complete(&info->se_info.req_completion); - } - break; - } -} -EXPORT_SYMBOL_GPL(st21nfcb_hci_cmd_received); - -/* - * Remarks: On some early st21nfcb firmware, nci_nfcee_mode_set(0) - * is rejected - */ -static int st21nfcb_nci_control_se(struct nci_dev *ndev, u8 se_idx, - u8 state) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - int r; - struct sk_buff *sk_host_list; - u8 host_id; - - switch (se_idx) { - case ST21NFCB_UICC_HOST_ID: - ndev->hci_dev->count_pipes = 0; - ndev->hci_dev->expected_pipes = ST21NFCB_SE_COUNT_PIPE_UICC; - break; - case ST21NFCB_ESE_HOST_ID: - ndev->hci_dev->count_pipes = 0; - ndev->hci_dev->expected_pipes = ST21NFCB_SE_COUNT_PIPE_EMBEDDED; - break; - default: - return -EINVAL; - } - - /* - * Wait for an EVT_HOT_PLUG in order to - * retrieve a relevant host list. - */ - reinit_completion(&info->se_info.req_completion); - r = nci_nfcee_mode_set(ndev, se_idx, NCI_NFCEE_ENABLE); - if (r != NCI_STATUS_OK) - return r; - - mod_timer(&info->se_info.se_active_timer, jiffies + - msecs_to_jiffies(ST21NFCB_SE_TO_HOT_PLUG)); - info->se_info.se_active = true; - - /* Ignore return value and check in any case the host_list */ - wait_for_completion_interruptible(&info->se_info.req_completion); - - /* There might be some "collision" after receiving a HOT_PLUG event - * This may cause the CLF to not answer to the next hci command. - * There is no possible synchronization to prevent this. - * Adding a small delay is the only way to solve the issue. - */ - usleep_range(3000, 5000); - - r = nci_hci_get_param(ndev, NCI_HCI_ADMIN_GATE, - NCI_HCI_ADMIN_PARAM_HOST_LIST, &sk_host_list); - if (r != NCI_HCI_ANY_OK) - return r; - - host_id = sk_host_list->data[sk_host_list->len - 1]; - kfree_skb(sk_host_list); - if (state == ST21NFCB_SE_MODE_ON && host_id == se_idx) - return se_idx; - else if (state == ST21NFCB_SE_MODE_OFF && host_id != se_idx) - return se_idx; - - return -1; -} - -int st21nfcb_nci_disable_se(struct nci_dev *ndev, u32 se_idx) -{ - int r; - - pr_debug("st21nfcb_nci_disable_se\n"); - - if (se_idx == NFC_SE_EMBEDDED) { - r = nci_hci_send_event(ndev, ST21NFCB_APDU_READER_GATE, - ST21NFCB_EVT_SE_END_OF_APDU_TRANSFER, NULL, 0); - if (r < 0) - return r; - } - - return 0; -} -EXPORT_SYMBOL_GPL(st21nfcb_nci_disable_se); - -int st21nfcb_nci_enable_se(struct nci_dev *ndev, u32 se_idx) -{ - int r; - - pr_debug("st21nfcb_nci_enable_se\n"); - - if (se_idx == ST21NFCB_HCI_HOST_ID_ESE) { - r = nci_hci_send_event(ndev, ST21NFCB_APDU_READER_GATE, - ST21NFCB_EVT_SE_SOFT_RESET, NULL, 0); - if (r < 0) - return r; - } - - return 0; -} -EXPORT_SYMBOL_GPL(st21nfcb_nci_enable_se); - -static int st21nfcb_hci_network_init(struct nci_dev *ndev) -{ - struct core_conn_create_dest_spec_params *dest_params; - struct dest_spec_params spec_params; - struct nci_conn_info *conn_info; - int r, dev_num; - - dest_params = - kzalloc(sizeof(struct core_conn_create_dest_spec_params) + - sizeof(struct dest_spec_params), GFP_KERNEL); - if (dest_params == NULL) { - r = -ENOMEM; - goto exit; - } - - dest_params->type = NCI_DESTINATION_SPECIFIC_PARAM_NFCEE_TYPE; - dest_params->length = sizeof(struct dest_spec_params); - spec_params.id = ndev->hci_dev->nfcee_id; - spec_params.protocol = NCI_NFCEE_INTERFACE_HCI_ACCESS; - memcpy(dest_params->value, &spec_params, sizeof(struct dest_spec_params)); - r = nci_core_conn_create(ndev, NCI_DESTINATION_NFCEE, 1, - sizeof(struct core_conn_create_dest_spec_params) + - sizeof(struct dest_spec_params), - dest_params); - if (r != NCI_STATUS_OK) - goto free_dest_params; - - conn_info = ndev->hci_dev->conn_info; - if (!conn_info) - goto free_dest_params; - - memcpy(ndev->hci_dev->init_data.gates, st21nfcb_gates, - sizeof(st21nfcb_gates)); - - /* - * Session id must include the driver name + i2c bus addr - * persistent info to discriminate 2 identical chips - */ - dev_num = find_first_zero_bit(dev_mask, ST21NFCB_NUM_DEVICES); - if (dev_num >= ST21NFCB_NUM_DEVICES) { - r = -ENODEV; - goto free_dest_params; - } - - scnprintf(ndev->hci_dev->init_data.session_id, - sizeof(ndev->hci_dev->init_data.session_id), - "%s%2x", "ST21BH", dev_num); - - r = nci_hci_dev_session_init(ndev); - if (r != NCI_HCI_ANY_OK) - goto free_dest_params; - - r = nci_nfcee_mode_set(ndev, ndev->hci_dev->conn_info->id, - NCI_NFCEE_ENABLE); - if (r != NCI_STATUS_OK) - goto free_dest_params; - -free_dest_params: - kfree(dest_params); - -exit: - return r; -} - -int st21nfcb_nci_discover_se(struct nci_dev *ndev) -{ - u8 param[2]; - int r; - int se_count = 0; - - pr_debug("st21nfcb_nci_discover_se\n"); - - r = st21nfcb_hci_network_init(ndev); - if (r != 0) - return r; - - param[0] = ST21NFCB_UICC_HOST_ID; - param[1] = ST21NFCB_HCI_HOST_ID_ESE; - r = nci_hci_set_param(ndev, NCI_HCI_ADMIN_GATE, - NCI_HCI_ADMIN_PARAM_WHITELIST, - param, sizeof(param)); - if (r != NCI_HCI_ANY_OK) - return r; - - r = st21nfcb_nci_control_se(ndev, ST21NFCB_UICC_HOST_ID, - ST21NFCB_SE_MODE_ON); - if (r == ST21NFCB_UICC_HOST_ID) { - nfc_add_se(ndev->nfc_dev, ST21NFCB_UICC_HOST_ID, NFC_SE_UICC); - se_count++; - } - - /* Try to enable eSE in order to check availability */ - r = st21nfcb_nci_control_se(ndev, ST21NFCB_HCI_HOST_ID_ESE, - ST21NFCB_SE_MODE_ON); - if (r == ST21NFCB_HCI_HOST_ID_ESE) { - nfc_add_se(ndev->nfc_dev, ST21NFCB_HCI_HOST_ID_ESE, - NFC_SE_EMBEDDED); - se_count++; - st21nfcb_se_get_atr(ndev); - } - - return !se_count; -} -EXPORT_SYMBOL_GPL(st21nfcb_nci_discover_se); - -int st21nfcb_nci_se_io(struct nci_dev *ndev, u32 se_idx, - u8 *apdu, size_t apdu_length, - se_io_cb_t cb, void *cb_context) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - - pr_debug("\n"); - - switch (se_idx) { - case ST21NFCB_HCI_HOST_ID_ESE: - info->se_info.cb = cb; - info->se_info.cb_context = cb_context; - mod_timer(&info->se_info.bwi_timer, jiffies + - msecs_to_jiffies(info->se_info.wt_timeout)); - info->se_info.bwi_active = true; - return nci_hci_send_event(ndev, ST21NFCB_APDU_READER_GATE, - ST21NFCB_EVT_TRANSMIT_DATA, apdu, - apdu_length); - default: - return -ENODEV; - } -} -EXPORT_SYMBOL(st21nfcb_nci_se_io); - -static void st21nfcb_se_wt_timeout(unsigned long data) -{ - /* - * No answer from the secure element - * within the defined timeout. - * Let's send a reset request as recovery procedure. - * According to the situation, we first try to send a software reset - * to the secure element. If the next command is still not - * answering in time, we send to the CLF a secure element hardware - * reset request. - */ - /* hardware reset managed through VCC_UICC_OUT power supply */ - u8 param = 0x01; - struct st21nfcb_nci_info *info = (struct st21nfcb_nci_info *) data; - - pr_debug("\n"); - - info->se_info.bwi_active = false; - - if (!info->se_info.xch_error) { - info->se_info.xch_error = true; - nci_hci_send_event(info->ndlc->ndev, ST21NFCB_APDU_READER_GATE, - ST21NFCB_EVT_SE_SOFT_RESET, NULL, 0); - } else { - info->se_info.xch_error = false; - nci_hci_send_event(info->ndlc->ndev, ST21NFCB_DEVICE_MGNT_GATE, - ST21NFCB_EVT_SE_HARD_RESET, ¶m, 1); - } - info->se_info.cb(info->se_info.cb_context, NULL, 0, -ETIME); -} - -static void st21nfcb_se_activation_timeout(unsigned long data) -{ - struct st21nfcb_nci_info *info = (struct st21nfcb_nci_info *) data; - - pr_debug("\n"); - - info->se_info.se_active = false; - - complete(&info->se_info.req_completion); -} - -int st21nfcb_se_init(struct nci_dev *ndev) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - - init_completion(&info->se_info.req_completion); - /* initialize timers */ - init_timer(&info->se_info.bwi_timer); - info->se_info.bwi_timer.data = (unsigned long)info; - info->se_info.bwi_timer.function = st21nfcb_se_wt_timeout; - info->se_info.bwi_active = false; - - init_timer(&info->se_info.se_active_timer); - info->se_info.se_active_timer.data = (unsigned long)info; - info->se_info.se_active_timer.function = - st21nfcb_se_activation_timeout; - info->se_info.se_active = false; - - info->se_info.xch_error = false; - - info->se_info.wt_timeout = - ST21NFCB_BWI_TO_TIMEOUT(ST21NFCB_ATR_DEFAULT_BWI); - - return 0; -} -EXPORT_SYMBOL(st21nfcb_se_init); - -void st21nfcb_se_deinit(struct nci_dev *ndev) -{ - struct st21nfcb_nci_info *info = nci_get_drvdata(ndev); - - if (info->se_info.bwi_active) - del_timer_sync(&info->se_info.bwi_timer); - if (info->se_info.se_active) - del_timer_sync(&info->se_info.se_active_timer); - - info->se_info.se_active = false; - info->se_info.bwi_active = false; -} -EXPORT_SYMBOL(st21nfcb_se_deinit); - diff --git a/drivers/nfc/st21nfcb/st21nfcb_se.h b/drivers/nfc/st21nfcb/st21nfcb_se.h deleted file mode 100644 index 52a323872bea..000000000000 --- a/drivers/nfc/st21nfcb/st21nfcb_se.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * NCI based Driver for STMicroelectronics NFC Chip - * - * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ -#ifndef __LOCAL_ST21NFCB_SE_H_ -#define __LOCAL_ST21NFCB_SE_H_ - -/* - * ref ISO7816-3 chap 8.1. the initial character TS is followed by a - * sequence of at most 32 characters. - */ -#define ST21NFCB_ESE_MAX_LENGTH 33 -#define ST21NFCB_HCI_HOST_ID_ESE 0xc0 - -struct st21nfcb_se_info { - u8 atr[ST21NFCB_ESE_MAX_LENGTH]; - struct completion req_completion; - - struct timer_list bwi_timer; - int wt_timeout; /* in msecs */ - bool bwi_active; - - struct timer_list se_active_timer; - bool se_active; - - bool xch_error; - - se_io_cb_t cb; - void *cb_context; -}; - -int st21nfcb_se_init(struct nci_dev *ndev); -void st21nfcb_se_deinit(struct nci_dev *ndev); - -int st21nfcb_nci_discover_se(struct nci_dev *ndev); -int st21nfcb_nci_enable_se(struct nci_dev *ndev, u32 se_idx); -int st21nfcb_nci_disable_se(struct nci_dev *ndev, u32 se_idx); -int st21nfcb_nci_se_io(struct nci_dev *ndev, u32 se_idx, - u8 *apdu, size_t apdu_length, - se_io_cb_t cb, void *cb_context); -int st21nfcb_hci_load_session(struct nci_dev *ndev); -void st21nfcb_hci_event_received(struct nci_dev *ndev, u8 pipe, - u8 event, struct sk_buff *skb); -void st21nfcb_hci_cmd_received(struct nci_dev *ndev, u8 pipe, u8 cmd, - struct sk_buff *skb); - - -#endif /* __LOCAL_ST21NFCB_NCI_H_ */ diff --git a/include/linux/platform_data/st-nci.h b/include/linux/platform_data/st-nci.h new file mode 100644 index 000000000000..d9d400a297bd --- /dev/null +++ b/include/linux/platform_data/st-nci.h @@ -0,0 +1,29 @@ +/* + * Driver include for ST NCI NFC chip family. + * + * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _ST_NCI_H_ +#define _ST_NCI_H_ + +#define ST_NCI_DRIVER_NAME "st_nci" + +struct st_nci_nfc_platform_data { + unsigned int gpio_reset; + unsigned int irq_polarity; +}; + +#endif /* _ST_NCI_H_ */ diff --git a/include/linux/platform_data/st21nfcb.h b/include/linux/platform_data/st21nfcb.h deleted file mode 100644 index b023373d9874..000000000000 --- a/include/linux/platform_data/st21nfcb.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Driver include for the ST21NFCB NFC chip. - * - * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#ifndef _ST21NFCB_NCI_H_ -#define _ST21NFCB_NCI_H_ - -#define ST21NFCB_NCI_DRIVER_NAME "st21nfcb_nci" - -struct st21nfcb_nfc_platform_data { - unsigned int gpio_reset; - unsigned int irq_polarity; -}; - -#endif /* _ST21NFCB_NCI_H_ */ diff --git a/include/linux/platform_data/st_nci.h b/include/linux/platform_data/st_nci.h new file mode 100644 index 000000000000..d9d400a297bd --- /dev/null +++ b/include/linux/platform_data/st_nci.h @@ -0,0 +1,29 @@ +/* + * Driver include for ST NCI NFC chip family. + * + * Copyright (C) 2014-2015 STMicroelectronics SAS. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef _ST_NCI_H_ +#define _ST_NCI_H_ + +#define ST_NCI_DRIVER_NAME "st_nci" + +struct st_nci_nfc_platform_data { + unsigned int gpio_reset; + unsigned int irq_polarity; +}; + +#endif /* _ST_NCI_H_ */ -- cgit v1.2.3 From 4f822c625f9c68267ffee7519519e304858a46c3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 10 Jun 2015 18:07:57 -0700 Subject: net: phy: broadcom: include phy.h for brcmphy.h We utilize inline functions from the PHY library, make sure that we do include phy.h in brcmphy.h in order for the code including brcmphy.h not to have to resolve this inclusion dependency. Fixes: 705314797b8b ("net: phy: broadcom: move shadow 0x1C register accessors to brcmphy.h") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 656da2a12ffe..abb6106f839d 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -1,6 +1,8 @@ #ifndef _LINUX_BRCMPHY_H #define _LINUX_BRCMPHY_H +#include + #define PHY_ID_BCM50610 0x0143bd60 #define PHY_ID_BCM50610M 0x0143bd70 #define PHY_ID_BCM5241 0x0143bc30 -- cgit v1.2.3 From 8bc84b79265f66d5af736473db582e74b28e099d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 10 Jun 2015 18:07:58 -0700 Subject: net: phy: broadcom: define Broadcom pseudo-PHY address in brcmphy.h Define the pseudo-PHY address (30) which is used by all Broadcom Ethernet switches in a shared header file. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index abb6106f839d..697ca7795bd9 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -3,6 +3,11 @@ #include +/* All Broadcom Ethernet switches have a pseudo-PHY at address 30 which is used + * to configure the switch internal registers via MDIO accesses. + */ +#define BRCM_PSEUDO_PHY_ADDR 30 + #define PHY_ID_BCM50610 0x0143bd60 #define PHY_ID_BCM50610M 0x0143bd70 #define PHY_ID_BCM5241 0x0143bc30 -- cgit v1.2.3 From dc14bdef8762a8098b1da881b611d722e24fe787 Mon Sep 17 00:00:00 2001 From: Vincent Cuissard Date: Thu, 11 Jun 2015 14:00:19 +0200 Subject: NFC: nfcmrvl: add platform_data and DT configuration Declare nfcmrvl platform_data structure and few DT parameters for nfcmrvl driver. Signed-off-by: Vincent Cuissard Signed-off-by: Samuel Ortiz --- drivers/nfc/nfcmrvl/main.c | 66 +++++++++++++++++++++++++++-------- drivers/nfc/nfcmrvl/nfcmrvl.h | 36 +++++++++++++------ drivers/nfc/nfcmrvl/usb.c | 6 +++- include/linux/platform_data/nfcmrvl.h | 31 ++++++++++++++++ 4 files changed, 113 insertions(+), 26 deletions(-) create mode 100644 include/linux/platform_data/nfcmrvl.h (limited to 'include/linux') diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c index 708aad28c567..e317a69a4560 100644 --- a/drivers/nfc/nfcmrvl/main.c +++ b/drivers/nfc/nfcmrvl/main.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -65,7 +66,7 @@ static int nfcmrvl_nci_send(struct nci_dev *ndev, struct sk_buff *skb) if (!test_bit(NFCMRVL_NCI_RUNNING, &priv->flags)) return -EBUSY; - if (priv->hci_muxed) { + if (priv->config.hci_muxed) { unsigned char *hdr; unsigned char len = skb->len; @@ -92,9 +93,9 @@ static struct nci_ops nfcmrvl_nci_ops = { }; struct nfcmrvl_private *nfcmrvl_nci_register_dev(void *drv_data, - struct nfcmrvl_if_ops *ops, - struct device *dev, - unsigned int flags) + struct nfcmrvl_if_ops *ops, + struct device *dev, + struct nfcmrvl_platform_data *pdata) { struct nfcmrvl_private *priv; int rc; @@ -108,23 +109,24 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(void *drv_data, priv->drv_data = drv_data; priv->if_ops = ops; priv->dev = dev; - priv->hci_muxed = (flags & NFCMRVL_DEV_FLAG_HCI_MUXED) ? 1 : 0; - priv->reset_n_io = NFCMRVL_DEV_FLAG_GET_RESET_N_IO(flags); - if (priv->reset_n_io) { + memcpy(&priv->config, pdata, sizeof(*pdata)); + + if (priv->config.reset_n_io) { rc = devm_gpio_request_one(dev, - priv->reset_n_io, + priv->config.reset_n_io, GPIOF_OUT_INIT_LOW, "nfcmrvl_reset_n"); if (rc < 0) nfc_err(dev, "failed to request reset_n io\n"); } - if (priv->hci_muxed) + if (priv->config.hci_muxed) headroom = NFCMRVL_HCI_EVENT_HEADER_SIZE; protocols = NFC_PROTO_JEWEL_MASK - | NFC_PROTO_MIFARE_MASK | NFC_PROTO_FELICA_MASK + | NFC_PROTO_MIFARE_MASK + | NFC_PROTO_FELICA_MASK | NFC_PROTO_ISO14443_MASK | NFC_PROTO_ISO14443_B_MASK | NFC_PROTO_NFC_DEP_MASK; @@ -169,7 +171,7 @@ EXPORT_SYMBOL_GPL(nfcmrvl_nci_unregister_dev); int nfcmrvl_nci_recv_frame(struct nfcmrvl_private *priv, struct sk_buff *skb) { - if (priv->hci_muxed) { + if (priv->config.hci_muxed) { if (skb->data[0] == NFCMRVL_HCI_EVENT_CODE && skb->data[1] == NFCMRVL_HCI_NFC_EVENT_CODE) { /* Data packet, let's extract NCI payload */ @@ -200,15 +202,51 @@ void nfcmrvl_chip_reset(struct nfcmrvl_private *priv) * To be improved. */ - if (priv->reset_n_io) { + if (priv->config.reset_n_io) { nfc_info(priv->dev, "reset the chip\n"); - gpio_set_value(priv->reset_n_io, 0); + gpio_set_value(priv->config.reset_n_io, 0); usleep_range(5000, 10000); - gpio_set_value(priv->reset_n_io, 1); + gpio_set_value(priv->config.reset_n_io, 1); } else nfc_info(priv->dev, "no reset available on this interface\n"); } +#ifdef CONFIG_OF + +int nfcmrvl_parse_dt(struct device_node *node, + struct nfcmrvl_platform_data *pdata) +{ + int reset_n_io; + + reset_n_io = of_get_named_gpio(node, "reset-n-io", 0); + if (reset_n_io < 0) { + pr_info("no reset-n-io config\n"); + reset_n_io = 0; + } else if (!gpio_is_valid(reset_n_io)) { + pr_err("invalid reset-n-io GPIO\n"); + return reset_n_io; + } + pdata->reset_n_io = reset_n_io; + + if (of_find_property(node, "hci-muxed", NULL)) + pdata->hci_muxed = 1; + else + pdata->hci_muxed = 0; + + return 0; +} + +#else + +int nfcmrvl_parse_dt(struct device_node *node, + struct nfcmrvl_platform_data *pdata) +{ + return -ENODEV; +} + +#endif +EXPORT_SYMBOL_GPL(nfcmrvl_parse_dt); + MODULE_AUTHOR("Marvell International Ltd."); MODULE_DESCRIPTION("Marvell NFC driver ver " VERSION); MODULE_VERSION(VERSION); diff --git a/drivers/nfc/nfcmrvl/nfcmrvl.h b/drivers/nfc/nfcmrvl/nfcmrvl.h index 2edae9a4b6bd..214412bd6110 100644 --- a/drivers/nfc/nfcmrvl/nfcmrvl.h +++ b/drivers/nfc/nfcmrvl/nfcmrvl.h @@ -16,6 +16,11 @@ * this warranty disclaimer. **/ +#ifndef _NFCMRVL_H_ +#define _NFCMRVL_H_ + +#include + /* Define private flags: */ #define NFCMRVL_NCI_RUNNING 1 @@ -38,22 +43,25 @@ #define NFCMRVL_HCI_OGF 0x81 #define NFCMRVL_HCI_OCF 0xFE -#define NFCMRVL_DEV_FLAG_HCI_MUXED (1 << 0) -#define NFCMRVL_DEV_FLAG_SET_RESET_N_IO(X) ((X) << 16) -#define NFCMRVL_DEV_FLAG_GET_RESET_N_IO(X) ((X) >> 16) struct nfcmrvl_private { - /* Tell if NCI packets are encapsulated in HCI ones */ - int hci_muxed; + unsigned long flags; + + /* Platform configuration */ + struct nfcmrvl_platform_data config; + struct nci_dev *ndev; - /* Reset IO (0 if not available) */ - int reset_n_io; + /* + ** PHY related information + */ - unsigned long flags; + /* PHY driver context */ void *drv_data; + /* PHY device */ struct device *dev; + /* Low level driver ops */ struct nfcmrvl_if_ops *if_ops; }; @@ -66,8 +74,14 @@ struct nfcmrvl_if_ops { void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv); int nfcmrvl_nci_recv_frame(struct nfcmrvl_private *priv, struct sk_buff *skb); struct nfcmrvl_private *nfcmrvl_nci_register_dev(void *drv_data, - struct nfcmrvl_if_ops *ops, - struct device *dev, - unsigned int flags); + struct nfcmrvl_if_ops *ops, + struct device *dev, + struct nfcmrvl_platform_data *pdata); + void nfcmrvl_chip_reset(struct nfcmrvl_private *priv); + +int nfcmrvl_parse_dt(struct device_node *node, + struct nfcmrvl_platform_data *pdata); + +#endif diff --git a/drivers/nfc/nfcmrvl/usb.c b/drivers/nfc/nfcmrvl/usb.c index c4046b681bfa..aa3f3c1cff0a 100644 --- a/drivers/nfc/nfcmrvl/usb.c +++ b/drivers/nfc/nfcmrvl/usb.c @@ -302,6 +302,10 @@ static int nfcmrvl_probe(struct usb_interface *intf, struct nfcmrvl_private *priv; int i; struct usb_device *udev = interface_to_usbdev(intf); + struct nfcmrvl_platform_data config; + + /* No configuration for USB */ + memset(&config, 0, sizeof(config)); nfc_info(&udev->dev, "intf %p id %p\n", intf, id); @@ -339,7 +343,7 @@ static int nfcmrvl_probe(struct usb_interface *intf, init_usb_anchor(&drv_data->deferred); priv = nfcmrvl_nci_register_dev(drv_data, &usb_ops, - &drv_data->udev->dev, 0); + &drv_data->udev->dev, &config); if (IS_ERR(priv)) return PTR_ERR(priv); diff --git a/include/linux/platform_data/nfcmrvl.h b/include/linux/platform_data/nfcmrvl.h new file mode 100644 index 000000000000..106cfe5ed589 --- /dev/null +++ b/include/linux/platform_data/nfcmrvl.h @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2015, Marvell International Ltd. + * + * This software file (the "File") is distributed by Marvell International + * Ltd. under the terms of the GNU General Public License Version 2, June 1991 + * (the "License"). You may use, redistribute and/or modify this File in + * accordance with the terms and conditions of the License, a copy of which + * is available on the worldwide web at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. + * + * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE + * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE + * ARE EXPRESSLY DISCLAIMED. The License provides additional details about + * this warranty disclaimer. + */ + +#ifndef _NFCMRVL_PTF_H_ +#define _NFCMRVL_PTF_H_ + +struct nfcmrvl_platform_data { + /* + * Generic + */ + + /* GPIO that is wired to RESET_N signal */ + unsigned int reset_n_io; + /* Tell if transport is muxed in HCI one */ + unsigned int hci_muxed; +}; + +#endif /* _NFCMRVL_PTF_H_ */ -- cgit v1.2.3 From e097dc624f784debbde49701a493bf920bc422c7 Mon Sep 17 00:00:00 2001 From: Vincent Cuissard Date: Thu, 11 Jun 2015 14:00:20 +0200 Subject: NFC: nfcmrvl: add UART driver Add support of Marvell NFC chip controlled over UART Signed-off-by: Vincent Cuissard Signed-off-by: Samuel Ortiz --- .../devicetree/bindings/net/nfc/nfcmrvl.txt | 29 +++ drivers/nfc/nfcmrvl/Kconfig | 11 + drivers/nfc/nfcmrvl/Makefile | 3 + drivers/nfc/nfcmrvl/nfcmrvl.h | 7 + drivers/nfc/nfcmrvl/uart.c | 225 +++++++++++++++++++++ include/linux/platform_data/nfcmrvl.h | 9 + net/nfc/nci/uart.c | 1 - 7 files changed, 284 insertions(+), 1 deletion(-) create mode 100644 Documentation/devicetree/bindings/net/nfc/nfcmrvl.txt create mode 100644 drivers/nfc/nfcmrvl/uart.c (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/net/nfc/nfcmrvl.txt b/Documentation/devicetree/bindings/net/nfc/nfcmrvl.txt new file mode 100644 index 000000000000..7c4a0cc370cf --- /dev/null +++ b/Documentation/devicetree/bindings/net/nfc/nfcmrvl.txt @@ -0,0 +1,29 @@ +* Marvell International Ltd. NCI NFC Controller + +Required properties: +- compatible: Should be "mrvl,nfc-uart". + +Optional SoC specific properties: +- pinctrl-names: Contains only one value - "default". +- pintctrl-0: Specifies the pin control groups used for this controller. +- reset-n-io: Output GPIO pin used to reset the chip (active low). +- hci-muxed: Specifies that the chip is muxing NCI over HCI frames. + +Optional UART-based chip specific properties: +- flow-control: Specifies that the chip is using RTS/CTS. +- break-control: Specifies that the chip needs specific break management. + +Example (for ARM-based BeagleBoard Black with 88W8887 on UART5): + +&uart5 { + status = "okay"; + + nfcmrvluart: nfcmrvluart@5 { + compatible = "mrvl,nfc-uart"; + + reset-n-io = <&gpio3 16 0>; + + hci-muxed; + flow-control; + } +}; diff --git a/drivers/nfc/nfcmrvl/Kconfig b/drivers/nfc/nfcmrvl/Kconfig index 5e18afd9abe2..796be2411440 100644 --- a/drivers/nfc/nfcmrvl/Kconfig +++ b/drivers/nfc/nfcmrvl/Kconfig @@ -21,3 +21,14 @@ config NFC_MRVL_USB Say Y here to compile support for Marvell NFC-over-USB driver into the kernel or say M to compile it as module. + +config NFC_MRVL_UART + tristate "Marvell NFC-over-UART driver" + depends on NFC_MRVL && NFC_NCI_UART + help + Marvell NFC-over-UART driver. + + This driver provides support for Marvell NFC-over-UART devices + + Say Y here to compile support for Marvell NFC-over-UART driver + into the kernel or say M to compile it as module. diff --git a/drivers/nfc/nfcmrvl/Makefile b/drivers/nfc/nfcmrvl/Makefile index 97a0de72dc01..775196274d1f 100644 --- a/drivers/nfc/nfcmrvl/Makefile +++ b/drivers/nfc/nfcmrvl/Makefile @@ -7,3 +7,6 @@ obj-$(CONFIG_NFC_MRVL) += nfcmrvl.o nfcmrvl_usb-y += usb.o obj-$(CONFIG_NFC_MRVL_USB) += nfcmrvl_usb.o + +nfcmrvl_uart-y += uart.o +obj-$(CONFIG_NFC_MRVL_UART) += nfcmrvl_uart.o diff --git a/drivers/nfc/nfcmrvl/nfcmrvl.h b/drivers/nfc/nfcmrvl/nfcmrvl.h index 214412bd6110..09780d57c9b8 100644 --- a/drivers/nfc/nfcmrvl/nfcmrvl.h +++ b/drivers/nfc/nfcmrvl/nfcmrvl.h @@ -43,6 +43,11 @@ #define NFCMRVL_HCI_OGF 0x81 #define NFCMRVL_HCI_OCF 0xFE +enum nfcmrvl_phy { + NFCMRVL_PHY_USB = 0, + NFCMRVL_PHY_UART = 1, +}; + struct nfcmrvl_private { @@ -61,6 +66,8 @@ struct nfcmrvl_private { void *drv_data; /* PHY device */ struct device *dev; + /* PHY type */ + enum nfcmrvl_phy phy; /* Low level driver ops */ struct nfcmrvl_if_ops *if_ops; }; diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c new file mode 100644 index 000000000000..61442d6528a6 --- /dev/null +++ b/drivers/nfc/nfcmrvl/uart.c @@ -0,0 +1,225 @@ +/** + * Marvell NFC-over-UART driver + * + * Copyright (C) 2015, Marvell International Ltd. + * + * This software file (the "File") is distributed by Marvell International + * Ltd. under the terms of the GNU General Public License Version 2, June 1991 + * (the "License"). You may use, redistribute and/or modify this File in + * accordance with the terms and conditions of the License, a copy of which + * is available on the worldwide web at + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. + * + * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE + * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE + * ARE EXPRESSLY DISCLAIMED. The License provides additional details about + * this warranty disclaimer. + */ + +#include +#include +#include +#include +#include +#include "nfcmrvl.h" + +static unsigned int hci_muxed; +static unsigned int flow_control; +static unsigned int break_control; +static unsigned int reset_n_io; + +/* +** NFCMRVL NCI OPS +*/ + +static int nfcmrvl_uart_nci_open(struct nfcmrvl_private *priv) +{ + return 0; +} + +static int nfcmrvl_uart_nci_close(struct nfcmrvl_private *priv) +{ + return 0; +} + +static int nfcmrvl_uart_nci_send(struct nfcmrvl_private *priv, + struct sk_buff *skb) +{ + struct nci_uart *nu = priv->drv_data; + + return nu->ops.send(nu, skb); +} + +static struct nfcmrvl_if_ops uart_ops = { + .nci_open = nfcmrvl_uart_nci_open, + .nci_close = nfcmrvl_uart_nci_close, + .nci_send = nfcmrvl_uart_nci_send, +}; + +#ifdef CONFIG_OF + +static int nfcmrvl_uart_parse_dt(struct device_node *node, + struct nfcmrvl_platform_data *pdata) +{ + struct device_node *matched_node; + int ret; + + matched_node = of_find_compatible_node(node, NULL, "mrvl,nfc-uart"); + if (!matched_node) + return -ENODEV; + + ret = nfcmrvl_parse_dt(matched_node, pdata); + if (ret < 0) { + pr_err("Failed to get generic entries\n"); + return ret; + } + + if (of_find_property(matched_node, "flow-control", NULL)) + pdata->flow_control = 1; + else + pdata->flow_control = 0; + + if (of_find_property(matched_node, "break-control", NULL)) + pdata->break_control = 1; + else + pdata->break_control = 0; + + return 0; +} + +#else + +static int nfcmrvl_uart_parse_dt(struct device_node *node, + struct nfcmrvl_platform_data *pdata) +{ + return -ENODEV; +} + +#endif + +/* +** NCI UART OPS +*/ + +static int nfcmrvl_nci_uart_open(struct nci_uart *nu) +{ + struct nfcmrvl_private *priv; + struct nfcmrvl_platform_data *pdata = NULL; + struct nfcmrvl_platform_data config; + + /* + * Platform data cannot be used here since usually it is already used + * by low level serial driver. We can try to retrieve serial device + * and check if DT entries were added. + */ + + if (nu->tty->dev->parent && nu->tty->dev->parent->of_node) + if (nfcmrvl_uart_parse_dt(nu->tty->dev->parent->of_node, + &config) == 0) + pdata = &config; + + if (!pdata) { + pr_info("No platform data / DT -> fallback to module params\n"); + config.hci_muxed = hci_muxed; + config.reset_n_io = reset_n_io; + config.flow_control = flow_control; + config.break_control = break_control; + pdata = &config; + } + + priv = nfcmrvl_nci_register_dev(nu, &uart_ops, nu->tty->dev, pdata); + if (IS_ERR(priv)) + return PTR_ERR(priv); + + priv->phy = NFCMRVL_PHY_UART; + + nu->drv_data = priv; + nu->ndev = priv->ndev; + + /* Set BREAK */ + if (priv->config.break_control && nu->tty->ops->break_ctl) + nu->tty->ops->break_ctl(nu->tty, -1); + + return 0; +} + +static void nfcmrvl_nci_uart_close(struct nci_uart *nu) +{ + nfcmrvl_nci_unregister_dev((struct nfcmrvl_private *)nu->drv_data); +} + +static int nfcmrvl_nci_uart_recv(struct nci_uart *nu, struct sk_buff *skb) +{ + return nfcmrvl_nci_recv_frame((struct nfcmrvl_private *)nu->drv_data, + skb); +} + +static void nfcmrvl_nci_uart_tx_start(struct nci_uart *nu) +{ + struct nfcmrvl_private *priv = (struct nfcmrvl_private *)nu->drv_data; + + /* Remove BREAK to wake up the NFCC */ + if (priv->config.break_control && nu->tty->ops->break_ctl) { + nu->tty->ops->break_ctl(nu->tty, 0); + usleep_range(3000, 5000); + } +} + +static void nfcmrvl_nci_uart_tx_done(struct nci_uart *nu) +{ + struct nfcmrvl_private *priv = (struct nfcmrvl_private *)nu->drv_data; + + /* + ** To ensure that if the NFCC goes in DEEP SLEEP sate we can wake him + ** up. we set BREAK. Once we will be ready to send again we will remove + ** it. + */ + if (priv->config.break_control && nu->tty->ops->break_ctl) + nu->tty->ops->break_ctl(nu->tty, -1); +} + +static struct nci_uart nfcmrvl_nci_uart = { + .owner = THIS_MODULE, + .name = "nfcmrvl_uart", + .driver = NCI_UART_DRIVER_MARVELL, + .ops = { + .open = nfcmrvl_nci_uart_open, + .close = nfcmrvl_nci_uart_close, + .recv = nfcmrvl_nci_uart_recv, + .tx_start = nfcmrvl_nci_uart_tx_start, + .tx_done = nfcmrvl_nci_uart_tx_done, + } +}; + +/* +** Module init +*/ + +static int nfcmrvl_uart_init_module(void) +{ + return nci_uart_register(&nfcmrvl_nci_uart); +} + +static void nfcmrvl_uart_exit_module(void) +{ + nci_uart_unregister(&nfcmrvl_nci_uart); +} + +module_init(nfcmrvl_uart_init_module); +module_exit(nfcmrvl_uart_exit_module); + +MODULE_AUTHOR("Marvell International Ltd."); +MODULE_DESCRIPTION("Marvell NFC-over-UART"); +MODULE_LICENSE("GPL v2"); + +module_param(flow_control, uint, 0); +MODULE_PARM_DESC(flow_control, "Tell if UART needs flow control at init."); + +module_param(break_control, uint, 0); +MODULE_PARM_DESC(break_control, "Tell if UART driver must drive break signal."); + +module_param(hci_muxed, uint, 0); +MODULE_PARM_DESC(hci_muxed, "Tell if transport is muxed in HCI one."); + +module_param(reset_n_io, uint, 0); +MODULE_PARM_DESC(reset_n_io, "GPIO that is wired to RESET_N signal."); diff --git a/include/linux/platform_data/nfcmrvl.h b/include/linux/platform_data/nfcmrvl.h index 106cfe5ed589..ac91707dabcb 100644 --- a/include/linux/platform_data/nfcmrvl.h +++ b/include/linux/platform_data/nfcmrvl.h @@ -26,6 +26,15 @@ struct nfcmrvl_platform_data { unsigned int reset_n_io; /* Tell if transport is muxed in HCI one */ unsigned int hci_muxed; + + /* + * UART specific + */ + + /* Tell if UART needs flow control at init */ + unsigned int flow_control; + /* Tell if firmware supports break control for power management */ + unsigned int break_control; }; #endif /* _NFCMRVL_PTF_H_ */ diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c index 70c279543074..2c48810af5bb 100644 --- a/net/nfc/nci/uart.c +++ b/net/nfc/nci/uart.c @@ -12,7 +12,6 @@ * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE * ARE EXPRESSLY DISCLAIMED. The License provides additional details about * this warranty disclaimer. - */ /* Inspired (hugely) by HCI LDISC implementation in Bluetooth. -- cgit v1.2.3 From facc9699f0fe7d65a92cc09e175662659306066d Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 11 Jun 2015 14:47:27 +0300 Subject: net/mlx5e: Fix HW MTU settings Previously we configured HW MTU to be netdev->mtu, actually we need to configure netdev->mtu + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN). Also, query MTU can not fail, hence make the relevant helper a void functionm, add mlx5e_set_dev_port_mtu, helper function to handle MTU setting. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx5/main.c | 8 +-- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 61 ++++++++++++----------- drivers/net/ethernet/mellanox/mlx5/core/port.c | 36 ++++++------- include/linux/mlx5/driver.h | 10 ++-- 5 files changed, 55 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d4dea86052d6..79dadd627e9c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -446,15 +446,11 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, if (err) goto out; - err = mlx5_query_port_max_mtu(mdev, &max_mtu, port); - if (err) - goto out; + mlx5_query_port_max_mtu(mdev, &max_mtu, port); props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu); - err = mlx5_query_port_oper_mtu(mdev, &oper_mtu, port); - if (err) - goto out; + mlx5_query_port_oper_mtu(mdev, &oper_mtu, port); props->active_mtu = mlx5_mtu_to_ib_mtu(oper_mtu); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index e9edb7210de1..71f38bb5dbfc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -57,7 +57,6 @@ #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 #define MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ 0x7 -#define MLX5E_PARAMS_MIN_MTU 46 #define MLX5E_TX_CQ_POLL_BUDGET 128 #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 075e5175e779..22b2665e0328 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -277,6 +277,9 @@ static void mlx5e_send_nop(struct mlx5e_sq *sq) mlx5e_tx_notify_hw(sq, wqe); } +#define MLX5E_HW2SW_MTU(hwmtu) (hwmtu - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) +#define MLX5E_SW2HW_MTU(swmtu) (swmtu + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) + static int mlx5e_create_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) @@ -305,7 +308,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, } rq->wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : - priv->netdev->mtu + ETH_HLEN + VLAN_HLEN; + MLX5E_SW2HW_MTU(priv->netdev->mtu); for (i = 0; i < wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); @@ -1367,11 +1370,30 @@ static void mlx5e_close_tirs(struct mlx5e_priv *priv) mlx5e_close_tir(priv, i); } -int mlx5e_open_locked(struct net_device *netdev) +static int mlx5e_set_dev_port_mtu(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - int actual_mtu; + int hw_mtu; + int err; + + err = mlx5_set_port_mtu(mdev, MLX5E_SW2HW_MTU(netdev->mtu), 1); + if (err) + return err; + + mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1); + + if (MLX5E_HW2SW_MTU(hw_mtu) != netdev->mtu) + netdev_warn(netdev, "%s: Port MTU %d is different than netdev mtu %d\n", + __func__, MLX5E_HW2SW_MTU(hw_mtu), netdev->mtu); + + netdev->mtu = MLX5E_HW2SW_MTU(hw_mtu); + return 0; +} + +int mlx5e_open_locked(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); int num_txqs; int err; @@ -1380,25 +1402,9 @@ int mlx5e_open_locked(struct net_device *netdev) netif_set_real_num_tx_queues(netdev, num_txqs); netif_set_real_num_rx_queues(netdev, priv->params.num_channels); - err = mlx5_set_port_mtu(mdev, netdev->mtu); - if (err) { - netdev_err(netdev, "%s: mlx5_set_port_mtu failed %d\n", - __func__, err); + err = mlx5e_set_dev_port_mtu(netdev); + if (err) return err; - } - - err = mlx5_query_port_oper_mtu(mdev, &actual_mtu, 1); - if (err) { - netdev_err(netdev, "%s: mlx5_query_port_oper_mtu failed %d\n", - __func__, err); - return err; - } - - if (actual_mtu != netdev->mtu) - netdev_warn(netdev, "%s: Failed to set MTU to %d\n", - __func__, netdev->mtu); - - netdev->mtu = actual_mtu; err = mlx5e_open_tises(priv); if (err) { @@ -1613,15 +1619,14 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; int max_mtu; - int err = 0; + int err; - err = mlx5_query_port_max_mtu(mdev, &max_mtu, 1); - if (err) - return err; + mlx5_query_port_max_mtu(mdev, &max_mtu, 1); - if (new_mtu > max_mtu || new_mtu < MLX5E_PARAMS_MIN_MTU) { - netdev_err(netdev, "%s: Bad MTU size, mtu must be [%d-%d]\n", - __func__, MLX5E_PARAMS_MIN_MTU, max_mtu); + if (new_mtu > max_mtu) { + netdev_err(netdev, + "%s: Bad MTU (%d) > (%d) Max\n", + __func__, new_mtu, max_mtu); return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 619d3baf19ea..70147999f657 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -248,22 +248,18 @@ int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status) return err; } -static int mlx5_query_port_mtu(struct mlx5_core_dev *dev, - int *admin_mtu, int *max_mtu, int *oper_mtu, - u8 local_port) +static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, int *admin_mtu, + int *max_mtu, int *oper_mtu, u8 port) { u32 in[MLX5_ST_SZ_DW(pmtu_reg)]; u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; - int err; memset(in, 0, sizeof(in)); - MLX5_SET(pmtu_reg, in, local_port, local_port); + MLX5_SET(pmtu_reg, in, local_port, port); - err = mlx5_core_access_reg(dev, in, sizeof(in), out, - sizeof(out), MLX5_REG_PMTU, 0, 0); - if (err) - return err; + mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PMTU, 0, 0); if (max_mtu) *max_mtu = MLX5_GET(pmtu_reg, out, max_mtu); @@ -271,11 +267,9 @@ static int mlx5_query_port_mtu(struct mlx5_core_dev *dev, *oper_mtu = MLX5_GET(pmtu_reg, out, oper_mtu); if (admin_mtu) *admin_mtu = MLX5_GET(pmtu_reg, out, admin_mtu); - - return 0; } -int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu) +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port) { u32 in[MLX5_ST_SZ_DW(pmtu_reg)]; u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; @@ -283,24 +277,24 @@ int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu) memset(in, 0, sizeof(in)); MLX5_SET(pmtu_reg, in, admin_mtu, mtu); - MLX5_SET(pmtu_reg, in, local_port, 1); + MLX5_SET(pmtu_reg, in, local_port, port); - return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), - MLX5_REG_PMTU, 0, 1); + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PMTU, 0, 1); } EXPORT_SYMBOL_GPL(mlx5_set_port_mtu); -int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, - u8 local_port) +void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, + u8 port) { - return mlx5_query_port_mtu(dev, NULL, max_mtu, NULL, local_port); + mlx5_query_port_mtu(dev, NULL, max_mtu, NULL, port); } EXPORT_SYMBOL_GPL(mlx5_query_port_max_mtu); -int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, - u8 local_port) +void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, + u8 port) { - return mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu, local_port); + mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu, port); } EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6093bde16b94..c0930f8d7021 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -756,11 +756,11 @@ int mlx5_set_port_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); int mlx5_query_port_status(struct mlx5_core_dev *dev, u8 *status); -int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu); -int mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, - u8 local_port); -int mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, - u8 local_port); +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port); +void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 port); +void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, + u8 port); + int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, u8 *vl_hw_cap, u8 local_port); -- cgit v1.2.3 From fc11fbf9a785b25c5d07f05a30d4169ec39818da Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 11 Jun 2015 14:47:28 +0300 Subject: net/mlx5e: Add HW cacheline start padding Enable HW cacheline start padding and align RX WQE size to cacheline while considering HW start padding. Also, fix dma_unmap call to use the correct SKB data buffer size. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 ++++- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 10 +++++----- include/linux/mlx5/device.h | 4 ++++ 3 files changed, 13 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 22b2665e0328..1c62af69ca29 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -309,12 +309,15 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : MLX5E_SW2HW_MTU(priv->netdev->mtu); + rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz + MLX5E_NET_IP_ALIGN); for (i = 0; i < wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); + u32 byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN; wqe->data.lkey = c->mkey_be; - wqe->data.byte_count = cpu_to_be32(rq->wqe_sz); + wqe->data.byte_count = + cpu_to_be32(byte_count | MLX5_HW_START_PADDING); } rq->pdev = c->pdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index ce1317cdabd7..06e7c744ed4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -45,18 +45,18 @@ static inline int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, if (unlikely(!skb)) return -ENOMEM; - skb_reserve(skb, MLX5E_NET_IP_ALIGN); - dma_addr = dma_map_single(rq->pdev, /* hw start padding */ - skb->data - MLX5E_NET_IP_ALIGN, - /* hw end padding */ + skb->data, + /* hw end padding */ rq->wqe_sz, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(rq->pdev, dma_addr))) goto err_free_skb; + skb_reserve(skb, MLX5E_NET_IP_ALIGN); + *((dma_addr_t *)skb->cb) = dma_addr; wqe->data.addr = cpu_to_be64(dma_addr + MLX5E_NET_IP_ALIGN); @@ -217,7 +217,7 @@ bool mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) dma_unmap_single(rq->pdev, *((dma_addr_t *)skb->cb), - skb_end_offset(skb), + rq->wqe_sz, DMA_FROM_DEVICE); if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index b2c43508a737..b943cd9e2097 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -131,6 +131,10 @@ enum { MLX5_INLINE_SEG = 0x80000000, }; +enum { + MLX5_HW_START_PADDING = MLX5_INLINE_SEG, +}; + enum { MLX5_MIN_PKEY_TABLE_SIZE = 128, MLX5_MAX_LOG_PKEY_TABLE = 5, -- cgit v1.2.3 From 72b31f7271df34c6aab36c01305287924826678f Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Sat, 30 May 2015 15:27:40 +0200 Subject: netfilter: bridge: detect NAT66 correctly and change MAC address IPv4 iptables allows to REDIRECT/DNAT/SNAT any traffic over a bridge. e.g. REDIRECT $ sysctl -w net.bridge.bridge-nf-call-iptables=1 $ iptables -t nat -A PREROUTING -p tcp -m tcp --dport 8080 \ -j REDIRECT --to-ports 81 This does not work with ip6tables on a bridge in NAT66 scenario because the REDIRECT/DNAT/SNAT is not correctly detected. The bridge pre-routing (finish) netfilter hook has to check for a possible redirect and then fix the destination mac address. This allows to use the ip6tables rules for local REDIRECT/DNAT/SNAT REDIRECT similar to the IPv4 iptables version. e.g. REDIRECT $ sysctl -w net.bridge.bridge-nf-call-ip6tables=1 $ ip6tables -t nat -A PREROUTING -p tcp -m tcp --dport 8080 \ -j REDIRECT --to-ports 81 This patch makes it possible to use IPv6 NAT66 on a bridge. It was tested on a bridge with two interfaces using SNAT/DNAT NAT66 rules. Reported-by: Artie Hamilton Signed-off-by: Sven Eckelmann [bernhard.thaler@wvnet.at: rebased, add indirect call to ip6_route_input()] [bernhard.thaler@wvnet.at: rebased, split into separate patches] Signed-off-by: Bernhard Thaler Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 1 + include/linux/skbuff.h | 6 ++++- net/bridge/br_netfilter.c | 55 +++++++++++++++++++++++++++++++++++------- net/ipv6/netfilter.c | 1 + 4 files changed, 53 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 64dad1cc1a4b..e2d19694ee8f 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -25,6 +25,7 @@ void ipv6_netfilter_fini(void); struct nf_ipv6_ops { int (*chk_addr)(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); + void (*route_input)(struct sk_buff *skb); }; extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cc612fc0a894..f70fc0e6bf7b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -36,6 +36,7 @@ #include #include #include +#include /* A. Checksumming of received packets by device. * @@ -179,7 +180,10 @@ struct nf_bridge_info { struct net_device *physoutdev; char neigh_header[8]; }; - __be32 ipv4_daddr; + union { + __be32 ipv4_daddr; + struct in6_addr ipv6_daddr; + }; }; #endif diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 6cb642c43451..9ac0c6417c77 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -326,30 +326,63 @@ free_skb: static bool daddr_was_changed(const struct sk_buff *skb, const struct nf_bridge_info *nf_bridge) { - return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; + switch (skb->protocol) { + case htons(ETH_P_IP): + return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr; + case htons(ETH_P_IPV6): + return memcmp(&nf_bridge->ipv6_daddr, &ipv6_hdr(skb)->daddr, + sizeof(ipv6_hdr(skb)->daddr)) != 0; + default: + return false; + } } -/* PF_BRIDGE/PRE_ROUTING *********************************************/ -/* Undo the changes made for ip6tables PREROUTING and continue the - * bridge PRE_ROUTING hook. +/* PF_BRIDGE/PRE_ROUTING: Undo the changes made for ip6tables + * PREROUTING and continue the bridge PRE_ROUTING hook. See comment + * for br_nf_pre_routing_finish(), same logic is used here but + * equivalent IPv6 function ip6_route_input() called indirectly. */ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; + struct net_device *dev = skb->dev; + const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; nf_bridge->pkt_otherhost = false; } nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; + if (daddr_was_changed(skb, nf_bridge)) { + skb_dst_drop(skb); + v6ops->route_input(skb); - rt = bridge_parent_rtable(nf_bridge->physindev); - if (!rt) { - kfree_skb(skb); - return 0; + if (skb_dst(skb)->error) { + kfree_skb(skb); + return 0; + } + + if (skb_dst(skb)->dev == dev) { + skb->dev = nf_bridge->physindev; + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, + sk, skb, skb->dev, NULL, + br_nf_pre_routing_finish_bridge, + 1); + return 0; + } + ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); + skb->pkt_type = PACKET_HOST; + } else { + rt = bridge_parent_rtable(nf_bridge->physindev); + if (!rt) { + kfree_skb(skb); + return 0; + } + skb_dst_set_noref(skb, &rt->dst); } - skb_dst_set_noref(skb, &rt->dst); skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); @@ -579,6 +612,7 @@ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { + struct nf_bridge_info *nf_bridge; const struct ipv6hdr *hdr; u32 pkt_len; @@ -610,6 +644,9 @@ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, if (!setup_pre_routing(skb)) return NF_DROP; + nf_bridge = nf_bridge_info_get(skb); + nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr; + skb->protocol = htons(ETH_P_IPV6); NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb, skb->dev, NULL, diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index d958718b5031..bbca09fe9553 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -191,6 +191,7 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, static const struct nf_ipv6_ops ipv6ops = { .chk_addr = ipv6_chk_addr, + .route_input = ip6_route_input }; static const struct nf_afinfo nf_ip6_afinfo = { -- cgit v1.2.3 From 411ffb4fde80705a9a8db4c2d38dbeef6f5bd689 Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Sat, 30 May 2015 15:28:28 +0200 Subject: netfilter: bridge: refactor frag_max_size Currently frag_max_size is member of br_input_skb_cb and copied back and forth using IPCB(skb) and BR_INPUT_SKB_CB(skb) each time it is changed or used. Attach frag_max_size to nf_bridge_info and set value in pre_routing and forward functions. Use its value in forward and xmit functions. Signed-off-by: Bernhard Thaler Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 1 + net/bridge/br_netfilter.c | 20 +++++++------------- net/bridge/br_private.h | 1 - 3 files changed, 8 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f70fc0e6bf7b..32b105e682b3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -174,6 +174,7 @@ struct nf_bridge_info { BRNF_PROTO_PPPOE } orig_proto:8; bool pkt_otherhost; + __u16 frag_max_size; unsigned int mask; struct net_device *physindev; union { diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 9ac0c6417c77..1f30b28745de 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -440,10 +440,8 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; int err; - int frag_max_size; - frag_max_size = IPCB(skb)->frag_max_size; - BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size; + nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; @@ -738,11 +736,9 @@ static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb) struct net_device *in; if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) { - int frag_max_size; if (skb->protocol == htons(ETH_P_IP)) { - frag_max_size = IPCB(skb)->frag_max_size; - BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size; + nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; } in = nf_bridge->physindev; @@ -806,12 +802,9 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, } if (pf == NFPROTO_IPV4) { - int frag_max = BR_INPUT_SKB_CB(skb)->frag_max_size; - if (br_parse_ip_options(skb)) return NF_DROP; - - IPCB(skb)->frag_max_size = frag_max; + IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; } nf_bridge->physoutdev = skb->dev; @@ -904,7 +897,7 @@ static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) { int ret; - int frag_max_size; + struct nf_bridge_info *nf_bridge; unsigned int mtu_reserved; if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP)) { @@ -913,17 +906,18 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) } mtu_reserved = nf_bridge_mtu_reduction(skb); + nf_bridge = nf_bridge_info_get(skb); /* This is wrong! We should preserve the original fragment * boundaries by preserving frag_list rather than refragmenting. */ if (skb->len + mtu_reserved > skb->dev->mtu) { struct brnf_frag_data *data; - frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; if (br_parse_ip_options(skb)) /* Drop invalid packet */ return NF_DROP; - IPCB(skb)->frag_max_size = frag_max_size; + + IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; nf_bridge_update_protocol(skb); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 1f36fa70639b..8cde96e68fd5 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -304,7 +304,6 @@ struct br_input_skb_cb { int mrouters_only; #endif - u16 frag_max_size; bool proxyarp_replied; #ifdef CONFIG_BRIDGE_VLAN_FILTERING -- cgit v1.2.3 From efb6de9b4ba0092b2c55f6a52d16294a8a698edd Mon Sep 17 00:00:00 2001 From: Bernhard Thaler Date: Sat, 30 May 2015 15:30:16 +0200 Subject: netfilter: bridge: forward IPv6 fragmented packets IPv6 fragmented packets are not forwarded on an ethernet bridge with netfilter ip6_tables loaded. e.g. steps to reproduce 1) create a simple bridge like this modprobe br_netfilter brctl addbr br0 brctl addif br0 eth0 brctl addif br0 eth2 ifconfig eth0 up ifconfig eth2 up ifconfig br0 up 2) place a host with an IPv6 address on each side of the bridge set IPv6 address on host A: ip -6 addr add fd01:2345:6789:1::1/64 dev eth0 set IPv6 address on host B: ip -6 addr add fd01:2345:6789:1::2/64 dev eth0 3) run a simple ping command on host A with packets > MTU ping6 -s 4000 fd01:2345:6789:1::2 4) wait some time and run e.g. "ip6tables -t nat -nvL" on the bridge IPv6 fragmented packets traverse the bridge cleanly until somebody runs. "ip6tables -t nat -nvL". As soon as it is run (and netfilter modules are loaded) IPv6 fragmented packets do not traverse the bridge any more (you see no more responses in ping's output). After applying this patch IPv6 fragmented packets traverse the bridge cleanly in above scenario. Signed-off-by: Bernhard Thaler [pablo@netfilter.org: small changes to br_nf_dev_queue_xmit] Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 2 + net/bridge/br_netfilter.c | 139 +++++++++++++++++++++++++++++------------ net/bridge/br_private.h | 6 +- net/ipv6/netfilter.c | 3 +- 4 files changed, 108 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index e2d19694ee8f..8b7d28f3aada 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -26,6 +26,8 @@ struct nf_ipv6_ops { int (*chk_addr)(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); void (*route_input)(struct sk_buff *skb); + int (*fragment)(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)); }; extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index d201ea4440c9..535f9dae743e 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -34,6 +34,7 @@ #include #include +#include #include #include @@ -320,6 +321,55 @@ bad: return -1; } +/* Equivalent to br_validate_ipv4 for IPv6 */ +static int br_validate_ipv6(struct sk_buff *skb) +{ + const struct ipv6hdr *hdr; + struct net_device *dev = skb->dev; + struct inet6_dev *idev = in6_dev_get(skb->dev); + u32 pkt_len; + u8 ip6h_len = sizeof(struct ipv6hdr); + + if (!pskb_may_pull(skb, ip6h_len)) + goto inhdr_error; + + if (skb->len < ip6h_len) + goto drop; + + hdr = ipv6_hdr(skb); + + if (hdr->version != 6) + goto inhdr_error; + + pkt_len = ntohs(hdr->payload_len); + + if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { + if (pkt_len + ip6h_len > skb->len) { + IP6_INC_STATS_BH(dev_net(dev), idev, + IPSTATS_MIB_INTRUNCATEDPKTS); + goto drop; + } + if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { + IP6_INC_STATS_BH(dev_net(dev), idev, + IPSTATS_MIB_INDISCARDS); + goto drop; + } + } + if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb)) + goto drop; + + memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); + /* No IP options in IPv6 header; however it should be + * checked if some next headers need special treatment + */ + return 0; + +inhdr_error: + IP6_INC_STATS_BH(dev_net(dev), idev, IPSTATS_MIB_INHDRERRORS); +drop: + return -1; +} + static void nf_bridge_update_protocol(struct sk_buff *skb) { switch (skb->nf_bridge->orig_proto) { @@ -405,6 +455,8 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) struct net_device *dev = skb->dev; const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); + nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; + if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; nf_bridge->pkt_otherhost = false; @@ -606,35 +658,15 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb) } /* Replicate the checks that IPv6 does on packet reception and pass the packet - * to ip6tables, which doesn't support NAT, so things are fairly simple. */ + * to ip6tables. + */ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_bridge_info *nf_bridge; - const struct ipv6hdr *hdr; - u32 pkt_len; - - if (skb->len < sizeof(struct ipv6hdr)) - return NF_DROP; - - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - return NF_DROP; - - hdr = ipv6_hdr(skb); - - if (hdr->version != 6) - return NF_DROP; - pkt_len = ntohs(hdr->payload_len); - - if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { - if (pkt_len + sizeof(struct ipv6hdr) > skb->len) - return NF_DROP; - if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) - return NF_DROP; - } - if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb)) + if (br_validate_ipv6(skb)) return NF_DROP; nf_bridge_put(skb->nf_bridge); @@ -738,9 +770,11 @@ static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb) if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) { - if (skb->protocol == htons(ETH_P_IP)) { + if (skb->protocol == htons(ETH_P_IP)) nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; - } + + if (skb->protocol == htons(ETH_P_IPV6)) + nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size; in = nf_bridge->physindev; if (nf_bridge->pkt_otherhost) { @@ -808,6 +842,12 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; } + if (pf == NFPROTO_IPV6) { + if (br_validate_ipv6(skb)) + return NF_DROP; + IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; + } + nf_bridge->physoutdev = skb->dev; if (pf == NFPROTO_IPV4) skb->protocol = htons(ETH_P_IP); @@ -855,7 +895,7 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, return NF_STOLEN; } -#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) { struct brnf_frag_data *data; @@ -875,6 +915,7 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) nf_bridge_info_free(skb); return br_dev_queue_push_xmit(sk, skb); } +#endif static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, int (*output)(struct sock *, struct sk_buff *)) @@ -897,21 +938,23 @@ static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) { - int ret; struct nf_bridge_info *nf_bridge; unsigned int mtu_reserved; - if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP)) { + mtu_reserved = nf_bridge_mtu_reduction(skb); + + if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) { nf_bridge_info_free(skb); return br_dev_queue_push_xmit(sk, skb); } - mtu_reserved = nf_bridge_mtu_reduction(skb); nf_bridge = nf_bridge_info_get(skb); + +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) /* This is wrong! We should preserve the original fragment * boundaries by preserving frag_list rather than refragmenting. */ - if (skb->len + mtu_reserved > skb->dev->mtu) { + if (skb->protocol == htons(ETH_P_IP)) { struct brnf_frag_data *data; if (br_validate_ipv4(skb)) @@ -928,21 +971,37 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) skb_copy_from_linear_data_offset(skb, -data->size, data->mac, data->size); - ret = br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit); - } else { - nf_bridge_info_free(skb); - ret = br_dev_queue_push_xmit(sk, skb); + return br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit); } +#endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + if (skb->protocol == htons(ETH_P_IPV6)) { + const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); + struct brnf_frag_data *data; - return ret; -} -#else -static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) -{ + if (br_validate_ipv6(skb)) + return NF_DROP; + + IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; + + nf_bridge_update_protocol(skb); + + data = this_cpu_ptr(&brnf_frag_data_storage); + data->encap_size = nf_bridge_encap_header_len(skb); + data->size = ETH_HLEN + data->encap_size; + + skb_copy_from_linear_data_offset(skb, -data->size, data->mac, + data->size); + + if (v6ops) + return v6ops->fragment(sk, skb, br_nf_push_frag_xmit); + else + return -EMSGSIZE; + } +#endif nf_bridge_info_free(skb); return br_dev_queue_push_xmit(sk, skb); } -#endif /* PF_BRIDGE/POST_ROUTING ********************************************/ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 8cde96e68fd5..5dccced71269 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #define BR_HASH_BITS 8 @@ -214,7 +215,10 @@ struct net_bridge spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - struct rtable fake_rtable; + union { + struct rtable fake_rtable; + struct rt6_info fake_rt6_info; + }; bool nf_call_iptables; bool nf_call_ip6tables; bool nf_call_arptables; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index bbca09fe9553..b4de08a83e0b 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -191,7 +191,8 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, static const struct nf_ipv6_ops ipv6ops = { .chk_addr = ipv6_chk_addr, - .route_input = ip6_route_input + .route_input = ip6_route_input, + .fragment = ip6_fragment }; static const struct nf_afinfo nf_ip6_afinfo = { -- cgit v1.2.3 From 33b1f31392861947fa2a2a57c3a39ab63b8c9f9d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 5 Jun 2015 13:28:38 +0200 Subject: net: ip_fragment: remove BRIDGE_NETFILTER mtu special handling since commit d6b915e29f4adea9 ("ip_fragment: don't forward defragmented DF packet") the largest fragment size is available in the IPCB. Therefore we no longer need to care about 'encapsulation' overhead of stripped PPPOE/VLAN headers since ip_do_fragment doesn't use device mtu in such cases. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 7 ------- net/bridge/br_netfilter.c | 7 +++++++ net/ipv4/ip_output.c | 4 ---- 3 files changed, 7 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index f2fdb5a52070..6d80fc686323 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -20,13 +20,6 @@ enum nf_br_hook_priorities { #define BRNF_BRIDGED_DNAT 0x02 #define BRNF_NF_BRIDGE_PREROUTING 0x08 -static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) -{ - if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE) - return PPPOE_SES_HLEN; - return 0; -} - int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb); static inline void br_drop_fake_rtable(struct sk_buff *skb) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 535f9dae743e..1e62ae5d8f4e 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -936,6 +936,13 @@ static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, return ip_do_fragment(sk, skb, output); } +static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) +{ + if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE) + return PPPOE_SES_HLEN; + return 0; +} + static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) { struct nf_bridge_info *nf_bridge; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f5f5ef1cebd5..19d7e43b5370 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -549,10 +549,6 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb, hlen = iph->ihl * 4; mtu = mtu - hlen; /* Size of data space */ -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (skb->nf_bridge) - mtu -= nf_bridge_mtu_reduction(skb); -#endif IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; /* When frag_list is given, use it. First, check its validity: -- cgit v1.2.3 From 71ae0dff02d756e4d2ca710b79f2ff5390029a5f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Jun 2015 01:34:54 +0200 Subject: netfilter: xtables: use percpu rule counters The binary arp/ip/ip6tables ruleset is stored per cpu. The only reason left as to why we need percpu duplication are the rule counters embedded into ipt_entry et al -- since each cpu has its own copy of the rules, all counters can be lockless. The downside is that the more cpus are supported, the more memory is required. Rules are not just duplicated per online cpu but for each possible cpu, i.e. if maxcpu is 144, then rule is duplicated 144 times, not for the e.g. 64 cores present. To save some memory and also improve utilization of shared caches it would be preferable to only store the rule blob once. So we first need to separate counters and the rule blob. Instead of using entry->counters, allocate this percpu and store the percpu address in entry->counters.pcnt on CONFIG_SMP. This change makes no sense as-is; it is merely an intermediate step to remove the percpu duplication of the rule set in a followup patch. Suggested-by: Eric Dumazet Acked-by: Jesper Dangaard Brouer Reported-by: Marcelo Ricardo Leitner Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 49 ++++++++++++++++++++++++++++++++++++++ net/ipv4/netfilter/arp_tables.c | 32 +++++++++++++++++++++---- net/ipv4/netfilter/ip_tables.c | 31 ++++++++++++++++++++---- net/ipv6/netfilter/ip6_tables.c | 31 ++++++++++++++++++++---- 4 files changed, 129 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 09f38206c18f..b77ab9f17641 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -353,6 +353,55 @@ static inline unsigned long ifname_compare_aligned(const char *_a, return ret; } + +/* On SMP, ip(6)t_entry->counters.pcnt holds address of the + * real (percpu) counter. On !SMP, its just the packet count, + * so nothing needs to be done there. + * + * xt_percpu_counter_alloc returns the address of the percpu + * counter, or 0 on !SMP. + * + * Hence caller must use IS_ERR_VALUE to check for error, this + * allows us to return 0 for single core systems without forcing + * callers to deal with SMP vs. NONSMP issues. + */ +static inline u64 xt_percpu_counter_alloc(void) +{ + if (nr_cpu_ids > 1) { + void __percpu *res = alloc_percpu(struct xt_counters); + + if (res == NULL) + return (u64) -ENOMEM; + + return (__force u64) res; + } + + return 0; +} +static inline void xt_percpu_counter_free(u64 pcnt) +{ + if (nr_cpu_ids > 1) + free_percpu((void __percpu *) pcnt); +} + +static inline struct xt_counters * +xt_get_this_cpu_counter(struct xt_counters *cnt) +{ + if (nr_cpu_ids > 1) + return this_cpu_ptr((void __percpu *) cnt->pcnt); + + return cnt; +} + +static inline struct xt_counters * +xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) +{ + if (nr_cpu_ids > 1) + return per_cpu_ptr((void __percpu *) cnt->pcnt, cpu); + + return cnt; +} + struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *); void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index a61200754f4b..0ada09ae6e81 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -289,13 +289,15 @@ unsigned int arpt_do_table(struct sk_buff *skb, arp = arp_hdr(skb); do { const struct xt_entry_target *t; + struct xt_counters *counter; if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { e = arpt_next_entry(e); continue; } - ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1); + counter = xt_get_this_cpu_counter(&e->counters); + ADD_COUNTER(*counter, arp_hdr_len(skb->dev), 1); t = arpt_get_target_c(e); @@ -521,6 +523,10 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) if (ret) return ret; + e->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(e->counters.pcnt)) + return -ENOMEM; + t = arpt_get_target(e); target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, t->u.user.revision); @@ -538,6 +544,8 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) err: module_put(t->u.kernel.target->me); out: + xt_percpu_counter_free(e->counters.pcnt); + return ret; } @@ -614,6 +622,7 @@ static inline void cleanup_entry(struct arpt_entry *e) if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); + xt_percpu_counter_free(e->counters.pcnt); } /* Checks and translates the user-supplied table segment (held in @@ -723,13 +732,15 @@ static void get_counters(const struct xt_table_info *t, i = 0; xt_entry_foreach(iter, t->entries[cpu], t->size) { + struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); do { start = read_seqcount_begin(s); - bcnt = iter->counters.bcnt; - pcnt = iter->counters.pcnt; + bcnt = tmp->bcnt; + pcnt = tmp->pcnt; } while (read_seqcount_retry(s, start)); ADD_COUNTER(counters[i], bcnt, pcnt); @@ -1186,7 +1197,10 @@ static int do_add_counters(struct net *net, const void __user *user, loc_cpu_entry = private->entries[curcpu]; addend = xt_write_recseq_begin(); xt_entry_foreach(iter, loc_cpu_entry, private->size) { - ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); + struct xt_counters *tmp; + + tmp = xt_get_this_cpu_counter(&iter->counters); + ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); ++i; } xt_write_recseq_end(addend); @@ -1416,9 +1430,17 @@ static int translate_compat_table(const char *name, i = 0; xt_entry_foreach(iter1, entry1, newinfo->size) { + iter1->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(iter1->counters.pcnt)) { + ret = -ENOMEM; + break; + } + ret = check_target(iter1, name); - if (ret != 0) + if (ret != 0) { + xt_percpu_counter_free(iter1->counters.pcnt); break; + } ++i; if (strcmp(arpt_get_target(iter1)->u.user.name, XT_ERROR_TARGET) == 0) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e7abf5145edc..d190b10dabfe 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -345,6 +345,7 @@ ipt_do_table(struct sk_buff *skb, do { const struct xt_entry_target *t; const struct xt_entry_match *ematch; + struct xt_counters *counter; IP_NF_ASSERT(e); if (!ip_packet_match(ip, indev, outdev, @@ -361,7 +362,8 @@ ipt_do_table(struct sk_buff *skb, goto no_match; } - ADD_COUNTER(e->counters, skb->len, 1); + counter = xt_get_this_cpu_counter(&e->counters); + ADD_COUNTER(*counter, skb->len, 1); t = ipt_get_target(e); IP_NF_ASSERT(t->u.kernel.target); @@ -665,6 +667,10 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, if (ret) return ret; + e->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(e->counters.pcnt)) + return -ENOMEM; + j = 0; mtpar.net = net; mtpar.table = name; @@ -691,6 +697,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, ret = check_target(e, net, name); if (ret) goto err; + return 0; err: module_put(t->u.kernel.target->me); @@ -700,6 +707,9 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, break; cleanup_match(ematch, net); } + + xt_percpu_counter_free(e->counters.pcnt); + return ret; } @@ -784,6 +794,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net) if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); + xt_percpu_counter_free(e->counters.pcnt); } /* Checks and translates the user-supplied table segment (held in @@ -888,13 +899,15 @@ get_counters(const struct xt_table_info *t, i = 0; xt_entry_foreach(iter, t->entries[cpu], t->size) { + struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); do { start = read_seqcount_begin(s); - bcnt = iter->counters.bcnt; - pcnt = iter->counters.pcnt; + bcnt = tmp->bcnt; + pcnt = tmp->pcnt; } while (read_seqcount_retry(s, start)); ADD_COUNTER(counters[i], bcnt, pcnt); @@ -1374,7 +1387,10 @@ do_add_counters(struct net *net, const void __user *user, loc_cpu_entry = private->entries[curcpu]; addend = xt_write_recseq_begin(); xt_entry_foreach(iter, loc_cpu_entry, private->size) { - ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); + struct xt_counters *tmp; + + tmp = xt_get_this_cpu_counter(&iter->counters); + ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); ++i; } xt_write_recseq_end(addend); @@ -1608,6 +1624,10 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name) unsigned int j; int ret = 0; + e->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(e->counters.pcnt)) + return -ENOMEM; + j = 0; mtpar.net = net; mtpar.table = name; @@ -1632,6 +1652,9 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name) break; cleanup_match(ematch, net); } + + xt_percpu_counter_free(e->counters.pcnt); + return ret; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index cdd085f8b770..a1190ee14723 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -367,6 +367,7 @@ ip6t_do_table(struct sk_buff *skb, do { const struct xt_entry_target *t; const struct xt_entry_match *ematch; + struct xt_counters *counter; IP_NF_ASSERT(e); acpar.thoff = 0; @@ -384,7 +385,8 @@ ip6t_do_table(struct sk_buff *skb, goto no_match; } - ADD_COUNTER(e->counters, skb->len, 1); + counter = xt_get_this_cpu_counter(&e->counters); + ADD_COUNTER(*counter, skb->len, 1); t = ip6t_get_target_c(e); IP_NF_ASSERT(t->u.kernel.target); @@ -679,6 +681,10 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, if (ret) return ret; + e->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(e->counters.pcnt)) + return -ENOMEM; + j = 0; mtpar.net = net; mtpar.table = name; @@ -714,6 +720,9 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, break; cleanup_match(ematch, net); } + + xt_percpu_counter_free(e->counters.pcnt); + return ret; } @@ -797,6 +806,8 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net) if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); + + xt_percpu_counter_free(e->counters.pcnt); } /* Checks and translates the user-supplied table segment (held in @@ -901,13 +912,15 @@ get_counters(const struct xt_table_info *t, i = 0; xt_entry_foreach(iter, t->entries[cpu], t->size) { + struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); do { start = read_seqcount_begin(s); - bcnt = iter->counters.bcnt; - pcnt = iter->counters.pcnt; + bcnt = tmp->bcnt; + pcnt = tmp->pcnt; } while (read_seqcount_retry(s, start)); ADD_COUNTER(counters[i], bcnt, pcnt); @@ -1374,7 +1387,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, goto free; } - local_bh_disable(); private = t->private; if (private->number != num_counters) { @@ -1388,7 +1400,10 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, addend = xt_write_recseq_begin(); loc_cpu_entry = private->entries[curcpu]; xt_entry_foreach(iter, loc_cpu_entry, private->size) { - ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); + struct xt_counters *tmp; + + tmp = xt_get_this_cpu_counter(&iter->counters); + ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); ++i; } xt_write_recseq_end(addend); @@ -1621,6 +1636,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net, struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; + e->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(e->counters.pcnt)) + return -ENOMEM; j = 0; mtpar.net = net; mtpar.table = name; @@ -1645,6 +1663,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net, break; cleanup_match(ematch, net); } + + xt_percpu_counter_free(e->counters.pcnt); + return ret; } -- cgit v1.2.3 From 482cfc318559e2527dfd8513582d2fdb276e47c2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Jun 2015 01:34:55 +0200 Subject: netfilter: xtables: avoid percpu ruleset duplication We store the rule blob per (possible) cpu. Unfortunately this means we can waste lot of memory on big smp machines. ipt_entry structure ('rule head') is 112 byte, so e.g. with maxcpu=64 one single rule eats close to 8k RAM. Since previous patch made counters percpu it appears there is nothing left in the rule blob that needs to be percpu. On my test system (144 possible cpus, 400k dummy rules) this change saves close to 9 Gigabyte of RAM. Reported-by: Marcelo Ricardo Leitner Acked-by: Jesper Dangaard Brouer Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 4 +-- net/ipv4/netfilter/arp_tables.c | 50 +++++++++-------------------- net/ipv4/netfilter/ip_tables.c | 64 ++++++++++--------------------------- net/ipv6/netfilter/ip6_tables.c | 65 ++++++++++---------------------------- net/netfilter/x_tables.c | 23 +++++--------- 5 files changed, 57 insertions(+), 149 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index b77ab9f17641..9969d79dcde1 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -224,9 +224,9 @@ struct xt_table_info { unsigned int stacksize; unsigned int __percpu *stackptr; void ***jumpstack; - /* ipt_entry tables: one per CPU */ + /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */ - void *entries[1]; + void *entries; }; #define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \ diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 0ada09ae6e81..d75c139393fc 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -275,7 +275,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, * pointer. */ smp_read_barrier_depends(); - table_base = private->entries[smp_processor_id()]; + table_base = private->entries; e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); @@ -711,12 +711,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) { - if (newinfo->entries[i] && newinfo->entries[i] != entry0) - memcpy(newinfo->entries[i], entry0, newinfo->size); - } - return ret; } @@ -731,7 +725,7 @@ static void get_counters(const struct xt_table_info *t, seqcount_t *s = &per_cpu(xt_recseq, cpu); i = 0; - xt_entry_foreach(iter, t->entries[cpu], t->size) { + xt_entry_foreach(iter, t->entries, t->size) { struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; @@ -785,7 +779,7 @@ static int copy_entries_to_user(unsigned int total_size, if (IS_ERR(counters)) return PTR_ERR(counters); - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; /* ... then copy entire thing ... */ if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { ret = -EFAULT; @@ -880,10 +874,10 @@ static int compat_table_info(const struct xt_table_info *info, if (!newinfo || !info) return -EINVAL; - /* we dont care about newinfo->entries[] */ + /* we dont care about newinfo->entries */ memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; - loc_cpu_entry = info->entries[raw_smp_processor_id()]; + loc_cpu_entry = info->entries; xt_compat_init_offsets(NFPROTO_ARP, info->number); xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); @@ -1048,7 +1042,7 @@ static int __do_replace(struct net *net, const char *name, get_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ - loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; + loc_cpu_old_entry = oldinfo->entries; xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) cleanup_entry(iter); @@ -1095,8 +1089,7 @@ static int do_replace(struct net *net, const void __user *user, if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; @@ -1126,7 +1119,7 @@ static int do_replace(struct net *net, const void __user *user, static int do_add_counters(struct net *net, const void __user *user, unsigned int len, int compat) { - unsigned int i, curcpu; + unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1136,7 +1129,6 @@ static int do_add_counters(struct net *net, const void __user *user, struct xt_table *t; const struct xt_table_info *private; int ret = 0; - void *loc_cpu_entry; struct arpt_entry *iter; unsigned int addend; #ifdef CONFIG_COMPAT @@ -1192,11 +1184,9 @@ static int do_add_counters(struct net *net, const void __user *user, } i = 0; - /* Choose the copy that is on our node */ - curcpu = smp_processor_id(); - loc_cpu_entry = private->entries[curcpu]; + addend = xt_write_recseq_begin(); - xt_entry_foreach(iter, loc_cpu_entry, private->size) { + xt_entry_foreach(iter, private->entries, private->size) { struct xt_counters *tmp; tmp = xt_get_this_cpu_counter(&iter->counters); @@ -1410,7 +1400,7 @@ static int translate_compat_table(const char *name, newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } - entry1 = newinfo->entries[raw_smp_processor_id()]; + entry1 = newinfo->entries; pos = entry1; size = total_size; xt_entry_foreach(iter0, entry0, total_size) { @@ -1470,11 +1460,6 @@ static int translate_compat_table(const char *name, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) - if (newinfo->entries[i] && newinfo->entries[i] != entry1) - memcpy(newinfo->entries[i], entry1, newinfo->size); - *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1533,8 +1518,7 @@ static int compat_do_replace(struct net *net, void __user *user, if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; @@ -1631,7 +1615,6 @@ static int compat_copy_entries_to_user(unsigned int total_size, void __user *pos; unsigned int size; int ret = 0; - void *loc_cpu_entry; unsigned int i = 0; struct arpt_entry *iter; @@ -1639,11 +1622,9 @@ static int compat_copy_entries_to_user(unsigned int total_size, if (IS_ERR(counters)) return PTR_ERR(counters); - /* choose the copy on our node/cpu */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; pos = userptr; size = total_size; - xt_entry_foreach(iter, loc_cpu_entry, total_size) { + xt_entry_foreach(iter, private->entries, total_size) { ret = compat_copy_entry_to_user(iter, &pos, &size, counters, i++); if (ret != 0) @@ -1812,8 +1793,7 @@ struct xt_table *arpt_register_table(struct net *net, goto out; } - /* choose the copy on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(newinfo, loc_cpu_entry, repl); @@ -1844,7 +1824,7 @@ void arpt_unregister_table(struct xt_table *table) private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) cleanup_entry(iter); if (private->number > private->initial_entries) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index d190b10dabfe..6151500c2df8 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -254,15 +254,13 @@ static void trace_packet(const struct sk_buff *skb, const struct xt_table_info *private, const struct ipt_entry *e) { - const void *table_base; const struct ipt_entry *root; const char *hookname, *chainname, *comment; const struct ipt_entry *iter; unsigned int rulenum = 0; struct net *net = dev_net(in ? in : out); - table_base = private->entries[smp_processor_id()]; - root = get_entry(table_base, private->hook_entry[hook]); + root = get_entry(private->entries, private->hook_entry[hook]); hookname = chainname = hooknames[hook]; comment = comments[NF_IP_TRACE_COMMENT_RULE]; @@ -331,7 +329,7 @@ ipt_do_table(struct sk_buff *skb, * pointer. */ smp_read_barrier_depends(); - table_base = private->entries[cpu]; + table_base = private->entries; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; stackptr = per_cpu_ptr(private->stackptr, cpu); origptr = *stackptr; @@ -877,12 +875,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) { - if (newinfo->entries[i] && newinfo->entries[i] != entry0) - memcpy(newinfo->entries[i], entry0, newinfo->size); - } - return ret; } @@ -898,7 +890,7 @@ get_counters(const struct xt_table_info *t, seqcount_t *s = &per_cpu(xt_recseq, cpu); i = 0; - xt_entry_foreach(iter, t->entries[cpu], t->size) { + xt_entry_foreach(iter, t->entries, t->size) { struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; @@ -946,17 +938,13 @@ copy_entries_to_user(unsigned int total_size, struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; - const void *loc_cpu_entry; + void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) return PTR_ERR(counters); - /* choose the copy that is on our node/cpu, ... - * This choice is lazy (because current thread is - * allowed to migrate to another cpu) - */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { ret = -EFAULT; goto free_counters; @@ -1070,10 +1058,10 @@ static int compat_table_info(const struct xt_table_info *info, if (!newinfo || !info) return -EINVAL; - /* we dont care about newinfo->entries[] */ + /* we dont care about newinfo->entries */ memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; - loc_cpu_entry = info->entries[raw_smp_processor_id()]; + loc_cpu_entry = info->entries; xt_compat_init_offsets(AF_INET, info->number); xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); @@ -1194,7 +1182,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table *t; struct xt_table_info *oldinfo; struct xt_counters *counters; - void *loc_cpu_old_entry; struct ipt_entry *iter; ret = 0; @@ -1237,8 +1224,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, get_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ - loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; - xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) + xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) cleanup_entry(iter, net); xt_free_table_info(oldinfo); @@ -1284,8 +1270,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; @@ -1316,7 +1301,7 @@ static int do_add_counters(struct net *net, const void __user *user, unsigned int len, int compat) { - unsigned int i, curcpu; + unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1326,7 +1311,6 @@ do_add_counters(struct net *net, const void __user *user, struct xt_table *t; const struct xt_table_info *private; int ret = 0; - void *loc_cpu_entry; struct ipt_entry *iter; unsigned int addend; #ifdef CONFIG_COMPAT @@ -1382,11 +1366,8 @@ do_add_counters(struct net *net, const void __user *user, } i = 0; - /* Choose the copy that is on our node */ - curcpu = smp_processor_id(); - loc_cpu_entry = private->entries[curcpu]; addend = xt_write_recseq_begin(); - xt_entry_foreach(iter, loc_cpu_entry, private->size) { + xt_entry_foreach(iter, private->entries, private->size) { struct xt_counters *tmp; tmp = xt_get_this_cpu_counter(&iter->counters); @@ -1739,7 +1720,7 @@ translate_compat_table(struct net *net, newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } - entry1 = newinfo->entries[raw_smp_processor_id()]; + entry1 = newinfo->entries; pos = entry1; size = total_size; xt_entry_foreach(iter0, entry0, total_size) { @@ -1791,11 +1772,6 @@ translate_compat_table(struct net *net, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) - if (newinfo->entries[i] && newinfo->entries[i] != entry1) - memcpy(newinfo->entries[i], entry1, newinfo->size); - *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1842,8 +1818,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; @@ -1914,7 +1889,6 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *pos; unsigned int size; int ret = 0; - const void *loc_cpu_entry; unsigned int i = 0; struct ipt_entry *iter; @@ -1922,14 +1896,9 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, if (IS_ERR(counters)) return PTR_ERR(counters); - /* choose the copy that is on our node/cpu, ... - * This choice is lazy (because current thread is - * allowed to migrate to another cpu) - */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; pos = userptr; size = total_size; - xt_entry_foreach(iter, loc_cpu_entry, total_size) { + xt_entry_foreach(iter, private->entries, total_size) { ret = compat_copy_entry_to_user(iter, &pos, &size, counters, i++); if (ret != 0) @@ -2104,8 +2073,7 @@ struct xt_table *ipt_register_table(struct net *net, goto out; } - /* choose the copy on our node/cpu, but dont care about preemption */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(net, newinfo, loc_cpu_entry, repl); @@ -2136,7 +2104,7 @@ void ipt_unregister_table(struct net *net, struct xt_table *table) private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) cleanup_entry(iter, net); if (private->number > private->initial_entries) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index a1190ee14723..80a7f0d38aa4 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -283,15 +283,13 @@ static void trace_packet(const struct sk_buff *skb, const struct xt_table_info *private, const struct ip6t_entry *e) { - const void *table_base; const struct ip6t_entry *root; const char *hookname, *chainname, *comment; const struct ip6t_entry *iter; unsigned int rulenum = 0; struct net *net = dev_net(in ? in : out); - table_base = private->entries[smp_processor_id()]; - root = get_entry(table_base, private->hook_entry[hook]); + root = get_entry(private->entries, private->hook_entry[hook]); hookname = chainname = hooknames[hook]; comment = comments[NF_IP6_TRACE_COMMENT_RULE]; @@ -357,7 +355,7 @@ ip6t_do_table(struct sk_buff *skb, */ smp_read_barrier_depends(); cpu = smp_processor_id(); - table_base = private->entries[cpu]; + table_base = private->entries; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; stackptr = per_cpu_ptr(private->stackptr, cpu); origptr = *stackptr; @@ -890,12 +888,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) { - if (newinfo->entries[i] && newinfo->entries[i] != entry0) - memcpy(newinfo->entries[i], entry0, newinfo->size); - } - return ret; } @@ -911,7 +903,7 @@ get_counters(const struct xt_table_info *t, seqcount_t *s = &per_cpu(xt_recseq, cpu); i = 0; - xt_entry_foreach(iter, t->entries[cpu], t->size) { + xt_entry_foreach(iter, t->entries, t->size) { struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; @@ -959,17 +951,13 @@ copy_entries_to_user(unsigned int total_size, struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; - const void *loc_cpu_entry; + void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) return PTR_ERR(counters); - /* choose the copy that is on our node/cpu, ... - * This choice is lazy (because current thread is - * allowed to migrate to another cpu) - */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { ret = -EFAULT; goto free_counters; @@ -1083,10 +1071,10 @@ static int compat_table_info(const struct xt_table_info *info, if (!newinfo || !info) return -EINVAL; - /* we dont care about newinfo->entries[] */ + /* we dont care about newinfo->entries */ memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; - loc_cpu_entry = info->entries[raw_smp_processor_id()]; + loc_cpu_entry = info->entries; xt_compat_init_offsets(AF_INET6, info->number); xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); @@ -1207,7 +1195,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table *t; struct xt_table_info *oldinfo; struct xt_counters *counters; - const void *loc_cpu_old_entry; struct ip6t_entry *iter; ret = 0; @@ -1250,8 +1237,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, get_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ - loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; - xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) + xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) cleanup_entry(iter, net); xt_free_table_info(oldinfo); @@ -1297,8 +1283,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; @@ -1329,7 +1314,7 @@ static int do_add_counters(struct net *net, const void __user *user, unsigned int len, int compat) { - unsigned int i, curcpu; + unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1339,7 +1324,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, struct xt_table *t; const struct xt_table_info *private; int ret = 0; - const void *loc_cpu_entry; struct ip6t_entry *iter; unsigned int addend; #ifdef CONFIG_COMPAT @@ -1395,11 +1379,8 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, } i = 0; - /* Choose the copy that is on our node */ - curcpu = smp_processor_id(); addend = xt_write_recseq_begin(); - loc_cpu_entry = private->entries[curcpu]; - xt_entry_foreach(iter, loc_cpu_entry, private->size) { + xt_entry_foreach(iter, private->entries, private->size) { struct xt_counters *tmp; tmp = xt_get_this_cpu_counter(&iter->counters); @@ -1407,7 +1388,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, ++i; } xt_write_recseq_end(addend); - unlock_up_free: local_bh_enable(); xt_table_unlock(t); @@ -1750,7 +1730,7 @@ translate_compat_table(struct net *net, newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } - entry1 = newinfo->entries[raw_smp_processor_id()]; + entry1 = newinfo->entries; pos = entry1; size = total_size; xt_entry_foreach(iter0, entry0, total_size) { @@ -1802,11 +1782,6 @@ translate_compat_table(struct net *net, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) - if (newinfo->entries[i] && newinfo->entries[i] != entry1) - memcpy(newinfo->entries[i], entry1, newinfo->size); - *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1853,8 +1828,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; @@ -1925,7 +1899,6 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *pos; unsigned int size; int ret = 0; - const void *loc_cpu_entry; unsigned int i = 0; struct ip6t_entry *iter; @@ -1933,14 +1906,9 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, if (IS_ERR(counters)) return PTR_ERR(counters); - /* choose the copy that is on our node/cpu, ... - * This choice is lazy (because current thread is - * allowed to migrate to another cpu) - */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; pos = userptr; size = total_size; - xt_entry_foreach(iter, loc_cpu_entry, total_size) { + xt_entry_foreach(iter, private->entries, total_size) { ret = compat_copy_entry_to_user(iter, &pos, &size, counters, i++); if (ret != 0) @@ -2115,8 +2083,7 @@ struct xt_table *ip6t_register_table(struct net *net, goto out; } - /* choose the copy on our node/cpu, but dont care about preemption */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(net, newinfo, loc_cpu_entry, repl); @@ -2146,7 +2113,7 @@ void ip6t_unregister_table(struct net *net, struct xt_table *table) private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) cleanup_entry(iter, net); if (private->number > private->initial_entries) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 83032464a4bd..6062ce3e862c 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -659,7 +659,6 @@ EXPORT_SYMBOL_GPL(xt_compat_target_to_user); struct xt_table_info *xt_alloc_table_info(unsigned int size) { struct xt_table_info *newinfo; - int cpu; /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages) @@ -671,19 +670,14 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) newinfo->size = size; - for_each_possible_cpu(cpu) { - if (size <= PAGE_SIZE) - newinfo->entries[cpu] = kmalloc_node(size, - GFP_KERNEL, - cpu_to_node(cpu)); - else - newinfo->entries[cpu] = vmalloc_node(size, - cpu_to_node(cpu)); + if (size <= PAGE_SIZE) + newinfo->entries = kmalloc(size, GFP_KERNEL); + else + newinfo->entries = vmalloc(size); - if (newinfo->entries[cpu] == NULL) { - xt_free_table_info(newinfo); - return NULL; - } + if (newinfo->entries == NULL) { + xt_free_table_info(newinfo); + return NULL; } return newinfo; @@ -694,8 +688,7 @@ void xt_free_table_info(struct xt_table_info *info) { int cpu; - for_each_possible_cpu(cpu) - kvfree(info->entries[cpu]); + kvfree(info->entries); if (info->jumpstack != NULL) { for_each_possible_cpu(cpu) -- cgit v1.2.3 From 1e98a0f08abddde87f0f93237f10629ecb4880ef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 12 Jun 2015 19:31:32 -0700 Subject: flow_dissector: fix ipv6 dst, hop-by-hop and routing ext hdrs __skb_header_pointer() returns a pointer that must be checked. Fixes infinite loop reported by Alexei, and add __must_check to catch these errors earlier. Fixes: 6a74fcf426f5 ("flow_dissector: add support for dst, hop-by-hop and routing ext hdrs") Reported-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Signed-off-by: Eric Dumazet Acked-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++---- net/core/flow_dissector.c | 6 ++++-- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cc612fc0a894..a7acc92aa668 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2743,8 +2743,9 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum); -static inline void *__skb_header_pointer(const struct sk_buff *skb, int offset, - int len, void *data, int hlen, void *buffer) +static inline void * __must_check +__skb_header_pointer(const struct sk_buff *skb, int offset, + int len, void *data, int hlen, void *buffer) { if (hlen - offset >= len) return data + offset; @@ -2756,8 +2757,8 @@ static inline void *__skb_header_pointer(const struct sk_buff *skb, int offset, return buffer; } -static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, - int len, void *buffer) +static inline void * __must_check +skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) { return __skb_header_pointer(skb, offset, len, skb->data, skb_headlen(skb), buffer); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 22e4dffa0c8b..476e5dda59e1 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -394,9 +394,11 @@ ip_proto_again: opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr), data, hlen, &_opthdr); + if (!opthdr) + return false; - ip_proto = _opthdr[0]; - nhoff += (_opthdr[1] + 1) << 3; + ip_proto = opthdr[0]; + nhoff += (opthdr[1] + 1) << 3; goto ip_proto_again; } -- cgit v1.2.3 From aaeb6e24f5b6cb6a664fbdec6e08b65c3173c1b3 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 12 Jun 2015 21:07:54 +0200 Subject: netfilter: ipset: Use MSEC_PER_SEC consistently Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set_timeout.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h index 83c2f9e0886c..3c8842bcedaa 100644 --- a/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -61,7 +61,7 @@ ip_set_timeout_set(unsigned long *timeout, u32 t) return; } - *timeout = msecs_to_jiffies(t * 1000) + jiffies; + *timeout = msecs_to_jiffies(t * MSEC_PER_SEC) + jiffies; if (*timeout == IPSET_ELEM_PERMANENT) /* Bingo! :-) */ (*timeout)--; @@ -71,7 +71,7 @@ static inline u32 ip_set_timeout_get(unsigned long *timeout) { return *timeout == IPSET_ELEM_PERMANENT ? 0 : - jiffies_to_msecs(*timeout - jiffies)/1000; + jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC; } #endif /* __KERNEL__ */ -- cgit v1.2.3 From f690cbaed9fe4d77592e24139db7ad790641c4fd Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 12 Jun 2015 22:11:00 +0200 Subject: netfilter: ipset: Fix cidr handling for hash:*net* types Commit "Simplify cidr handling for hash:*net* types" broke the cidr handling for the hash:*net* types when the sets were used by the SET target: entries with invalid cidr values were added to the sets. Reported by Jonathan Johnson. Testsuite entry is added to verify the fix. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 2 -- net/netfilter/ipset/ip_set_hash_gen.h | 44 ++++++++++++++++------------ net/netfilter/ipset/ip_set_hash_ipportnet.c | 4 +-- net/netfilter/ipset/ip_set_hash_net.c | 4 +-- net/netfilter/ipset/ip_set_hash_netiface.c | 4 +-- net/netfilter/ipset/ip_set_hash_netnet.c | 8 ++--- net/netfilter/ipset/ip_set_hash_netport.c | 4 +-- net/netfilter/ipset/ip_set_hash_netportnet.c | 8 ++--- 8 files changed, 42 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index ffdfdc24952a..a6fe1ce96437 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -545,8 +545,6 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, { .bytes = ULLONG_MAX, .packets = ULLONG_MAX, \ .timeout = (set)->timeout } -#define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b)) - #define IPSET_CONCAT(a, b) a##b #define IPSET_TOKEN(a, b) IPSET_CONCAT(a, b) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 8dd82db05ed0..2f1985e71f6c 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -149,17 +149,21 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #endif /* cidr + 1 is stored in net_prefixes to support /0 */ -#define SCIDR(cidr, i) (__CIDR(cidr, i) + 1) +#define NCIDR_PUT(cidr) ((cidr) + 1) +#define NCIDR_GET(cidr) ((cidr) - 1) #ifdef IP_SET_HASH_WITH_NETS_PACKED /* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */ -#define GCIDR(cidr, i) (__CIDR(cidr, i) + 1) -#define NCIDR(cidr) (cidr) +#define DCIDR_PUT(cidr) ((cidr) - 1) +#define DCIDR_GET(cidr, i) (__CIDR(cidr, i) + 1) #else -#define GCIDR(cidr, i) (__CIDR(cidr, i)) -#define NCIDR(cidr) (cidr - 1) +#define DCIDR_PUT(cidr) (cidr) +#define DCIDR_GET(cidr, i) __CIDR(cidr, i) #endif +#define INIT_CIDR(cidr, host_mask) \ + DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask)) + #define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128) #ifdef IP_SET_HASH_WITH_NET0 @@ -303,7 +307,8 @@ struct htype { #ifdef IP_SET_HASH_WITH_NETS /* Network cidr size book keeping when the hash stores different - * sized networks */ + * sized networks. cidr == real cidr + 1 to support /0. + */ static void mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) { @@ -498,8 +503,10 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) pr_debug("expired %u/%u\n", i, j); #ifdef IP_SET_HASH_WITH_NETS for (k = 0; k < IPSET_NET_COUNT; k++) - mtype_del_cidr(h, SCIDR(data->cidr, k), - nets_length, k); + mtype_del_cidr(h, + NCIDR_PUT(DCIDR_GET(data->cidr, + k)), + nets_length, k); #endif ip_set_ext_destroy(set, data); if (j != n->pos - 1) @@ -692,9 +699,9 @@ reuse_slot: data = ahash_data(n, j, set->dsize); #ifdef IP_SET_HASH_WITH_NETS for (i = 0; i < IPSET_NET_COUNT; i++) { - mtype_del_cidr(h, SCIDR(data->cidr, i), + mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(data->cidr, i)), NLEN(set->family), i); - mtype_add_cidr(h, SCIDR(d->cidr, i), + mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), NLEN(set->family), i); } #endif @@ -711,8 +718,8 @@ reuse_slot: data = ahash_data(n, n->pos++, set->dsize); #ifdef IP_SET_HASH_WITH_NETS for (i = 0; i < IPSET_NET_COUNT; i++) - mtype_add_cidr(h, SCIDR(d->cidr, i), NLEN(set->family), - i); + mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), + NLEN(set->family), i); #endif h->elements++; } @@ -772,8 +779,8 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, h->elements--; #ifdef IP_SET_HASH_WITH_NETS for (j = 0; j < IPSET_NET_COUNT; j++) - mtype_del_cidr(h, SCIDR(d->cidr, j), NLEN(set->family), - j); + mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)), + NLEN(set->family), j); #endif ip_set_ext_destroy(set, data); if (n->pos + AHASH_INIT_SIZE < n->size) { @@ -836,12 +843,13 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, for (; j < nets_length && h->nets[j].cidr[0] && !multi; j++) { #if IPSET_NET_COUNT == 2 mtype_data_reset_elem(d, &orig); - mtype_data_netmask(d, NCIDR(h->nets[j].cidr[0]), false); + mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false); for (k = 0; k < nets_length && h->nets[k].cidr[1] && !multi; k++) { - mtype_data_netmask(d, NCIDR(h->nets[k].cidr[1]), true); + mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]), + true); #else - mtype_data_netmask(d, NCIDR(h->nets[j].cidr[0])); + mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0])); #endif key = HKEY(d, h->initval, t->htable_bits); n = hbucket(t, key); @@ -889,7 +897,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, /* If we test an IP address and not a network address, * try all possible network sizes */ for (i = 0; i < IPSET_NET_COUNT; i++) - if (GCIDR(d->cidr, i) != SET_HOST_MASK(set->family)) + if (DCIDR_GET(d->cidr, i) != SET_HOST_MASK(set->family)) break; if (i == IPSET_NET_COUNT) { ret = mtype_test_cidrs(set, d, ext, mext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index e38a029f3002..50248debdc8b 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -141,7 +141,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -389,7 +389,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index d19926ac3a03..089b23fd1a94 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -120,7 +120,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -288,7 +288,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net6_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 7b69fa28d774..aac20768f6f0 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -235,7 +235,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -469,7 +469,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 11eee0077b8b..ed9cc45084dd 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -141,8 +141,8 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); - e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); + e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; @@ -364,8 +364,8 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); - e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); + e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK; diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 7a6448cbd8fb..fbaf8138e5d4 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -136,7 +136,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); @@ -348,7 +348,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem e = { - .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, + .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 6eb5f8798304..a828cbc8bed7 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -152,8 +152,8 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); - e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); + e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; @@ -418,8 +418,8 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netportnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); - e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); + e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK; -- cgit v1.2.3 From c4c997839cf92cb1037e43a85cdb4cbf44ed39a5 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 13 Jun 2015 11:59:45 +0200 Subject: netfilter: ipset: Fix parallel resizing and listing of the same set When elements added to a hash:* type of set and resizing triggered, parallel listing could start to list the original set (before resizing) and "continue" with listing the new set. Fix it by references and using the original hash table for listing. Therefore the destroying of the original hash table may happen from the resizing or listing functions. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 13 ++++++---- net/netfilter/ipset/ip_set_core.c | 29 ++++++++++++---------- net/netfilter/ipset/ip_set_hash_gen.h | 44 +++++++++++++++++++++++++++++++--- 3 files changed, 66 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index a6fe1ce96437..5674b6ac6646 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -176,6 +176,9 @@ struct ip_set_type_variant { /* List elements */ int (*list)(const struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb); + /* Keep listing private when resizing runs parallel */ + void (*uref)(struct ip_set *set, struct netlink_callback *cb, + bool start); /* Return true if "b" set is the same as "a" * according to the create set parameters */ @@ -380,12 +383,12 @@ ip_set_init_counter(struct ip_set_counter *counter, /* Netlink CB args */ enum { - IPSET_CB_NET = 0, - IPSET_CB_DUMP, - IPSET_CB_INDEX, - IPSET_CB_ARG0, + IPSET_CB_NET = 0, /* net namespace */ + IPSET_CB_DUMP, /* dump single set/all sets */ + IPSET_CB_INDEX, /* set index */ + IPSET_CB_PRIVATE, /* set private data */ + IPSET_CB_ARG0, /* type specific */ IPSET_CB_ARG1, - IPSET_CB_ARG2, }; /* register and unregister set references */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 68ae551f2b39..777cac6fd64d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1211,12 +1211,16 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, static int ip_set_dump_done(struct netlink_callback *cb) { - struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET]; if (cb->args[IPSET_CB_ARG0]) { - pr_debug("release set %s\n", - ip_set(inst, cb->args[IPSET_CB_INDEX])->name); - __ip_set_put_byindex(inst, - (ip_set_id_t) cb->args[IPSET_CB_INDEX]); + struct ip_set_net *inst = + (struct ip_set_net *)cb->args[IPSET_CB_NET]; + ip_set_id_t index = (ip_set_id_t)cb->args[IPSET_CB_INDEX]; + struct ip_set *set = ip_set(inst, index); + + if (set->variant->uref) + set->variant->uref(set, cb, false); + pr_debug("release set %s\n", set->name); + __ip_set_put_byindex(inst, index); } return 0; } @@ -1247,12 +1251,6 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, ip_set_setname_policy); - /* cb->args[IPSET_CB_NET]: net namespace - * [IPSET_CB_DUMP]: dump single set/all sets - * [IPSET_CB_INDEX]: set index - * [IPSET_CB_ARG0]: type specific - */ - if (cda[IPSET_ATTR_SETNAME]) { struct ip_set *set; @@ -1359,6 +1357,8 @@ dump_last: goto release_refcount; if (dump_flags & IPSET_FLAG_LIST_HEADER) goto next_set; + if (set->variant->uref) + set->variant->uref(set, cb, true); /* Fall through and add elements */ default: read_lock_bh(&set->lock); @@ -1375,6 +1375,8 @@ dump_last: dump_type = DUMP_LAST; cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16); cb->args[IPSET_CB_INDEX] = 0; + if (set && set->variant->uref) + set->variant->uref(set, cb, false); goto dump_last; } goto out; @@ -1389,7 +1391,10 @@ next_set: release_refcount: /* If there was an error or set is done, release set */ if (ret || !cb->args[IPSET_CB_ARG0]) { - pr_debug("release set %s\n", ip_set(inst, index)->name); + set = ip_set(inst, index); + if (set->variant->uref) + set->variant->uref(set, cb, false); + pr_debug("release set %s\n", set->name); __ip_set_put_byindex(inst, index); cb->args[IPSET_CB_ARG0] = 0; } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 2f1985e71f6c..5fcf70b0ebc2 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -71,6 +71,8 @@ struct hbucket { /* The hash table: the table size stored here in order to make resizing easy */ struct htable { + atomic_t ref; /* References for resizing */ + atomic_t uref; /* References for dumping */ u8 htable_bits; /* size of hash table == 2^htable_bits */ struct hbucket bucket[0]; /* hashtable buckets */ }; @@ -207,6 +209,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #undef mtype_del #undef mtype_test_cidrs #undef mtype_test +#undef mtype_uref #undef mtype_expire #undef mtype_resize #undef mtype_head @@ -248,6 +251,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define mtype_del IPSET_TOKEN(MTYPE, _del) #define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) #define mtype_test IPSET_TOKEN(MTYPE, _test) +#define mtype_uref IPSET_TOKEN(MTYPE, _uref) #define mtype_expire IPSET_TOKEN(MTYPE, _expire) #define mtype_resize IPSET_TOKEN(MTYPE, _resize) #define mtype_head IPSET_TOKEN(MTYPE, _head) @@ -595,6 +599,9 @@ retry: t->htable_bits = htable_bits; read_lock_bh(&set->lock); + /* There can't be another parallel resizing, but dumping is possible */ + atomic_set(&orig->ref, 1); + atomic_inc(&orig->uref); for (i = 0; i < jhash_size(orig->htable_bits); i++) { n = hbucket(orig, i); for (j = 0; j < n->pos; j++) { @@ -609,6 +616,8 @@ retry: #ifdef IP_SET_HASH_WITH_NETS mtype_data_reset_flags(data, &flags); #endif + atomic_set(&orig->ref, 0); + atomic_dec(&orig->uref); read_unlock_bh(&set->lock); mtype_ahash_destroy(set, t, false); if (ret == -EAGAIN) @@ -631,7 +640,11 @@ retry: pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, orig->htable_bits, orig, t->htable_bits, t); - mtype_ahash_destroy(set, orig, false); + /* If there's nobody else dumping the table, destroy it */ + if (atomic_dec_and_test(&orig->uref)) { + pr_debug("Table destroy by resize %p\n", orig); + mtype_ahash_destroy(set, orig, false); + } return 0; } @@ -961,13 +974,36 @@ nla_put_failure: return -EMSGSIZE; } +/* Make possible to run dumping parallel with resizing */ +static void +mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start) +{ + struct htype *h = set->data; + struct htable *t; + + if (start) { + rcu_read_lock_bh(); + t = rcu_dereference_bh_nfnl(h->table); + atomic_inc(&t->uref); + cb->args[IPSET_CB_PRIVATE] = (unsigned long)t; + rcu_read_unlock_bh(); + } else if (cb->args[IPSET_CB_PRIVATE]) { + t = (struct htable *)cb->args[IPSET_CB_PRIVATE]; + if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { + /* Resizing didn't destroy the hash table */ + pr_debug("Table destroy by dump: %p\n", t); + mtype_ahash_destroy(set, t, false); + } + cb->args[IPSET_CB_PRIVATE] = 0; + } +} + /* Reply a LIST/SAVE request: dump the elements of the specified set */ static int mtype_list(const struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb) { - const struct htype *h = set->data; - const struct htable *t = rcu_dereference_bh_nfnl(h->table); + const struct htable *t; struct nlattr *atd, *nested; const struct hbucket *n; const struct mtype_elem *e; @@ -980,6 +1016,7 @@ mtype_list(const struct ip_set *set, if (!atd) return -EMSGSIZE; pr_debug("list hash set %s\n", set->name); + t = (const struct htable *)cb->args[IPSET_CB_PRIVATE]; for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits); cb->args[IPSET_CB_ARG0]++) { incomplete = skb_tail_pointer(skb); @@ -1047,6 +1084,7 @@ static const struct ip_set_type_variant mtype_variant = { .flush = mtype_flush, .head = mtype_head, .list = mtype_list, + .uref = mtype_uref, .resize = mtype_resize, .same_set = mtype_same_set, }; -- cgit v1.2.3 From b57b2d1fa53fe8563bdfc66a33b844463b9af285 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 13 Jun 2015 14:22:25 +0200 Subject: netfilter: ipset: Prepare the ipset core to use RCU at set level Replace rwlock_t with spinlock_t in "struct ip_set" and change the locking accordingly. Convert the comment extension into an rcu-avare object. Also, simplify the timeout routines. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 9 ++++-- include/linux/netfilter/ipset/ip_set_comment.h | 38 +++++++++++++++------- include/linux/netfilter/ipset/ip_set_timeout.h | 25 ++++++--------- net/netfilter/ipset/ip_set_core.c | 44 +++++++++++++------------- 4 files changed, 66 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 5674b6ac6646..19b4969a25fe 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -108,8 +108,13 @@ struct ip_set_counter { atomic64_t packets; }; +struct ip_set_comment_rcu { + struct rcu_head rcu; + char str[0]; +}; + struct ip_set_comment { - char *str; + struct ip_set_comment_rcu __rcu *c; }; struct ip_set_skbinfo { @@ -226,7 +231,7 @@ struct ip_set { /* The name of the set */ char name[IPSET_MAXNAMELEN]; /* Lock protecting the set data */ - rwlock_t lock; + spinlock_t lock; /* References to the set */ u32 ref; /* The core set type */ diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h index 21217ea008d7..8d0248525957 100644 --- a/include/linux/netfilter/ipset/ip_set_comment.h +++ b/include/linux/netfilter/ipset/ip_set_comment.h @@ -16,41 +16,57 @@ ip_set_comment_uget(struct nlattr *tb) return nla_data(tb); } +/* Called from uadd only, protected by the set spinlock. + * The kadt functions don't use the comment extensions in any way. + */ static inline void ip_set_init_comment(struct ip_set_comment *comment, const struct ip_set_ext *ext) { + struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1); size_t len = ext->comment ? strlen(ext->comment) : 0; - if (unlikely(comment->str)) { - kfree(comment->str); - comment->str = NULL; + if (unlikely(c)) { + kfree_rcu(c, rcu); + rcu_assign_pointer(comment->c, NULL); } if (!len) return; if (unlikely(len > IPSET_MAX_COMMENT_SIZE)) len = IPSET_MAX_COMMENT_SIZE; - comment->str = kzalloc(len + 1, GFP_ATOMIC); - if (unlikely(!comment->str)) + c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC); + if (unlikely(!c)) return; - strlcpy(comment->str, ext->comment, len + 1); + strlcpy(c->str, ext->comment, len + 1); + rcu_assign_pointer(comment->c, c); } +/* Used only when dumping a set, protected by rcu_read_lock_bh() */ static inline int ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment) { - if (!comment->str) + struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c); + + if (!c) return 0; - return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str); + return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str); } +/* Called from uadd/udel, flush or the garbage collectors protected + * by the set spinlock. + * Called when the set is destroyed and when there can't be any user + * of the set data anymore. + */ static inline void ip_set_comment_free(struct ip_set_comment *comment) { - if (unlikely(!comment->str)) + struct ip_set_comment_rcu *c; + + c = rcu_dereference_protected(comment->c, 1); + if (unlikely(!c)) return; - kfree(comment->str); - comment->str = NULL; + kfree_rcu(c, rcu); + rcu_assign_pointer(comment->c, NULL); } #endif diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h index 3c8842bcedaa..1d6a935c1ac5 100644 --- a/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -40,31 +40,26 @@ ip_set_timeout_uget(struct nlattr *tb) } static inline bool -ip_set_timeout_test(unsigned long timeout) +ip_set_timeout_expired(unsigned long *t) { - return timeout == IPSET_ELEM_PERMANENT || - time_is_after_jiffies(timeout); -} - -static inline bool -ip_set_timeout_expired(unsigned long *timeout) -{ - return *timeout != IPSET_ELEM_PERMANENT && - time_is_before_jiffies(*timeout); + return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t); } static inline void -ip_set_timeout_set(unsigned long *timeout, u32 t) +ip_set_timeout_set(unsigned long *timeout, u32 value) { - if (!t) { + unsigned long t; + + if (!value) { *timeout = IPSET_ELEM_PERMANENT; return; } - *timeout = msecs_to_jiffies(t * MSEC_PER_SEC) + jiffies; - if (*timeout == IPSET_ELEM_PERMANENT) + t = msecs_to_jiffies(value * MSEC_PER_SEC) + jiffies; + if (t == IPSET_ELEM_PERMANENT) /* Bingo! :-) */ - (*timeout)--; + t--; + *timeout = t; } static inline u32 diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 87b4182660ba..2b21a1983a98 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -209,15 +209,15 @@ ip_set_type_register(struct ip_set_type *type) pr_warn("ip_set type %s, family %s with revision min %u already registered!\n", type->name, family_name(type->family), type->revision_min); - ret = -EINVAL; - goto unlock; + ip_set_type_unlock(); + return -EINVAL; } list_add_rcu(&type->list, &ip_set_type_list); pr_debug("type %s, family %s, revision %u:%u registered.\n", type->name, family_name(type->family), type->revision_min, type->revision_max); -unlock: ip_set_type_unlock(); + return ret; } EXPORT_SYMBOL_GPL(ip_set_type_register); @@ -231,12 +231,12 @@ ip_set_type_unregister(struct ip_set_type *type) pr_warn("ip_set type %s, family %s with revision min %u not registered\n", type->name, family_name(type->family), type->revision_min); - goto unlock; + ip_set_type_unlock(); + return; } list_del_rcu(&type->list); pr_debug("type %s, family %s with revision min %u unregistered.\n", type->name, family_name(type->family), type->revision_min); -unlock: ip_set_type_unlock(); synchronize_rcu(); @@ -531,16 +531,16 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return 0; - read_lock_bh(&set->lock); + rcu_read_lock_bh(); ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt); - read_unlock_bh(&set->lock); + rcu_read_unlock_bh(); if (ret == -EAGAIN) { /* Type requests element to be completed */ pr_debug("element must be completed, ADD is triggered\n"); - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); set->variant->kadt(set, skb, par, IPSET_ADD, opt); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); ret = 1; } else { /* --return-nomatch: invert matched element */ @@ -570,9 +570,9 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return -IPSET_ERR_TYPE_MISMATCH; - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); return ret; } @@ -593,9 +593,9 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return -IPSET_ERR_TYPE_MISMATCH; - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); return ret; } @@ -880,7 +880,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, set = kzalloc(sizeof(struct ip_set), GFP_KERNEL); if (!set) return -ENOMEM; - rwlock_init(&set->lock); + spin_lock_init(&set->lock); strlcpy(set->name, name, IPSET_MAXNAMELEN); set->family = family; set->revision = revision; @@ -1062,9 +1062,9 @@ ip_set_flush_set(struct ip_set *set) { pr_debug("set: %s\n", set->name); - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); set->variant->flush(set); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); } static int @@ -1377,9 +1377,9 @@ dump_last: set->variant->uref(set, cb, true); /* Fall through and add elements */ default: - read_lock_bh(&set->lock); + rcu_read_lock_bh(); ret = set->variant->list(set, skb, cb); - read_unlock_bh(&set->lock); + rcu_read_unlock_bh(); if (!cb->args[IPSET_CB_ARG0]) /* Set is done, proceed with next one */ goto next_set; @@ -1462,9 +1462,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, bool eexist = flags & IPSET_FLAG_EXIST, retried = false; do { - write_lock_bh(&set->lock); + spin_lock_bh(&set->lock); ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); - write_unlock_bh(&set->lock); + spin_unlock_bh(&set->lock); retried = true; } while (ret == -EAGAIN && set->variant->resize && @@ -1644,9 +1644,9 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; - read_lock_bh(&set->lock); + rcu_read_lock_bh(); ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0); - read_unlock_bh(&set->lock); + rcu_read_unlock_bh(); /* Userspace can't trigger element to be re-added */ if (ret == -EAGAIN) ret = 1; -- cgit v1.2.3 From ca0f6a5cd99e0c6ba4bb78dc402817f636370f26 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 13 Jun 2015 19:45:33 +0200 Subject: netfilter: ipset: Fix coding styles reported by checkpatch.pl Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 5 +- include/uapi/linux/netfilter/ipset/ip_set.h | 6 +- net/netfilter/ipset/ip_set_bitmap_gen.h | 11 +- net/netfilter/ipset/ip_set_bitmap_ip.c | 12 +- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 21 +-- net/netfilter/ipset/ip_set_bitmap_port.c | 7 +- net/netfilter/ipset/ip_set_core.c | 201 +++++++++++++-------------- net/netfilter/ipset/ip_set_getport.c | 13 +- net/netfilter/ipset/ip_set_hash_gen.h | 55 ++++---- net/netfilter/ipset/ip_set_hash_ip.c | 4 +- net/netfilter/ipset/ip_set_hash_ipmark.c | 9 +- net/netfilter/ipset/ip_set_hash_ipport.c | 14 +- net/netfilter/ipset/ip_set_hash_ipportip.c | 16 ++- net/netfilter/ipset/ip_set_hash_ipportnet.c | 19 ++- net/netfilter/ipset/ip_set_hash_mac.c | 6 +- net/netfilter/ipset/ip_set_hash_net.c | 8 +- net/netfilter/ipset/ip_set_hash_netiface.c | 25 ++-- net/netfilter/ipset/ip_set_hash_netnet.c | 46 +++--- net/netfilter/ipset/ip_set_hash_netport.c | 19 ++- net/netfilter/ipset/ip_set_hash_netportnet.c | 54 +++---- net/netfilter/ipset/ip_set_list_set.c | 11 +- net/netfilter/ipset/pfxlen.c | 16 +-- net/netfilter/xt_set.c | 44 +++--- 23 files changed, 327 insertions(+), 295 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 19b4969a25fe..48bb01edcf30 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -349,12 +349,11 @@ ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo) cpu_to_be64((u64)skbinfo->skbmark << 32 | skbinfo->skbmarkmask))) || (skbinfo->skbprio && - nla_put_net32(skb, IPSET_ATTR_SKBPRIO, + nla_put_net32(skb, IPSET_ATTR_SKBPRIO, cpu_to_be32(skbinfo->skbprio))) || (skbinfo->skbqueue && - nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, + nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, cpu_to_be16(skbinfo->skbqueue))); - } static inline void diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 5ab4e60894cf..63b2e34f1b60 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -15,12 +15,12 @@ /* The protocol version */ #define IPSET_PROTOCOL 6 -/* The maximum permissible comment length we will accept over netlink */ -#define IPSET_MAX_COMMENT_SIZE 255 - /* The max length of strings including NUL: set and type identifiers */ #define IPSET_MAXNAMELEN 32 +/* The maximum permissible comment length we will accept over netlink */ +#define IPSET_MAX_COMMENT_SIZE 255 + /* Message types and commands */ enum ipset_cmd { IPSET_CMD_NONE, diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 86429f369128..d05e759ed0fa 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -41,7 +41,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) struct mtype *map = set->data; init_timer(&map->gc); - map->gc.data = (unsigned long) set; + map->gc.data = (unsigned long)set; map->gc.function = gc; map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); @@ -223,7 +223,7 @@ mtype_list(const struct ip_set *set, if (!test_bit(id, map->members) || (SET_WITH_TIMEOUT(set) && #ifdef IP_SET_BITMAP_STORED_TIMEOUT - mtype_is_filled((const struct mtype_elem *) x) && + mtype_is_filled((const struct mtype_elem *)x) && #endif ip_set_timeout_expired(ext_timeout(x, set)))) continue; @@ -240,7 +240,7 @@ mtype_list(const struct ip_set *set, if (mtype_do_list(skb, map, id, set->dsize)) goto nla_put_failure; if (ip_set_put_extensions(skb, set, x, - mtype_is_filled((const struct mtype_elem *) x))) + mtype_is_filled((const struct mtype_elem *)x))) goto nla_put_failure; ipset_nest_end(skb, nested); } @@ -266,13 +266,14 @@ out: static void mtype_gc(unsigned long ul_set) { - struct ip_set *set = (struct ip_set *) ul_set; + struct ip_set *set = (struct ip_set *)ul_set; struct mtype *map = set->data; void *x; u32 id; /* We run parallel with other readers (test element) - * but adding/deleting new entries is locked out */ + * but adding/deleting new entries is locked out + */ spin_lock_bh(&set->lock); for (id = 0; id < map->elements; id++) if (mtype_gc_test(id, map, set->dsize)) { diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index b8ce474c038d..64a564334418 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -59,7 +59,7 @@ struct bitmap_ip_adt_elem { static inline u32 ip_to_id(const struct bitmap_ip *m, u32 ip) { - return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip)/m->hosts; + return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip) / m->hosts; } /* Common functions */ @@ -175,8 +175,9 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); - } else + } else { ip_to = ip; + } if (ip_to > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; @@ -187,8 +188,8 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } @@ -278,8 +279,9 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], if (cidr >= HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(first_ip, last_ip, cidr); - } else + } else { return -IPSET_ERR_PROTOCOL; + } if (tb[IPSET_ATTR_NETMASK]) { netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index fe00e87decc8..1430535118fb 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -90,7 +90,7 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e, return 0; elem = get_elem(map->extensions, e->id, dsize); if (elem->filled == MAC_FILLED) - return e->ether == NULL || + return !e->ether || ether_addr_equal(e->ether, elem->ether); /* Trigger kernel to fill out the ethernet address */ return -EAGAIN; @@ -131,7 +131,8 @@ bitmap_ipmac_add_timeout(unsigned long *timeout, /* If MAC is unset yet, we store plain timeout value * because the timer is not activated yet * and we can reuse it later when MAC is filled out, - * possibly by the kernel */ + * possibly by the kernel + */ if (e->ether) ip_set_timeout_set(timeout, t); else @@ -155,7 +156,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e, /* memcpy isn't atomic */ clear_bit(e->id, map->members); smp_mb__after_atomic(); - memcpy(elem->ether, e->ether, ETH_ALEN); + ether_addr_copy(elem->ether, e->ether); } return IPSET_ADD_FAILED; } else if (!e->ether) @@ -164,19 +165,18 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e, /* Fill the MAC address and trigger the timer activation */ clear_bit(e->id, map->members); smp_mb__after_atomic(); - memcpy(elem->ether, e->ether, ETH_ALEN); + ether_addr_copy(elem->ether, e->ether); elem->filled = MAC_FILLED; return IPSET_ADD_START_STORED_TIMEOUT; } else if (e->ether) { /* We can store MAC too */ - memcpy(elem->ether, e->ether, ETH_ALEN); + ether_addr_copy(elem->ether, e->ether); elem->filled = MAC_FILLED; return 0; - } else { - elem->filled = MAC_UNSET; - /* MAC is not stored yet, don't start timer */ - return IPSET_ADD_STORE_PLAIN_TIMEOUT; } + elem->filled = MAC_UNSET; + /* MAC is not stored yet, don't start timer */ + return IPSET_ADD_STORE_PLAIN_TIMEOUT; } static inline int @@ -352,8 +352,9 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[], if (cidr >= HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(first_ip, last_ip, cidr); - } else + } else { return -IPSET_ERR_PROTOCOL; + } elements = (u64)last_ip - first_ip + 1; diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 2d360f951d18..5338ccd5da46 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -162,8 +162,9 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], if (port < map->first_port) return -IPSET_ERR_BITMAP_RANGE; } - } else + } else { port_to = port; + } if (port_to > map->last_port) return -IPSET_ERR_BITMAP_RANGE; @@ -174,8 +175,8 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 2b21a1983a98..338b4047776f 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -35,6 +35,7 @@ struct ip_set_net { bool is_deleted; /* deleted by ip_set_net_exit */ bool is_destroyed; /* all sets are destroyed */ }; + static int ip_set_net_id __read_mostly; static inline struct ip_set_net *ip_set_pernet(struct net *net) @@ -60,8 +61,7 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); #define ip_set(inst, id) \ ip_set_dereference((inst)->ip_set_list)[id] -/* - * The set types are implemented in modules and registered set types +/* The set types are implemented in modules and registered set types * can be found in ip_set_type_list. Adding/deleting types is * serialized by ip_set_type_mutex. */ @@ -131,7 +131,8 @@ __find_set_type_get(const char *name, u8 family, u8 revision, goto unlock; } /* Make sure the type is already loaded - * but we don't support the revision */ + * but we don't support the revision + */ list_for_each_entry_rcu(type, &ip_set_type_list, list) if (STRNCMP(type->name, name)) { err = -IPSET_ERR_FIND_TYPE; @@ -290,7 +291,7 @@ static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = { int ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr) { - struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1]; + struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1]; if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; @@ -307,7 +308,7 @@ EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4); int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) { - struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1]; + struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1]; if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; @@ -318,7 +319,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) return -IPSET_ERR_PROTOCOL; memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]), - sizeof(struct in6_addr)); + sizeof(struct in6_addr)); return 0; } EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); @@ -467,8 +468,7 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, } EXPORT_SYMBOL_GPL(ip_set_put_extensions); -/* - * Creating/destroying/renaming/swapping affect the existence and +/* Creating/destroying/renaming/swapping affect the existence and * the properties of a set. All of these can be executed from userspace * only and serialized by the nfnl mutex indirectly from nfnetlink. * @@ -495,8 +495,7 @@ __ip_set_put(struct ip_set *set) write_unlock_bh(&ip_set_ref_lock); } -/* - * Add, del and test set entries from kernel. +/* Add, del and test set entries from kernel. * * The set behind the index must exist and must be referenced * so it can't be destroyed (or changed) under our foot. @@ -524,7 +523,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, dev_net(par->in ? par->in : par->out), index); int ret = 0; - BUG_ON(set == NULL); + BUG_ON(!set); pr_debug("set %s, index %u\n", set->name, index); if (opt->dim < set->type->dimension || @@ -563,7 +562,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, dev_net(par->in ? par->in : par->out), index); int ret; - BUG_ON(set == NULL); + BUG_ON(!set); pr_debug("set %s, index %u\n", set->name, index); if (opt->dim < set->type->dimension || @@ -586,7 +585,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, dev_net(par->in ? par->in : par->out), index); int ret = 0; - BUG_ON(set == NULL); + BUG_ON(!set); pr_debug("set %s, index %u\n", set->name, index); if (opt->dim < set->type->dimension || @@ -601,8 +600,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, } EXPORT_SYMBOL_GPL(ip_set_del); -/* - * Find set by name, reference it once. The reference makes sure the +/* Find set by name, reference it once. The reference makes sure the * thing pointed to, does not go away under our feet. * */ @@ -616,7 +614,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) rcu_read_lock(); for (i = 0; i < inst->ip_set_max; i++) { s = rcu_dereference(inst->ip_set_list)[i]; - if (s != NULL && STRNCMP(s->name, name)) { + if (s && STRNCMP(s->name, name)) { __ip_set_get(s); index = i; *set = s; @@ -629,8 +627,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) } EXPORT_SYMBOL_GPL(ip_set_get_byname); -/* - * If the given set pointer points to a valid set, decrement +/* If the given set pointer points to a valid set, decrement * reference count by 1. The caller shall not assume the index * to be valid, after calling this function. * @@ -643,7 +640,7 @@ __ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index) rcu_read_lock(); set = rcu_dereference(inst->ip_set_list)[index]; - if (set != NULL) + if (set) __ip_set_put(set); rcu_read_unlock(); } @@ -657,8 +654,7 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index) } EXPORT_SYMBOL_GPL(ip_set_put_byindex); -/* - * Get the name of a set behind a set index. +/* Get the name of a set behind a set index. * We assume the set is referenced, so it does exist and * can't be destroyed. The set cannot be renamed due to * the referencing either. @@ -669,7 +665,7 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index) { const struct ip_set *set = ip_set_rcu_get(net, index); - BUG_ON(set == NULL); + BUG_ON(!set); BUG_ON(set->ref == 0); /* Referenced, so it's safe */ @@ -677,13 +673,11 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index) } EXPORT_SYMBOL_GPL(ip_set_name_byindex); -/* - * Routines to call by external subsystems, which do not +/* Routines to call by external subsystems, which do not * call nfnl_lock for us. */ -/* - * Find set by index, reference it once. The reference makes sure the +/* Find set by index, reference it once. The reference makes sure the * thing pointed to, does not go away under our feet. * * The nfnl mutex is used in the function. @@ -709,8 +703,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index) } EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex); -/* - * If the given set pointer points to a valid set, decrement +/* If the given set pointer points to a valid set, decrement * reference count by 1. The caller shall not assume the index * to be valid, after calling this function. * @@ -725,15 +718,14 @@ ip_set_nfnl_put(struct net *net, ip_set_id_t index) nfnl_lock(NFNL_SUBSYS_IPSET); if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */ set = ip_set(inst, index); - if (set != NULL) + if (set) __ip_set_put(set); } nfnl_unlock(NFNL_SUBSYS_IPSET); } EXPORT_SYMBOL_GPL(ip_set_nfnl_put); -/* - * Communication protocol with userspace over netlink. +/* Communication protocol with userspace over netlink. * * The commands are serialized by the nfnl mutex. */ @@ -760,7 +752,7 @@ start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags, nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), sizeof(*nfmsg), flags); - if (nlh == NULL) + if (!nlh) return NULL; nfmsg = nlmsg_data(nlh); @@ -793,7 +785,7 @@ find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id) *id = IPSET_INVALID_ID; for (i = 0; i < inst->ip_set_max; i++) { set = ip_set(inst, i); - if (set != NULL && STRNCMP(set->name, name)) { + if (set && STRNCMP(set->name, name)) { *id = i; break; } @@ -819,7 +811,7 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index, *index = IPSET_INVALID_ID; for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); - if (s == NULL) { + if (!s) { if (*index == IPSET_INVALID_ID) *index = i; } else if (STRNCMP(name, s->name)) { @@ -851,18 +843,18 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set, *clash = NULL; ip_set_id_t index = IPSET_INVALID_ID; - struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {}; + struct nlattr *tb[IPSET_ATTR_CREATE_MAX + 1] = {}; const char *name, *typename; u8 family, revision; u32 flags = flag_exist(nlh); int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL || - attr[IPSET_ATTR_TYPENAME] == NULL || - attr[IPSET_ATTR_REVISION] == NULL || - attr[IPSET_ATTR_FAMILY] == NULL || - (attr[IPSET_ATTR_DATA] != NULL && + !attr[IPSET_ATTR_SETNAME] || + !attr[IPSET_ATTR_TYPENAME] || + !attr[IPSET_ATTR_REVISION] || + !attr[IPSET_ATTR_FAMILY] || + (attr[IPSET_ATTR_DATA] && !flag_nested(attr[IPSET_ATTR_DATA])))) return -IPSET_ERR_PROTOCOL; @@ -873,11 +865,10 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n", name, typename, family_name(family), revision); - /* - * First, and without any locks, allocate and initialize + /* First, and without any locks, allocate and initialize * a normal base set structure. */ - set = kzalloc(sizeof(struct ip_set), GFP_KERNEL); + set = kzalloc(sizeof(*set), GFP_KERNEL); if (!set) return -ENOMEM; spin_lock_init(&set->lock); @@ -885,21 +876,18 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, set->family = family; set->revision = revision; - /* - * Next, check that we know the type, and take + /* Next, check that we know the type, and take * a reference on the type, to make sure it stays available * while constructing our new set. * * After referencing the type, we try to create the type * specific part of the set without holding any locks. */ - ret = find_set_type_get(typename, family, revision, &(set->type)); + ret = find_set_type_get(typename, family, revision, &set->type); if (ret) goto out; - /* - * Without holding any locks, create private part. - */ + /* Without holding any locks, create private part. */ if (attr[IPSET_ATTR_DATA] && nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], set->type->create_policy)) { @@ -913,8 +901,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, /* BTW, ret==0 here. */ - /* - * Here, we have a valid, constructed set and we are protected + /* Here, we have a valid, constructed set and we are protected * by the nfnl mutex. Find the first free index in ip_set_list * and check clashing. */ @@ -937,7 +924,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, /* Wraparound */ goto cleanup; - list = kzalloc(sizeof(struct ip_set *) * i, GFP_KERNEL); + list = kcalloc(i, sizeof(struct ip_set *), GFP_KERNEL); if (!list) goto cleanup; /* nfnl mutex is held, both lists are valid */ @@ -951,12 +938,11 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, inst->ip_set_max = i; kfree(tmp); ret = 0; - } else if (ret) + } else if (ret) { goto cleanup; + } - /* - * Finally! Add our shiny new set to the list, and be done. - */ + /* Finally! Add our shiny new set to the list, and be done. */ pr_debug("create: '%s' created with index %u!\n", set->name, index); ip_set(inst, index) = set; @@ -1018,7 +1004,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); - if (s != NULL && s->ref) { + if (s && s->ref) { ret = -IPSET_ERR_BUSY; goto out; } @@ -1037,7 +1023,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, } else { s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &i); - if (s == NULL) { + if (!s) { ret = -ENOENT; goto out; } else if (s->ref) { @@ -1082,12 +1068,12 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb, if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); - if (s != NULL) + if (s) ip_set_flush_set(s); } } else { s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); - if (s == NULL) + if (!s) return -ENOENT; ip_set_flush_set(s); @@ -1119,12 +1105,12 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL || - attr[IPSET_ATTR_SETNAME2] == NULL)) + !attr[IPSET_ATTR_SETNAME] || + !attr[IPSET_ATTR_SETNAME2])) return -IPSET_ERR_PROTOCOL; set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); - if (set == NULL) + if (!set) return -ENOENT; read_lock_bh(&ip_set_ref_lock); @@ -1136,7 +1122,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); - if (s != NULL && STRNCMP(s->name, name2)) { + if (s && STRNCMP(s->name, name2)) { ret = -IPSET_ERR_EXIST_SETNAME2; goto out; } @@ -1168,23 +1154,24 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, char from_name[IPSET_MAXNAMELEN]; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL || - attr[IPSET_ATTR_SETNAME2] == NULL)) + !attr[IPSET_ATTR_SETNAME] || + !attr[IPSET_ATTR_SETNAME2])) return -IPSET_ERR_PROTOCOL; from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &from_id); - if (from == NULL) + if (!from) return -ENOENT; to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]), &to_id); - if (to == NULL) + if (!to) return -IPSET_ERR_EXIST_SETNAME2; /* Features must not change. - * Not an artificial restriction anymore, as we must prevent - * possible loops created by swapping in setlist type of sets. */ + * Not an artifical restriction anymore, as we must prevent + * possible loops created by swapping in setlist type of sets. + */ if (!(from->type->features == to->type->features && from->family == to->family)) return -IPSET_ERR_TYPE_MISMATCH; @@ -1246,7 +1233,7 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) { struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); - struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; + struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1]; struct nlattr *attr = (void *)nlh + min_len; u32 dump_type; ip_set_id_t index; @@ -1260,16 +1247,18 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]), &index); - if (set == NULL) + if (!set) return -ENOENT; dump_type = DUMP_ONE; cb->args[IPSET_CB_INDEX] = index; - } else + } else { dump_type = DUMP_ALL; + } if (cda[IPSET_ATTR_FLAGS]) { u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]); + dump_type |= (f << 16); } cb->args[IPSET_CB_NET] = (unsigned long)inst; @@ -1295,7 +1284,8 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) if (ret < 0) { nlh = nlmsg_hdr(cb->skb); /* We have to create and send the error message - * manually :-( */ + * manually :-( + */ if (nlh->nlmsg_flags & NLM_F_ACK) netlink_ack(cb->skb, nlh, ret); return ret; @@ -1313,7 +1303,7 @@ dump_last: pr_debug("dump type, flag: %u %u index: %ld\n", dump_type, dump_flags, cb->args[IPSET_CB_INDEX]); for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) { - index = (ip_set_id_t) cb->args[IPSET_CB_INDEX]; + index = (ip_set_id_t)cb->args[IPSET_CB_INDEX]; write_lock_bh(&ip_set_ref_lock); set = ip_set(inst, index); is_destroyed = inst->is_destroyed; @@ -1480,12 +1470,12 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, size_t payload = min(SIZE_MAX, sizeof(*errmsg) + nlmsg_len(nlh)); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); - struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; + struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1]; struct nlattr *cmdattr; u32 *errline; skb2 = nlmsg_new(payload, GFP_KERNEL); - if (skb2 == NULL) + if (!skb2) return -ENOMEM; rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); @@ -1502,7 +1492,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, *errline = lineno; - netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); + netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, + MSG_DONTWAIT); /* Signal netlink not to send its ACK/errmsg. */ return -EINTR; } @@ -1517,25 +1508,25 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, { struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; - struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; + struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; const struct nlattr *nla; u32 flags = flag_exist(nlh); bool use_lineno; int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL || + !attr[IPSET_ATTR_SETNAME] || !((attr[IPSET_ATTR_DATA] != NULL) ^ (attr[IPSET_ATTR_ADT] != NULL)) || - (attr[IPSET_ATTR_DATA] != NULL && + (attr[IPSET_ATTR_DATA] && !flag_nested(attr[IPSET_ATTR_DATA])) || - (attr[IPSET_ATTR_ADT] != NULL && + (attr[IPSET_ATTR_ADT] && (!flag_nested(attr[IPSET_ATTR_ADT]) || - attr[IPSET_ATTR_LINENO] == NULL)))) + !attr[IPSET_ATTR_LINENO])))) return -IPSET_ERR_PROTOCOL; set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); - if (set == NULL) + if (!set) return -ENOENT; use_lineno = !!attr[IPSET_ATTR_LINENO]; @@ -1572,25 +1563,25 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, { struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; - struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; + struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; const struct nlattr *nla; u32 flags = flag_exist(nlh); bool use_lineno; int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL || + !attr[IPSET_ATTR_SETNAME] || !((attr[IPSET_ATTR_DATA] != NULL) ^ (attr[IPSET_ATTR_ADT] != NULL)) || - (attr[IPSET_ATTR_DATA] != NULL && + (attr[IPSET_ATTR_DATA] && !flag_nested(attr[IPSET_ATTR_DATA])) || - (attr[IPSET_ATTR_ADT] != NULL && + (attr[IPSET_ATTR_ADT] && (!flag_nested(attr[IPSET_ATTR_ADT]) || - attr[IPSET_ATTR_LINENO] == NULL)))) + !attr[IPSET_ATTR_LINENO])))) return -IPSET_ERR_PROTOCOL; set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); - if (set == NULL) + if (!set) return -ENOENT; use_lineno = !!attr[IPSET_ATTR_LINENO]; @@ -1627,17 +1618,17 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, { struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; - struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; + struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL || - attr[IPSET_ATTR_DATA] == NULL || + !attr[IPSET_ATTR_SETNAME] || + !attr[IPSET_ATTR_DATA] || !flag_nested(attr[IPSET_ATTR_DATA]))) return -IPSET_ERR_PROTOCOL; set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); - if (set == NULL) + if (!set) return -ENOENT; if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], @@ -1668,15 +1659,15 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_SETNAME] == NULL)) + !attr[IPSET_ATTR_SETNAME])) return -IPSET_ERR_PROTOCOL; set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); - if (set == NULL) + if (!set) return -ENOENT; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb2 == NULL) + if (!skb2) return -ENOMEM; nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, @@ -1725,8 +1716,8 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb, int ret = 0; if (unlikely(protocol_failed(attr) || - attr[IPSET_ATTR_TYPENAME] == NULL || - attr[IPSET_ATTR_FAMILY] == NULL)) + !attr[IPSET_ATTR_TYPENAME] || + !attr[IPSET_ATTR_FAMILY])) return -IPSET_ERR_PROTOCOL; family = nla_get_u8(attr[IPSET_ATTR_FAMILY]); @@ -1736,7 +1727,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb, return ret; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb2 == NULL) + if (!skb2) return -ENOMEM; nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, @@ -1781,11 +1772,11 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, struct nlmsghdr *nlh2; int ret = 0; - if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL)) + if (unlikely(!attr[IPSET_ATTR_PROTOCOL])) return -IPSET_ERR_PROTOCOL; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb2 == NULL) + if (!skb2) return -ENOMEM; nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, @@ -1913,7 +1904,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) ret = -EFAULT; goto done; } - op = (unsigned int *) data; + op = (unsigned int *)data; if (*op < IP_SET_OP_VERSION) { /* Check the version at the beginning of operations */ @@ -2025,7 +2016,7 @@ ip_set_net_init(struct net *net) if (inst->ip_set_max >= IPSET_INVALID_ID) inst->ip_set_max = IPSET_INVALID_ID - 1; - list = kzalloc(sizeof(struct ip_set *) * inst->ip_set_max, GFP_KERNEL); + list = kcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL); if (!list) return -ENOMEM; inst->is_deleted = false; @@ -2061,11 +2052,11 @@ static struct pernet_operations ip_set_net_ops = { .size = sizeof(struct ip_set_net) }; - static int __init ip_set_init(void) { int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); + if (ret != 0) { pr_err("ip_set: cannot register with nfnetlink.\n"); return ret; diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c index 1981f021cc60..42c3e3ba1b94 100644 --- a/net/netfilter/ipset/ip_set_getport.c +++ b/net/netfilter/ipset/ip_set_getport.c @@ -30,7 +30,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, const struct tcphdr *th; th = skb_header_pointer(skb, protooff, sizeof(_tcph), &_tcph); - if (th == NULL) + if (!th) /* No choice either */ return false; @@ -42,7 +42,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, const sctp_sctphdr_t *sh; sh = skb_header_pointer(skb, protooff, sizeof(_sh), &_sh); - if (sh == NULL) + if (!sh) /* No choice either */ return false; @@ -55,7 +55,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, const struct udphdr *uh; uh = skb_header_pointer(skb, protooff, sizeof(_udph), &_udph); - if (uh == NULL) + if (!uh) /* No choice either */ return false; @@ -67,7 +67,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, const struct icmphdr *ic; ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich); - if (ic == NULL) + if (!ic) return false; *port = (__force __be16)htons((ic->type << 8) | ic->code); @@ -78,7 +78,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, const struct icmp6hdr *ic; ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich); - if (ic == NULL) + if (!ic) return false; *port = (__force __be16) @@ -116,7 +116,8 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src, return false; default: /* Other protocols doesn't have ports, - so we can match fragments */ + * so we can match fragments. + */ *proto = protocol; return true; } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index f352cc022010..afe905c208af 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -35,7 +35,7 @@ /* Number of elements to store in an initial array block */ #define AHASH_INIT_SIZE 4 /* Max number of elements to store in an array block */ -#define AHASH_MAX_SIZE (3*AHASH_INIT_SIZE) +#define AHASH_MAX_SIZE (3 * AHASH_INIT_SIZE) /* Max muber of elements in the array block when tuned */ #define AHASH_MAX_TUNED 64 @@ -57,6 +57,7 @@ tune_ahash_max(u8 curr, u32 multi) */ return n > curr && n <= AHASH_MAX_TUNED ? n : curr; } + #define TUNE_AHASH_MAX(h, multi) \ ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi)) #else @@ -256,7 +257,7 @@ htable_bits(u32 hashsize) #endif #define HKEY(data, initval, htable_bits) \ -(jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval) \ +(jhash2((u32 *)(data), HKEY_DATALEN / sizeof(u32), initval) \ & jhash_mask(htable_bits)) #ifndef htype @@ -299,11 +300,11 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) /* Add in increasing prefix order, so larger cidr first */ for (i = 0, j = -1; i < nets_length && h->nets[i].cidr[n]; i++) { - if (j != -1) + if (j != -1) { continue; - else if (h->nets[i].cidr[n] < cidr) + } else if (h->nets[i].cidr[n] < cidr) { j = i; - else if (h->nets[i].cidr[n] == cidr) { + } else if (h->nets[i].cidr[n] == cidr) { h->nets[cidr - 1].nets[n]++; return; } @@ -322,15 +323,15 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) u8 i, j, net_end = nets_length - 1; for (i = 0; i < nets_length; i++) { - if (h->nets[i].cidr[n] != cidr) - continue; + if (h->nets[i].cidr[n] != cidr) + continue; h->nets[cidr - 1].nets[n]--; if (h->nets[cidr - 1].nets[n] > 0) - return; + return; for (j = i; j < net_end && h->nets[j].cidr[n]; j++) - h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; + h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; h->nets[j].cidr[n] = 0; - return; + return; } } #endif @@ -426,8 +427,8 @@ mtype_destroy(struct ip_set *set) if (SET_WITH_TIMEOUT(set)) del_timer_sync(&h->gc); - mtype_ahash_destroy(set, __ipset_dereference_protected(h->table, 1), - true); + mtype_ahash_destroy(set, + __ipset_dereference_protected(h->table, 1), true); kfree(h); set->data = NULL; @@ -439,7 +440,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) struct htype *h = set->data; init_timer(&h->gc); - h->gc.data = (unsigned long) set; + h->gc.data = (unsigned long)set; h->gc.function = gc; h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&h->gc); @@ -530,7 +531,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) static void mtype_gc(unsigned long ul_set) { - struct ip_set *set = (struct ip_set *) ul_set; + struct ip_set *set = (struct ip_set *)ul_set; struct htype *h = set->data; pr_debug("called\n"); @@ -544,7 +545,8 @@ mtype_gc(unsigned long ul_set) /* Resize a hash: create a new hash table with doubling the hashsize * and inserting the elements to it. Repeat until we succeed or - * fail due to memory pressures. */ + * fail due to memory pressures. + */ static int mtype_resize(struct ip_set *set, bool retried) { @@ -687,7 +689,8 @@ cleanup: } /* Add an element to a hash and update the internal counters when succeeded, - * otherwise report the proper error code. */ + * otherwise report the proper error code. + */ static int mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) @@ -926,7 +929,8 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext, #ifdef IP_SET_HASH_WITH_NETS /* Special test function which takes into account the different network - * sizes added to the set */ + * sizes added to the set + */ static int mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, const struct ip_set_ext *ext, @@ -1004,7 +1008,8 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, t = rcu_dereference_bh(h->table); #ifdef IP_SET_HASH_WITH_NETS /* If we test an IP address and not a network address, - * try all possible network sizes */ + * try all possible network sizes + */ for (i = 0; i < IPSET_NET_COUNT; i++) if (DCIDR_GET(d->cidr, i) != SET_HOST_MASK(set->family)) break; @@ -1148,8 +1153,8 @@ mtype_list(const struct ip_set *set, nla_nest_cancel(skb, atd); ret = -EMSGSIZE; goto out; - } else - goto nla_put_failure; + } + goto nla_put_failure; } if (mtype_data_list(skb, e)) goto nla_put_failure; @@ -1171,8 +1176,9 @@ nla_put_failure: set->name); cb->args[IPSET_CB_ARG0] = 0; ret = -EMSGSIZE; - } else + } else { ipset_nest_end(skb, atd); + } out: rcu_read_unlock(); return ret; @@ -1180,12 +1186,13 @@ out: static int IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, struct ip_set_adt_opt *opt); + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt); static int IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried); + enum ipset_adt adt, u32 *lineno, u32 flags, + bool retried); static const struct ip_set_type_variant mtype_variant = { .kadt = mtype_kadt, diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index f54d7069d633..9d6bf19f7b78 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -158,8 +158,8 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index f8fbc325ad34..a0695a2ab585 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -155,8 +155,8 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } @@ -206,7 +206,6 @@ hash_ipmark6_data_next(struct hash_ipmark4_elem *next, #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" - static int hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, @@ -268,10 +267,8 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[], ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; - return ret; + return 0; } static struct ip_set_type hash_ipmark_type __read_mostly = { diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 9a31db8ccca6..9d84b3dff603 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -140,8 +140,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; @@ -187,8 +188,8 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } } return ret; @@ -305,8 +306,9 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; @@ -329,8 +331,8 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index fc42489f8795..215b7b942038 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -63,7 +63,7 @@ hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1, static bool hash_ipportip4_data_list(struct sk_buff *skb, - const struct hash_ipportip4_elem *data) + const struct hash_ipportip4_elem *data) { if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip2) || @@ -147,8 +147,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; @@ -194,8 +195,8 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } } return ret; @@ -320,8 +321,9 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; @@ -344,8 +346,8 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 2a69b9bf66b8..9ca719625ea3 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -209,14 +209,16 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -263,8 +265,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip2_from, ip2_to); if (ip2_from + UINT_MAX == ip2_to) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1); + } if (retried) ip = ntohl(h->next.ip); @@ -287,8 +290,8 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; ip2 = ip2_last + 1; } } @@ -466,14 +469,16 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -497,8 +502,8 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c index 112aff3cda96..f1e7d2c0f685 100644 --- a/net/netfilter/ipset/ip_set_hash_mac.c +++ b/net/netfilter/ipset/ip_set_hash_mac.c @@ -89,10 +89,10 @@ hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb, return 0; if (skb_mac_header(skb) < skb->head || - (skb_mac_header(skb) + ETH_HLEN) > skb->data) + (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; - memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN); + ether_addr_copy(e.ether, eth_hdr(skb)->h_source); if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0) return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); @@ -116,7 +116,7 @@ hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; - memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN); + ether_addr_copy(e.ether, nla_data(tb[IPSET_ATTR_ETHER])); if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0) return -IPSET_ERR_HASH_ELEM; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index e49b1d010d30..3e4bffdc1cc0 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -169,6 +169,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -176,7 +177,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { e.ip = htonl(ip & ip_set_hostmask(e.cidr)); ret = adtfn(set, &e, &ext, &ext, flags); - return ip_set_enomatch(ret, flags, adt, set) ? -ret: + return ip_set_enomatch(ret, flags, adt, set) ? -ret : ip_set_eexist(ret, flags) ? 0 : ret; } @@ -198,8 +199,8 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; ip = last + 1; } return ret; @@ -339,6 +340,7 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 42c893e08842..43d8c9896fa3 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -143,7 +143,7 @@ static const char *get_physindev_name(const struct sk_buff *skb) return dev ? dev->name : NULL; } -static const char *get_phyoutdev_name(const struct sk_buff *skb) +static const char *get_physoutdev_name(const struct sk_buff *skb) { struct net_device *dev = nf_bridge_get_physoutdev(skb); @@ -178,15 +178,16 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) const char *eiface = SRCDIR ? get_physindev_name(skb) : - get_phyoutdev_name(skb); + get_physoutdev_name(skb); if (!eiface) return -EINVAL; STRLCPY(e.iface, eiface); e.physdev = 1; #endif - } else + } else { STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out)); + } if (strlen(e.iface) == 0) return -EINVAL; @@ -229,6 +230,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_PHYSDEV) e.physdev = 1; if (cadt_flags & IPSET_FLAG_NOMATCH) @@ -249,8 +251,9 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip, ip_to); if (ip + UINT_MAX == ip_to) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip, ip_to, e.cidr); + } if (retried) ip = ntohl(h->next.ip); @@ -261,8 +264,8 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; ip = last + 1; } return ret; @@ -385,15 +388,16 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) const char *eiface = SRCDIR ? get_physindev_name(skb) : - get_phyoutdev_name(skb); + get_physoutdev_name(skb); + if (!eiface) return -EINVAL; - STRLCPY(e.iface, eiface); e.physdev = 1; #endif - } else + } else { STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out)); + } if (strlen(e.iface) == 0) return -EINVAL; @@ -403,7 +407,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, static int hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 }; @@ -440,6 +444,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_PHYSDEV) e.physdev = 1; if (cadt_flags & IPSET_FLAG_NOMATCH) diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index b5428be1f159..3c862c0a76d1 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -57,8 +57,8 @@ struct hash_netnet4_elem { static inline bool hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1, - const struct hash_netnet4_elem *ip2, - u32 *multi) + const struct hash_netnet4_elem *ip2, + u32 *multi) { return ip1->ipcmp == ip2->ipcmp && ip1->ccmp == ip2->ccmp; @@ -84,7 +84,7 @@ hash_netnet4_data_reset_flags(struct hash_netnet4_elem *elem, u8 *flags) static inline void hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem, - struct hash_netnet4_elem *orig) + struct hash_netnet4_elem *orig) { elem->ip[1] = orig->ip[1]; } @@ -103,7 +103,7 @@ hash_netnet4_data_netmask(struct hash_netnet4_elem *elem, u8 cidr, bool inner) static bool hash_netnet4_data_list(struct sk_buff *skb, - const struct hash_netnet4_elem *data) + const struct hash_netnet4_elem *data) { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; @@ -122,7 +122,7 @@ nla_put_failure: static inline void hash_netnet4_data_next(struct hash_netnet4_elem *next, - const struct hash_netnet4_elem *d) + const struct hash_netnet4_elem *d) { next->ipcmp = d->ipcmp; } @@ -133,8 +133,8 @@ hash_netnet4_data_next(struct hash_netnet4_elem *next, static int hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, struct ip_set_adt_opt *opt) + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -156,7 +156,7 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb, static int hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -199,6 +199,7 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -221,8 +222,9 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip, ip_to); if (unlikely(ip + UINT_MAX == ip_to)) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip, ip_to, e.cidr[0]); + } ip2_to = ip2_from; if (tb[IPSET_ATTR_IP2_TO]) { @@ -233,8 +235,9 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip2_from, ip2_to); if (unlikely(ip2_from + UINT_MAX == ip2_to)) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); + } if (retried) ip = ntohl(h->next.ip[0]); @@ -251,8 +254,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; ip2 = last2 + 1; } ip = last + 1; @@ -276,8 +279,8 @@ struct hash_netnet6_elem { static inline bool hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1, - const struct hash_netnet6_elem *ip2, - u32 *multi) + const struct hash_netnet6_elem *ip2, + u32 *multi) { return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) && ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) && @@ -304,7 +307,7 @@ hash_netnet6_data_reset_flags(struct hash_netnet6_elem *elem, u8 *flags) static inline void hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem, - struct hash_netnet6_elem *orig) + struct hash_netnet6_elem *orig) { elem->ip[1] = orig->ip[1]; } @@ -323,7 +326,7 @@ hash_netnet6_data_netmask(struct hash_netnet6_elem *elem, u8 cidr, bool inner) static bool hash_netnet6_data_list(struct sk_buff *skb, - const struct hash_netnet6_elem *data) + const struct hash_netnet6_elem *data) { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; @@ -342,7 +345,7 @@ nla_put_failure: static inline void hash_netnet6_data_next(struct hash_netnet4_elem *next, - const struct hash_netnet6_elem *d) + const struct hash_netnet6_elem *d) { } @@ -356,8 +359,8 @@ hash_netnet6_data_next(struct hash_netnet4_elem *next, static int hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, struct ip_set_adt_opt *opt) + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -367,7 +370,7 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) - e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK; + e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK; ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6); ip6addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1].in6); @@ -379,7 +382,7 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, static int hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netnet6_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, }; @@ -424,6 +427,7 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 27307d0a8a5d..731813e0f08c 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -198,8 +198,9 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; @@ -208,6 +209,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -233,8 +235,9 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip, ip_to); if (ip + UINT_MAX == ip_to) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip, ip_to, e.cidr + 1); + } if (retried) ip = ntohl(h->next.ip); @@ -250,8 +253,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } ip = last + 1; } @@ -413,14 +416,16 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -444,8 +449,8 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 1e0e47ae40a4..0c68734f5cc4 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -62,8 +62,8 @@ struct hash_netportnet4_elem { static inline bool hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1, - const struct hash_netportnet4_elem *ip2, - u32 *multi) + const struct hash_netportnet4_elem *ip2, + u32 *multi) { return ip1->ipcmp == ip2->ipcmp && ip1->ccmp == ip2->ccmp && @@ -91,7 +91,7 @@ hash_netportnet4_data_reset_flags(struct hash_netportnet4_elem *elem, u8 *flags) static inline void hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem, - struct hash_netportnet4_elem *orig) + struct hash_netportnet4_elem *orig) { elem->ip[1] = orig->ip[1]; } @@ -111,7 +111,7 @@ hash_netportnet4_data_netmask(struct hash_netportnet4_elem *elem, static bool hash_netportnet4_data_list(struct sk_buff *skb, - const struct hash_netportnet4_elem *data) + const struct hash_netportnet4_elem *data) { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; @@ -132,7 +132,7 @@ nla_put_failure: static inline void hash_netportnet4_data_next(struct hash_netportnet4_elem *next, - const struct hash_netportnet4_elem *d) + const struct hash_netportnet4_elem *d) { next->ipcmp = d->ipcmp; next->port = d->port; @@ -144,8 +144,8 @@ hash_netportnet4_data_next(struct hash_netportnet4_elem *next, static int hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, struct ip_set_adt_opt *opt) + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -171,7 +171,7 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, static int hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -223,14 +223,16 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -254,8 +256,9 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip, ip_to); if (unlikely(ip + UINT_MAX == ip_to)) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip, ip_to, e.cidr[0]); + } port_to = port = ntohs(e.port); if (tb[IPSET_ATTR_PORT_TO]) { @@ -273,8 +276,9 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(ip2_from, ip2_to); if (unlikely(ip2_from + UINT_MAX == ip2_to)) return -IPSET_ERR_HASH_RANGE; - } else + } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); + } if (retried) ip = ntohl(h->next.ip[0]); @@ -296,8 +300,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; ip2 = ip2_last + 1; } } @@ -324,8 +328,8 @@ struct hash_netportnet6_elem { static inline bool hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1, - const struct hash_netportnet6_elem *ip2, - u32 *multi) + const struct hash_netportnet6_elem *ip2, + u32 *multi) { return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) && ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) && @@ -354,7 +358,7 @@ hash_netportnet6_data_reset_flags(struct hash_netportnet6_elem *elem, u8 *flags) static inline void hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem, - struct hash_netportnet6_elem *orig) + struct hash_netportnet6_elem *orig) { elem->ip[1] = orig->ip[1]; } @@ -374,7 +378,7 @@ hash_netportnet6_data_netmask(struct hash_netportnet6_elem *elem, static bool hash_netportnet6_data_list(struct sk_buff *skb, - const struct hash_netportnet6_elem *data) + const struct hash_netportnet6_elem *data) { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; @@ -395,7 +399,7 @@ nla_put_failure: static inline void hash_netportnet6_data_next(struct hash_netportnet4_elem *next, - const struct hash_netportnet6_elem *d) + const struct hash_netportnet6_elem *d) { next->port = d->port; } @@ -410,8 +414,8 @@ hash_netportnet6_data_next(struct hash_netportnet4_elem *next, static int hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, struct ip_set_adt_opt *opt) + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -437,7 +441,7 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, static int hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -493,14 +497,16 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; - } else + } else { return -IPSET_ERR_MISSING_PROTO; + } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } @@ -524,8 +530,8 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (ret && !ip_set_eexist(ret, flags)) return ret; - else - ret = 0; + + ret = 0; } return ret; } diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 9f624ee9a41e..a1fe5377a2b3 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -206,14 +206,15 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, continue; } - if (d->before == 0) + if (d->before == 0) { ret = 1; - else if (d->before > 0) { + } else if (d->before > 0) { next = list_next_entry(e, list); ret = !list_is_last(&e->list, &map->members) && next->id == d->refid; - } else + } else { ret = prev && prev->id == d->refid; + } return ret; } return 0; @@ -558,7 +559,7 @@ static const struct ip_set_type_variant set_variant = { static void list_set_gc(unsigned long ul_set) { - struct ip_set *set = (struct ip_set *) ul_set; + struct ip_set *set = (struct ip_set *)ul_set; struct list_set *map = set->data; spin_lock_bh(&set->lock); @@ -575,7 +576,7 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) struct list_set *map = set->data; init_timer(&map->gc); - map->gc.data = (unsigned long) set; + map->gc.data = (unsigned long)set; map->gc.function = gc; map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c index 04d15fdc99ee..1c8a42c1056c 100644 --- a/net/netfilter/ipset/pfxlen.c +++ b/net/netfilter/ipset/pfxlen.c @@ -1,9 +1,7 @@ #include #include -/* - * Prefixlen maps for fast conversions, by Jan Engelhardt. - */ +/* Prefixlen maps for fast conversions, by Jan Engelhardt. */ #define E(a, b, c, d) \ {.ip6 = { \ @@ -11,8 +9,7 @@ htonl(c), htonl(d), \ } } -/* - * This table works for both IPv4 and IPv6; +/* This table works for both IPv4 and IPv6; * just use prefixlen_netmask_map[prefixlength].ip. */ const union nf_inet_addr ip_set_netmask_map[] = { @@ -149,13 +146,12 @@ const union nf_inet_addr ip_set_netmask_map[] = { EXPORT_SYMBOL_GPL(ip_set_netmask_map); #undef E -#define E(a, b, c, d) \ - {.ip6 = { (__force __be32) a, (__force __be32) b, \ - (__force __be32) c, (__force __be32) d, \ +#define E(a, b, c, d) \ + {.ip6 = { (__force __be32)a, (__force __be32)b, \ + (__force __be32)c, (__force __be32)d, \ } } -/* - * This table works for both IPv4 and IPv6; +/* This table works for both IPv4 and IPv6; * just use prefixlen_hostmask_map[prefixlength].ip. */ const union nf_inet_addr ip_set_hostmask_map[] = { diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index b103e9627716..5669e5b453f4 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -9,7 +9,8 @@ */ /* Kernel module which implements the set match and SET target - * for netfilter/iptables. */ + * for netfilter/iptables. + */ #include #include @@ -53,6 +54,7 @@ static bool set_match_v0(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v0 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.u.compat.dim, info->match_set.u.compat.flags, 0, UINT_MAX); @@ -69,10 +71,10 @@ compat_flags(struct xt_set_info_v0 *info) info->u.compat.dim = IPSET_DIM_ZERO; if (info->u.flags[0] & IPSET_MATCH_INV) info->u.compat.flags |= IPSET_INV_MATCH; - for (i = 0; i < IPSET_DIM_MAX-1 && info->u.flags[i]; i++) { + for (i = 0; i < IPSET_DIM_MAX - 1 && info->u.flags[i]; i++) { info->u.compat.dim++; if (info->u.flags[i] & IPSET_SRC) - info->u.compat.flags |= (1<u.compat.dim); + info->u.compat.flags |= (1 << info->u.compat.dim); } } @@ -89,7 +91,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par) info->match_set.index); return -ENOENT; } - if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) { + if (info->match_set.u.flags[IPSET_DIM_MAX - 1] != 0) { pr_warn("Protocol error: set match dimension is over the limit!\n"); ip_set_nfnl_put(par->net, info->match_set.index); return -ERANGE; @@ -115,6 +117,7 @@ static bool set_match_v1(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v1 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.dim, info->match_set.flags, 0, UINT_MAX); @@ -179,9 +182,10 @@ static bool set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v3 *info = par->matchinfo; + int ret; + ADT_OPT(opt, par->family, info->match_set.dim, info->match_set.flags, info->flags, UINT_MAX); - int ret; if (info->packets.op != IPSET_COUNTER_NONE || info->bytes.op != IPSET_COUNTER_NONE) @@ -225,9 +229,10 @@ static bool set_match_v4(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v4 *info = par->matchinfo; + int ret; + ADT_OPT(opt, par->family, info->match_set.dim, info->match_set.flags, info->flags, UINT_MAX); - int ret; if (info->packets.op != IPSET_COUNTER_NONE || info->bytes.op != IPSET_COUNTER_NONE) @@ -253,6 +258,7 @@ static unsigned int set_target_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v0 *info = par->targinfo; + ADT_OPT(add_opt, par->family, info->add_set.u.compat.dim, info->add_set.u.compat.flags, 0, UINT_MAX); ADT_OPT(del_opt, par->family, info->del_set.u.compat.dim, @@ -291,8 +297,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) return -ENOENT; } } - if (info->add_set.u.flags[IPSET_DIM_MAX-1] != 0 || - info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) { + if (info->add_set.u.flags[IPSET_DIM_MAX - 1] != 0 || + info->del_set.u.flags[IPSET_DIM_MAX - 1] != 0) { pr_warn("Protocol error: SET target dimension is over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); @@ -325,6 +331,7 @@ static unsigned int set_target_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v1 *info = par->targinfo; + ADT_OPT(add_opt, par->family, info->add_set.dim, info->add_set.flags, 0, UINT_MAX); ADT_OPT(del_opt, par->family, info->del_set.dim, @@ -393,6 +400,7 @@ static unsigned int set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v2 *info = par->targinfo; + ADT_OPT(add_opt, par->family, info->add_set.dim, info->add_set.flags, info->flags, info->timeout); ADT_OPT(del_opt, par->family, info->del_set.dim, @@ -400,8 +408,8 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) /* Normalize to fit into jiffies */ if (add_opt.ext.timeout != IPSET_NO_TIMEOUT && - add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC) - add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC; + add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC) + add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC; if (info->add_set.index != IPSET_INVALID_ID) ip_set_add(info->add_set.index, skb, par, &add_opt); if (info->del_set.index != IPSET_INVALID_ID) @@ -419,6 +427,8 @@ static unsigned int set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v3 *info = par->targinfo; + int ret; + ADT_OPT(add_opt, par->family, info->add_set.dim, info->add_set.flags, info->flags, info->timeout); ADT_OPT(del_opt, par->family, info->del_set.dim, @@ -426,12 +436,10 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) ADT_OPT(map_opt, par->family, info->map_set.dim, info->map_set.flags, 0, UINT_MAX); - int ret; - /* Normalize to fit into jiffies */ if (add_opt.ext.timeout != IPSET_NO_TIMEOUT && - add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC) - add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC; + add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC) + add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC; if (info->add_set.index != IPSET_INVALID_ID) ip_set_add(info->add_set.index, skb, par, &add_opt); if (info->del_set.index != IPSET_INVALID_ID) @@ -457,7 +465,6 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } - static int set_target_v3_checkentry(const struct xt_tgchk_param *par) { @@ -497,8 +504,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) !(par->hook_mask & (1 << NF_INET_FORWARD | 1 << NF_INET_LOCAL_OUT | 1 << NF_INET_POST_ROUTING))) { - pr_warn("mapping of prio or/and queue is allowed only" - "from OUTPUT/FORWARD/POSTROUTING chains\n"); + pr_warn("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n"); return -EINVAL; } index = ip_set_nfnl_get_byindex(par->net, @@ -519,8 +525,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) if (info->add_set.dim > IPSET_DIM_MAX || info->del_set.dim > IPSET_DIM_MAX || info->map_set.dim > IPSET_DIM_MAX) { - pr_warn("Protocol error: SET target dimension " - "is over the limit!\n"); + pr_warn("Protocol error: SET target dimension is over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) @@ -546,7 +551,6 @@ set_target_v3_destroy(const struct xt_tgdtor_param *par) ip_set_nfnl_put(par->net, info->map_set.index); } - static struct xt_match set_matches[] __read_mostly = { { .name = "set", -- cgit v1.2.3 From 711bdde6a884354ddae8da2fcb495b2a9364cc90 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Jun 2015 09:57:30 -0700 Subject: netfilter: x_tables: remove XT_TABLE_INFO_SZ and a dereference. After Florian patches, there is no need for XT_TABLE_INFO_SZ anymore : Only one copy of table is kept, instead of one copy per cpu. We also can avoid a dereference if we put table data right after xt_table_info. It reduces register pressure and helps compiler. Then, we attempt a kmalloc() if total size is under order-3 allocation, to reduce TLB pressure, as in many cases, rules fit in 32 KB. Signed-off-by: Eric Dumazet Cc: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 5 +---- net/ipv4/netfilter/arp_tables.c | 4 ++-- net/ipv4/netfilter/ip_tables.c | 4 ++-- net/ipv6/netfilter/ip6_tables.c | 4 ++-- net/netfilter/x_tables.c | 32 ++++++++++++-------------------- 5 files changed, 19 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 9969d79dcde1..95693c4cebdd 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -225,12 +225,9 @@ struct xt_table_info { unsigned int __percpu *stackptr; void ***jumpstack; - /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */ - void *entries; + unsigned char entries[0] __aligned(8); }; -#define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \ - + nr_cpu_ids * sizeof(char *)) int xt_register_target(struct xt_target *target); void xt_unregister_target(struct xt_target *target); int xt_register_targets(struct xt_target *target, unsigned int n); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index d75c139393fc..95c9b6eece25 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -256,7 +256,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, const struct arphdr *arp; struct arpt_entry *e, *back; const char *indev, *outdev; - void *table_base; + const void *table_base; const struct xt_table_info *private; struct xt_action_param acpar; unsigned int addend; @@ -868,7 +868,7 @@ static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { struct arpt_entry *iter; - void *loc_cpu_entry; + const void *loc_cpu_entry; int ret; if (!newinfo || !info) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 6151500c2df8..6c72fbb7b49e 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -938,7 +938,7 @@ copy_entries_to_user(unsigned int total_size, struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; - void *loc_cpu_entry; + const void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) @@ -1052,7 +1052,7 @@ static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { struct ipt_entry *iter; - void *loc_cpu_entry; + const void *loc_cpu_entry; int ret; if (!newinfo || !info) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 80a7f0d38aa4..3c35ced39b42 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -951,7 +951,7 @@ copy_entries_to_user(unsigned int total_size, struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; - void *loc_cpu_entry; + const void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) @@ -1065,7 +1065,7 @@ static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { struct ip6t_entry *iter; - void *loc_cpu_entry; + const void *loc_cpu_entry; int ret; if (!newinfo || !info) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 6062ce3e862c..d324fe71260c 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -658,29 +658,23 @@ EXPORT_SYMBOL_GPL(xt_compat_target_to_user); struct xt_table_info *xt_alloc_table_info(unsigned int size) { - struct xt_table_info *newinfo; + struct xt_table_info *info = NULL; + size_t sz = sizeof(*info) + size; /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages) return NULL; - newinfo = kzalloc(XT_TABLE_INFO_SZ, GFP_KERNEL); - if (!newinfo) - return NULL; - - newinfo->size = size; - - if (size <= PAGE_SIZE) - newinfo->entries = kmalloc(size, GFP_KERNEL); - else - newinfo->entries = vmalloc(size); - - if (newinfo->entries == NULL) { - xt_free_table_info(newinfo); - return NULL; + if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) + info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); + if (!info) { + info = vmalloc(sz); + if (!info) + return NULL; } - - return newinfo; + memset(info, 0, sizeof(*info)); + info->size = size; + return info; } EXPORT_SYMBOL(xt_alloc_table_info); @@ -688,8 +682,6 @@ void xt_free_table_info(struct xt_table_info *info) { int cpu; - kvfree(info->entries); - if (info->jumpstack != NULL) { for_each_possible_cpu(cpu) kvfree(info->jumpstack[cpu]); @@ -698,7 +690,7 @@ void xt_free_table_info(struct xt_table_info *info) free_percpu(info->stackptr); - kfree(info); + kvfree(info); } EXPORT_SYMBOL(xt_free_table_info); -- cgit v1.2.3 From ffeedafbf0236f03aeb2e8db273b3e5ae5f5bc89 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 12 Jun 2015 19:39:12 -0700 Subject: bpf: introduce current->pid, tgid, uid, gid, comm accessors eBPF programs attached to kprobes need to filter based on current->pid, uid and other fields, so introduce helper functions: u64 bpf_get_current_pid_tgid(void) Return: current->tgid << 32 | current->pid u64 bpf_get_current_uid_gid(void) Return: current_gid << 32 | current_uid bpf_get_current_comm(char *buf, int size_of_buf) stores current->comm into buf They can be used from the programs attached to TC as well to classify packets based on current task fields. Update tracex2 example to print histogram of write syscalls for each process instead of aggregated for all. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 +++ include/uapi/linux/bpf.h | 19 +++++++++++++ kernel/bpf/core.c | 3 +++ kernel/bpf/helpers.c | 58 +++++++++++++++++++++++++++++++++++++++ kernel/trace/bpf_trace.c | 6 +++++ net/core/filter.c | 6 +++++ samples/bpf/bpf_helpers.h | 6 +++++ samples/bpf/tracex2_kern.c | 24 +++++++++++++---- samples/bpf/tracex2_user.c | 67 +++++++++++++++++++++++++++++++++++++++------- 9 files changed, 178 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2235aee8096a..1b9a3f5b27f6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -188,5 +188,8 @@ extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; extern const struct bpf_func_proto bpf_ktime_get_ns_proto; +extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; +extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; +extern const struct bpf_func_proto bpf_get_current_comm_proto; #endif /* _LINUX_BPF_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 602f05b7a275..29ef6f99e43d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -230,6 +230,25 @@ enum bpf_func_id { * Return: 0 on success */ BPF_FUNC_clone_redirect, + + /** + * u64 bpf_get_current_pid_tgid(void) + * Return: current->tgid << 32 | current->pid + */ + BPF_FUNC_get_current_pid_tgid, + + /** + * u64 bpf_get_current_uid_gid(void) + * Return: current_gid << 32 | current_uid + */ + BPF_FUNC_get_current_uid_gid, + + /** + * bpf_get_current_comm(char *buf, int size_of_buf) + * stores current->comm into buf + * Return: 0 on success + */ + BPF_FUNC_get_current_comm, __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 1e00aa3316dc..1fc45cc83076 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -730,6 +730,9 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak; const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; +const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; +const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; +const struct bpf_func_proto bpf_get_current_comm_proto __weak; /* Always built-in helper functions. */ const struct bpf_func_proto bpf_tail_call_proto = { diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 7ad5d8842d5b..1447ec09421e 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include /* If kernel subsystem is allowing eBPF programs to call this function, * inside its own verifier_ops->get_func_proto() callback it should return @@ -124,3 +126,59 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto = { .gpl_only = true, .ret_type = RET_INTEGER, }; + +static u64 bpf_get_current_pid_tgid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct task_struct *task = current; + + if (!task) + return -EINVAL; + + return (u64) task->tgid << 32 | task->pid; +} + +const struct bpf_func_proto bpf_get_current_pid_tgid_proto = { + .func = bpf_get_current_pid_tgid, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + +static u64 bpf_get_current_uid_gid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct task_struct *task = current; + kuid_t uid; + kgid_t gid; + + if (!task) + return -EINVAL; + + current_uid_gid(&uid, &gid); + return (u64) from_kgid(&init_user_ns, gid) << 32 | + from_kuid(&init_user_ns, uid); +} + +const struct bpf_func_proto bpf_get_current_uid_gid_proto = { + .func = bpf_get_current_uid_gid, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + +static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5) +{ + struct task_struct *task = current; + char *buf = (char *) (long) r1; + + if (!task) + return -EINVAL; + + memcpy(buf, task->comm, min_t(size_t, size, sizeof(task->comm))); + return 0; +} + +const struct bpf_func_proto bpf_get_current_comm_proto = { + .func = bpf_get_current_comm, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_STACK, + .arg2_type = ARG_CONST_STACK_SIZE, +}; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 50c4015a8ad3..3a17638cdf46 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -162,6 +162,12 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_ktime_get_ns_proto; case BPF_FUNC_tail_call: return &bpf_tail_call_proto; + case BPF_FUNC_get_current_pid_tgid: + return &bpf_get_current_pid_tgid_proto; + case BPF_FUNC_get_current_uid_gid: + return &bpf_get_current_uid_gid_proto; + case BPF_FUNC_get_current_comm: + return &bpf_get_current_comm_proto; case BPF_FUNC_trace_printk: /* diff --git a/net/core/filter.c b/net/core/filter.c index d271c06bf01f..20aa51ccbf9d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1459,6 +1459,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_l4_csum_replace_proto; case BPF_FUNC_clone_redirect: return &bpf_clone_redirect_proto; + case BPF_FUNC_get_current_pid_tgid: + return &bpf_get_current_pid_tgid_proto; + case BPF_FUNC_get_current_uid_gid: + return &bpf_get_current_uid_gid_proto; + case BPF_FUNC_get_current_comm: + return &bpf_get_current_comm_proto; default: return sk_filter_func_proto(func_id); } diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index f531a0b3282d..bdf1c1607b80 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -25,6 +25,12 @@ static void (*bpf_tail_call)(void *ctx, void *map, int index) = (void *) BPF_FUNC_tail_call; static unsigned long long (*bpf_get_smp_processor_id)(void) = (void *) BPF_FUNC_get_smp_processor_id; +static unsigned long long (*bpf_get_current_pid_tgid)(void) = + (void *) BPF_FUNC_get_current_pid_tgid; +static unsigned long long (*bpf_get_current_uid_gid)(void) = + (void *) BPF_FUNC_get_current_uid_gid; +static int (*bpf_get_current_comm)(void *buf, int buf_size) = + (void *) BPF_FUNC_get_current_comm; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c index 19ec1cfc45db..dc50f4f2943f 100644 --- a/samples/bpf/tracex2_kern.c +++ b/samples/bpf/tracex2_kern.c @@ -62,11 +62,18 @@ static unsigned int log2l(unsigned long v) return log2(v); } +struct hist_key { + char comm[16]; + u64 pid_tgid; + u64 uid_gid; + u32 index; +}; + struct bpf_map_def SEC("maps") my_hist_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct hist_key), .value_size = sizeof(long), - .max_entries = 64, + .max_entries = 1024, }; SEC("kprobe/sys_write") @@ -75,11 +82,18 @@ int bpf_prog3(struct pt_regs *ctx) long write_size = ctx->dx; /* arg3 */ long init_val = 1; long *value; - u32 index = log2l(write_size); + struct hist_key key = {}; + + key.index = log2l(write_size); + key.pid_tgid = bpf_get_current_pid_tgid(); + key.uid_gid = bpf_get_current_uid_gid(); + bpf_get_current_comm(&key.comm, sizeof(key.comm)); - value = bpf_map_lookup_elem(&my_hist_map, &index); + value = bpf_map_lookup_elem(&my_hist_map, &key); if (value) __sync_fetch_and_add(value, 1); + else + bpf_map_update_elem(&my_hist_map, &key, &init_val, BPF_ANY); return 0; } char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c index 91b8d0896fbb..cd0241c1447a 100644 --- a/samples/bpf/tracex2_user.c +++ b/samples/bpf/tracex2_user.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "libbpf.h" #include "bpf_load.h" @@ -20,23 +21,42 @@ static void stars(char *str, long val, long max, int width) str[i] = '\0'; } -static void print_hist(int fd) +struct task { + char comm[16]; + __u64 pid_tgid; + __u64 uid_gid; +}; + +struct hist_key { + struct task t; + __u32 index; +}; + +#define SIZE sizeof(struct task) + +static void print_hist_for_pid(int fd, void *task) { - int key; + struct hist_key key = {}, next_key; + char starstr[MAX_STARS]; long value; long data[MAX_INDEX] = {}; - char starstr[MAX_STARS]; - int i; int max_ind = -1; long max_value = 0; + int i, ind; - for (key = 0; key < MAX_INDEX; key++) { - bpf_lookup_elem(fd, &key, &value); - data[key] = value; - if (value && key > max_ind) - max_ind = key; + while (bpf_get_next_key(fd, &key, &next_key) == 0) { + if (memcmp(&next_key, task, SIZE)) { + key = next_key; + continue; + } + bpf_lookup_elem(fd, &next_key, &value); + ind = next_key.index; + data[ind] = value; + if (value && ind > max_ind) + max_ind = ind; if (value > max_value) max_value = value; + key = next_key; } printf(" syscall write() stats\n"); @@ -48,6 +68,35 @@ static void print_hist(int fd) MAX_STARS, starstr); } } + +static void print_hist(int fd) +{ + struct hist_key key = {}, next_key; + static struct task tasks[1024]; + int task_cnt = 0; + int i; + + while (bpf_get_next_key(fd, &key, &next_key) == 0) { + int found = 0; + + for (i = 0; i < task_cnt; i++) + if (memcmp(&tasks[i], &next_key, SIZE) == 0) + found = 1; + if (!found) + memcpy(&tasks[task_cnt++], &next_key, SIZE); + key = next_key; + } + + for (i = 0; i < task_cnt; i++) { + printf("\npid %d cmd %s uid %d\n", + (__u32) tasks[i].pid_tgid, + tasks[i].comm, + (__u32) tasks[i].uid_gid); + print_hist_for_pid(fd, &tasks[i]); + } + +} + static void int_exit(int sig) { print_hist(map_fd[1]); -- cgit v1.2.3 From 0756ea3e85139d23a8148ebaa95411c2f0aa4f11 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 12 Jun 2015 19:39:13 -0700 Subject: bpf: allow networking programs to use bpf_trace_printk() for debugging bpf_trace_printk() is a helper function used to debug eBPF programs. Let socket and TC programs use it as well. Note, it's DEBUG ONLY helper. If it's used in the program, the kernel will print warning banner to make sure users don't use it in production. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + kernel/bpf/core.c | 4 ++++ kernel/trace/bpf_trace.c | 20 ++++++++++++-------- net/core/filter.c | 2 ++ 4 files changed, 19 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1b9a3f5b27f6..4383476a0d48 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -150,6 +150,7 @@ struct bpf_array { u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); void bpf_prog_array_map_clear(struct bpf_map *map); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); +const struct bpf_func_proto *bpf_get_trace_printk_proto(void); #ifdef CONFIG_BPF_SYSCALL void bpf_register_prog_type(struct bpf_prog_type_list *tl); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 1fc45cc83076..c5bedc82bc1c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -733,6 +733,10 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; const struct bpf_func_proto bpf_get_current_comm_proto __weak; +const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) +{ + return NULL; +} /* Always built-in helper functions. */ const struct bpf_func_proto bpf_tail_call_proto = { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 3a17638cdf46..4f9b5d41869b 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -147,6 +147,17 @@ static const struct bpf_func_proto bpf_trace_printk_proto = { .arg2_type = ARG_CONST_STACK_SIZE, }; +const struct bpf_func_proto *bpf_get_trace_printk_proto(void) +{ + /* + * this program might be calling bpf_trace_printk, + * so allocate per-cpu printk buffers + */ + trace_printk_init_buffers(); + + return &bpf_trace_printk_proto; +} + static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -168,15 +179,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_get_current_uid_gid_proto; case BPF_FUNC_get_current_comm: return &bpf_get_current_comm_proto; - case BPF_FUNC_trace_printk: - /* - * this program might be calling bpf_trace_printk, - * so allocate per-cpu printk buffers - */ - trace_printk_init_buffers(); - - return &bpf_trace_printk_proto; + return bpf_get_trace_printk_proto(); default: return NULL; } diff --git a/net/core/filter.c b/net/core/filter.c index 20aa51ccbf9d..65ff107d3d29 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1442,6 +1442,8 @@ sk_filter_func_proto(enum bpf_func_id func_id) return &bpf_tail_call_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; + case BPF_FUNC_trace_printk: + return bpf_get_trace_printk_proto(); default: return NULL; } -- cgit v1.2.3 From 9464ca650008615d28d9aaf7de99b4edf61f0109 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 12 Jun 2015 19:44:48 -0700 Subject: net: make u64_stats_init() a function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using a function instead of a macro is cleaner and remove following W=1 warnings (extract) In file included from net/ipv6/ip6_vti.c:29:0: net/ipv6/ip6_vti.c: In function ‘vti6_dev_init_gen’: include/linux/netdevice.h:2029:18: warning: variable ‘stat’ set but not used [-Wunused-but-set-variable] typeof(type) *stat; \ ^ net/ipv6/ip6_vti.c:862:16: note: in expansion of macro ‘netdev_alloc_pcpu_stats’ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); ^ CC [M] net/ipv6/sit.o In file included from net/ipv6/sit.c:30:0: net/ipv6/sit.c: In function ‘ipip6_tunnel_init’: include/linux/netdevice.h:2029:18: warning: variable ‘stat’ set but not used [-Wunused-but-set-variable] typeof(type) *stat; \ ^ Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/u64_stats_sync.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index 4b4439e75f45..df89c9bcba7d 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -68,11 +68,12 @@ struct u64_stats_sync { }; +static inline void u64_stats_init(struct u64_stats_sync *syncp) +{ #if BITS_PER_LONG == 32 && defined(CONFIG_SMP) -# define u64_stats_init(syncp) seqcount_init(syncp.seq) -#else -# define u64_stats_init(syncp) do { } while (0) + seqcount_init(&syncp->seq); #endif +} static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { -- cgit v1.2.3 From 47d8417f5914012c794684f651213ffae1b91619 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 15 Jun 2015 17:58:58 +0300 Subject: net/mlx4_core: Add sink counter Reserve the last valid counter index for "sink" counter, when a new counter cannot be allocated, the driver will use this counter. In order to avoid allocating this counter on any other flow, fix the indices bitmap allocation range, and reserve the sink counter index. Add macro for the sink counter index and replace all appearences of the index with the macro. Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/qp.c | 3 ++- drivers/net/ethernet/mellanox/mlx4/en_resources.c | 2 +- drivers/net/ethernet/mellanox/mlx4/main.c | 11 +++++++---- include/linux/mlx4/device.h | 1 + 4 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 02fc91c68027..c7d916fab68a 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1544,7 +1544,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, dev->counters[qp->port - 1]; optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX; } else - context->pri_path.counter_index = 0xff; + context->pri_path.counter_index = + MLX4_SINK_COUNTER_INDEX(dev->dev); if (qp->flags & MLX4_IB_QP_NETIF) { mlx4_ib_steer_qp_reg(dev, qp, 1); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index 34f2fdf4fe5d..97bcb9135f8d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -66,7 +66,7 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, context->pri_path.sched_queue |= user_prio << 3; context->pri_path.feup = MLX4_FEUP_FORCE_ETH_UP; } - context->pri_path.counter_index = 0xff; + context->pri_path.counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev); context->cqn_send = cpu_to_be32(cqn); context->cqn_recv = cpu_to_be32(cqn); context->db_rec_addr = cpu_to_be64(priv->res.db.dma << 2); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 1072dc1054dd..2bf54687f9fb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -479,7 +479,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) } } - dev->caps.max_counters = 1 << ilog2(dev_cap->max_counters); + dev->caps.max_counters = dev_cap->max_counters; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] = @@ -2193,13 +2193,16 @@ err_free_icm: static int mlx4_init_counters_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - int nent; + int nent_pow2; if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) return -ENOENT; - nent = dev->caps.max_counters; - return mlx4_bitmap_init(&priv->counters_bitmap, nent, nent - 1, 0, 0); + nent_pow2 = roundup_pow_of_two(dev->caps.max_counters); + /* reserve last counter index for sink counter */ + return mlx4_bitmap_init(&priv->counters_bitmap, nent_pow2, + nent_pow2 - 1, 0, + nent_pow2 - dev->caps.max_counters + 1); } static void mlx4_cleanup_counters_table(struct mlx4_dev *dev) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ad31e476873f..312c50420dde 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -957,6 +957,7 @@ struct mlx4_mad_ifc { ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) #define MLX4_INVALID_SLAVE_ID 0xFF +#define MLX4_SINK_COUNTER_INDEX(dev) (dev->caps.max_counters - 1) void handle_port_mgmt_change_event(struct work_struct *work); -- cgit v1.2.3 From 6de5f7f6a1fa2288552d46b3effbb6d5571413e5 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 15 Jun 2015 17:59:02 +0300 Subject: net/mlx4_core: Allocate default counter per port Default counter per port will be allocated at the mlx4 core driver load. Every QP opened by the Ethernet driver will be attached to the port's default counter. This is an infrastructure step to collect VF statistics from the PF. Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 5 ++ drivers/net/ethernet/mellanox/mlx4/en_resources.c | 2 +- drivers/net/ethernet/mellanox/mlx4/main.c | 71 ++++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 1 + drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 + include/linux/mlx4/device.h | 1 + 6 files changed, 77 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 98efb5842fca..048fca0ca9a4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1597,6 +1597,9 @@ int mlx4_en_start_port(struct net_device *dev) } mdev->mac_removed[priv->port] = 0; + priv->counter_index = + mlx4_get_default_counter_index(mdev->dev, priv->port); + err = mlx4_en_config_rss_steer(priv); if (err) { en_err(priv, "Failed configuring rss steering\n"); @@ -1755,6 +1758,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) /* Set port as not active */ priv->port_up = false; + priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev); /* Promsicuous mode */ if (mdev->dev->caps.steering_mode == @@ -2778,6 +2782,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv = netdev_priv(dev); memset(priv, 0, sizeof(struct mlx4_en_priv)); + priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev); spin_lock_init(&priv->stats_lock); INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode); INIT_WORK(&priv->watchdog_task, mlx4_en_restart); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index 97bcb9135f8d..e482fa1bb741 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -66,7 +66,7 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, context->pri_path.sched_queue |= user_prio << 3; context->pri_path.feup = MLX4_FEUP_FORCE_ETH_UP; } - context->pri_path.counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev); + context->pri_path.counter_index = priv->counter_index; context->cqn_send = cpu_to_be32(cqn); context->cqn_recv = cpu_to_be32(cqn); context->db_rec_addr = cpu_to_be64(priv->res.db.dma << 2); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 8b27bfcf809d..4e69cf52a579 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2219,6 +2219,47 @@ static void mlx4_cleanup_counters_table(struct mlx4_dev *dev) mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap); } +static void mlx4_cleanup_default_counters(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int port; + + for (port = 0; port < dev->caps.num_ports; port++) + if (priv->def_counter[port] != -1) + mlx4_counter_free(dev, priv->def_counter[port]); +} + +static int mlx4_allocate_default_counters(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int port, err = 0; + u32 idx; + + for (port = 0; port < dev->caps.num_ports; port++) + priv->def_counter[port] = -1; + + for (port = 0; port < dev->caps.num_ports; port++) { + err = mlx4_counter_alloc(dev, &idx); + + if (!err || err == -ENOSPC) { + priv->def_counter[port] = idx; + } else if (err == -ENOENT) { + err = 0; + continue; + } else { + mlx4_err(dev, "%s: failed to allocate default counter port %d err %d\n", + __func__, port + 1, err); + mlx4_cleanup_default_counters(dev); + return err; + } + + mlx4_dbg(dev, "%s: default counter index %d for port %d\n", + __func__, priv->def_counter[port], port + 1); + } + + return err; +} + int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -2227,8 +2268,10 @@ int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) return -ENOENT; *idx = mlx4_bitmap_alloc(&priv->counters_bitmap); - if (*idx == -1) - return -ENOMEM; + if (*idx == -1) { + *idx = MLX4_SINK_COUNTER_INDEX(dev); + return -ENOSPC; + } return 0; } @@ -2275,6 +2318,9 @@ void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx) if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) return; + if (idx == MLX4_SINK_COUNTER_INDEX(dev)) + return; + __mlx4_clear_if_stat(dev, idx); mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR); @@ -2296,6 +2342,14 @@ void mlx4_counter_free(struct mlx4_dev *dev, u32 idx) } EXPORT_SYMBOL_GPL(mlx4_counter_free); +int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + return priv->def_counter[port - 1]; +} +EXPORT_SYMBOL_GPL(mlx4_get_default_counter_index); + void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -2439,6 +2493,12 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) } } + err = mlx4_allocate_default_counters(dev); + if (err) { + mlx4_err(dev, "Failed to allocate default counters, aborting\n"); + goto err_counters_table_free; + } + if (!mlx4_is_slave(dev)) { for (port = 1; port <= dev->caps.num_ports; port++) { ib_port_default_caps = 0; @@ -2470,13 +2530,16 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) if (err) { mlx4_err(dev, "Failed to set port %d, aborting\n", port); - goto err_counters_table_free; + goto err_default_countes_free; } } } return 0; +err_default_countes_free: + mlx4_cleanup_default_counters(dev); + err_counters_table_free: if (!mlx4_is_slave(dev)) mlx4_cleanup_counters_table(dev); @@ -3212,6 +3275,7 @@ err_port: for (--port; port >= 1; --port) mlx4_cleanup_port_info(&priv->port[port]); + mlx4_cleanup_default_counters(dev); if (!mlx4_is_slave(dev)) mlx4_cleanup_counters_table(dev); mlx4_cleanup_qp_table(dev); @@ -3511,6 +3575,7 @@ static void mlx4_unload_one(struct pci_dev *pdev) mlx4_free_resource_tracker(dev, RES_TR_FREE_SLAVES_ONLY); + mlx4_cleanup_default_counters(dev); if (!mlx4_is_slave(dev)) mlx4_cleanup_counters_table(dev); mlx4_cleanup_qp_table(dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 8d2d3594bf8f..e9e94bc0e75d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -876,6 +876,7 @@ struct mlx4_priv { struct mlx4_qp_table qp_table; struct mlx4_mcg_table mcg_table; struct mlx4_bitmap counters_bitmap; + int def_counter[MLX4_MAX_PORTS]; struct mlx4_catas_err catas_err; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index edd8fd69ec9a..f6c3e128cc0c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -582,6 +582,7 @@ struct mlx4_en_priv { int base_tx_qpn; struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE]; struct hwtstamp_config hwtstamp_config; + u32 counter_index; #ifdef CONFIG_MLX4_EN_DCB struct ieee_ets ets; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 312c50420dde..4820080ac394 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1348,6 +1348,7 @@ int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port); int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); void mlx4_counter_free(struct mlx4_dev *dev, u32 idx); +int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port); void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port); -- cgit v1.2.3 From 9616982f3fcc9e6577d7f41009c4ef2df19a71ec Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 15 Jun 2015 17:59:05 +0300 Subject: net/mlx4_core: Add helper to query counters This is an infrastructure step for querying VF and PF counters. This code was in the IB driver, move it to the mlx4 core driver so it will be accessible for more use cases. Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/mad.c | 32 +++++------------- drivers/net/ethernet/mellanox/mlx4/cmd.c | 57 ++++++++++++++++++++++++++++++++ include/linux/mlx4/cmd.h | 3 ++ include/linux/mlx4/device.h | 8 +++++ 4 files changed, 76 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index bc698b14683f..bc09b4e1f57c 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -64,14 +64,6 @@ enum { #define GUID_TBL_BLK_NUM_ENTRIES 8 #define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES) -/* Counters should be saturate once they reach their maximum value */ -#define ASSIGN_32BIT_COUNTER(counter, value) do {\ - if ((value) > U32_MAX) \ - counter = cpu_to_be32(U32_MAX); \ - else \ - counter = cpu_to_be32(value); \ -} while (0) - struct mlx4_mad_rcv_buf { struct ib_grh grh; u8 payload[256]; @@ -828,31 +820,25 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad) { - struct mlx4_cmd_mailbox *mailbox; + struct mlx4_counter counter_stats; struct mlx4_ib_dev *dev = to_mdev(ibdev); int err; - u32 inmod = dev->counters[port_num - 1].index & 0xffff; - u8 mode; if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) return -EINVAL; - mailbox = mlx4_alloc_cmd_mailbox(dev->dev); - if (IS_ERR(mailbox)) - return IB_MAD_RESULT_FAILURE; - - err = mlx4_cmd_box(dev->dev, 0, mailbox->dma, inmod, 0, - MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, - MLX4_CMD_WRAPPED); + memset(&counter_stats, 0, sizeof(counter_stats)); + err = mlx4_get_counter_stats(dev->dev, + dev->counters[port_num - 1].index, + &counter_stats, 0); if (err) err = IB_MAD_RESULT_FAILURE; else { memset(out_mad->data, 0, sizeof out_mad->data); - mode = ((struct mlx4_counter *)mailbox->buf)->counter_mode; - switch (mode & 0xf) { + switch (counter_stats.counter_mode & 0xf) { case 0: - edit_counter(mailbox->buf, - (void *)(out_mad->data + 40)); + edit_counter(&counter_stats, + (void *)(out_mad->data + 40)); err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; break; default: @@ -860,8 +846,6 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, } } - mlx4_free_cmd_mailbox(dev->dev, mailbox); - return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 68ae765873a9..44b8f7715ade 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -49,6 +49,7 @@ #include "mlx4.h" #include "fw.h" #include "fw_qos.h" +#include "mlx4_stats.h" #define CMD_POLL_TOKEN 0xffff #define INBOX_MASK 0xffffffffffffff00ULL @@ -3166,6 +3167,62 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat } EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state); +int mlx4_get_counter_stats(struct mlx4_dev *dev, int counter_index, + struct mlx4_counter *counter_stats, int reset) +{ + struct mlx4_cmd_mailbox *mailbox = NULL; + struct mlx4_counter *tmp_counter; + int err; + u32 if_stat_in_mod; + + if (!counter_stats) + return -EINVAL; + + if (counter_index == MLX4_SINK_COUNTER_INDEX(dev)) + return 0; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memset(mailbox->buf, 0, sizeof(struct mlx4_counter)); + if_stat_in_mod = counter_index; + if (reset) + if_stat_in_mod |= MLX4_QUERY_IF_STAT_RESET; + err = mlx4_cmd_box(dev, 0, mailbox->dma, + if_stat_in_mod, 0, + MLX4_CMD_QUERY_IF_STAT, + MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + if (err) { + mlx4_dbg(dev, "%s: failed to read statistics for counter index %d\n", + __func__, counter_index); + goto if_stat_out; + } + tmp_counter = (struct mlx4_counter *)mailbox->buf; + counter_stats->counter_mode = tmp_counter->counter_mode; + if (counter_stats->counter_mode == 0) { + counter_stats->rx_frames = + cpu_to_be64(be64_to_cpu(counter_stats->rx_frames) + + be64_to_cpu(tmp_counter->rx_frames)); + counter_stats->tx_frames = + cpu_to_be64(be64_to_cpu(counter_stats->tx_frames) + + be64_to_cpu(tmp_counter->tx_frames)); + counter_stats->rx_bytes = + cpu_to_be64(be64_to_cpu(counter_stats->rx_bytes) + + be64_to_cpu(tmp_counter->rx_bytes)); + counter_stats->tx_bytes = + cpu_to_be64(be64_to_cpu(counter_stats->tx_bytes) + + be64_to_cpu(tmp_counter->tx_bytes)); + } + +if_stat_out: + mlx4_free_cmd_mailbox(dev, mailbox); + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_get_counter_stats); + int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index f62e7cf227c6..5dffc869988b 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -35,6 +35,7 @@ #include #include +#include enum { /* initialization and general commands */ @@ -300,6 +301,8 @@ static inline int mlx4_cmd_imm(struct mlx4_dev *dev, u64 in_param, u64 *out_para struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev); void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox); +int mlx4_get_counter_stats(struct mlx4_dev *dev, int counter_index, + struct mlx4_counter *counter_stats, int reset); u32 mlx4_comm_get_version(void); int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac); int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 4820080ac394..efe80c754b2f 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -771,6 +771,14 @@ union mlx4_ext_av { struct mlx4_eth_av eth; }; +/* Counters should be saturate once they reach their maximum value */ +#define ASSIGN_32BIT_COUNTER(counter, value) do { \ + if ((value) > U32_MAX) \ + counter = cpu_to_be32(U32_MAX); \ + else \ + counter = cpu_to_be32(value); \ +} while (0) + struct mlx4_counter { u8 reserved1[3]; u8 counter_mode; -- cgit v1.2.3 From 3b766cd832328fcb87db3507e7b98cf42f21689d Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 15 Jun 2015 17:59:07 +0300 Subject: net/core: Add reading VF statistics through the PF netdevice Add ndo_get_vf_stats where the PF retrieves and fills the VFs traffic statistics. We encode the VF stats in a nested manner to allow for future extensions. Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/if_link.h | 9 ++++++++ include/linux/netdevice.h | 4 ++++ include/uapi/linux/if_link.h | 13 +++++++++++ net/core/rtnetlink.c | 51 ++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 75 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index da4929927f69..ae5d0d22955d 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -5,6 +5,15 @@ /* We don't want this structure exposed to user space */ +struct ifla_vf_stats { + __u64 rx_packets; + __u64 tx_packets; + __u64 rx_bytes; + __u64 tx_bytes; + __u64 broadcast; + __u64 multicast; +}; + struct ifla_vf_info { __u32 vf; __u8 mac[32]; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6f5f71ff5169..e20979dfd6a9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1100,6 +1100,10 @@ struct net_device_ops { struct ifla_vf_info *ivf); int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state); + int (*ndo_get_vf_stats)(struct net_device *dev, + int vf, + struct ifla_vf_stats + *vf_stats); int (*ndo_set_vf_port)(struct net_device *dev, int vf, struct nlattr *port[]); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 510efb360580..2c7e8e3d3981 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -484,6 +484,7 @@ enum { IFLA_VF_RSS_QUERY_EN, /* RSS Redirection Table and Hash Key query * on/off switch */ + IFLA_VF_STATS, /* network device statistics */ __IFLA_VF_MAX, }; @@ -533,6 +534,18 @@ struct ifla_vf_rss_query_en { __u32 setting; }; +enum { + IFLA_VF_STATS_RX_PACKETS, + IFLA_VF_STATS_TX_PACKETS, + IFLA_VF_STATS_RX_BYTES, + IFLA_VF_STATS_TX_BYTES, + IFLA_VF_STATS_BROADCAST, + IFLA_VF_STATS_MULTICAST, + __IFLA_VF_STATS_MAX, +}; + +#define IFLA_VF_STATS_MAX (__IFLA_VF_STATS_MAX - 1) + /* VF ports management section * * Nested layout of set/get msg is: diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 077b6d280371..2d102ce1474f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -819,7 +819,19 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, nla_total_size(sizeof(struct ifla_vf_spoofchk)) + nla_total_size(sizeof(struct ifla_vf_rate)) + nla_total_size(sizeof(struct ifla_vf_link_state)) + - nla_total_size(sizeof(struct ifla_vf_rss_query_en))); + nla_total_size(sizeof(struct ifla_vf_rss_query_en)) + + /* IFLA_VF_STATS_RX_PACKETS */ + nla_total_size(sizeof(__u64)) + + /* IFLA_VF_STATS_TX_PACKETS */ + nla_total_size(sizeof(__u64)) + + /* IFLA_VF_STATS_RX_BYTES */ + nla_total_size(sizeof(__u64)) + + /* IFLA_VF_STATS_TX_BYTES */ + nla_total_size(sizeof(__u64)) + + /* IFLA_VF_STATS_BROADCAST */ + nla_total_size(sizeof(__u64)) + + /* IFLA_VF_STATS_MULTICAST */ + nla_total_size(sizeof(__u64))); return size; } else return 0; @@ -1123,7 +1135,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, && (ext_filter_mask & RTEXT_FILTER_VF)) { int i; - struct nlattr *vfinfo, *vf; + struct nlattr *vfinfo, *vf, *vfstats; int num_vfs = dev_num_vf(dev->dev.parent); vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); @@ -1138,6 +1150,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct ifla_vf_spoofchk vf_spoofchk; struct ifla_vf_link_state vf_linkstate; struct ifla_vf_rss_query_en vf_rss_query_en; + struct ifla_vf_stats vf_stats; /* * Not all SR-IOV capable drivers support the @@ -1190,6 +1203,30 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, sizeof(vf_rss_query_en), &vf_rss_query_en)) goto nla_put_failure; + memset(&vf_stats, 0, sizeof(vf_stats)); + if (dev->netdev_ops->ndo_get_vf_stats) + dev->netdev_ops->ndo_get_vf_stats(dev, i, + &vf_stats); + vfstats = nla_nest_start(skb, IFLA_VF_STATS); + if (!vfstats) { + nla_nest_cancel(skb, vf); + nla_nest_cancel(skb, vfinfo); + goto nla_put_failure; + } + if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS, + vf_stats.rx_packets) || + nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS, + vf_stats.tx_packets) || + nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES, + vf_stats.rx_bytes) || + nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES, + vf_stats.tx_bytes) || + nla_put_u64(skb, IFLA_VF_STATS_BROADCAST, + vf_stats.broadcast) || + nla_put_u64(skb, IFLA_VF_STATS_MULTICAST, + vf_stats.multicast)) + goto nla_put_failure; + nla_nest_end(skb, vfstats); nla_nest_end(skb, vf); } nla_nest_end(skb, vfinfo); @@ -1303,6 +1340,16 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) }, [IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) }, [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) }, + [IFLA_VF_STATS] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = { + [IFLA_VF_STATS_RX_PACKETS] = { .type = NLA_U64 }, + [IFLA_VF_STATS_TX_PACKETS] = { .type = NLA_U64 }, + [IFLA_VF_STATS_RX_BYTES] = { .type = NLA_U64 }, + [IFLA_VF_STATS_TX_BYTES] = { .type = NLA_U64 }, + [IFLA_VF_STATS_BROADCAST] = { .type = NLA_U64 }, + [IFLA_VF_STATS_MULTICAST] = { .type = NLA_U64 }, }; static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { -- cgit v1.2.3 From 62a890557f57e6cbebe9cc6c32aef045405d4fa2 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 15 Jun 2015 17:59:08 +0300 Subject: net/mlx4_en: Support ndo_get_vf_stats Implement the ndo to gather VF statistics through the PF. All counters related to this VF are stored in a per slave list, run over the slave's list and collect all statistics. Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 30 ++++++++++++++ drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 10 +++++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 + .../net/ethernet/mellanox/mlx4/resource_tracker.c | 48 ++++++++++++++++++++++ include/linux/mlx4/cmd.h | 3 ++ 5 files changed, 93 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 44b8f7715ade..82040137d7d9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -3223,6 +3223,36 @@ if_stat_out: } EXPORT_SYMBOL_GPL(mlx4_get_counter_stats); +int mlx4_get_vf_stats(struct mlx4_dev *dev, int port, int vf_idx, + struct ifla_vf_stats *vf_stats) +{ + struct mlx4_counter tmp_vf_stats; + int slave; + int err = 0; + + if (!vf_stats) + return -EINVAL; + + if (!mlx4_is_master(dev)) + return -EPROTONOSUPPORT; + + slave = mlx4_get_slave_indx(dev, vf_idx); + if (slave < 0) + return -EINVAL; + + port = mlx4_slaves_closest_port(dev, slave, port); + err = mlx4_calc_vf_counters(dev, slave, port, &tmp_vf_stats); + if (!err && tmp_vf_stats.counter_mode == 0) { + vf_stats->rx_packets = be64_to_cpu(tmp_vf_stats.rx_frames); + vf_stats->tx_packets = be64_to_cpu(tmp_vf_stats.tx_frames); + vf_stats->rx_bytes = be64_to_cpu(tmp_vf_stats.rx_bytes); + vf_stats->tx_bytes = be64_to_cpu(tmp_vf_stats.tx_bytes); + } + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_get_vf_stats); + int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index f9142f22d630..77179d7ae4cc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2292,6 +2292,15 @@ static int mlx4_en_set_vf_link_state(struct net_device *dev, int vf, int link_st return mlx4_set_vf_link_state(mdev->dev, en_priv->port, vf, link_state); } +static int mlx4_en_get_vf_stats(struct net_device *dev, int vf, + struct ifla_vf_stats *vf_stats) +{ + struct mlx4_en_priv *en_priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = en_priv->mdev; + + return mlx4_get_vf_stats(mdev->dev, en_priv->port, vf, vf_stats); +} + #define PORT_ID_BYTE_LEN 8 static int mlx4_en_get_phys_port_id(struct net_device *dev, struct netdev_phys_item_id *ppid) @@ -2489,6 +2498,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_set_vf_rate = mlx4_en_set_vf_rate, .ndo_set_vf_spoofchk = mlx4_en_set_vf_spoofchk, .ndo_set_vf_link_state = mlx4_en_set_vf_link_state, + .ndo_get_vf_stats = mlx4_en_get_vf_stats, .ndo_get_vf_config = mlx4_en_get_vf_config, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = mlx4_en_netpoll, diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index e9e94bc0e75d..a092c5c34d43 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1010,6 +1010,8 @@ int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list); int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx); +int mlx4_calc_vf_counters(struct mlx4_dev *dev, int slave, int port, + struct mlx4_counter *data); int __mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn); void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 73db584bc240..731423ca575d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -46,6 +46,7 @@ #include "mlx4.h" #include "fw.h" +#include "mlx4_stats.h" #define MLX4_MAC_VALID (1ull << 63) #define MLX4_PF_COUNTERS_PER_PORT 2 @@ -1147,6 +1148,53 @@ static struct res_common *alloc_tr(u64 id, enum mlx4_resource type, int slave, return ret; } +int mlx4_calc_vf_counters(struct mlx4_dev *dev, int slave, int port, + struct mlx4_counter *data) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; + struct res_common *tmp; + struct res_counter *counter; + int *counters_arr; + int i = 0, err = 0; + + memset(data, 0, sizeof(*data)); + + counters_arr = kmalloc_array(dev->caps.max_counters, + sizeof(*counters_arr), GFP_KERNEL); + if (!counters_arr) + return -ENOMEM; + + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry(tmp, + &tracker->slave_list[slave].res_list[RES_COUNTER], + list) { + counter = container_of(tmp, struct res_counter, com); + if (counter->port == port) { + counters_arr[i] = (int)tmp->res_id; + i++; + } + } + spin_unlock_irq(mlx4_tlock(dev)); + counters_arr[i] = -1; + + i = 0; + + while (counters_arr[i] != -1) { + err = mlx4_get_counter_stats(dev, counters_arr[i], data, + 0); + if (err) { + memset(data, 0, sizeof(*data)); + goto table_changed; + } + i++; + } + +table_changed: + kfree(counters_arr); + return 0; +} + static int add_res_range(struct mlx4_dev *dev, int slave, u64 base, int count, enum mlx4_resource type, int extra) { diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 5dffc869988b..58391f2e0414 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -36,6 +36,7 @@ #include #include #include +#include enum { /* initialization and general commands */ @@ -303,6 +304,8 @@ void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbo int mlx4_get_counter_stats(struct mlx4_dev *dev, int counter_index, struct mlx4_counter *counter_stats, int reset); +int mlx4_get_vf_stats(struct mlx4_dev *dev, int port, int vf_idx, + struct ifla_vf_stats *vf_stats); u32 mlx4_comm_get_version(void); int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac); int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos); -- cgit v1.2.3 From eb4cb008529ca08e0d8c0fa54e8f739520197a65 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 15 Jun 2015 11:26:18 -0400 Subject: sock_diag: define destruction multicast groups These groups will contain socket-destruction events for AF_INET/AF_INET6, IPPROTO_TCP/IPPROTO_UDP. Near the end of socket destruction, a check for listeners is performed. In the presence of a listener, rather than completely cleanup the socket, a unit of work will be added to a private work queue which will first broadcast information about the socket and then finish the cleanup operation. Signed-off-by: Craig Gallek Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 42 +++++++++++++++++++++ include/net/sock.h | 1 + include/uapi/linux/sock_diag.h | 10 +++++ net/core/sock.c | 11 +++++- net/core/sock_diag.c | 85 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 148 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 083ac388098e..fddebc617469 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -1,7 +1,10 @@ #ifndef __SOCK_DIAG_H__ #define __SOCK_DIAG_H__ +#include #include +#include +#include #include struct sk_buff; @@ -11,6 +14,7 @@ struct sock; struct sock_diag_handler { __u8 family; int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh); + int (*get_info)(struct sk_buff *skb, struct sock *sk); }; int sock_diag_register(const struct sock_diag_handler *h); @@ -26,4 +30,42 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, struct sk_buff *skb, int attrtype); +static inline +enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk) +{ + switch (sk->sk_family) { + case AF_INET: + switch (sk->sk_protocol) { + case IPPROTO_TCP: + return SKNLGRP_INET_TCP_DESTROY; + case IPPROTO_UDP: + return SKNLGRP_INET_UDP_DESTROY; + default: + return SKNLGRP_NONE; + } + case AF_INET6: + switch (sk->sk_protocol) { + case IPPROTO_TCP: + return SKNLGRP_INET6_TCP_DESTROY; + case IPPROTO_UDP: + return SKNLGRP_INET6_UDP_DESTROY; + default: + return SKNLGRP_NONE; + } + default: + return SKNLGRP_NONE; + } +} + +static inline +bool sock_diag_has_destroy_listeners(const struct sock *sk) +{ + const struct net *n = sock_net(sk); + const enum sknetlink_groups group = sock_diag_destroy_group(sk); + + return group != SKNLGRP_NONE && n->diag_nlsk && + netlink_has_listeners(n->diag_nlsk, group); +} +void sock_diag_broadcast_destroy(struct sock *sk); + #endif diff --git a/include/net/sock.h b/include/net/sock.h index 26c1c3171e00..3e8258699270 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1518,6 +1518,7 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern); void sk_free(struct sock *sk); +void sk_destruct(struct sock *sk); struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h index b00e29efb161..49230d36f9ce 100644 --- a/include/uapi/linux/sock_diag.h +++ b/include/uapi/linux/sock_diag.h @@ -23,4 +23,14 @@ enum { SK_MEMINFO_VARS, }; +enum sknetlink_groups { + SKNLGRP_NONE, + SKNLGRP_INET_TCP_DESTROY, + SKNLGRP_INET_UDP_DESTROY, + SKNLGRP_INET6_TCP_DESTROY, + SKNLGRP_INET6_UDP_DESTROY, + __SKNLGRP_MAX, +}; +#define SKNLGRP_MAX (__SKNLGRP_MAX - 1) + #endif /* _UAPI__SOCK_DIAG_H__ */ diff --git a/net/core/sock.c b/net/core/sock.c index 7063c329c1b6..1e1fe9a68d83 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -131,6 +131,7 @@ #include #include #include +#include #include @@ -1423,7 +1424,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, } EXPORT_SYMBOL(sk_alloc); -static void __sk_free(struct sock *sk) +void sk_destruct(struct sock *sk) { struct sk_filter *filter; @@ -1451,6 +1452,14 @@ static void __sk_free(struct sock *sk) sk_prot_free(sk->sk_prot_creator, sk); } +static void __sk_free(struct sock *sk) +{ + if (unlikely(sock_diag_has_destroy_listeners(sk))) + sock_diag_broadcast_destroy(sk); + else + sk_destruct(sk); +} + void sk_free(struct sock *sk) { /* diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 74dddf84adcd..d79866c5f8bc 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -5,6 +5,9 @@ #include #include #include +#include +#include +#include #include #include @@ -12,6 +15,7 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX]; static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); static DEFINE_MUTEX(sock_diag_table_mutex); +static struct workqueue_struct *broadcast_wq; static u64 sock_gen_cookie(struct sock *sk) { @@ -101,6 +105,62 @@ out: } EXPORT_SYMBOL(sock_diag_put_filterinfo); +struct broadcast_sk { + struct sock *sk; + struct work_struct work; +}; + +static size_t sock_diag_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct inet_diag_msg) + + nla_total_size(sizeof(u8)) /* INET_DIAG_PROTOCOL */ + + nla_total_size(sizeof(struct tcp_info))); /* INET_DIAG_INFO */ +} + +static void sock_diag_broadcast_destroy_work(struct work_struct *work) +{ + struct broadcast_sk *bsk = + container_of(work, struct broadcast_sk, work); + struct sock *sk = bsk->sk; + const struct sock_diag_handler *hndl; + struct sk_buff *skb; + const enum sknetlink_groups group = sock_diag_destroy_group(sk); + int err = -1; + + WARN_ON(group == SKNLGRP_NONE); + + skb = nlmsg_new(sock_diag_nlmsg_size(), GFP_KERNEL); + if (!skb) + goto out; + + mutex_lock(&sock_diag_table_mutex); + hndl = sock_diag_handlers[sk->sk_family]; + if (hndl && hndl->get_info) + err = hndl->get_info(skb, sk); + mutex_unlock(&sock_diag_table_mutex); + + if (!err) + nlmsg_multicast(sock_net(sk)->diag_nlsk, skb, 0, group, + GFP_KERNEL); + else + kfree_skb(skb); +out: + sk_destruct(sk); + kfree(bsk); +} + +void sock_diag_broadcast_destroy(struct sock *sk) +{ + /* Note, this function is often called from an interrupt context. */ + struct broadcast_sk *bsk = + kmalloc(sizeof(struct broadcast_sk), GFP_ATOMIC); + if (!bsk) + return sk_destruct(sk); + bsk->sk = sk; + INIT_WORK(&bsk->work, sock_diag_broadcast_destroy_work); + queue_work(broadcast_wq, &bsk->work); +} + void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)) { mutex_lock(&sock_diag_table_mutex); @@ -211,10 +271,32 @@ static void sock_diag_rcv(struct sk_buff *skb) mutex_unlock(&sock_diag_mutex); } +static int sock_diag_bind(struct net *net, int group) +{ + switch (group) { + case SKNLGRP_INET_TCP_DESTROY: + case SKNLGRP_INET_UDP_DESTROY: + if (!sock_diag_handlers[AF_INET]) + request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, + NETLINK_SOCK_DIAG, AF_INET); + break; + case SKNLGRP_INET6_TCP_DESTROY: + case SKNLGRP_INET6_UDP_DESTROY: + if (!sock_diag_handlers[AF_INET6]) + request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, + NETLINK_SOCK_DIAG, AF_INET); + break; + } + return 0; +} + static int __net_init diag_net_init(struct net *net) { struct netlink_kernel_cfg cfg = { + .groups = SKNLGRP_MAX, .input = sock_diag_rcv, + .bind = sock_diag_bind, + .flags = NL_CFG_F_NONROOT_RECV, }; net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, &cfg); @@ -234,12 +316,15 @@ static struct pernet_operations diag_net_ops = { static int __init sock_diag_init(void) { + broadcast_wq = alloc_workqueue("sock_diag_events", 0, 0); + BUG_ON(!broadcast_wq); return register_pernet_subsys(&diag_net_ops); } static void __exit sock_diag_exit(void) { unregister_pernet_subsys(&diag_net_ops); + destroy_workqueue(broadcast_wq); } module_init(sock_diag_init); -- cgit v1.2.3 From 3fd22af808f4d7455ba91596d334438c7ee0f889 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 15 Jun 2015 11:26:19 -0400 Subject: sock_diag: specify info_size per inet protocol Previously, there was no clear distinction between the inet protocols that used struct tcp_info to report information and those that didn't. This change adds a specific size attribute to the inet_diag_handler struct which defines these interfaces. This will make dispatching sock_diag get_info requests identical for all inet protocols in a following patch. Tested: ss -au Tested: ss -at Signed-off-by: Craig Gallek Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 1 + net/dccp/diag.c | 1 + net/ipv4/inet_diag.c | 4 ++-- net/ipv4/tcp_diag.c | 1 + net/ipv4/udp_diag.c | 2 ++ 5 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index ac48b10c9395..0e707f0c1a3e 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -24,6 +24,7 @@ struct inet_diag_handler { struct inet_diag_msg *r, void *info); __u16 idiag_type; + __u16 idiag_info_size; }; struct inet_connection_sock; diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 5a45f8de5d99..2d84303ea6bf 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -66,6 +66,7 @@ static const struct inet_diag_handler dccp_diag_handler = { .dump_one = dccp_diag_dump_one, .idiag_get_info = dccp_diag_get_info, .idiag_type = IPPROTO_DCCP, + .idiag_info_size = sizeof(struct tcp_info), }; static int __init dccp_diag_init(void) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 4d32262c7502..b1f01174bf32 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -200,9 +200,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, } #undef EXPIRES_IN_MS - if (ext & (1 << (INET_DIAG_INFO - 1))) { + if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { attr = nla_reserve(skb, INET_DIAG_INFO, - sizeof(struct tcp_info)); + handler->idiag_info_size); if (!attr) goto errout; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 79b34a0f4a4a..423e3881a40b 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -50,6 +50,7 @@ static const struct inet_diag_handler tcp_diag_handler = { .dump_one = tcp_diag_dump_one, .idiag_get_info = tcp_diag_get_info, .idiag_type = IPPROTO_TCP, + .idiag_info_size = sizeof(struct tcp_info), }; static int __init tcp_diag_init(void) diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index b763c39ae1d7..6116604bf6e8 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -170,6 +170,7 @@ static const struct inet_diag_handler udp_diag_handler = { .dump_one = udp_diag_dump_one, .idiag_get_info = udp_diag_get_info, .idiag_type = IPPROTO_UDP, + .idiag_info_size = 0, }; static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, @@ -190,6 +191,7 @@ static const struct inet_diag_handler udplite_diag_handler = { .dump_one = udplite_diag_dump_one, .idiag_get_info = udp_diag_get_info, .idiag_type = IPPROTO_UDPLITE, + .idiag_info_size = 0, }; static int __init udp_diag_init(void) -- cgit v1.2.3 From a1a56aaa0735c7821e85c37268a7c12e132415cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Jun 2015 18:10:13 -0700 Subject: netfilter: x_tables: align per cpu xt_counter Let's force a 16 bytes alignment on xt_counter percpu allocations, so that bytes and packets sit in same cache line. xt_counter being exported to user space, we cannot add __align(16) on the structure itself. Signed-off-by: Eric Dumazet Cc: Florian Westphal Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 95693c4cebdd..1c97a2204379 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -356,7 +356,8 @@ static inline unsigned long ifname_compare_aligned(const char *_a, * so nothing needs to be done there. * * xt_percpu_counter_alloc returns the address of the percpu - * counter, or 0 on !SMP. + * counter, or 0 on !SMP. We force an alignment of 16 bytes + * so that bytes/packets share a common cache line. * * Hence caller must use IS_ERR_VALUE to check for error, this * allows us to return 0 for single core systems without forcing @@ -365,7 +366,8 @@ static inline unsigned long ifname_compare_aligned(const char *_a, static inline u64 xt_percpu_counter_alloc(void) { if (nr_cpu_ids > 1) { - void __percpu *res = alloc_percpu(struct xt_counters); + void __percpu *res = __alloc_percpu(sizeof(struct xt_counters), + sizeof(struct xt_counters)); if (res == NULL) return (u64) -ENOMEM; -- cgit v1.2.3 From a263653ed798216c0069922d7b5237ca49436007 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 17 Jun 2015 10:28:27 -0500 Subject: netfilter: don't pull include/linux/netfilter.h from netns headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This pulls the full hook netfilter definitions from all those that include net_namespace.h. Instead let's just include the bare minimum required in the new linux/netfilter_defs.h file, and use it from the netfilter netns header files. I also needed to include in.h and in6.h from linux/netfilter.h otherwise we hit this compilation error: In file included from include/linux/netfilter_defs.h:4:0, from include/net/netns/netfilter.h:4, from include/net/net_namespace.h:22, from include/linux/netdevice.h:43, from net/netfilter/nfnetlink_queue_core.c:23: include/uapi/linux/netfilter.h:76:17: error: field ‘in’ has incomplete type struct in_addr in; And also explicit include linux/netfilter.h in several spots. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Eric W. Biederman --- include/linux/netfilter.h | 6 ++---- include/linux/netfilter_defs.h | 9 +++++++++ include/net/netns/netfilter.h | 2 +- include/net/netns/x_tables.h | 2 +- include/uapi/linux/netfilter.h | 3 ++- net/ipv6/output_core.c | 1 + 6 files changed, 16 insertions(+), 7 deletions(-) create mode 100644 include/linux/netfilter_defs.h (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index f5ff5d156da8..00050dfd9f23 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -10,7 +10,8 @@ #include #include #include -#include +#include + #ifdef CONFIG_NETFILTER static inline int NF_DROP_GETERR(int verdict) { @@ -38,9 +39,6 @@ static inline void nf_inet_addr_mask(const union nf_inet_addr *a1, int netfilter_init(void); -/* Largest hook number + 1 */ -#define NF_MAX_HOOKS 8 - struct sk_buff; struct nf_hook_ops; diff --git a/include/linux/netfilter_defs.h b/include/linux/netfilter_defs.h new file mode 100644 index 000000000000..d3a7f8597e82 --- /dev/null +++ b/include/linux/netfilter_defs.h @@ -0,0 +1,9 @@ +#ifndef __LINUX_NETFILTER_CORE_H_ +#define __LINUX_NETFILTER_CORE_H_ + +#include + +/* Largest hook number + 1, see uapi/linux/netfilter_decnet.h */ +#define NF_MAX_HOOKS 8 + +#endif diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index cf25b5e35f3c..532e4ba64f49 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -1,7 +1,7 @@ #ifndef __NETNS_NETFILTER_H #define __NETNS_NETFILTER_H -#include +#include struct proc_dir_entry; struct nf_logger; diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h index 4d6597ad6067..c8a7681efa6a 100644 --- a/include/net/netns/x_tables.h +++ b/include/net/netns/x_tables.h @@ -2,7 +2,7 @@ #define __NETNS_X_TABLES_H #include -#include +#include struct ebt_table; diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index 177027cce6b3..d93f949d1d9a 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -4,7 +4,8 @@ #include #include #include - +#include +#include /* Responses from hook functions. */ #define NF_DROP 0 diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 21678acd4521..928a0fb0b744 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -8,6 +8,7 @@ #include #include #include +#include static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, const struct in6_addr *dst, -- cgit v1.2.3 From dcb8f5c8139ef945cdfd55900fae265c4dbefc02 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 17 Jun 2015 23:58:28 +0200 Subject: netfilter: xtables: fix warnings on 32bit platforms On 32bit archs gcc complains due to cast from void* to u64. Add intermediate casts to long to silence these warnings. include/linux/netfilter/x_tables.h:376:10: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] include/linux/netfilter/x_tables.h:384:15: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] include/linux/netfilter/x_tables.h:391:23: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] include/linux/netfilter/x_tables.h:400:22: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] Fixes: 71ae0dff02d756e ("netfilter: xtables: use percpu rule counters") Reported-by: kbuild test robot Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 1c97a2204379..286098a5667f 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -372,7 +372,7 @@ static inline u64 xt_percpu_counter_alloc(void) if (res == NULL) return (u64) -ENOMEM; - return (__force u64) res; + return (u64) (__force unsigned long) res; } return 0; @@ -380,14 +380,14 @@ static inline u64 xt_percpu_counter_alloc(void) static inline void xt_percpu_counter_free(u64 pcnt) { if (nr_cpu_ids > 1) - free_percpu((void __percpu *) pcnt); + free_percpu((void __percpu *) (unsigned long) pcnt); } static inline struct xt_counters * xt_get_this_cpu_counter(struct xt_counters *cnt) { if (nr_cpu_ids > 1) - return this_cpu_ptr((void __percpu *) cnt->pcnt); + return this_cpu_ptr((void __percpu *) (unsigned long) cnt->pcnt); return cnt; } @@ -396,7 +396,7 @@ static inline struct xt_counters * xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) { if (nr_cpu_ids > 1) - return per_cpu_ptr((void __percpu *) cnt->pcnt, cpu); + return per_cpu_ptr((void __percpu *) (unsigned long) cnt->pcnt, cpu); return cnt; } -- cgit v1.2.3 From dfe816c5e37272f2f3c1311f0e9934e1b4229261 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Fri, 19 Jun 2015 19:47:53 +0530 Subject: macvtap: Increase limit of macvtap queues Macvtap should be compatible with tuntap for maximum number of queues. commit 'baf71c5c1f80d82e92924050a60b5baaf97e3094 (tuntap: Increase the number of queues in tun.)' removes the limitations and increases number of queues in tuntap. Now, Its safe to increase number of queues in Macvtap as well. This patch also modifies 'macvtap_del_queues' function to avoid extra memory allocation in stack. Changes from v1->v2 : Michael S. Tsirkin, Jason Wang : Better way to use linked list to avoid use of extra memory in stack. Sergei Shtylyov : Specify dependent commit's summary. Signed-off-by: Pankaj Gupta Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 10 ++-------- include/linux/if_macvlan.h | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 483afb19596c..6a64197f5bce 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -263,27 +263,21 @@ out: static void macvtap_del_queues(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); - struct macvtap_queue *q, *tmp, *qlist[MAX_MACVTAP_QUEUES]; - int i, j = 0; + struct macvtap_queue *q, *tmp; ASSERT_RTNL(); list_for_each_entry_safe(q, tmp, &vlan->queue_list, next) { list_del_init(&q->next); - qlist[j++] = q; RCU_INIT_POINTER(q->vlan, NULL); if (q->enabled) vlan->numvtaps--; vlan->numqueues--; + sock_put(&q->sk); } - for (i = 0; i < vlan->numvtaps; i++) - RCU_INIT_POINTER(vlan->taps[i], NULL); BUG_ON(vlan->numvtaps); BUG_ON(vlan->numqueues); /* guarantee that any future macvtap_set_queue will fail */ vlan->numvtaps = MAX_MACVTAP_QUEUES; - - for (--j; j >= 0; j--) - sock_put(&qlist[j]->sk); } static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 6f6929ea8a0c..a4ccc3122f93 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -29,7 +29,7 @@ struct macvtap_queue; * Maximum times a macvtap device can be opened. This can be used to * configure the number of receive queue, e.g. for multiqueue virtio. */ -#define MAX_MACVTAP_QUEUES 16 +#define MAX_MACVTAP_QUEUES 256 #define MACVLAN_MC_FILTER_BITS 8 #define MACVLAN_MC_FILTER_SZ (1 << MACVLAN_MC_FILTER_BITS) -- cgit v1.2.3 From 7d4f8d871ab15bd50a5771382ca2c9355b38d73c Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Mon, 22 Jun 2015 00:27:17 -0700 Subject: switchdev; add VLAN support for port's bridge_getlink One more missing piece of the puzzle. Add vlan dump support to switchdev port's bridge_getlink. iproute2 "bridge vlan show" cmd already knows how to show the vlans installed on the bridge and the device , but (until now) no one implemented the port vlan part of the netlink PF_BRIDGE:RTM_GETLINK msg. Before this patch, "bridge vlan show": $ bridge -c vlan show port vlan ids sw1p1 30-34 << bridge side vlans 57 sw1p1 << device side vlans (missing) sw1p2 57 sw1p2 sw1p3 sw1p4 br0 None (When the port is bridged, the output repeats the vlan list for the vlans on the bridge side of the port and the vlans on the device side of the port. The listing above show no vlans for the device side even though they are installed). After this patch: $ bridge -c vlan show port vlan ids sw1p1 30-34 << bridge side vlan 57 sw1p1 30-34 << device side vlans 57 3840 PVID sw1p2 57 sw1p2 57 3840 PVID sw1p3 3842 PVID sw1p4 3843 PVID br0 None I re-used ndo_dflt_bridge_getlink to add vlan fill call-back func. switchdev support adds an obj dump for VLAN objects, using the same call-back scheme as FDB dump. Support included for both compressed and un-compressed vlan dumps. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +- drivers/net/ethernet/rocker/rocker.c | 25 ++++++ include/linux/rtnetlink.h | 6 +- net/core/rtnetlink.c | 18 +++- net/switchdev/switchdev.c | 123 +++++++++++++++++++++++++- 7 files changed, 172 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index c0f34845cf59..6f642426308c 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5096,7 +5096,7 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, return ndo_dflt_bridge_getlink(skb, pid, seq, dev, hsw_mode == PORT_FWD_TYPE_VEPA ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB, - 0, 0, nlflags); + 0, 0, nlflags, filter_mask, NULL); } #ifdef CONFIG_BE2NET_VXLAN diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 52d7d8b8f1f9..48a52b35b614 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8069,7 +8069,7 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev, #ifdef HAVE_BRIDGE_FILTER static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, - u32 __always_unused filter_mask, int nlflags) + u32 filter_mask, int nlflags) #else static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, int nlflags) @@ -8095,7 +8095,7 @@ static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, return 0; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, veb->bridge_mode, - nlflags); + nlflags, 0, 0, filter_mask, NULL); } #endif /* HAVE_BRIDGE_ATTRIBS */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 3bf2f3cfd9f6..9aa6104e34ea 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8095,7 +8095,8 @@ static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, return 0; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, - adapter->bridge_mode, 0, 0, nlflags); + adapter->bridge_mode, 0, 0, nlflags, + filter_mask, NULL); } static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 7d5d92a10284..83e913b60d9c 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4456,6 +4456,28 @@ static int rocker_port_fdb_dump(const struct rocker_port *rocker_port, return err; } +static int rocker_port_vlan_dump(const struct rocker_port *rocker_port, + struct switchdev_obj *obj) +{ + struct switchdev_obj_vlan *vlan = &obj->u.vlan; + u16 vid; + int err = 0; + + for (vid = 1; vid < VLAN_N_VID; vid++) { + if (!test_bit(vid, rocker_port->vlan_bitmap)) + continue; + vlan->flags = 0; + if (rocker_vlan_id_is_internal(htons(vid))) + vlan->flags |= BRIDGE_VLAN_INFO_PVID; + vlan->vid_begin = vlan->vid_end = vid; + err = obj->cb(rocker_port->dev, obj); + if (err) + break; + } + + return err; +} + static int rocker_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj) { @@ -4466,6 +4488,9 @@ static int rocker_port_obj_dump(struct net_device *dev, case SWITCHDEV_OBJ_PORT_FDB: err = rocker_port_fdb_dump(rocker_port, obj); break; + case SWITCHDEV_OBJ_PORT_VLAN: + err = rocker_port_vlan_dump(rocker_port, obj); + break; default: err = -EOPNOTSUPP; break; diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index a2324fb45cf4..39adaa9529eb 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -114,5 +114,9 @@ extern int ndo_dflt_fdb_del(struct ndmsg *ndm, extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode, - u32 flags, u32 mask, int nlflags); + u32 flags, u32 mask, int nlflags, + u32 filter_mask, + int (*vlan_fill)(struct sk_buff *skb, + struct net_device *dev, + u32 filter_mask)); #endif /* __LINUX_RTNETLINK_H */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2d102ce1474f..01ced4a889e0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2908,7 +2908,11 @@ static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask, int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode, - u32 flags, u32 mask, int nlflags) + u32 flags, u32 mask, int nlflags, + u32 filter_mask, + int (*vlan_fill)(struct sk_buff *skb, + struct net_device *dev, + u32 filter_mask)) { struct nlmsghdr *nlh; struct ifinfomsg *ifm; @@ -2916,6 +2920,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct nlattr *protinfo; u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; struct net_device *br_dev = netdev_master_upper_dev_get(dev); + int err = 0; nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), nlflags); if (nlh == NULL) @@ -2956,6 +2961,13 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, goto nla_put_failure; } } + if (vlan_fill) { + err = vlan_fill(skb, dev, filter_mask); + if (err) { + nla_nest_cancel(skb, br_afspec); + goto nla_put_failure; + } + } nla_nest_end(skb, br_afspec); protinfo = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED); @@ -2989,9 +3001,9 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, return 0; nla_put_failure: nlmsg_cancel(skb, nlh); - return -EMSGSIZE; + return err ? err : -EMSGSIZE; } -EXPORT_SYMBOL(ndo_dflt_bridge_getlink); +EXPORT_SYMBOL_GPL(ndo_dflt_bridge_getlink); static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) { diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 448d9199cea2..3e4b35a0c8cb 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -391,6 +391,126 @@ int call_switchdev_notifiers(unsigned long val, struct net_device *dev, } EXPORT_SYMBOL_GPL(call_switchdev_notifiers); +struct switchdev_vlan_dump { + struct switchdev_obj obj; + struct sk_buff *skb; + u32 filter_mask; + u16 flags; + u16 begin; + u16 end; +}; + +static int switchdev_port_vlan_dump_put(struct net_device *dev, + struct switchdev_vlan_dump *dump) +{ + struct bridge_vlan_info vinfo; + + vinfo.flags = dump->flags; + + if (dump->begin == 0 && dump->end == 0) { + return 0; + } else if (dump->begin == dump->end) { + vinfo.vid = dump->begin; + if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + return -EMSGSIZE; + } else { + vinfo.vid = dump->begin; + vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN; + if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + return -EMSGSIZE; + vinfo.vid = dump->end; + vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN; + vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END; + if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + return -EMSGSIZE; + } + + return 0; +} + +static int switchdev_port_vlan_dump_cb(struct net_device *dev, + struct switchdev_obj *obj) +{ + struct switchdev_vlan_dump *dump = + container_of(obj, struct switchdev_vlan_dump, obj); + struct switchdev_obj_vlan *vlan = &dump->obj.u.vlan; + int err = 0; + + if (vlan->vid_begin > vlan->vid_end) + return -EINVAL; + + if (dump->filter_mask & RTEXT_FILTER_BRVLAN) { + dump->flags = vlan->flags; + for (dump->begin = dump->end = vlan->vid_begin; + dump->begin <= vlan->vid_end; + dump->begin++, dump->end++) { + err = switchdev_port_vlan_dump_put(dev, dump); + if (err) + return err; + } + } else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) { + if (dump->begin > vlan->vid_begin && + dump->begin >= vlan->vid_end) { + if ((dump->begin - 1) == vlan->vid_end && + dump->flags == vlan->flags) { + /* prepend */ + dump->begin = vlan->vid_begin; + } else { + err = switchdev_port_vlan_dump_put(dev, dump); + dump->flags = vlan->flags; + dump->begin = vlan->vid_begin; + dump->end = vlan->vid_end; + } + } else if (dump->end <= vlan->vid_begin && + dump->end < vlan->vid_end) { + if ((dump->end + 1) == vlan->vid_begin && + dump->flags == vlan->flags) { + /* append */ + dump->end = vlan->vid_end; + } else { + err = switchdev_port_vlan_dump_put(dev, dump); + dump->flags = vlan->flags; + dump->begin = vlan->vid_begin; + dump->end = vlan->vid_end; + } + } else { + err = -EINVAL; + } + } + + return err; +} + +static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev, + u32 filter_mask) +{ + struct switchdev_vlan_dump dump = { + .obj = { + .id = SWITCHDEV_OBJ_PORT_VLAN, + .cb = switchdev_port_vlan_dump_cb, + }, + .skb = skb, + .filter_mask = filter_mask, + }; + int err = 0; + + if ((filter_mask & RTEXT_FILTER_BRVLAN) || + (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) { + err = switchdev_port_obj_dump(dev, &dump.obj); + if (err) + goto err_out; + if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) + /* last one */ + err = switchdev_port_vlan_dump_put(dev, &dump); + } + +err_out: + return err == -EOPNOTSUPP ? 0 : err; +} + /** * switchdev_port_bridge_getlink - Get bridge port attributes * @@ -415,7 +535,8 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, return err; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, - attr.u.brport_flags, mask, nlflags); + attr.u.brport_flags, mask, nlflags, + filter_mask, switchdev_port_vlan_fill); } EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink); -- cgit v1.2.3 From 0eeb075fad736fb92620af995c47c204bbb5e829 Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Tue, 23 Jun 2015 13:45:37 -0400 Subject: net: ipv4 sysctl option to ignore routes when nexthop link is down This feature is only enabled with the new per-interface or ipv4 global sysctls called 'ignore_routes_with_linkdown'. net.ipv4.conf.all.ignore_routes_with_linkdown = 0 net.ipv4.conf.default.ignore_routes_with_linkdown = 0 net.ipv4.conf.lo.ignore_routes_with_linkdown = 0 ... When the above sysctls are set, will report to userspace that a route is dead and will no longer resolve to this nexthop when performing a fib lookup. This will signal to userspace that the route will not be selected. The signalling of a RTNH_F_DEAD is only passed to userspace if the sysctl is enabled and link is down. This was done as without it the netlink listeners would have no idea whether or not a nexthop would be selected. The kernel only sets RTNH_F_DEAD internally if the interface has IFF_UP cleared. With the new sysctl set, the following behavior can be observed (interface p8p1 is link-down): default via 10.0.5.2 dev p9p1 10.0.5.0/24 dev p9p1 proto kernel scope link src 10.0.5.15 70.0.0.0/24 dev p7p1 proto kernel scope link src 70.0.0.1 80.0.0.0/24 dev p8p1 proto kernel scope link src 80.0.0.1 dead linkdown 90.0.0.0/24 via 80.0.0.2 dev p8p1 metric 1 dead linkdown 90.0.0.0/24 via 70.0.0.2 dev p7p1 metric 2 90.0.0.1 via 70.0.0.2 dev p7p1 src 70.0.0.1 cache local 80.0.0.1 dev lo src 80.0.0.1 cache 80.0.0.2 via 10.0.5.2 dev p9p1 src 10.0.5.15 cache While the route does remain in the table (so it can be modified if needed rather than being wiped away as it would be if IFF_UP was cleared), the proper next-hop is chosen automatically when the link is down. Now interface p8p1 is linked-up: default via 10.0.5.2 dev p9p1 10.0.5.0/24 dev p9p1 proto kernel scope link src 10.0.5.15 70.0.0.0/24 dev p7p1 proto kernel scope link src 70.0.0.1 80.0.0.0/24 dev p8p1 proto kernel scope link src 80.0.0.1 90.0.0.0/24 via 80.0.0.2 dev p8p1 metric 1 90.0.0.0/24 via 70.0.0.2 dev p7p1 metric 2 192.168.56.0/24 dev p2p1 proto kernel scope link src 192.168.56.2 90.0.0.1 via 80.0.0.2 dev p8p1 src 80.0.0.1 cache local 80.0.0.1 dev lo src 80.0.0.1 cache 80.0.0.2 dev p8p1 src 80.0.0.1 cache and the output changes to what one would expect. If the sysctl is not set, the following output would be expected when p8p1 is down: default via 10.0.5.2 dev p9p1 10.0.5.0/24 dev p9p1 proto kernel scope link src 10.0.5.15 70.0.0.0/24 dev p7p1 proto kernel scope link src 70.0.0.1 80.0.0.0/24 dev p8p1 proto kernel scope link src 80.0.0.1 linkdown 90.0.0.0/24 via 80.0.0.2 dev p8p1 metric 1 linkdown 90.0.0.0/24 via 70.0.0.2 dev p7p1 metric 2 Since the dead flag does not appear, there should be no expectation that the kernel would skip using this route due to link being down. v2: Split kernel changes into 2 patches, this actually makes a behavioral change if the sysctl is set. Also took suggestion from Alex to simplify code by only checking sysctl during fib lookup and suggestion from Scott to add a per-interface sysctl. v3: Code clean-ups to make it more readable and efficient as well as a reverse path check fix. v4: Drop binary sysctl v5: Whitespace fixups from Dave v6: Style changes from Dave and checkpatch suggestions v7: One more checkpatch fixup Signed-off-by: Andy Gospodarek Signed-off-by: Dinesh Dutt Acked-by: Scott Feldman Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 3 +++ include/net/fib_rules.h | 3 ++- include/net/ip_fib.h | 16 +++++++++------- include/uapi/linux/ip.h | 1 + net/ipv4/devinet.c | 2 ++ net/ipv4/fib_frontend.c | 6 +++--- net/ipv4/fib_rules.c | 5 +++-- net/ipv4/fib_semantics.c | 33 ++++++++++++++++++++++++++++----- net/ipv4/fib_trie.c | 6 ++++++ net/ipv4/netfilter/ipt_rpfilter.c | 2 +- net/ipv4/route.c | 10 +++++----- 11 files changed, 63 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 0a21fbefdfbe..a4328cea376a 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -120,6 +120,9 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) || (!IN_DEV_FORWARD(in_dev) && \ IN_DEV_ORCONF((in_dev), ACCEPT_REDIRECTS))) +#define IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) \ + IN_DEV_CONF_GET((in_dev), IGNORE_ROUTES_WITH_LINKDOWN) + #define IN_DEV_ARPFILTER(in_dev) IN_DEV_ORCONF((in_dev), ARPFILTER) #define IN_DEV_ARP_ACCEPT(in_dev) IN_DEV_ORCONF((in_dev), ARP_ACCEPT) #define IN_DEV_ARP_ANNOUNCE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 6d67383a5114..903a55efbffe 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -36,7 +36,8 @@ struct fib_lookup_arg { void *result; struct fib_rule *rule; int flags; -#define FIB_LOOKUP_NOREF 1 +#define FIB_LOOKUP_NOREF 1 +#define FIB_LOOKUP_IGNORE_LINKSTATE 2 }; struct fib_rules_ops { diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index f73d27c5575a..49c142bdf01e 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -226,7 +226,7 @@ static inline struct fib_table *fib_new_table(struct net *net, u32 id) } static inline int fib_lookup(struct net *net, const struct flowi4 *flp, - struct fib_result *res) + struct fib_result *res, unsigned int flags) { struct fib_table *tb; int err = -ENETUNREACH; @@ -234,7 +234,7 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp, rcu_read_lock(); tb = fib_get_table(net, RT_TABLE_MAIN); - if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF)) + if (tb && !fib_table_lookup(tb, flp, res, flags | FIB_LOOKUP_NOREF)) err = 0; rcu_read_unlock(); @@ -249,16 +249,18 @@ void __net_exit fib4_rules_exit(struct net *net); struct fib_table *fib_new_table(struct net *net, u32 id); struct fib_table *fib_get_table(struct net *net, u32 id); -int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res); +int __fib_lookup(struct net *net, struct flowi4 *flp, + struct fib_result *res, unsigned int flags); static inline int fib_lookup(struct net *net, struct flowi4 *flp, - struct fib_result *res) + struct fib_result *res, unsigned int flags) { struct fib_table *tb; int err; + flags |= FIB_LOOKUP_NOREF; if (net->ipv4.fib_has_custom_rules) - return __fib_lookup(net, flp, res); + return __fib_lookup(net, flp, res, flags); rcu_read_lock(); @@ -266,11 +268,11 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp, for (err = 0; !err; err = -ENETUNREACH) { tb = rcu_dereference_rtnl(net->ipv4.fib_main); - if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF)) + if (tb && !fib_table_lookup(tb, flp, res, flags)) break; tb = rcu_dereference_rtnl(net->ipv4.fib_default); - if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF)) + if (tb && !fib_table_lookup(tb, flp, res, flags)) break; } diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index 411959405ab6..08f894d2ddbd 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -164,6 +164,7 @@ enum IPV4_DEVCONF_ROUTE_LOCALNET, IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL, IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL, + IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, __IPV4_DEVCONF_MAX }; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 419d23c53ec7..7498716e8f54 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2169,6 +2169,8 @@ static struct devinet_sysctl_table { "igmpv2_unsolicited_report_interval"), DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL, "igmpv3_unsolicited_report_interval"), + DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN, + "ignore_routes_with_linkdown"), DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 534eb1485045..6bbc54940eb4 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -280,7 +280,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_scope = scope; fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; - if (!fib_lookup(net, &fl4, &res)) + if (!fib_lookup(net, &fl4, &res, 0)) return FIB_RES_PREFSRC(net, res); } else { scope = RT_SCOPE_LINK; @@ -319,7 +319,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0; net = dev_net(dev); - if (fib_lookup(net, &fl4, &res)) + if (fib_lookup(net, &fl4, &res, 0)) goto last_resort; if (res.type != RTN_UNICAST && (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev))) @@ -354,7 +354,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_oif = dev->ifindex; ret = 0; - if (fib_lookup(net, &fl4, &res) == 0) { + if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) { if (res.type == RTN_UNICAST) ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; } diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 56151982f74e..18123d50f576 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -47,11 +47,12 @@ struct fib4_rule { #endif }; -int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) +int __fib_lookup(struct net *net, struct flowi4 *flp, + struct fib_result *res, unsigned int flags) { struct fib_lookup_arg arg = { .result = res, - .flags = FIB_LOOKUP_NOREF, + .flags = flags, }; int err; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b1b305b1e340..3bfccd83551c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -623,7 +623,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, /* It is not necessary, but requires a bit of thinking */ if (fl4.flowi4_scope < RT_SCOPE_LINK) fl4.flowi4_scope = RT_SCOPE_LINK; - err = fib_lookup(net, &fl4, &res); + err = fib_lookup(net, &fl4, &res, + FIB_LOOKUP_IGNORE_LINKSTATE); if (err) { rcu_read_unlock(); return err; @@ -1035,12 +1036,20 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) goto nla_put_failure; if (fi->fib_nhs == 1) { + struct in_device *in_dev; + if (fi->fib_nh->nh_gw && nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) goto nla_put_failure; if (fi->fib_nh->nh_oif && nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif)) goto nla_put_failure; + if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) { + in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev); + if (in_dev && + IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) + rtm->rtm_flags |= RTNH_F_DEAD; + } #ifdef CONFIG_IP_ROUTE_CLASSID if (fi->fib_nh[0].nh_tclassid && nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) @@ -1057,11 +1066,19 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, goto nla_put_failure; for_nexthops(fi) { + struct in_device *in_dev; + rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); if (!rtnh) goto nla_put_failure; rtnh->rtnh_flags = nh->nh_flags & 0xFF; + if (nh->nh_flags & RTNH_F_LINKDOWN) { + in_dev = __in_dev_get_rcu(nh->nh_dev); + if (in_dev && + IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) + rtnh->rtnh_flags |= RTNH_F_DEAD; + } rtnh->rtnh_hops = nh->nh_weight - 1; rtnh->rtnh_ifindex = nh->nh_oif; @@ -1310,16 +1327,22 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags) void fib_select_multipath(struct fib_result *res) { struct fib_info *fi = res->fi; + struct in_device *in_dev; int w; spin_lock_bh(&fib_multipath_lock); if (fi->fib_power <= 0) { int power = 0; change_nexthops(fi) { - if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) { - power += nexthop_nh->nh_weight; - nexthop_nh->nh_power = nexthop_nh->nh_weight; - } + in_dev = __in_dev_get_rcu(nexthop_nh->nh_dev); + if (nexthop_nh->nh_flags & RTNH_F_DEAD) + continue; + if (in_dev && + IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && + nexthop_nh->nh_flags & RTNH_F_LINKDOWN) + continue; + power += nexthop_nh->nh_weight; + nexthop_nh->nh_power = nexthop_nh->nh_weight; } endfor_nexthops(fi); fi->fib_power = power; if (power <= 0) { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 6c666a9f1bd5..15d32612e3c6 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1412,9 +1412,15 @@ found: continue; for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { const struct fib_nh *nh = &fi->fib_nh[nhsel]; + struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev); if (nh->nh_flags & RTNH_F_DEAD) continue; + if (in_dev && + IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && + nh->nh_flags & RTNH_F_LINKDOWN && + !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) + continue; if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) continue; diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 4bfaedf9b34e..8618fd150c96 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -40,7 +40,7 @@ static bool rpfilter_lookup_reverse(struct flowi4 *fl4, struct net *net = dev_net(dev); int ret __maybe_unused; - if (fib_lookup(net, fl4, &res)) + if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE)) return false; if (res.type != RTN_UNICAST) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f6055984c307..d0362a2de3d3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -747,7 +747,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow if (!(n->nud_state & NUD_VALID)) { neigh_event_send(n, NULL); } else { - if (fib_lookup(net, fl4, &res) == 0) { + if (fib_lookup(net, fl4, &res, 0) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, new_gw, @@ -975,7 +975,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) return; rcu_read_lock(); - if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) { + if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, 0, mtu, @@ -1186,7 +1186,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) fl4.flowi4_mark = skb->mark; rcu_read_lock(); - if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) + if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0) src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); else src = inet_select_addr(rt->dst.dev, @@ -1716,7 +1716,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.daddr = daddr; fl4.saddr = saddr; - err = fib_lookup(net, &fl4, &res); + err = fib_lookup(net, &fl4, &res, 0); if (err != 0) { if (!IN_DEV_FORWARD(in_dev)) err = -EHOSTUNREACH; @@ -2123,7 +2123,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) goto make_route; } - if (fib_lookup(net, fl4, &res)) { + if (fib_lookup(net, fl4, &res, 0)) { res.fi = NULL; res.table = NULL; if (fl4->flowi4_oif) { -- cgit v1.2.3