From dd77abf8a03a1ebd4dd3ddebecce312dcb0d1af1 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Sun, 19 Mar 2017 11:01:28 +0200 Subject: IB/mlx4: Support RAW Ethernet when RoCE is disabled On some environments, such as certain SR-IOV VF configurations, RoCE isn't supported for mlx4 Ethernet ports. Currently the driver will not open IB device on that port. This is problematic since we do want user-space RAW Ethernet QPs functionality to remain in place. For that end, enhance the relevant driver flows such that we do create a device instance in that case. Signed-off-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx4/device.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 1beb1ec2fbdf..74b765ce48ab 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1011,8 +1011,7 @@ struct mlx4_mad_ifc { #define mlx4_foreach_ib_transport_port(port, dev) \ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \ - ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) || \ - ((dev)->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)) + ((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_ETH)) #define MLX4_INVALID_SLAVE_ID 0xFF #define MLX4_SINK_COUNTER_INDEX(dev) (dev->caps.max_counters - 1) -- cgit v1.2.3 From 19cc75249adc61401aa4b21a6654e0a7f7ea8fe2 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Mon, 3 Apr 2017 13:11:03 +0300 Subject: IB/mlx5: Use IP version matching to classify IP traffic This change adds the ability for flow steering to classify IPv4/6 packets with MPLS tag (Ethertype 0x8847 and 0x8848) as standard IP packets and hit IPv4/6 classifed steering rules. When user added a flow rule with IP classification, driver was implicitly adding ethertype matching to the created rule in order to distinguish between IPv4 and IPv6 protocols. Since IP packets with MPLS tag header have MPLS ethertype, they missed the rule and ended up hitting the default filters. Such behavior prevented from MPLS packets to undergo inbound traffic load balancing flows (if such were defined by configuring RSS) to achieve higher throughput - the way that non-MPLS IP packets performed. Since our device is able to look past the MPLS tag and identify the next protocol we introduce this solution which replaces Ethertype matching by the device's capability to perform IP version parsing and matching in order to distinguish between IPv4 and IPv6. Therefore, whenever a flow with IP spec is added and device support IP version matching, driver will implicitly add IP version matching to the rule (Based on the IP spec type) without Ethertype matching which will cause relevant MPLS tagged packets to hit this rule as well. Otherwise (device doesn't support IP version matching), we fall back to setting Ethertype matching. If the user's filters specify an L2 ethertype and an IP spec the rule will then match both the ethertype and the IP version. The device's support for IP version matching is reported by the device via dedicated capability bit in query_device_cap and named outer/inner_ip_version. Signed-off-by: Ariel Levkovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 66 +++++++++++++++++++++++++++++---------- include/linux/mlx5/mlx5_ifc.h | 6 ++-- 2 files changed, 52 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 1c85c39d1d03..c28b6952b0ab 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1736,8 +1736,11 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) offsetof(typeof(filter), field) -\ sizeof(filter.field)) -static int parse_flow_attr(u32 *match_c, u32 *match_v, - const union ib_flow_spec *ib_spec, u32 *tag_id) +#define IPV4_VERSION 4 +#define IPV6_VERSION 6 +static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, + u32 *match_v, const union ib_flow_spec *ib_spec, + u32 *tag_id) { void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); @@ -1745,17 +1748,22 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, misc_parameters); void *headers_c; void *headers_v; + int match_ipv; if (ib_spec->type & IB_FLOW_SPEC_INNER) { headers_c = MLX5_ADDR_OF(fte_match_param, match_c, inner_headers); headers_v = MLX5_ADDR_OF(fte_match_param, match_v, inner_headers); + match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_ip_version); } else { headers_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers); headers_v = MLX5_ADDR_OF(fte_match_param, match_v, outer_headers); + match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_ip_version); } switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { @@ -1811,10 +1819,17 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) return -EOPNOTSUPP; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ethertype, 0xffff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ethertype, ETH_P_IP); + if (match_ipv) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + ip_version, 0xf); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + ip_version, IPV4_VERSION); + } else { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + ethertype, 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + ethertype, ETH_P_IP); + } memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4), @@ -1843,10 +1858,17 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) return -EOPNOTSUPP; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ethertype, 0xffff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ethertype, ETH_P_IPV6); + if (match_ipv) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + ip_version, 0xf); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + ip_version, IPV6_VERSION); + } else { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + ethertype, 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + ethertype, ETH_P_IPV6); + } memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv6_layout.ipv6), @@ -1968,10 +1990,16 @@ static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr) is_multicast_ether_addr(eth_spec->val.dst_mac); } -static bool is_valid_ethertype(const struct ib_flow_attr *flow_attr, +static bool is_valid_ethertype(struct mlx5_core_dev *mdev, + const struct ib_flow_attr *flow_attr, bool check_inner) { union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); + int match_ipv = check_inner ? + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_ip_version) : + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_ip_version); int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0; bool ipv4_spec_valid, ipv6_spec_valid; unsigned int ip_spec_type = 0; @@ -2002,16 +2030,20 @@ static bool is_valid_ethertype(const struct ib_flow_attr *flow_attr, (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit)); ipv6_spec_valid = (eth_type == ETH_P_IPV6) && (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit)); - type_valid = ipv4_spec_valid || ipv6_spec_valid; + + type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) || + (((eth_type == ETH_P_MPLS_UC) || + (eth_type == ETH_P_MPLS_MC)) && match_ipv); } return type_valid; } -static bool is_valid_attr(const struct ib_flow_attr *flow_attr) +static bool is_valid_attr(struct mlx5_core_dev *mdev, + const struct ib_flow_attr *flow_attr) { - return is_valid_ethertype(flow_attr, false) && - is_valid_ethertype(flow_attr, true); + return is_valid_ethertype(mdev, flow_attr, false) && + is_valid_ethertype(mdev, flow_attr, true); } static void put_flow_table(struct mlx5_ib_dev *dev, @@ -2154,7 +2186,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; int err = 0; - if (!is_valid_attr(flow_attr)) + if (!is_valid_attr(dev->mdev, flow_attr)) return ERR_PTR(-EINVAL); spec = mlx5_vzalloc(sizeof(*spec)); @@ -2167,7 +2199,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, INIT_LIST_HEAD(&handler->list); for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { - err = parse_flow_attr(spec->match_criteria, + err = parse_flow_attr(dev->mdev, spec->match_criteria, spec->match_value, ib_flow, &flow_tag); if (err < 0) goto free; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 7c50bd39b297..4da6e803b627 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -236,7 +236,7 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 outer_dmac[0x1]; u8 outer_smac[0x1]; u8 outer_ether_type[0x1]; - u8 reserved_at_3[0x1]; + u8 outer_ip_version[0x1]; u8 outer_first_prio[0x1]; u8 outer_first_cfi[0x1]; u8 outer_first_vid[0x1]; @@ -265,7 +265,7 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 inner_dmac[0x1]; u8 inner_smac[0x1]; u8 inner_ether_type[0x1]; - u8 reserved_at_23[0x1]; + u8 inner_ip_version[0x1]; u8 inner_first_prio[0x1]; u8 inner_first_cfi[0x1]; u8 inner_first_vid[0x1]; @@ -371,7 +371,7 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 cvlan_tag[0x1]; u8 svlan_tag[0x1]; u8 frag[0x1]; - u8 reserved_at_93[0x4]; + u8 ip_version[0x4]; u8 tcp_flags[0x9]; u8 tcp_sport[0x10]; -- cgit v1.2.3 From e1f24a79f424ddb03828de7c0152668c9a30146e Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 16 Apr 2017 07:29:29 +0300 Subject: IB/mlx5: Support congestion related counters This patch adds support to query the congestion related hardware counters through new command and links them with other hw counters being available in hw_counters sysfs location. In order to reuse existing infrastructure it renames related q_counter data structures to more generic counters to reflect q_counters and congestion counters and maybe some other counters in the future. New hardware counters: * rp_cnp_handled - CNP packets handled by the reaction point * rp_cnp_ignored - CNP packets ignored by the reaction point * np_cnp_sent - CNP packets sent by notification point to respond to CE marked RoCE packets * np_ecn_marked_roce_packets - CE marked RoCE packets received by notification point It also avoids returning ENOSYS which is specific for invalid system call and produces the following checkpatch.pl warning. WARNING: ENOSYS means 'invalid syscall nr' and nothing else + return -ENOSYS; Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cmd.c | 11 +++ drivers/infiniband/hw/mlx5/cmd.h | 2 + drivers/infiniband/hw/mlx5/main.c | 172 ++++++++++++++++++++++++----------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 7 +- drivers/infiniband/hw/mlx5/qp.c | 7 +- include/linux/mlx5/mlx5_ifc.h | 18 ++-- 6 files changed, 150 insertions(+), 67 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index cdc2d3017da7..18d5e1db93ed 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -46,3 +46,14 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey) null_mkey); return err; } + +int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, + bool reset, void *out, int out_size) +{ + u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { }; + + MLX5_SET(query_cong_statistics_in, in, opcode, + MLX5_CMD_OP_QUERY_CONG_STATISTICS); + MLX5_SET(query_cong_statistics_in, in, clear, reset); + return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 7ca8a7b6434d..fa09228193a6 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -37,4 +37,6 @@ #include int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); +int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, + bool reset, void *out, int out_size); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index cca3848a2671..e81391901260 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -57,6 +57,7 @@ #include #include #include "mlx5_ib.h" +#include "cmd.h" #define DRIVER_NAME "mlx5_ib" #define DRIVER_VERSION "2.2-1" @@ -3213,7 +3214,7 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev) mlx5_nic_vport_disable_roce(dev->mdev); } -struct mlx5_ib_q_counter { +struct mlx5_ib_counter { const char *name; size_t offset; }; @@ -3221,18 +3222,18 @@ struct mlx5_ib_q_counter { #define INIT_Q_COUNTER(_name) \ { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)} -static const struct mlx5_ib_q_counter basic_q_cnts[] = { +static const struct mlx5_ib_counter basic_q_cnts[] = { INIT_Q_COUNTER(rx_write_requests), INIT_Q_COUNTER(rx_read_requests), INIT_Q_COUNTER(rx_atomic_requests), INIT_Q_COUNTER(out_of_buffer), }; -static const struct mlx5_ib_q_counter out_of_seq_q_cnts[] = { +static const struct mlx5_ib_counter out_of_seq_q_cnts[] = { INIT_Q_COUNTER(out_of_sequence), }; -static const struct mlx5_ib_q_counter retrans_q_cnts[] = { +static const struct mlx5_ib_counter retrans_q_cnts[] = { INIT_Q_COUNTER(duplicate_request), INIT_Q_COUNTER(rnr_nak_retry_err), INIT_Q_COUNTER(packet_seq_err), @@ -3240,22 +3241,31 @@ static const struct mlx5_ib_q_counter retrans_q_cnts[] = { INIT_Q_COUNTER(local_ack_timeout_err), }; -static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev) +#define INIT_CONG_COUNTER(_name) \ + { .name = #_name, .offset = \ + MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)} + +static const struct mlx5_ib_counter cong_cnts[] = { + INIT_CONG_COUNTER(rp_cnp_ignored), + INIT_CONG_COUNTER(rp_cnp_handled), + INIT_CONG_COUNTER(np_ecn_marked_roce_packets), + INIT_CONG_COUNTER(np_cnp_sent), +}; + +static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) { unsigned int i; for (i = 0; i < dev->num_ports; i++) { mlx5_core_dealloc_q_counter(dev->mdev, - dev->port[i].q_cnts.set_id); - kfree(dev->port[i].q_cnts.names); - kfree(dev->port[i].q_cnts.offsets); + dev->port[i].cnts.set_id); + kfree(dev->port[i].cnts.names); + kfree(dev->port[i].cnts.offsets); } } -static int __mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev, - const char ***names, - size_t **offsets, - u32 *num) +static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, + struct mlx5_ib_counters *cnts) { u32 num_counters; @@ -3266,27 +3276,32 @@ static int __mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev, if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) num_counters += ARRAY_SIZE(retrans_q_cnts); + cnts->num_q_counters = num_counters; - *names = kcalloc(num_counters, sizeof(**names), GFP_KERNEL); - if (!*names) + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { + cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); + num_counters += ARRAY_SIZE(cong_cnts); + } + + cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); + if (!cnts->names) return -ENOMEM; - *offsets = kcalloc(num_counters, sizeof(**offsets), GFP_KERNEL); - if (!*offsets) + cnts->offsets = kcalloc(num_counters, + sizeof(cnts->offsets), GFP_KERNEL); + if (!cnts->offsets) goto err_names; - *num = num_counters; - return 0; err_names: - kfree(*names); + kfree(cnts->names); return -ENOMEM; } -static void mlx5_ib_fill_q_counters(struct mlx5_ib_dev *dev, - const char **names, - size_t *offsets) +static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, + const char **names, + size_t *offsets) { int i; int j = 0; @@ -3309,9 +3324,16 @@ static void mlx5_ib_fill_q_counters(struct mlx5_ib_dev *dev, offsets[j] = retrans_q_cnts[i].offset; } } + + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { + for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { + names[j] = cong_cnts[i].name; + offsets[j] = cong_cnts[i].offset; + } + } } -static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) +static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) { int i; int ret; @@ -3320,7 +3342,7 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) struct mlx5_ib_port *port = &dev->port[i]; ret = mlx5_core_alloc_q_counter(dev->mdev, - &port->q_cnts.set_id); + &port->cnts.set_id); if (ret) { mlx5_ib_warn(dev, "couldn't allocate queue counter for port %d, err %d\n", @@ -3328,15 +3350,12 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) goto dealloc_counters; } - ret = __mlx5_ib_alloc_q_counters(dev, - &port->q_cnts.names, - &port->q_cnts.offsets, - &port->q_cnts.num_counters); + ret = __mlx5_ib_alloc_counters(dev, &port->cnts); if (ret) goto dealloc_counters; - mlx5_ib_fill_q_counters(dev, port->q_cnts.names, - port->q_cnts.offsets); + mlx5_ib_fill_counters(dev, port->cnts.names, + port->cnts.offsets); } return 0; @@ -3344,7 +3363,7 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) dealloc_counters: while (--i >= 0) mlx5_core_dealloc_q_counter(dev->mdev, - dev->port[i].q_cnts.set_id); + dev->port[i].cnts.set_id); return ret; } @@ -3359,44 +3378,93 @@ static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, if (port_num == 0) return NULL; - return rdma_alloc_hw_stats_struct(port->q_cnts.names, - port->q_cnts.num_counters, + return rdma_alloc_hw_stats_struct(port->cnts.names, + port->cnts.num_q_counters + + port->cnts.num_cong_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); } -static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, - struct rdma_hw_stats *stats, - u8 port_num, int index) +static int mlx5_ib_query_q_counters(struct mlx5_ib_dev *dev, + struct mlx5_ib_port *port, + struct rdma_hw_stats *stats) { - struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_ib_port *port = &dev->port[port_num - 1]; int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); void *out; __be32 val; - int ret; - int i; - - if (!stats) - return -ENOSYS; + int ret, i; out = mlx5_vzalloc(outlen); if (!out) return -ENOMEM; ret = mlx5_core_query_q_counter(dev->mdev, - port->q_cnts.set_id, 0, + port->cnts.set_id, 0, out, outlen); if (ret) goto free; - for (i = 0; i < port->q_cnts.num_counters; i++) { - val = *(__be32 *)(out + port->q_cnts.offsets[i]); + for (i = 0; i < port->cnts.num_q_counters; i++) { + val = *(__be32 *)(out + port->cnts.offsets[i]); stats->value[i] = (u64)be32_to_cpu(val); } free: kvfree(out); - return port->q_cnts.num_counters; + return ret; +} + +static int mlx5_ib_query_cong_counters(struct mlx5_ib_dev *dev, + struct mlx5_ib_port *port, + struct rdma_hw_stats *stats) +{ + int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); + void *out; + int ret, i; + int offset = port->cnts.num_q_counters; + + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + ret = mlx5_cmd_query_cong_counter(dev->mdev, false, out, outlen); + if (ret) + goto free; + + for (i = 0; i < port->cnts.num_cong_counters; i++) { + stats->value[i + offset] = + be64_to_cpup((__be64 *)(out + + port->cnts.offsets[i + offset])); + } + +free: + kvfree(out); + return ret; +} + +static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u8 port_num, int index) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_port *port = &dev->port[port_num - 1]; + int ret, num_counters; + + if (!stats) + return -EINVAL; + + ret = mlx5_ib_query_q_counters(dev, port, stats); + if (ret) + return ret; + num_counters = port->cnts.num_q_counters; + + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { + ret = mlx5_ib_query_cong_counters(dev, port, stats); + if (ret) + return ret; + num_counters += port->cnts.num_cong_counters; + } + + return num_counters; } static void *mlx5_ib_add(struct mlx5_core_dev *mdev) @@ -3603,14 +3671,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) goto err_rsrc; if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { - err = mlx5_ib_alloc_q_counters(dev); + err = mlx5_ib_alloc_counters(dev); if (err) goto err_odp; } dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev); if (!dev->mdev->priv.uar) - goto err_q_cnt; + goto err_cnt; err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false); if (err) @@ -3654,9 +3722,9 @@ err_bfreg: err_uar_page: mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar); -err_q_cnt: +err_cnt: if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) - mlx5_ib_dealloc_q_counters(dev); + mlx5_ib_dealloc_counters(dev); err_odp: mlx5_ib_odp_remove_one(dev); @@ -3690,7 +3758,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) mlx5_free_bfreg(dev->mdev, &dev->bfreg); mlx5_put_uars_page(dev->mdev, mdev->priv.uar); if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) - mlx5_ib_dealloc_q_counters(dev); + mlx5_ib_dealloc_counters(dev); destroy_umrc_res(dev); mlx5_ib_odp_remove_one(dev); destroy_dev_resources(&dev->devr); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index ce8ba617d46e..191b82b038d7 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -595,15 +595,16 @@ struct mlx5_ib_resources { struct mutex mutex; }; -struct mlx5_ib_q_counters { +struct mlx5_ib_counters { const char **names; size_t *offsets; - u32 num_counters; + u32 num_q_counters; + u32 num_cong_counters; u16 set_id; }; struct mlx5_ib_port { - struct mlx5_ib_q_counters q_cnts; + struct mlx5_ib_counters cnts; }; struct mlx5_roce { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index ed6320186f89..4e5a811d33c7 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2799,7 +2799,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, qp->port) - 1; mibport = &dev->port[port_num]; context->qp_counter_set_usr_page |= - cpu_to_be32((u32)(mibport->q_cnts.set_id) << 24); + cpu_to_be32((u32)(mibport->cnts.set_id) << 24); } if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) @@ -2827,7 +2827,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, raw_qp_param.operation = op; if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { - raw_qp_param.rq_q_ctr_id = mibport->q_cnts.set_id; + raw_qp_param.rq_q_ctr_id = mibport->cnts.set_id; raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID; } @@ -4965,7 +4965,8 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) { MLX5_SET64(modify_rq_in, in, modify_bitmask, MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID); - MLX5_SET(rqc, rqc, counter_set_id, dev->port->q_cnts.set_id); + MLX5_SET(rqc, rqc, counter_set_id, + dev->port->cnts.set_id); } else pr_info_once("%s: Receive WQ counters are not supported on current FW\n", dev->ib_dev.name); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4da6e803b627..954f42c268a4 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4735,17 +4735,17 @@ struct mlx5_ifc_query_cong_statistics_out_bits { u8 reserved_at_40[0x40]; - u8 cur_flows[0x20]; + u8 rp_cur_flows[0x20]; u8 sum_flows[0x20]; - u8 cnp_ignored_high[0x20]; + u8 rp_cnp_ignored_high[0x20]; - u8 cnp_ignored_low[0x20]; + u8 rp_cnp_ignored_low[0x20]; - u8 cnp_handled_high[0x20]; + u8 rp_cnp_handled_high[0x20]; - u8 cnp_handled_low[0x20]; + u8 rp_cnp_handled_low[0x20]; u8 reserved_at_140[0x100]; @@ -4755,13 +4755,13 @@ struct mlx5_ifc_query_cong_statistics_out_bits { u8 accumulators_period[0x20]; - u8 ecn_marked_roce_packets_high[0x20]; + u8 np_ecn_marked_roce_packets_high[0x20]; - u8 ecn_marked_roce_packets_low[0x20]; + u8 np_ecn_marked_roce_packets_low[0x20]; - u8 cnps_sent_high[0x20]; + u8 np_cnp_sent_high[0x20]; - u8 cnps_sent_low[0x20]; + u8 np_cnp_sent_low[0x20]; u8 reserved_at_320[0x560]; }; -- cgit v1.2.3 From 133bea04ff6fd715d8140edca9d6c7337249571b Mon Sep 17 00:00:00 2001 From: Tim Wright Date: Mon, 1 May 2017 17:30:08 +0100 Subject: IB/mlx5: Add port_xmit_wait to counter registers read Add port_xmit_wait to the error counters read by mlx5_ib_process_mad to ensure sysfs port counter provides correct value for PortXmitWait. Otherwise the sysfs port_xmit_wait file always contains zero. The previous MAD_IFC implementation populated this counter, but it was removed during the migration to PPCNT for error counters (32-bit only). Signed-off-by: Tim Wright Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mad.c | 2 ++ include/linux/mlx5/mlx5_ifc.h | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 8dacb49eabd9..f1b56de64871 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -187,6 +187,8 @@ static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt, port_xmit_discards); MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_constraint_errors, port_xmit_constraint_errors); + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_wait, + port_xmit_wait); MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_constraint_errors, port_rcv_constraint_errors); MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_overrun_errors, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 954f42c268a4..32de0724b400 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1456,7 +1456,9 @@ struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits { u8 vl_15_dropped[0x10]; - u8 reserved_at_a0[0xa0]; + u8 reserved_at_a0[0x80]; + + u8 port_xmit_wait[0x20]; }; struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits { -- cgit v1.2.3