diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r-- | drivers/infiniband/hw/mlx5/Makefile | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/ah.c | 14 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/counters.c | 195 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/counters.h | 15 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/cq.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/devx.c | 41 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/devx.h | 5 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/fs.c | 637 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/fs.h | 17 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 77 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 23 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 52 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 10 |
13 files changed, 1002 insertions, 88 deletions
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index b38961f5058e..11878ddf7cc7 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -9,6 +9,7 @@ mlx5_ib-y := ah.o \ data_direct.o \ dm.o \ doorbell.o \ + fs.o \ gsi.o \ ib_virt.o \ mad.o \ @@ -26,7 +27,6 @@ mlx5_ib-y := ah.o \ mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o \ - fs.o \ qos.o \ std_types.o mlx5_ib-$(CONFIG_MLX5_MACSEC) += macsec.o diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 99036afb3aef..531a57f9ee7e 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -50,11 +50,12 @@ static __be16 mlx5_ah_get_udp_sport(const struct mlx5_ib_dev *dev, return sport; } -static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, +static int create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, struct rdma_ah_init_attr *init_attr) { struct rdma_ah_attr *ah_attr = init_attr->ah_attr; enum ib_gid_type gid_type; + int rate_val; if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); @@ -67,8 +68,10 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, ah->av.tclass = grh->traffic_class; } - ah->av.stat_rate_sl = - (mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr)) << 4); + rate_val = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr)); + if (rate_val < 0) + return rate_val; + ah->av.stat_rate_sl = rate_val << 4; if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { if (init_attr->xmit_slave) @@ -89,6 +92,8 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, ah->av.fl_mlid = rdma_ah_get_path_bits(ah_attr) & 0x7f; ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0xf); } + + return 0; } int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, @@ -121,8 +126,7 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, return err; } - create_ib_ah(dev, ah, init_attr); - return 0; + return create_ib_ah(dev, ah, init_attr); } int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 81cfa74147a1..b847084dcd99 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -140,6 +140,13 @@ static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = { INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS), }; +static const struct mlx5_ib_counter packets_op_cnts[] = { + INIT_OP_COUNTER(rdma_tx_packets, RDMA_TX_PACKETS), + INIT_OP_COUNTER(rdma_tx_bytes, RDMA_TX_BYTES), + INIT_OP_COUNTER(rdma_rx_packets, RDMA_RX_PACKETS), + INIT_OP_COUNTER(rdma_rx_bytes, RDMA_RX_BYTES), +}; + static int mlx5_ib_read_counters(struct ib_counters *counters, struct ib_counters_read_attr *read_attr, struct uverbs_attr_bundle *attrs) @@ -427,6 +434,52 @@ done: return num_counters; } +static bool is_rdma_bytes_counter(u32 type) +{ + if (type == MLX5_IB_OPCOUNTER_RDMA_TX_BYTES || + type == MLX5_IB_OPCOUNTER_RDMA_RX_BYTES || + type == MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP || + type == MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP) + return true; + + return false; +} + +static int do_per_qp_get_op_stat(struct rdma_counter *counter) +{ + struct mlx5_ib_dev *dev = to_mdev(counter->device); + const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port); + struct mlx5_rdma_counter *mcounter = to_mcounter(counter); + int i, ret, index, num_hw_counters; + u64 packets = 0, bytes = 0; + + for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP; + i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) { + if (!mcounter->fc[i]) + continue; + + ret = mlx5_fc_query(dev->mdev, mcounter->fc[i], + &packets, &bytes); + if (ret) + return ret; + + num_hw_counters = cnts->num_q_counters + + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + + index = i - MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP + + num_hw_counters; + + if (is_rdma_bytes_counter(i)) + counter->stats->value[index] = bytes; + else + counter->stats->value[index] = packets; + + clear_bit(index, counter->stats->is_disabled); + } + return 0; +} + static int do_get_op_stat(struct ib_device *ibdev, struct rdma_hw_stats *stats, u32 port_num, int index) @@ -434,7 +487,7 @@ static int do_get_op_stat(struct ib_device *ibdev, struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct mlx5_ib_counters *cnts; const struct mlx5_ib_op_fc *opfcs; - u64 packets = 0, bytes; + u64 packets, bytes; u32 type; int ret; @@ -453,8 +506,11 @@ static int do_get_op_stat(struct ib_device *ibdev, if (ret) return ret; + if (is_rdma_bytes_counter(type)) + stats->value[index] = bytes; + else + stats->value[index] = packets; out: - stats->value[index] = packets; return index; } @@ -523,19 +579,30 @@ static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) { struct mlx5_ib_dev *dev = to_mdev(counter->device); const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port); + int ret; + + ret = mlx5_ib_query_q_counters(dev->mdev, cnts, counter->stats, + counter->id); + if (ret) + return ret; + + if (!counter->mode.bind_opcnt) + return 0; - return mlx5_ib_query_q_counters(dev->mdev, cnts, - counter->stats, counter->id); + return do_per_qp_get_op_stat(counter); } static int mlx5_ib_counter_dealloc(struct rdma_counter *counter) { + struct mlx5_rdma_counter *mcounter = to_mcounter(counter); struct mlx5_ib_dev *dev = to_mdev(counter->device); u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; if (!counter->id) return 0; + WARN_ON(!xa_empty(&mcounter->qpn_opfc_xa)); + mlx5r_fs_destroy_fcs(dev, counter); MLX5_SET(dealloc_q_counter_in, in, opcode, MLX5_CMD_OP_DEALLOC_Q_COUNTER); MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id); @@ -543,7 +610,7 @@ static int mlx5_ib_counter_dealloc(struct rdma_counter *counter) } static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, - struct ib_qp *qp) + struct ib_qp *qp, u32 port) { struct mlx5_ib_dev *dev = to_mdev(qp->device); bool new = false; @@ -568,8 +635,14 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, if (err) goto fail_set_counter; + err = mlx5r_fs_bind_op_fc(qp, counter, port); + if (err) + goto fail_bind_op_fc; + return 0; +fail_bind_op_fc: + mlx5_ib_qp_set_counter(qp, NULL); fail_set_counter: if (new) { mlx5_ib_counter_dealloc(counter); @@ -579,9 +652,22 @@ fail_set_counter: return err; } -static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp) +static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp, u32 port) { - return mlx5_ib_qp_set_counter(qp, NULL); + struct rdma_counter *counter = qp->counter; + int err; + + mlx5r_fs_unbind_op_fc(qp, counter); + + err = mlx5_ib_qp_set_counter(qp, NULL); + if (err) + goto fail_set_counter; + + return 0; + +fail_set_counter: + mlx5r_fs_bind_op_fc(qp, counter, port); + return err; } static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, @@ -681,6 +767,12 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, descs[j].priv = &rdmatx_cnp_op_cnts[i].type; } } + + for (i = 0; i < ARRAY_SIZE(packets_op_cnts); i++, j++) { + descs[j].name = packets_op_cnts[i].name; + descs[j].flags |= IB_STAT_FLAG_OPTIONAL; + descs[j].priv = &packets_op_cnts[i].type; + } } @@ -731,6 +823,8 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, num_op_counters = ARRAY_SIZE(basic_op_cnts); + num_op_counters += ARRAY_SIZE(packets_op_cnts); + if (MLX5_CAP_FLOWTABLE(dev->mdev, ft_field_support_2_nic_receive_rdma.bth_opcode)) num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts); @@ -760,10 +854,58 @@ err: return -ENOMEM; } +/* + * Checks if the given flow counter type should be sharing the same flow counter + * with another type and if it should, checks if that other type flow counter + * was already created, if both conditions are met return true and the counter + * else return false. + */ +bool mlx5r_is_opfc_shared_and_in_use(struct mlx5_ib_op_fc *opfcs, u32 type, + struct mlx5_ib_op_fc **opfc) +{ + u32 shared_fc_type; + + switch (type) { + case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES; + break; + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS; + break; + case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP; + break; + default: + return false; + } + + *opfc = &opfcs[shared_fc_type]; + if (!(*opfc)->fc) + return false; + + return true; +} + static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) { u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; int num_cnt_ports = dev->num_ports; + struct mlx5_ib_op_fc *in_use_opfc; int i, j; if (is_mdev_switchdev_mode(dev->mdev)) @@ -785,11 +927,15 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) if (!dev->port[i].cnts.opfcs[j].fc) continue; - if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) - mlx5_ib_fs_remove_op_fc(dev, - &dev->port[i].cnts.opfcs[j], j); + if (mlx5r_is_opfc_shared_and_in_use( + dev->port[i].cnts.opfcs, j, &in_use_opfc)) + goto skip; + + mlx5_ib_fs_remove_op_fc(dev, + &dev->port[i].cnts.opfcs[j], j); mlx5_fc_destroy(dev->mdev, dev->port[i].cnts.opfcs[j].fc); +skip: dev->port[i].cnts.opfcs[j].fc = NULL; } } @@ -983,8 +1129,8 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port, unsigned int index, bool enable) { struct mlx5_ib_dev *dev = to_mdev(device); + struct mlx5_ib_op_fc *opfc, *in_use_opfc; struct mlx5_ib_counters *cnts; - struct mlx5_ib_op_fc *opfc; u32 num_hw_counters, type; int ret; @@ -1008,6 +1154,13 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port, if (opfc->fc) return -EEXIST; + if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type, + &in_use_opfc)) { + opfc->fc = in_use_opfc->fc; + opfc->rule[0] = in_use_opfc->rule[0]; + return 0; + } + opfc->fc = mlx5_fc_create(dev->mdev, false); if (IS_ERR(opfc->fc)) return PTR_ERR(opfc->fc); @@ -1023,12 +1176,23 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port, if (!opfc->fc) return -EINVAL; + if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type, &in_use_opfc)) + goto out; + mlx5_ib_fs_remove_op_fc(dev, opfc, type); mlx5_fc_destroy(dev->mdev, opfc->fc); +out: opfc->fc = NULL; return 0; } +static void mlx5_ib_counter_init(struct rdma_counter *counter) +{ + struct mlx5_rdma_counter *mcounter = to_mcounter(counter); + + xa_init(&mcounter->qpn_opfc_xa); +} + static const struct ib_device_ops hw_stats_ops = { .alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats, .get_hw_stats = mlx5_ib_get_hw_stats, @@ -1037,8 +1201,10 @@ static const struct ib_device_ops hw_stats_ops = { .counter_dealloc = mlx5_ib_counter_dealloc, .counter_alloc_stats = mlx5_ib_counter_alloc_stats, .counter_update_stats = mlx5_ib_counter_update_stats, - .modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ? - mlx5_ib_modify_stat : NULL, + .modify_hw_stat = mlx5_ib_modify_stat, + .counter_init = mlx5_ib_counter_init, + + INIT_RDMA_OBJ_SIZE(rdma_counter, mlx5_rdma_counter, rdma_counter), }; static const struct ib_device_ops hw_switchdev_vport_op = { @@ -1053,6 +1219,9 @@ static const struct ib_device_ops hw_switchdev_stats_ops = { .counter_dealloc = mlx5_ib_counter_dealloc, .counter_alloc_stats = mlx5_ib_counter_alloc_stats, .counter_update_stats = mlx5_ib_counter_update_stats, + .counter_init = mlx5_ib_counter_init, + + INIT_RDMA_OBJ_SIZE(rdma_counter, mlx5_rdma_counter, rdma_counter), }; static const struct ib_device_ops counters_ops = { diff --git a/drivers/infiniband/hw/mlx5/counters.h b/drivers/infiniband/hw/mlx5/counters.h index 6bcaaa52e2b2..bd03cee42014 100644 --- a/drivers/infiniband/hw/mlx5/counters.h +++ b/drivers/infiniband/hw/mlx5/counters.h @@ -8,10 +8,25 @@ #include "mlx5_ib.h" +struct mlx5_rdma_counter { + struct rdma_counter rdma_counter; + + struct mlx5_fc *fc[MLX5_IB_OPCOUNTER_MAX]; + struct xarray qpn_opfc_xa; +}; + +static inline struct mlx5_rdma_counter * +to_mcounter(struct rdma_counter *counter) +{ + return container_of(counter, struct mlx5_rdma_counter, rdma_counter); +} + int mlx5_ib_counters_init(struct mlx5_ib_dev *dev); void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev); void mlx5_ib_counters_clear_description(struct ib_counters *counters); int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters, struct mlx5_ib_create_flow *ucmd); u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num); +bool mlx5r_is_opfc_shared_and_in_use(struct mlx5_ib_op_fc *opfcs, u32 type, + struct mlx5_ib_op_fc **opfc); #endif /* _MLX5_IB_COUNTERS_H */ diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 4c54dc578069..1aa5311b03e9 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -490,7 +490,7 @@ repoll: } qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff; - if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) { + if (!*cur_qp || (qpn != (*cur_qp)->trans_qp.base.mqp.qpn)) { /* We do not have to take the QP table lock here, * because CQs will be locked while QPs are removed * from the table. diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 4186884c66e1..2479da8620ca 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -13,6 +13,7 @@ #include <rdma/uverbs_std_types.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/fs.h> +#include <rdma/ib_ucaps.h> #include "mlx5_ib.h" #include "devx.h" #include "qp.h" @@ -122,7 +123,27 @@ devx_ufile2uctx(const struct uverbs_attr_bundle *attrs) return to_mucontext(ib_uverbs_get_ucontext(attrs)); } -int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) +static int set_uctx_ucaps(struct mlx5_ib_dev *dev, u64 req_ucaps, u32 *cap) +{ + if (UCAP_ENABLED(req_ucaps, RDMA_UCAP_MLX5_CTRL_LOCAL)) { + if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL) + *cap |= MLX5_UCTX_CAP_RDMA_CTRL; + else + return -EOPNOTSUPP; + } + + if (UCAP_ENABLED(req_ucaps, RDMA_UCAP_MLX5_CTRL_OTHER_VHCA)) { + if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & + MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA) + *cap |= MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA; + else + return -EOPNOTSUPP; + } + + return 0; +} + +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user, u64 req_ucaps) { u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {}; u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {}; @@ -136,14 +157,22 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) return -EINVAL; uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx); - if (is_user && capable(CAP_NET_RAW) && - (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX)) + if (is_user && + (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX) && + capable(CAP_NET_RAW)) cap |= MLX5_UCTX_CAP_RAW_TX; - if (is_user && capable(CAP_SYS_RAWIO) && + if (is_user && (MLX5_CAP_GEN(dev->mdev, uctx_cap) & - MLX5_UCTX_CAP_INTERNAL_DEV_RES)) + MLX5_UCTX_CAP_INTERNAL_DEV_RES) && + capable(CAP_SYS_RAWIO)) cap |= MLX5_UCTX_CAP_INTERNAL_DEV_RES; + if (req_ucaps) { + err = set_uctx_ucaps(dev, req_ucaps, &cap); + if (err) + return err; + } + MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX); MLX5_SET(uctx, uctx, cap, cap); @@ -2573,7 +2602,7 @@ int mlx5_ib_devx_init(struct mlx5_ib_dev *dev) struct mlx5_devx_event_table *table = &dev->devx_event_table; int uid; - uid = mlx5_ib_devx_create(dev, false); + uid = mlx5_ib_devx_create(dev, false, 0); if (uid > 0) { dev->devx_whitelist_uid = uid; xa_init(&table->event_xa); diff --git a/drivers/infiniband/hw/mlx5/devx.h b/drivers/infiniband/hw/mlx5/devx.h index 1344bf4c9d21..ee9e7d3af93f 100644 --- a/drivers/infiniband/hw/mlx5/devx.h +++ b/drivers/infiniband/hw/mlx5/devx.h @@ -24,13 +24,14 @@ struct devx_obj { struct list_head event_sub; /* holds devx_event_subscription entries */ }; #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) -int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user, u64 req_ucaps); void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); int mlx5_ib_devx_init(struct mlx5_ib_dev *dev); void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev); void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile); #else -static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) +static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user, + u64 req_ucaps) { return -EOPNOTSUPP; } diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 162814ae8cb4..251246c73b33 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -12,6 +12,7 @@ #include <rdma/mlx5_user_ioctl_verbs.h> #include <rdma/ib_hdrs.h> #include <rdma/ib_umem.h> +#include <rdma/ib_ucaps.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/fs.h> #include <linux/mlx5/fs_helpers.h> @@ -32,6 +33,11 @@ enum { MATCH_CRITERIA_ENABLE_MISC2_BIT }; + +struct mlx5_per_qp_opfc { + struct mlx5_ib_op_fc opfcs[MLX5_IB_OPCOUNTER_MAX]; +}; + #define HEADER_IS_ZERO(match_criteria, headers) \ !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \ 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \ @@ -678,7 +684,7 @@ enum flow_table_type { #define MLX5_FS_MAX_TYPES 6 #define MLX5_FS_MAX_ENTRIES BIT(16) -static bool mlx5_ib_shared_ft_allowed(struct ib_device *device) +static bool __maybe_unused mlx5_ib_shared_ft_allowed(struct ib_device *device) { struct mlx5_ib_dev *dev = to_mdev(device); @@ -690,7 +696,7 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *prio, int priority, int num_entries, int num_groups, - u32 flags) + u32 flags, u16 vport) { struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_table *ft; @@ -698,6 +704,7 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev, ft_attr.prio = priority; ft_attr.max_fte = num_entries; ft_attr.flags = flags; + ft_attr.vport = vport; ft_attr.autogroup.max_num_groups = num_groups; ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); if (IS_ERR(ft)) @@ -792,18 +799,25 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, ft = prio->flow_table; if (!ft) return _get_prio(dev, ns, prio, priority, max_table_size, - num_groups, flags); + num_groups, flags, 0); return prio; } enum { + RDMA_RX_ECN_OPCOUNTER_PER_QP_PRIO, + RDMA_RX_CNP_OPCOUNTER_PER_QP_PRIO, + RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO, RDMA_RX_ECN_OPCOUNTER_PRIO, RDMA_RX_CNP_OPCOUNTER_PRIO, + RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO, }; enum { + RDMA_TX_CNP_OPCOUNTER_PER_QP_PRIO, + RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO, RDMA_TX_CNP_OPCOUNTER_PRIO, + RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO, }; static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num, @@ -867,6 +881,344 @@ static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num, return 0; } +/* Returns the prio we should use for the given optional counter type, + * whereas for bytes type we use the packet type, since they share the same + * resources. + */ +static struct mlx5_ib_flow_prio *get_opfc_prio(struct mlx5_ib_dev *dev, + u32 type) +{ + u32 prio_type; + + switch (type) { + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES: + prio_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES: + prio_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS; + break; + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP: + prio_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP: + prio_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP; + break; + default: + prio_type = type; + } + + return &dev->flow_db->opfcs[prio_type]; +} + +static void put_per_qp_prio(struct mlx5_ib_dev *dev, + enum mlx5_ib_optional_counter_type type) +{ + enum mlx5_ib_optional_counter_type per_qp_type; + struct mlx5_ib_flow_prio *prio; + + switch (type) { + case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS: + per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS: + per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS: + per_qp_type = MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS: + per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES: + per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS: + per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES: + per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; + break; + default: + return; + } + + prio = get_opfc_prio(dev, per_qp_type); + put_flow_table(dev, prio, true); +} + +static int get_per_qp_prio(struct mlx5_ib_dev *dev, + enum mlx5_ib_optional_counter_type type) +{ + enum mlx5_ib_optional_counter_type per_qp_type; + enum mlx5_flow_namespace_type fn_type; + struct mlx5_flow_namespace *ns; + struct mlx5_ib_flow_prio *prio; + int priority; + + switch (type) { + case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS: + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; + priority = RDMA_RX_ECN_OPCOUNTER_PER_QP_PRIO; + per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS: + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; + priority = RDMA_RX_CNP_OPCOUNTER_PER_QP_PRIO; + per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS: + fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS; + priority = RDMA_TX_CNP_OPCOUNTER_PER_QP_PRIO; + per_qp_type = MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS: + fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS; + priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO; + per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES: + fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS; + priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO; + per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS: + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; + priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO; + per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES: + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; + priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO; + per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; + break; + default: + return -EINVAL; + } + + ns = mlx5_get_flow_namespace(dev->mdev, fn_type); + if (!ns) + return -EOPNOTSUPP; + + prio = get_opfc_prio(dev, per_qp_type); + if (prio->flow_table) + return 0; + + prio = _get_prio(dev, ns, prio, priority, MLX5_FS_MAX_POOL_SIZE, 1, 0, 0); + if (IS_ERR(prio)) + return PTR_ERR(prio); + + prio->refcount = 1; + + return 0; +} + +static struct mlx5_per_qp_opfc * +get_per_qp_opfc(struct mlx5_rdma_counter *mcounter, u32 qp_num, bool *new) +{ + struct mlx5_per_qp_opfc *per_qp_opfc; + + *new = false; + + per_qp_opfc = xa_load(&mcounter->qpn_opfc_xa, qp_num); + if (per_qp_opfc) + return per_qp_opfc; + per_qp_opfc = kzalloc(sizeof(*per_qp_opfc), GFP_KERNEL); + + if (!per_qp_opfc) + return NULL; + + *new = true; + return per_qp_opfc; +} + +static int add_op_fc_rules(struct mlx5_ib_dev *dev, + struct mlx5_rdma_counter *mcounter, + struct mlx5_per_qp_opfc *per_qp_opfc, + struct mlx5_ib_flow_prio *prio, + enum mlx5_ib_optional_counter_type type, + u32 qp_num, u32 port_num) +{ + struct mlx5_ib_op_fc *opfc = &per_qp_opfc->opfcs[type], *in_use_opfc; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_destination dst; + struct mlx5_flow_spec *spec; + int i, err, spec_num; + bool is_tx; + + if (opfc->fc) + return -EEXIST; + + if (mlx5r_is_opfc_shared_and_in_use(per_qp_opfc->opfcs, type, + &in_use_opfc)) { + opfc->fc = in_use_opfc->fc; + opfc->rule[0] = in_use_opfc->rule[0]; + return 0; + } + + opfc->fc = mcounter->fc[type]; + + spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto null_fc; + } + + switch (type) { + case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP: + if (set_ecn_ce_spec(dev, port_num, &spec[0], + MLX5_FS_IPV4_VERSION) || + set_ecn_ce_spec(dev, port_num, &spec[1], + MLX5_FS_IPV6_VERSION)) { + err = -EOPNOTSUPP; + goto free_spec; + } + spec_num = 2; + is_tx = false; + + MLX5_SET_TO_ONES(fte_match_param, spec[1].match_criteria, + misc_parameters.bth_dst_qp); + MLX5_SET(fte_match_param, spec[1].match_value, + misc_parameters.bth_dst_qp, qp_num); + spec[1].match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + break; + case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP: + if (!MLX5_CAP_FLOWTABLE( + dev->mdev, + ft_field_support_2_nic_receive_rdma.bth_opcode) || + set_cnp_spec(dev, port_num, &spec[0])) { + err = -EOPNOTSUPP; + goto free_spec; + } + spec_num = 1; + is_tx = false; + break; + case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP: + if (!MLX5_CAP_FLOWTABLE( + dev->mdev, + ft_field_support_2_nic_transmit_rdma.bth_opcode) || + set_cnp_spec(dev, port_num, &spec[0])) { + err = -EOPNOTSUPP; + goto free_spec; + } + spec_num = 1; + is_tx = true; + break; + case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP: + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP: + spec_num = 1; + is_tx = true; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP: + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP: + spec_num = 1; + is_tx = false; + break; + default: + err = -EINVAL; + goto free_spec; + } + + if (is_tx) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters.source_sqn); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters.source_sqn, qp_num); + } else { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters.bth_dst_qp); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters.bth_dst_qp, qp_num); + } + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + + dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dst.counter = opfc->fc; + + flow_act.action = + MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW; + + for (i = 0; i < spec_num; i++) { + opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i], + &flow_act, &dst, 1); + if (IS_ERR(opfc->rule[i])) { + err = PTR_ERR(opfc->rule[i]); + goto del_rules; + } + } + prio->refcount += spec_num; + + err = xa_err(xa_store(&mcounter->qpn_opfc_xa, qp_num, per_qp_opfc, + GFP_KERNEL)); + if (err) + goto del_rules; + + kfree(spec); + + return 0; + +del_rules: + while (i--) + mlx5_del_flow_rules(opfc->rule[i]); + put_flow_table(dev, prio, false); +free_spec: + kfree(spec); +null_fc: + opfc->fc = NULL; + return err; +} + +static bool is_fc_shared_and_in_use(struct mlx5_rdma_counter *mcounter, + u32 type, struct mlx5_fc **fc) +{ + u32 shared_fc_type; + + switch (type) { + case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP; + break; + default: + return false; + } + + *fc = mcounter->fc[shared_fc_type]; + if (!(*fc)) + return false; + + return true; +} + +void mlx5r_fs_destroy_fcs(struct mlx5_ib_dev *dev, + struct rdma_counter *counter) +{ + struct mlx5_rdma_counter *mcounter = to_mcounter(counter); + struct mlx5_fc *in_use_fc; + int i; + + for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP; + i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) { + if (!mcounter->fc[i]) + continue; + + if (is_fc_shared_and_in_use(mcounter, i, &in_use_fc)) { + mcounter->fc[i] = NULL; + continue; + } + + mlx5_fc_destroy(dev->mdev, mcounter->fc[i]); + mcounter->fc[i] = NULL; + } +} + int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, struct mlx5_ib_op_fc *opfc, enum mlx5_ib_optional_counter_type type) @@ -921,6 +1273,20 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, priority = RDMA_TX_CNP_OPCOUNTER_PRIO; break; + case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS: + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES: + spec_num = 1; + fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS; + priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO; + break; + + case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS: + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES: + spec_num = 1; + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; + priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO; + break; + default: err = -EOPNOTSUPP; goto free; @@ -932,13 +1298,17 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, goto free; } - prio = &dev->flow_db->opfcs[type]; + prio = get_opfc_prio(dev, type); if (!prio->flow_table) { + err = get_per_qp_prio(dev, type); + if (err) + goto free; + prio = _get_prio(dev, ns, prio, priority, - dev->num_ports * MAX_OPFC_RULES, 1, 0); + dev->num_ports * MAX_OPFC_RULES, 1, 0, 0); if (IS_ERR(prio)) { err = PTR_ERR(prio); - goto free; + goto put_prio; } } @@ -965,6 +1335,8 @@ del_rules: for (i -= 1; i >= 0; i--) mlx5_del_flow_rules(opfc->rule[i]); put_flow_table(dev, prio, false); +put_prio: + put_per_qp_prio(dev, type); free: kfree(spec); return err; @@ -974,12 +1346,115 @@ void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev, struct mlx5_ib_op_fc *opfc, enum mlx5_ib_optional_counter_type type) { + struct mlx5_ib_flow_prio *prio; int i; + prio = get_opfc_prio(dev, type); + for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) { mlx5_del_flow_rules(opfc->rule[i]); - put_flow_table(dev, &dev->flow_db->opfcs[type], true); + put_flow_table(dev, prio, true); } + + put_per_qp_prio(dev, type); +} + +void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct rdma_counter *counter) +{ + struct mlx5_rdma_counter *mcounter = to_mcounter(counter); + struct mlx5_ib_dev *dev = to_mdev(counter->device); + struct mlx5_per_qp_opfc *per_qp_opfc; + struct mlx5_ib_op_fc *in_use_opfc; + struct mlx5_ib_flow_prio *prio; + int i, j; + + per_qp_opfc = xa_load(&mcounter->qpn_opfc_xa, qp->qp_num); + if (!per_qp_opfc) + return; + + for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP; + i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) { + if (!per_qp_opfc->opfcs[i].fc) + continue; + + if (mlx5r_is_opfc_shared_and_in_use(per_qp_opfc->opfcs, i, + &in_use_opfc)) { + per_qp_opfc->opfcs[i].fc = NULL; + continue; + } + + for (j = 0; j < MAX_OPFC_RULES; j++) { + if (!per_qp_opfc->opfcs[i].rule[j]) + continue; + mlx5_del_flow_rules(per_qp_opfc->opfcs[i].rule[j]); + prio = get_opfc_prio(dev, i); + put_flow_table(dev, prio, true); + } + per_qp_opfc->opfcs[i].fc = NULL; + } + + kfree(per_qp_opfc); + xa_erase(&mcounter->qpn_opfc_xa, qp->qp_num); +} + +int mlx5r_fs_bind_op_fc(struct ib_qp *qp, struct rdma_counter *counter, + u32 port) +{ + struct mlx5_rdma_counter *mcounter = to_mcounter(counter); + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_per_qp_opfc *per_qp_opfc; + struct mlx5_ib_flow_prio *prio; + struct mlx5_ib_counters *cnts; + struct mlx5_ib_op_fc *opfc; + struct mlx5_fc *in_use_fc; + int i, err, per_qp_type; + bool new; + + if (!counter->mode.bind_opcnt) + return 0; + + cnts = &dev->port[port - 1].cnts; + + for (i = 0; i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES; i++) { + opfc = &cnts->opfcs[i]; + if (!opfc->fc) + continue; + + per_qp_type = i + MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP; + prio = get_opfc_prio(dev, per_qp_type); + WARN_ON(!prio->flow_table); + + if (is_fc_shared_and_in_use(mcounter, per_qp_type, &in_use_fc)) + mcounter->fc[per_qp_type] = in_use_fc; + + if (!mcounter->fc[per_qp_type]) { + mcounter->fc[per_qp_type] = mlx5_fc_create(dev->mdev, + false); + if (IS_ERR(mcounter->fc[per_qp_type])) + return PTR_ERR(mcounter->fc[per_qp_type]); + } + + per_qp_opfc = get_per_qp_opfc(mcounter, qp->qp_num, &new); + if (!per_qp_opfc) { + err = -ENOMEM; + goto free_fc; + } + err = add_op_fc_rules(dev, mcounter, per_qp_opfc, prio, + per_qp_type, qp->qp_num, port); + if (err) + goto del_rules; + } + + return 0; + +del_rules: + mlx5r_fs_unbind_op_fc(qp, counter); + if (new) + kfree(per_qp_opfc); +free_fc: + if (xa_empty(&mcounter->qpn_opfc_xa)) + mlx5r_fs_destroy_fcs(dev, counter); + return err; } static void set_underlay_qp(struct mlx5_ib_dev *dev, @@ -1413,17 +1888,51 @@ free_ucmd: return ERR_PTR(err); } +static int mlx5_ib_fill_transport_ns_info(struct mlx5_ib_dev *dev, + enum mlx5_flow_namespace_type type, + u32 *flags, u16 *vport_idx, + u16 *vport, + struct mlx5_core_dev **ft_mdev, + u32 ib_port) +{ + struct mlx5_core_dev *esw_mdev; + + if (!is_mdev_switchdev_mode(dev->mdev)) + return 0; + + if (!MLX5_CAP_ADV_RDMA(dev->mdev, rdma_transport_manager)) + return -EOPNOTSUPP; + + if (!dev->port[ib_port - 1].rep) + return -EINVAL; + + esw_mdev = mlx5_eswitch_get_core_dev(dev->port[ib_port - 1].rep->esw); + if (esw_mdev != dev->mdev) + return -EOPNOTSUPP; + + *flags |= MLX5_FLOW_TABLE_OTHER_VPORT; + *ft_mdev = esw_mdev; + *vport = dev->port[ib_port - 1].rep->vport; + *vport_idx = dev->port[ib_port - 1].rep->vport_index; + + return 0; +} + static struct mlx5_ib_flow_prio * _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority, enum mlx5_flow_namespace_type ns_type, - bool mcast) + bool mcast, u32 ib_port) { + struct mlx5_core_dev *ft_mdev = dev->mdev; struct mlx5_flow_namespace *ns = NULL; struct mlx5_ib_flow_prio *prio = NULL; int max_table_size = 0; + u16 vport_idx = 0; bool esw_encap; u32 flags = 0; + u16 vport = 0; int priority; + int ret; if (mcast) priority = MLX5_IB_FLOW_MCAST_PRIO; @@ -1471,13 +1980,38 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority, MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size)); priority = user_priority; break; + case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX: + case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX: + if (ib_port == 0 || user_priority > MLX5_RDMA_TRANSPORT_BYPASS_PRIO) + return ERR_PTR(-EINVAL); + ret = mlx5_ib_fill_transport_ns_info(dev, ns_type, &flags, + &vport_idx, &vport, + &ft_mdev, ib_port); + if (ret) + return ERR_PTR(ret); + + if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) + max_table_size = + BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX( + ft_mdev, log_max_ft_size)); + else + max_table_size = + BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX( + ft_mdev, log_max_ft_size)); + priority = user_priority; + break; default: break; } max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); - ns = mlx5_get_flow_namespace(dev->mdev, ns_type); + if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX || + ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) + ns = mlx5_get_flow_vport_namespace(ft_mdev, ns_type, vport_idx); + else + ns = mlx5_get_flow_namespace(ft_mdev, ns_type); + if (!ns) return ERR_PTR(-EOPNOTSUPP); @@ -1497,6 +2031,12 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority, case MLX5_FLOW_NAMESPACE_RDMA_TX: prio = &dev->flow_db->rdma_tx[priority]; break; + case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX: + prio = &dev->flow_db->rdma_transport_rx[ib_port - 1]; + break; + case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX: + prio = &dev->flow_db->rdma_transport_tx[ib_port - 1]; + break; default: return ERR_PTR(-EINVAL); } @@ -1507,7 +2047,7 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority, return prio; return _get_prio(dev, ns, prio, priority, max_table_size, - MLX5_FS_MAX_TYPES, flags); + MLX5_FS_MAX_TYPES, flags, vport); } static struct mlx5_ib_flow_handler * @@ -1626,7 +2166,8 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add( mutex_lock(&dev->flow_db->lock); ft_prio = _get_flow_table(dev, fs_matcher->priority, - fs_matcher->ns_type, mcast); + fs_matcher->ns_type, mcast, + fs_matcher->ib_port); if (IS_ERR(ft_prio)) { err = PTR_ERR(ft_prio); goto unlock; @@ -1742,6 +2283,12 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type, case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX: *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX; break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_RX: + *namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX; + break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_TX: + *namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX; + break; default: return -EINVAL; } @@ -1831,7 +2378,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs, return -EINVAL; /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */ - if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && + if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX || + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) && ((!dest_devx && !dest_qp) || (dest_devx && dest_qp))) return -EINVAL; @@ -1848,7 +2396,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs, return -EINVAL; /* Allow only flow table as dest when inserting to FDB or RDMA_RX */ if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS || - fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX || + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) && *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) return -EINVAL; } else if (dest_qp) { @@ -1869,14 +2418,16 @@ static int get_dests(struct uverbs_attr_bundle *attrs, *dest_id = mqp->raw_packet_qp.rq.tirn; *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; } else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS || - fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) && + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX || + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) && !(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) { *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT; } if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR && (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS || - fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX)) + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX || + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX)) return -EINVAL; return 0; @@ -2353,6 +2904,15 @@ static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs, return 0; } +static bool verify_context_caps(struct mlx5_ib_dev *dev, u64 enabled_caps) +{ + if (is_mdev_switchdev_mode(dev->mdev)) + return UCAP_ENABLED(enabled_caps, + RDMA_UCAP_MLX5_CTRL_OTHER_VHCA); + + return UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_LOCAL); +} + static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( struct uverbs_attr_bundle *attrs) { @@ -2401,6 +2961,26 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( goto end; } + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT)) { + err = uverbs_copy_from(&obj->ib_port, attrs, + MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT); + if (err) + goto end; + if (!rdma_is_port_valid(&dev->ib_dev, obj->ib_port)) { + err = -EINVAL; + goto end; + } + if (obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX && + obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) { + err = -EINVAL; + goto end; + } + if (!verify_context_caps(dev, uobj->context->enabled_caps)) { + err = -EOPNOTSUPP; + goto end; + } + } + uobj->object = obj; obj->mdev = dev->mdev; atomic_set(&obj->usecnt, 0); @@ -2448,7 +3028,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)( mutex_lock(&dev->flow_db->lock); - ft_prio = _get_flow_table(dev, priority, ns_type, 0); + ft_prio = _get_flow_table(dev, priority, ns_type, 0, 0); if (IS_ERR(ft_prio)) { err = PTR_ERR(ft_prio); goto free_obj; @@ -2834,7 +3414,10 @@ DECLARE_UVERBS_NAMED_METHOD( UA_OPTIONAL), UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE, enum mlx5_ib_uapi_flow_table_type, - UA_OPTIONAL)); + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT, + UVERBS_ATTR_TYPE(u32), + UA_OPTIONAL)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, @@ -2878,6 +3461,7 @@ DECLARE_UVERBS_NAMED_OBJECT( &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY)); const struct uapi_definition mlx5_ib_flow_defs[] = { +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) UAPI_DEF_CHAIN_OBJ_TREE_NAMED( MLX5_IB_OBJECT_FLOW_MATCHER), UAPI_DEF_CHAIN_OBJ_TREE( @@ -2888,6 +3472,7 @@ const struct uapi_definition mlx5_ib_flow_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED( MLX5_IB_OBJECT_STEERING_ANCHOR, UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)), +#endif {}, }; @@ -2904,8 +3489,26 @@ int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) if (!dev->flow_db) return -ENOMEM; + dev->flow_db->rdma_transport_rx = kcalloc(dev->num_ports, + sizeof(struct mlx5_ib_flow_prio), + GFP_KERNEL); + if (!dev->flow_db->rdma_transport_rx) + goto free_flow_db; + + dev->flow_db->rdma_transport_tx = kcalloc(dev->num_ports, + sizeof(struct mlx5_ib_flow_prio), + GFP_KERNEL); + if (!dev->flow_db->rdma_transport_tx) + goto free_rdma_transport_rx; + mutex_init(&dev->flow_db->lock); ib_set_device_ops(&dev->ib_dev, &flow_ops); return 0; + +free_rdma_transport_rx: + kfree(dev->flow_db->rdma_transport_rx); +free_flow_db: + kfree(dev->flow_db); + return -ENOMEM; } diff --git a/drivers/infiniband/hw/mlx5/fs.h b/drivers/infiniband/hw/mlx5/fs.h index b9734904f5f0..2ebe86e5be10 100644 --- a/drivers/infiniband/hw/mlx5/fs.h +++ b/drivers/infiniband/hw/mlx5/fs.h @@ -8,23 +8,8 @@ #include "mlx5_ib.h" -#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int mlx5_ib_fs_init(struct mlx5_ib_dev *dev); void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev); -#else -static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) -{ - dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); - - if (!dev->flow_db) - return -ENOMEM; - - mutex_init(&dev->flow_db->lock); - return 0; -} - -inline void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev) {} -#endif static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev) { @@ -40,6 +25,8 @@ static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev) * is a safe assumption that all references are gone. */ mlx5_ib_fs_cleanup_anchor(dev); + kfree(dev->flow_db->rdma_transport_tx); + kfree(dev->flow_db->rdma_transport_rx); kfree(dev->flow_db); } #endif /* _MLX5_IB_FS_H */ diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 81849eb671a1..d07cacaa0abd 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -47,6 +47,7 @@ #include <rdma/uverbs_ioctl.h> #include <rdma/mlx5_user_ioctl_verbs.h> #include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/ib_ucaps.h> #include "macsec.h" #include "data_direct.h" @@ -1934,6 +1935,12 @@ static int set_ucontext_resp(struct ib_ucontext *uctx, return 0; } +static bool uctx_rdma_ctrl_is_enabled(u64 enabled_caps) +{ + return UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_LOCAL) || + UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_OTHER_VHCA); +} + static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { @@ -1976,10 +1983,17 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, return -EINVAL; if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { - err = mlx5_ib_devx_create(dev, true); + err = mlx5_ib_devx_create(dev, true, uctx->enabled_caps); if (err < 0) goto out_ctx; context->devx_uid = err; + + if (uctx_rdma_ctrl_is_enabled(uctx->enabled_caps)) { + err = mlx5_cmd_add_privileged_uid(dev->mdev, + context->devx_uid); + if (err) + goto out_devx; + } } lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR; @@ -1994,7 +2008,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, /* updates req->total_num_bfregs */ err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi); if (err) - goto out_devx; + goto out_ucap; mutex_init(&bfregi->lock); bfregi->lib_uar_4k = lib_uar_4k; @@ -2002,7 +2016,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, GFP_KERNEL); if (!bfregi->count) { err = -ENOMEM; - goto out_devx; + goto out_ucap; } bfregi->sys_pages = kcalloc(bfregi->num_sys_pages, @@ -2066,6 +2080,11 @@ out_sys_pages: out_count: kfree(bfregi->count); +out_ucap: + if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX && + uctx_rdma_ctrl_is_enabled(uctx->enabled_caps)) + mlx5_cmd_remove_privileged_uid(dev->mdev, context->devx_uid); + out_devx: if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) mlx5_ib_devx_destroy(dev, context->devx_uid); @@ -2110,8 +2129,12 @@ static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) kfree(bfregi->sys_pages); kfree(bfregi->count); - if (context->devx_uid) + if (context->devx_uid) { + if (uctx_rdma_ctrl_is_enabled(ibcontext->enabled_caps)) + mlx5_cmd_remove_privileged_uid(dev->mdev, + context->devx_uid); mlx5_ib_devx_destroy(dev, context->devx_uid); + } } static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, @@ -4201,8 +4224,47 @@ static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev) return (var_table->bitmap) ? 0 : -ENOMEM; } +static void mlx5_ib_cleanup_ucaps(struct mlx5_ib_dev *dev) +{ + if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL) + ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL); + + if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & + MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA) + ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_OTHER_VHCA); +} + +static int mlx5_ib_init_ucaps(struct mlx5_ib_dev *dev) +{ + int ret; + + if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL) { + ret = ib_create_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL); + if (ret) + return ret; + } + + if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & + MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA) { + ret = ib_create_ucap(RDMA_UCAP_MLX5_CTRL_OTHER_VHCA); + if (ret) + goto remove_local; + } + + return 0; + +remove_local: + if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL) + ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL); + return ret; +} + static void mlx5_ib_stage_caps_cleanup(struct mlx5_ib_dev *dev) { + if (MLX5_CAP_GEN_2_64(dev->mdev, general_obj_types_127_64) & + MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL) + mlx5_ib_cleanup_ucaps(dev); + bitmap_free(dev->var_table.bitmap); } @@ -4253,6 +4315,13 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) return err; } + if (MLX5_CAP_GEN_2_64(dev->mdev, general_obj_types_127_64) & + MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL) { + err = mlx5_ib_init_ucaps(dev); + if (err) + return err; + } + dev->ib_dev.use_cq_dim = true; return 0; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 974a45c92fbb..ace2df3e1d9f 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -276,6 +276,7 @@ struct mlx5_ib_flow_matcher { struct mlx5_core_dev *mdev; atomic_t usecnt; u8 match_criteria_enable; + u32 ib_port; }; struct mlx5_ib_steering_anchor { @@ -293,6 +294,18 @@ enum mlx5_ib_optional_counter_type { MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS, MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS, MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS, + MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS, + MLX5_IB_OPCOUNTER_RDMA_TX_BYTES, + MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS, + MLX5_IB_OPCOUNTER_RDMA_RX_BYTES, + + MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP, + MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP, + MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP, + MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP, + MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP, + MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP, + MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP, MLX5_IB_OPCOUNTER_MAX, }; @@ -307,6 +320,8 @@ struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX]; struct mlx5_flow_table *lag_demux_ft; + struct mlx5_ib_flow_prio *rdma_transport_rx; + struct mlx5_ib_flow_prio *rdma_transport_tx; /* Protect flow steering bypass flow tables * when add/del flow rules. * only single add/removal of flow steering rule could be done @@ -883,6 +898,14 @@ void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev, struct mlx5_ib_op_fc *opfc, enum mlx5_ib_optional_counter_type type); +int mlx5r_fs_bind_op_fc(struct ib_qp *qp, struct rdma_counter *counter, + u32 port); + +void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct rdma_counter *counter); + +void mlx5r_fs_destroy_fcs(struct mlx5_ib_dev *dev, + struct rdma_counter *counter); + struct mlx5_ib_multiport_info; struct mlx5_ib_multiport { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 753faa9ad06a..b7c8c926c578 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -56,7 +56,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context); static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, u64 iova, int access_flags, - unsigned int page_size, bool populate, + unsigned long page_size, bool populate, int access_mode); static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr); @@ -718,8 +718,7 @@ mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev, } static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - struct mlx5_cache_ent *ent, - int access_flags) + struct mlx5_cache_ent *ent) { struct mlx5_ib_mr *mr; int err; @@ -794,7 +793,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, if (!ent) return ERR_PTR(-EOPNOTSUPP); - return _mlx5_mr_cache_alloc(dev, ent, access_flags); + return _mlx5_mr_cache_alloc(dev, ent); } static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) @@ -919,6 +918,25 @@ mkeys_err: return ERR_PTR(ret); } +static void mlx5r_destroy_cache_entries(struct mlx5_ib_dev *dev) +{ + struct rb_root *root = &dev->cache.rb_root; + struct mlx5_cache_ent *ent; + struct rb_node *node; + + mutex_lock(&dev->cache.rb_lock); + node = rb_first(root); + while (node) { + ent = rb_entry(node, struct mlx5_cache_ent, node); + node = rb_next(node); + clean_keys(dev, ent); + rb_erase(&ent->node, root); + mlx5r_mkeys_uninit(ent); + kfree(ent); + } + mutex_unlock(&dev->cache.rb_lock); +} + int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) { struct mlx5_mkey_cache *cache = &dev->cache; @@ -970,6 +988,8 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) err: mutex_unlock(&cache->rb_lock); mlx5_mkey_cache_debugfs_cleanup(dev); + mlx5r_destroy_cache_entries(dev); + destroy_workqueue(cache->wq); mlx5_ib_warn(dev, "failed to create mkey cache entry\n"); return ret; } @@ -1003,17 +1023,7 @@ void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); /* At this point all entries are disabled and have no concurrent work. */ - mutex_lock(&dev->cache.rb_lock); - node = rb_first(root); - while (node) { - ent = rb_entry(node, struct mlx5_cache_ent, node); - node = rb_next(node); - clean_keys(dev, ent); - rb_erase(&ent->node, root); - mlx5r_mkeys_uninit(ent); - kfree(ent); - } - mutex_unlock(&dev->cache.rb_lock); + mlx5r_destroy_cache_entries(dev); destroy_workqueue(dev->cache.wq); del_timer_sync(&dev->delay_timer); @@ -1115,7 +1125,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, struct mlx5r_cache_rb_key rb_key = {}; struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; - unsigned int page_size; + unsigned long page_size; if (umem->is_dmabuf) page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova); @@ -1144,7 +1154,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, return mr; } - mr = _mlx5_mr_cache_alloc(dev, ent, access_flags); + mr = _mlx5_mr_cache_alloc(dev, ent); if (IS_ERR(mr)) return mr; @@ -1219,7 +1229,7 @@ err_1: */ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, u64 iova, int access_flags, - unsigned int page_size, bool populate, + unsigned long page_size, bool populate, int access_mode) { struct mlx5_ib_dev *dev = to_mdev(pd->device); @@ -1425,7 +1435,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, mr = alloc_cacheable_mr(pd, umem, iova, access_flags, MLX5_MKC_ACCESS_MODE_MTT); } else { - unsigned int page_size = + unsigned long page_size = mlx5_umem_mkc_find_best_pgsz(dev, umem, iova); mutex_lock(&dev->slow_path_mutex); @@ -1957,7 +1967,6 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev, if (mr->mmkey.cache_ent) { spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); - mr->mmkey.cache_ent->in_use--; goto end; } @@ -2025,6 +2034,7 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) bool is_odp = is_odp_mr(mr); bool is_odp_dma_buf = is_dmabuf_mr(mr) && !to_ib_umem_dmabuf(mr->umem)->pinned; + bool from_cache = !!ent; int ret = 0; if (is_odp) @@ -2037,6 +2047,8 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) ent = mr->mmkey.cache_ent; /* upon storing to a clean temp entry - schedule its cleanup */ spin_lock_irq(&ent->mkeys_queue.lock); + if (from_cache) + ent->in_use--; if (ent->is_tmp && !ent->tmp_cleanup_scheduled) { mod_delayed_work(ent->dev->cache.wq, &ent->dwork, msecs_to_jiffies(30 * 1000)); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index e77c9280c07e..86d8fa63bf69 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -309,9 +309,6 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, blk_start_idx = idx; in_block = 1; } - - /* Count page invalidations */ - invalidations += idx - blk_start_idx + 1; } else { u64 umr_offset = idx & umr_block_mask; @@ -321,14 +318,19 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); in_block = 0; + /* Count page invalidations */ + invalidations += idx - blk_start_idx + 1; } } } - if (in_block) + if (in_block) { mlx5r_umr_update_xlt(mr, blk_start_idx, idx - blk_start_idx + 1, 0, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); + /* Count page invalidations */ + invalidations += idx - blk_start_idx + 1; + } mlx5_update_odp_stats_with_handled(mr, invalidations, invalidations); |