summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h12
-rw-r--r--drivers/infiniband/hw/bnxt_re/debugfs.c215
-rw-r--r--drivers/infiniband/hw/bnxt_re/debugfs.h15
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.c92
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c36
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h6
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c4
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c2
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c10
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h6
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c9
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h12
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c3
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h3
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cm.c1
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c18
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h1
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c2
-rw-r--r--drivers/infiniband/hw/hfi1/init.c5
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c4
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c20
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.h2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c16
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c22
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c2
-rw-r--r--drivers/infiniband/hw/irdma/Kconfig1
-rw-r--r--drivers/infiniband/hw/irdma/hw.c2
-rw-r--r--drivers/infiniband/hw/irdma/main.c46
-rw-r--r--drivers/infiniband/hw/irdma/main.h4
-rw-r--r--drivers/infiniband/hw/irdma/osdep.h6
-rw-r--r--drivers/infiniband/hw/irdma/puda.c19
-rw-r--r--drivers/infiniband/hw/irdma/puda.h5
-rw-r--r--drivers/infiniband/hw/irdma/utils.c47
-rw-r--r--drivers/infiniband/hw/mana/Makefile2
-rw-r--r--drivers/infiniband/hw/mana/ah.c58
-rw-r--r--drivers/infiniband/hw/mana/counters.c105
-rw-r--r--drivers/infiniband/hw/mana/counters.h44
-rw-r--r--drivers/infiniband/hw/mana/cq.c228
-rw-r--r--drivers/infiniband/hw/mana/device.c82
-rw-r--r--drivers/infiniband/hw/mana/main.c103
-rw-r--r--drivers/infiniband/hw/mana/mana_ib.h210
-rw-r--r--drivers/infiniband/hw/mana/mr.c105
-rw-r--r--drivers/infiniband/hw/mana/qp.c245
-rw-r--r--drivers/infiniband/hw/mana/shadow_queue.h115
-rw-r--r--drivers/infiniband/hw/mana/wr.c168
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c14
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c195
-rw-r--r--drivers/infiniband/hw/mlx5/counters.h15
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c2
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c41
-rw-r--r--drivers/infiniband/hw/mlx5/devx.h5
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c637
-rw-r--r--drivers/infiniband/hw/mlx5/fs.h17
-rw-r--r--drivers/infiniband/hw/mlx5/main.c77
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h23
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c52
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c10
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c28
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h2
64 files changed, 2921 insertions, 321 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 3721446c6ba4..6df5a2738c95 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -53,12 +53,6 @@
#define BNXT_RE_MAX_MR_SIZE_HIGH BIT_ULL(39)
#define BNXT_RE_MAX_MR_SIZE BNXT_RE_MAX_MR_SIZE_HIGH
-#define BNXT_RE_MAX_QPC_COUNT (64 * 1024)
-#define BNXT_RE_MAX_MRW_COUNT (64 * 1024)
-#define BNXT_RE_MAX_SRQC_COUNT (64 * 1024)
-#define BNXT_RE_MAX_CQ_COUNT (64 * 1024)
-#define BNXT_RE_MAX_MRW_COUNT_64K (64 * 1024)
-#define BNXT_RE_MAX_MRW_COUNT_256K (256 * 1024)
/* Number of MRs to reserve for PF, leaving remainder for VFs */
#define BNXT_RE_RESVD_MR_FOR_PF (32 * 1024)
@@ -231,6 +225,8 @@ struct bnxt_re_dev {
unsigned long event_bitmap;
struct bnxt_qplib_cc_param cc_param;
struct workqueue_struct *dcb_wq;
+ struct dentry *cc_config;
+ struct bnxt_re_dbg_cc_config_params *cc_config_params;
};
#define to_bnxt_re_dev(ptr, member) \
@@ -243,6 +239,10 @@ struct bnxt_re_dev {
#define BNXT_RE_CHECK_RC(x) ((x) && ((x) != -ETIMEDOUT))
void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev);
+int bnxt_re_assign_pma_port_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad);
+int bnxt_re_assign_pma_port_ext_counters(struct bnxt_re_dev *rdev,
+ struct ib_mad *out_mad);
+
static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev)
{
if (rdev)
diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.c b/drivers/infiniband/hw/bnxt_re/debugfs.c
index 7c47039044ef..af91d16c3c77 100644
--- a/drivers/infiniband/hw/bnxt_re/debugfs.c
+++ b/drivers/infiniband/hw/bnxt_re/debugfs.c
@@ -22,6 +22,23 @@
static struct dentry *bnxt_re_debugfs_root;
+static const char * const bnxt_re_cc_gen0_name[] = {
+ "enable_cc",
+ "run_avg_weight_g",
+ "num_phase_per_state",
+ "init_cr",
+ "init_tr",
+ "tos_ecn",
+ "tos_dscp",
+ "alt_vlan_pcp",
+ "alt_vlan_dscp",
+ "rtt",
+ "cc_mode",
+ "tcp_cp",
+ "tx_queue",
+ "inactivity_cp",
+};
+
static inline const char *bnxt_re_qp_state_str(u8 state)
{
switch (state) {
@@ -110,19 +127,215 @@ void bnxt_re_debug_rem_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp)
debugfs_remove(qp->dentry);
}
+static int map_cc_config_offset_gen0_ext0(u32 offset, struct bnxt_qplib_cc_param *ccparam, u32 *val)
+{
+ u64 map_offset;
+
+ map_offset = BIT(offset);
+
+ switch (map_offset) {
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC:
+ *val = ccparam->enable;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_G:
+ *val = ccparam->g;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_NUMPHASEPERSTATE:
+ *val = ccparam->nph_per_state;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_CR:
+ *val = ccparam->init_cr;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_TR:
+ *val = ccparam->init_tr;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN:
+ *val = ccparam->tos_ecn;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP:
+ *val = ccparam->tos_dscp;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP:
+ *val = ccparam->alt_vlan_pcp;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP:
+ *val = ccparam->alt_tos_dscp;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_RTT:
+ *val = ccparam->rtt;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE:
+ *val = ccparam->cc_mode;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TCP_CP:
+ *val = ccparam->tcp_cp;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static ssize_t bnxt_re_cc_config_get(struct file *filp, char __user *buffer,
+ size_t usr_buf_len, loff_t *ppos)
+{
+ struct bnxt_re_cc_param *dbg_cc_param = filp->private_data;
+ struct bnxt_re_dev *rdev = dbg_cc_param->rdev;
+ struct bnxt_qplib_cc_param ccparam = {};
+ u32 offset = dbg_cc_param->offset;
+ char buf[16];
+ u32 val;
+ int rc;
+
+ rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, &ccparam);
+ if (rc)
+ return rc;
+
+ rc = map_cc_config_offset_gen0_ext0(offset, &ccparam, &val);
+ if (rc)
+ return rc;
+
+ rc = snprintf(buf, sizeof(buf), "%d\n", val);
+ if (rc < 0)
+ return rc;
+
+ return simple_read_from_buffer(buffer, usr_buf_len, ppos, (u8 *)(buf), rc);
+}
+
+static void bnxt_re_fill_gen0_ext0(struct bnxt_qplib_cc_param *ccparam, u32 offset, u32 val)
+{
+ u32 modify_mask;
+
+ modify_mask = BIT(offset);
+
+ switch (modify_mask) {
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC:
+ ccparam->enable = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_G:
+ ccparam->g = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_NUMPHASEPERSTATE:
+ ccparam->nph_per_state = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_CR:
+ ccparam->init_cr = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_TR:
+ ccparam->init_tr = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN:
+ ccparam->tos_ecn = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP:
+ ccparam->tos_dscp = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP:
+ ccparam->alt_vlan_pcp = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP:
+ ccparam->alt_tos_dscp = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_RTT:
+ ccparam->rtt = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE:
+ ccparam->cc_mode = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TCP_CP:
+ ccparam->tcp_cp = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TX_QUEUE:
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INACTIVITY_CP:
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TIME_PER_PHASE:
+ ccparam->time_pph = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_PKTS_PER_PHASE:
+ ccparam->pkts_pph = val;
+ break;
+ }
+
+ ccparam->mask = modify_mask;
+}
+
+static int bnxt_re_configure_cc(struct bnxt_re_dev *rdev, u32 gen_ext, u32 offset, u32 val)
+{
+ struct bnxt_qplib_cc_param ccparam = { };
+
+ /* Supporting only Gen 0 now */
+ if (gen_ext == CC_CONFIG_GEN0_EXT0)
+ bnxt_re_fill_gen0_ext0(&ccparam, offset, val);
+ else
+ return -EINVAL;
+
+ bnxt_qplib_modify_cc(&rdev->qplib_res, &ccparam);
+ return 0;
+}
+
+static ssize_t bnxt_re_cc_config_set(struct file *filp, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct bnxt_re_cc_param *dbg_cc_param = filp->private_data;
+ struct bnxt_re_dev *rdev = dbg_cc_param->rdev;
+ u32 offset = dbg_cc_param->offset;
+ u8 cc_gen = dbg_cc_param->cc_gen;
+ char buf[16];
+ u32 val;
+ int rc;
+
+ if (count >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(buf, buffer, count))
+ return -EFAULT;
+
+ buf[count] = '\0';
+ if (kstrtou32(buf, 0, &val))
+ return -EINVAL;
+
+ rc = bnxt_re_configure_cc(rdev, cc_gen, offset, val);
+ return rc ? rc : count;
+}
+
+static const struct file_operations bnxt_re_cc_config_ops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = bnxt_re_cc_config_get,
+ .write = bnxt_re_cc_config_set,
+};
+
void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev)
{
struct pci_dev *pdev = rdev->en_dev->pdev;
+ struct bnxt_re_dbg_cc_config_params *cc_params;
+ int i;
rdev->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), bnxt_re_debugfs_root);
rdev->qp_debugfs = debugfs_create_dir("QPs", rdev->dbg_root);
+ rdev->cc_config = debugfs_create_dir("cc_config", rdev->dbg_root);
+
+ rdev->cc_config_params = kzalloc(sizeof(*cc_params), GFP_KERNEL);
+
+ for (i = 0; i < BNXT_RE_CC_PARAM_GEN0; i++) {
+ struct bnxt_re_cc_param *tmp_params = &rdev->cc_config_params->gen0_parms[i];
+
+ tmp_params->rdev = rdev;
+ tmp_params->offset = i;
+ tmp_params->cc_gen = CC_CONFIG_GEN0_EXT0;
+ tmp_params->dentry = debugfs_create_file(bnxt_re_cc_gen0_name[i], 0400,
+ rdev->cc_config, tmp_params,
+ &bnxt_re_cc_config_ops);
+ }
}
void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev)
{
debugfs_remove_recursive(rdev->qp_debugfs);
-
+ debugfs_remove_recursive(rdev->cc_config);
+ kfree(rdev->cc_config_params);
debugfs_remove_recursive(rdev->dbg_root);
rdev->dbg_root = NULL;
}
diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.h b/drivers/infiniband/hw/bnxt_re/debugfs.h
index cd3be0a9ec7e..8f101df4e838 100644
--- a/drivers/infiniband/hw/bnxt_re/debugfs.h
+++ b/drivers/infiniband/hw/bnxt_re/debugfs.h
@@ -18,4 +18,19 @@ void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev);
void bnxt_re_register_debugfs(void);
void bnxt_re_unregister_debugfs(void);
+#define CC_CONFIG_GEN_EXT(x, y) (((x) << 16) | (y))
+#define CC_CONFIG_GEN0_EXT0 CC_CONFIG_GEN_EXT(0, 0)
+
+#define BNXT_RE_CC_PARAM_GEN0 14
+
+struct bnxt_re_cc_param {
+ struct bnxt_re_dev *rdev;
+ struct dentry *dentry;
+ u32 offset;
+ u8 cc_gen;
+};
+
+struct bnxt_re_dbg_cc_config_params {
+ struct bnxt_re_cc_param gen0_parms[BNXT_RE_CC_PARAM_GEN0];
+};
#endif
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
index f039aefcaf67..44bb082e0a60 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -39,6 +39,8 @@
#include <linux/types.h>
#include <linux/pci.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_pma.h>
#include "roce_hsi.h"
#include "qplib_res.h"
@@ -285,6 +287,96 @@ static void bnxt_re_copy_db_pacing_stats(struct bnxt_re_dev *rdev,
readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off);
}
+int bnxt_re_assign_pma_port_ext_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad)
+{
+ struct ib_pma_portcounters_ext *pma_cnt_ext;
+ struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat;
+ struct ctx_hw_stats *hw_stats = NULL;
+ int rc;
+
+ hw_stats = rdev->qplib_ctx.stats.dma;
+
+ pma_cnt_ext = (struct ib_pma_portcounters_ext *)(out_mad->data + 40);
+ if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags)) {
+ u32 fid = PCI_FUNC(rdev->en_dev->pdev->devfn);
+
+ rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat);
+ if (rc)
+ return rc;
+ }
+
+ pma_cnt_ext = (struct ib_pma_portcounters_ext *)(out_mad->data + 40);
+ if ((bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) && rdev->is_virtfn) ||
+ !bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
+ pma_cnt_ext->port_xmit_data =
+ cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_bytes) / 4);
+ pma_cnt_ext->port_rcv_data =
+ cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_bytes) / 4);
+ pma_cnt_ext->port_xmit_packets =
+ cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_pkts));
+ pma_cnt_ext->port_rcv_packets =
+ cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_pkts));
+ pma_cnt_ext->port_unicast_rcv_packets =
+ cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_pkts));
+ pma_cnt_ext->port_unicast_xmit_packets =
+ cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_pkts));
+
+ } else {
+ pma_cnt_ext->port_rcv_packets = cpu_to_be64(estat->rx_roce_good_pkts);
+ pma_cnt_ext->port_rcv_data = cpu_to_be64(estat->rx_roce_good_bytes / 4);
+ pma_cnt_ext->port_xmit_packets = cpu_to_be64(estat->tx_roce_pkts);
+ pma_cnt_ext->port_xmit_data = cpu_to_be64(estat->tx_roce_bytes / 4);
+ pma_cnt_ext->port_unicast_rcv_packets = cpu_to_be64(estat->rx_roce_good_pkts);
+ pma_cnt_ext->port_unicast_xmit_packets = cpu_to_be64(estat->tx_roce_pkts);
+ }
+ return 0;
+}
+
+int bnxt_re_assign_pma_port_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad)
+{
+ struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat;
+ struct ib_pma_portcounters *pma_cnt;
+ struct ctx_hw_stats *hw_stats = NULL;
+ int rc;
+
+ hw_stats = rdev->qplib_ctx.stats.dma;
+
+ pma_cnt = (struct ib_pma_portcounters *)(out_mad->data + 40);
+ if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags)) {
+ u32 fid = PCI_FUNC(rdev->en_dev->pdev->devfn);
+
+ rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat);
+ if (rc)
+ return rc;
+ }
+ if ((bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) && rdev->is_virtfn) ||
+ !bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
+ pma_cnt->port_rcv_packets =
+ cpu_to_be32((u32)(le64_to_cpu(hw_stats->rx_ucast_pkts)) & 0xFFFFFFFF);
+ pma_cnt->port_rcv_data =
+ cpu_to_be32((u32)((le64_to_cpu(hw_stats->rx_ucast_bytes) &
+ 0xFFFFFFFF) / 4));
+ pma_cnt->port_xmit_packets =
+ cpu_to_be32((u32)(le64_to_cpu(hw_stats->tx_ucast_pkts)) & 0xFFFFFFFF);
+ pma_cnt->port_xmit_data =
+ cpu_to_be32((u32)((le64_to_cpu(hw_stats->tx_ucast_bytes)
+ & 0xFFFFFFFF) / 4));
+ } else {
+ pma_cnt->port_rcv_packets = cpu_to_be32(estat->rx_roce_good_pkts);
+ pma_cnt->port_rcv_data = cpu_to_be32((estat->rx_roce_good_bytes / 4));
+ pma_cnt->port_xmit_packets = cpu_to_be32(estat->tx_roce_pkts);
+ pma_cnt->port_xmit_data = cpu_to_be32((estat->tx_roce_bytes / 4));
+ }
+ pma_cnt->port_rcv_constraint_errors = (u8)(le64_to_cpu(hw_stats->rx_discard_pkts) & 0xFF);
+ pma_cnt->port_rcv_errors = cpu_to_be16((u16)(le64_to_cpu(hw_stats->rx_error_pkts)
+ & 0xFFFF));
+ pma_cnt->port_xmit_constraint_errors = (u8)(le64_to_cpu(hw_stats->tx_error_pkts) & 0xFF);
+ pma_cnt->port_xmit_discards = cpu_to_be16((u16)(le64_to_cpu(hw_stats->tx_discard_pkts)
+ & 0xFFFF));
+
+ return 0;
+}
+
int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
u32 port, int index)
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 6f5db32082dd..9082b3fd2b47 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -49,6 +49,7 @@
#include <rdma/ib_addr.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_cache.h>
+#include <rdma/ib_pma.h>
#include <rdma/uverbs_ioctl.h>
#include <linux/hashtable.h>
@@ -4491,6 +4492,41 @@ void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
kfree(bnxt_entry);
}
+int bnxt_re_process_mad(struct ib_device *ibdev, int mad_flags,
+ u32 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh,
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct ib_class_port_info cpi = {};
+ int ret = IB_MAD_RESULT_SUCCESS;
+ int rc = 0;
+
+ if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
+ return ret;
+
+ switch (in_mad->mad_hdr.attr_id) {
+ case IB_PMA_CLASS_PORT_INFO:
+ cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+ memcpy((out_mad->data + 40), &cpi, sizeof(cpi));
+ break;
+ case IB_PMA_PORT_COUNTERS_EXT:
+ rc = bnxt_re_assign_pma_port_ext_counters(rdev, out_mad);
+ break;
+ case IB_PMA_PORT_COUNTERS:
+ rc = bnxt_re_assign_pma_port_counters(rdev, out_mad);
+ break;
+ default:
+ rc = -EINVAL;
+ break;
+ }
+ if (rc)
+ return IB_MAD_RESULT_FAILURE;
+ ret |= IB_MAD_RESULT_REPLY;
+ return ret;
+}
+
static int UVERBS_HANDLER(BNXT_RE_METHOD_NOTIFY_DRV)(struct uverbs_attr_bundle *attrs)
{
struct bnxt_re_ucontext *uctx;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index fbb16a411d6a..22c9eb8e9cfc 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -268,6 +268,12 @@ void bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
+int bnxt_re_process_mad(struct ib_device *device, int process_mad_flags,
+ u32 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh,
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
+
static inline u32 __to_ib_port_num(u16 port_id)
{
return (u32)port_id + 1;
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index a94c8c5387d9..293b0a96c8e3 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1285,6 +1285,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
.post_recv = bnxt_re_post_recv,
.post_send = bnxt_re_post_send,
.post_srq_recv = bnxt_re_post_srq_recv,
+ .process_mad = bnxt_re_process_mad,
.query_ah = bnxt_re_query_ah,
.query_device = bnxt_re_query_device,
.modify_device = bnxt_re_modify_device,
@@ -2130,8 +2131,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
* memory for the function and all child VFs
*/
rc = bnxt_qplib_alloc_rcfw_channel(&rdev->qplib_res, &rdev->rcfw,
- &rdev->qplib_ctx,
- BNXT_RE_MAX_QPC_COUNT);
+ &rdev->qplib_ctx);
if (rc) {
ibdev_err(&rdev->ibdev,
"Failed to allocate RCFW Channel: %#x\n", rc);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 5336f74297f8..457eecb99f96 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -1217,8 +1217,6 @@ static void __modify_flags_from_init_state(struct bnxt_qplib_qp *qp)
qp->path_mtu =
CMDQ_MODIFY_QP_PATH_MTU_MTU_2048;
}
- qp->modify_flags &=
- ~CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID;
/* Bono FW require the max_dest_rd_atomic to be >= 1 */
if (qp->max_dest_rd_atomic < 1)
qp->max_dest_rd_atomic = 1;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 17e62f22683b..d23074383428 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -915,7 +915,6 @@ skip_ctx_setup:
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
{
- kfree(rcfw->qp_tbl);
kfree(rcfw->crsqe_tbl);
bnxt_qplib_free_hwq(rcfw->res, &rcfw->cmdq.hwq);
bnxt_qplib_free_hwq(rcfw->res, &rcfw->creq.hwq);
@@ -924,8 +923,7 @@ void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
struct bnxt_qplib_rcfw *rcfw,
- struct bnxt_qplib_ctx *ctx,
- int qp_tbl_sz)
+ struct bnxt_qplib_ctx *ctx)
{
struct bnxt_qplib_hwq_attr hwq_attr = {};
struct bnxt_qplib_sg_info sginfo = {};
@@ -969,12 +967,6 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
if (!rcfw->crsqe_tbl)
goto fail;
- /* Allocate one extra to hold the QP1 entries */
- rcfw->qp_tbl_size = qp_tbl_sz + 1;
- rcfw->qp_tbl = kcalloc(rcfw->qp_tbl_size, sizeof(struct bnxt_qplib_qp_node),
- GFP_KERNEL);
- if (!rcfw->qp_tbl)
- goto fail;
spin_lock_init(&rcfw->tbl_lock);
rcfw->max_timeout = res->cctx->hwrm_cmd_max_timeout;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 88814cb3aa74..ff873c5f1b25 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -262,8 +262,7 @@ static inline void bnxt_qplib_fill_cmdqmsg(struct bnxt_qplib_cmdqmsg *msg,
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
struct bnxt_qplib_rcfw *rcfw,
- struct bnxt_qplib_ctx *ctx,
- int qp_tbl_sz);
+ struct bnxt_qplib_ctx *ctx);
void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill);
void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
@@ -285,9 +284,10 @@ int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_ctx *ctx, int is_virtfn);
void bnxt_qplib_mark_qp_error(void *qp_handle);
+
static inline u32 map_qp_id_to_tbl_indx(u32 qid, struct bnxt_qplib_rcfw *rcfw)
{
/* Last index of the qp_tbl is for QP1 ie. qp_tbl_size - 1*/
- return (qid == 1) ? rcfw->qp_tbl_size - 1 : qid % rcfw->qp_tbl_size - 2;
+ return (qid == 1) ? rcfw->qp_tbl_size - 1 : (qid % (rcfw->qp_tbl_size - 2));
}
#endif /* __BNXT_QPLIB_RCFW_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 02922a0987ad..6cd05207ffed 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -871,6 +871,7 @@ int bnxt_qplib_init_res(struct bnxt_qplib_res *res)
void bnxt_qplib_free_res(struct bnxt_qplib_res *res)
{
+ kfree(res->rcfw->qp_tbl);
bnxt_qplib_free_sgid_tbl(res, &res->sgid_tbl);
bnxt_qplib_free_pd_tbl(&res->pd_tbl);
bnxt_qplib_free_dpi_tbl(res, &res->dpi_tbl);
@@ -878,12 +879,20 @@ void bnxt_qplib_free_res(struct bnxt_qplib_res *res)
int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct net_device *netdev)
{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct bnxt_qplib_dev_attr *dev_attr;
int rc;
res->netdev = netdev;
dev_attr = res->dattr;
+ /* Allocate one extra to hold the QP1 entries */
+ rcfw->qp_tbl_size = max_t(u32, BNXT_RE_MAX_QPC_COUNT + 1, dev_attr->max_qp);
+ rcfw->qp_tbl = kcalloc(rcfw->qp_tbl_size, sizeof(struct bnxt_qplib_qp_node),
+ GFP_KERNEL);
+ if (!rcfw->qp_tbl)
+ return -ENOMEM;
+
rc = bnxt_qplib_alloc_sgid_tbl(res, &res->sgid_tbl, dev_attr->max_sgid);
if (rc)
goto fail;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 711990232de1..6a13927674b4 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -49,6 +49,13 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
#define CHIP_NUM_58818 0xd818
#define CHIP_NUM_57608 0x1760
+#define BNXT_RE_MAX_QPC_COUNT (64 * 1024)
+#define BNXT_RE_MAX_MRW_COUNT (64 * 1024)
+#define BNXT_RE_MAX_SRQC_COUNT (64 * 1024)
+#define BNXT_RE_MAX_CQ_COUNT (64 * 1024)
+#define BNXT_RE_MAX_MRW_COUNT_64K (64 * 1024)
+#define BNXT_RE_MAX_MRW_COUNT_256K (256 * 1024)
+
#define BNXT_QPLIB_DBR_VALID (0x1UL << 26)
#define BNXT_QPLIB_DBR_EPOCH_SHIFT 24
#define BNXT_QPLIB_DBR_TOGGLE_SHIFT 25
@@ -600,4 +607,9 @@ static inline bool _is_cq_coalescing_supported(u16 dev_cap_ext_flags2)
return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_CQ_COALESCING_SUPPORTED;
}
+static inline bool _is_max_srq_ext_supported(u16 dev_cap_ext_flags_2)
+{
+ return !!(dev_cap_ext_flags_2 & CREQ_QUERY_FUNC_RESP_SB_MAX_SRQ_EXTENDED);
+}
+
#endif /* __BNXT_QPLIB_RES_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 4ccd4405355a..f231e886ad9d 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -176,6 +176,9 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw)
attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags);
attr->dev_cap_flags2 = le16_to_cpu(sb->dev_cap_ext_flags_2);
+ if (_is_max_srq_ext_supported(attr->dev_cap_flags2))
+ attr->max_srq += le16_to_cpu(sb->max_srq_ext);
+
bnxt_qplib_query_version(rcfw, attr->fw_ver);
for (i = 0; i < MAX_TQM_ALLOC_REQ / 4; i++) {
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index 0ee60fdc18b3..7eceb3e9f4ce 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -2215,11 +2215,12 @@ struct creq_query_func_resp_sb {
#define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE (0x2UL << 4)
#define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_LAST \
CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE
+ #define CREQ_QUERY_FUNC_RESP_SB_MAX_SRQ_EXTENDED 0x40UL
#define CREQ_QUERY_FUNC_RESP_SB_MIN_RNR_RTR_RTS_OPT_SUPPORTED 0x1000UL
__le16 max_xp_qp_size;
__le16 create_qp_batch_size;
__le16 destroy_qp_batch_size;
- __le16 reserved16;
+ __le16 max_srq_ext;
__le64 reserved64;
};
diff --git a/drivers/infiniband/hw/erdma/erdma_cm.c b/drivers/infiniband/hw/erdma/erdma_cm.c
index 1b23c698ec25..e0acc185e719 100644
--- a/drivers/infiniband/hw/erdma/erdma_cm.c
+++ b/drivers/infiniband/hw/erdma/erdma_cm.c
@@ -709,7 +709,6 @@ error:
erdma_cancel_mpatimer(new_cep);
erdma_cep_put(new_cep);
- new_cep->sock = NULL;
}
if (new_s) {
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index a442eca498b8..368b6be3226f 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -12882,22 +12882,6 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
}
}
-/* return the OPA port logical state name */
-const char *opa_lstate_name(u32 lstate)
-{
- static const char * const port_logical_names[] = {
- "PORT_NOP",
- "PORT_DOWN",
- "PORT_INIT",
- "PORT_ARMED",
- "PORT_ACTIVE",
- "PORT_ACTIVE_DEFER",
- };
- if (lstate < ARRAY_SIZE(port_logical_names))
- return port_logical_names[lstate];
- return "unknown";
-}
-
/* return the OPA port physical state name */
const char *opa_pstate_name(u32 pstate)
{
@@ -12956,8 +12940,6 @@ static void update_statusp(struct hfi1_pportdata *ppd, u32 state)
break;
}
}
- dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
- opa_lstate_name(state), state);
}
/**
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 8841db16bde7..6992f6d40255 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -771,7 +771,6 @@ int is_bx(struct hfi1_devdata *dd);
bool is_urg_masked(struct hfi1_ctxtdata *rcd);
u32 read_physical_state(struct hfi1_devdata *dd);
u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate);
-const char *opa_lstate_name(u32 lstate);
const char *opa_pstate_name(u32 pstate);
u32 driver_pstate(struct hfi1_pportdata *ppd);
u32 driver_lstate(struct hfi1_pportdata *ppd);
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 37a6794885d3..50826e7cdb7e 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -968,7 +968,7 @@ static bool __set_armed_to_active(struct hfi1_packet *packet)
if (hwstate != IB_PORT_ACTIVE) {
dd_dev_info(packet->rcd->dd,
"Unexpected link state %s\n",
- opa_lstate_name(hwstate));
+ ib_port_state_to_str(hwstate));
return false;
}
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index cbac4a442d9e..d6fbd9c2b8b4 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -635,12 +635,11 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
spin_lock_init(&ppd->cca_timer_lock);
for (i = 0; i < OPA_MAX_SLS; i++) {
- hrtimer_init(&ppd->cca_timer[i].hrtimer, CLOCK_MONOTONIC,
- HRTIMER_MODE_REL);
ppd->cca_timer[i].ppd = ppd;
ppd->cca_timer[i].sl = i;
ppd->cca_timer[i].ccti = 0;
- ppd->cca_timer[i].hrtimer.function = cca_timer_fn;
+ hrtimer_setup(&ppd->cca_timer[i].hrtimer, cca_timer_fn, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
}
ppd->cc_max_table_entries = IB_CC_TABLE_CAP_DEFAULT;
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index a9883295f4af..b39f63ce6dfc 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1160,8 +1160,8 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
if (ret == HFI_TRANSITION_DISALLOWED ||
ret == HFI_TRANSITION_UNDEFINED) {
pr_warn("invalid logical state transition %s -> %s\n",
- opa_lstate_name(logical_old),
- opa_lstate_name(logical_new));
+ ib_port_state_to_str(logical_old),
+ ib_port_state_to_str(logical_new));
return ret;
}
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index 52cce1c8b76a..3b7842a7f634 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -405,26 +405,6 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
}
/*
- * Perform a stand-alone single QSFP write. Acquire the resource, do the
- * write, then release the resource.
- */
-int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
- int len)
-{
- struct hfi1_devdata *dd = ppd->dd;
- u32 resource = qsfp_resource(dd);
- int ret;
-
- ret = acquire_chip_resource(dd, resource, QSFP_WAIT);
- if (ret)
- return ret;
- ret = qsfp_write(ppd, target, addr, bp, len);
- release_chip_resource(dd, resource);
-
- return ret;
-}
-
-/*
* Access page n, offset m of QSFP memory as defined by SFF 8636
* by reading @addr = ((256 * n) + m)
*
diff --git a/drivers/infiniband/hw/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h
index df1389bad86b..5c59d53fcb63 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.h
+++ b/drivers/infiniband/hw/hfi1/qsfp.h
@@ -195,8 +195,6 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
-int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
- int len);
int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
struct hfi1_asic_data;
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 950c133d4220..6ee911f6885b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -175,8 +175,10 @@ void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev)
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
ida_destroy(&hr_dev->xrcd_ida.ida);
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
ida_destroy(&hr_dev->srq_table.srq_ida.ida);
+ xa_destroy(&hr_dev->srq_table.xa);
+ }
hns_roce_cleanup_qp_table(hr_dev);
hns_roce_cleanup_cq_table(hr_dev);
ida_destroy(&hr_dev->mr_table.mtpt_ida.ida);
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 4106423a1b39..3a5c93c9fb3e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -537,5 +537,6 @@ void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev)
for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++)
ida_destroy(&hr_dev->cq_table.bank[i].ida);
+ xa_destroy(&hr_dev->cq_table.array);
mutex_destroy(&hr_dev->cq_table.bank_mutex);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index 605562122ecc..ca0798224e56 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -1361,6 +1361,11 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
return ret;
}
+/* This is the bottom bt pages number of a 100G MR on 4K OS, assuming
+ * the bt page size is not expanded by cal_best_bt_pg_sz()
+ */
+#define RESCHED_LOOP_CNT_THRESHOLD_ON_4K 12800
+
/* construct the base address table and link them by address hop config */
int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_list *hem_list,
@@ -1369,6 +1374,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
{
const struct hns_roce_buf_region *r;
int ofs, end;
+ int loop;
int unit;
int ret;
int i;
@@ -1386,7 +1392,10 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
continue;
end = r->offset + r->count;
- for (ofs = r->offset; ofs < end; ofs += unit) {
+ for (ofs = r->offset, loop = 1; ofs < end; ofs += unit, loop++) {
+ if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K))
+ cond_resched();
+
ret = hem_list_alloc_mid_bt(hr_dev, r, unit, ofs,
hem_list->mid_bt[i],
&hem_list->btm_bt);
@@ -1443,9 +1452,14 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
struct list_head *head = &hem_list->btm_bt;
struct hns_roce_hem_item *hem, *temp_hem;
void *cpu_base = NULL;
+ int loop = 1;
int nr = 0;
list_for_each_entry_safe(hem, temp_hem, head, sibling) {
+ if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K))
+ cond_resched();
+ loop++;
+
if (hem_list_page_is_in_range(hem, offset)) {
nr = offset - hem->start;
cpu_base = hem->addr + nr * BA_BYTE_LEN;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index ae24c81c9812..cf89a8db4f64 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -183,7 +183,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
IB_DEVICE_RC_RNR_NAK_GEN;
props->max_send_sge = hr_dev->caps.max_sq_sg;
props->max_recv_sge = hr_dev->caps.max_rq_sg;
- props->max_sge_rd = 1;
+ props->max_sge_rd = hr_dev->caps.max_sq_sg;
props->max_cq = hr_dev->caps.num_cqs;
props->max_cqe = hr_dev->caps.max_cqes;
props->max_mr = hr_dev->caps.num_mtpts;
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 55b9283bfc6f..09da3496843b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -998,7 +998,7 @@ static bool is_buf_attr_valid(struct hns_roce_dev *hr_dev,
if (attr->region_count > ARRAY_SIZE(attr->region) ||
attr->region_count < 1 || attr->page_shift < HNS_HW_PAGE_SHIFT) {
ibdev_err(ibdev,
- "invalid buf attr, region count %d, page shift %u.\n",
+ "invalid buf attr, region count %u, page shift %u.\n",
attr->region_count, attr->page_shift);
return false;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 9e2e76c59406..9f376a2232b0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -868,12 +868,14 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
struct hns_roce_ib_create_qp *ucmd,
struct hns_roce_ib_create_qp_resp *resp)
{
+ bool has_sdb = user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd);
struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata,
struct hns_roce_ucontext, ibucontext);
+ bool has_rdb = user_qp_has_rdb(hr_dev, init_attr, udata, resp);
struct ib_device *ibdev = &hr_dev->ib_dev;
int ret;
- if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) {
+ if (has_sdb) {
ret = hns_roce_db_map_user(uctx, ucmd->sdb_addr, &hr_qp->sdb);
if (ret) {
ibdev_err(ibdev,
@@ -884,7 +886,7 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB;
}
- if (user_qp_has_rdb(hr_dev, init_attr, udata, resp)) {
+ if (has_rdb) {
ret = hns_roce_db_map_user(uctx, ucmd->db_addr, &hr_qp->rdb);
if (ret) {
ibdev_err(ibdev,
@@ -898,7 +900,7 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
return 0;
err_sdb:
- if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
+ if (has_sdb)
hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
err_out:
return ret;
@@ -1119,24 +1121,23 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
ibucontext);
hr_qp->config = uctx->config;
ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd);
- if (ret)
+ if (ret) {
ibdev_err(ibdev,
"failed to set user SQ size, ret = %d.\n",
ret);
+ return ret;
+ }
ret = set_congest_param(hr_dev, hr_qp, ucmd);
- if (ret)
- return ret;
} else {
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
hr_qp->config = HNS_ROCE_EXSGE_FLAGS;
+ default_congest_type(hr_dev, hr_qp);
ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp);
if (ret)
ibdev_err(ibdev,
"failed to set kernel SQ size, ret = %d.\n",
ret);
-
- default_congest_type(hr_dev, hr_qp);
}
return ret;
@@ -1219,7 +1220,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
min(udata->outlen, sizeof(resp)));
if (ret) {
ibdev_err(ibdev, "copy qp resp failed!\n");
- goto err_store;
+ goto err_flow_ctrl;
}
}
@@ -1319,7 +1320,7 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
ret = hns_roce_create_qp_common(hr_dev, init_attr, udata, hr_qp);
if (ret)
- ibdev_err(ibdev, "create QP type 0x%x failed(%d)\n",
+ ibdev_err(ibdev, "create QP type %d failed(%d)\n",
init_attr->qp_type, ret);
err_out:
@@ -1602,6 +1603,7 @@ void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev)
for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++)
ida_destroy(&hr_dev->qp_table.bank[i].ida);
xa_destroy(&hr_dev->qp_table.dip_xa);
+ xa_destroy(&hr_dev->qp_table_xa);
mutex_destroy(&hr_dev->qp_table.bank_mutex);
mutex_destroy(&hr_dev->qp_table.scc_mutex);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 70c06ef65603..1090051f493b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -51,7 +51,7 @@ static void hns_roce_ib_srq_event(struct hns_roce_srq *srq,
break;
default:
dev_err(hr_dev->dev,
- "hns_roce:Unexpected event type 0x%x on SRQ %06lx\n",
+ "hns_roce:Unexpected event type %d on SRQ %06lx\n",
event_type, srq->srqn);
return;
}
diff --git a/drivers/infiniband/hw/irdma/Kconfig b/drivers/infiniband/hw/irdma/Kconfig
index b6f9c41bca51..5f49a58590ed 100644
--- a/drivers/infiniband/hw/irdma/Kconfig
+++ b/drivers/infiniband/hw/irdma/Kconfig
@@ -7,6 +7,7 @@ config INFINIBAND_IRDMA
depends on ICE && I40E
select GENERIC_ALLOCATOR
select AUXILIARY_BUS
+ select CRC32
help
This is an Intel(R) Ethernet Protocol Driver for RDMA driver
that support E810 (iWARP/RoCE) and X722 (iWARP) network devices.
diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c
index ad50b77282f8..69ce1862eabe 100644
--- a/drivers/infiniband/hw/irdma/hw.c
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -498,8 +498,6 @@ static int irdma_save_msix_info(struct irdma_pci_f *rf)
iw_qvlist->num_vectors = rf->msix_count;
if (rf->msix_count <= num_online_cpus())
rf->msix_shared = true;
- else if (rf->msix_count > num_online_cpus() + 1)
- rf->msix_count = num_online_cpus() + 1;
pmsix = rf->msix_entries;
for (i = 0, ceq_idx = 0; i < rf->msix_count; i++, iw_qvinfo++) {
diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c
index 3f13200ff71b..1ee8969595d3 100644
--- a/drivers/infiniband/hw/irdma/main.c
+++ b/drivers/infiniband/hw/irdma/main.c
@@ -206,6 +206,43 @@ static void irdma_lan_unregister_qset(struct irdma_sc_vsi *vsi,
ibdev_dbg(&iwdev->ibdev, "WS: LAN free_res for rdma qset failed.\n");
}
+static int irdma_init_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf)
+{
+ int i;
+
+ rf->msix_count = num_online_cpus() + IRDMA_NUM_AEQ_MSIX;
+ rf->msix_entries = kcalloc(rf->msix_count, sizeof(*rf->msix_entries),
+ GFP_KERNEL);
+ if (!rf->msix_entries)
+ return -ENOMEM;
+
+ for (i = 0; i < rf->msix_count; i++)
+ if (ice_alloc_rdma_qvector(pf, &rf->msix_entries[i]))
+ break;
+
+ if (i < IRDMA_MIN_MSIX) {
+ for (; i > 0; i--)
+ ice_free_rdma_qvector(pf, &rf->msix_entries[i]);
+
+ kfree(rf->msix_entries);
+ return -ENOMEM;
+ }
+
+ rf->msix_count = i;
+
+ return 0;
+}
+
+static void irdma_deinit_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf)
+{
+ int i;
+
+ for (i = 0; i < rf->msix_count; i++)
+ ice_free_rdma_qvector(pf, &rf->msix_entries[i]);
+
+ kfree(rf->msix_entries);
+}
+
static void irdma_remove(struct auxiliary_device *aux_dev)
{
struct iidc_auxiliary_dev *iidc_adev = container_of(aux_dev,
@@ -216,6 +253,7 @@ static void irdma_remove(struct auxiliary_device *aux_dev)
irdma_ib_unregister_device(iwdev);
ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, false);
+ irdma_deinit_interrupts(iwdev->rf, pf);
pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn));
}
@@ -230,9 +268,7 @@ static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf
rf->gen_ops.unregister_qset = irdma_lan_unregister_qset;
rf->hw.hw_addr = pf->hw.hw_addr;
rf->pcidev = pf->pdev;
- rf->msix_count = pf->num_rdma_msix;
rf->pf_id = pf->hw.pf_id;
- rf->msix_entries = &pf->msix_entries[pf->rdma_base_vector];
rf->default_vsi.vsi_idx = vsi->vsi_num;
rf->protocol_used = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ?
IRDMA_ROCE_PROTOCOL_ONLY : IRDMA_IWARP_PROTOCOL_ONLY;
@@ -281,6 +317,10 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_
irdma_fill_device_info(iwdev, pf, vsi);
rf = iwdev->rf;
+ err = irdma_init_interrupts(rf, pf);
+ if (err)
+ goto err_init_interrupts;
+
err = irdma_ctrl_init_hw(rf);
if (err)
goto err_ctrl_init;
@@ -311,6 +351,8 @@ err_ibreg:
err_rt_init:
irdma_ctrl_deinit_hw(rf);
err_ctrl_init:
+ irdma_deinit_interrupts(rf, pf);
+err_init_interrupts:
kfree(iwdev->rf);
ib_dealloc_device(&iwdev->ibdev);
diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h
index 9f0ed6e84471..bb0b6494ccb2 100644
--- a/drivers/infiniband/hw/irdma/main.h
+++ b/drivers/infiniband/hw/irdma/main.h
@@ -30,7 +30,6 @@
#endif
#include <linux/auxiliary_bus.h>
#include <linux/net/intel/iidc.h>
-#include <crypto/hash.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_pack.h>
@@ -117,6 +116,9 @@ extern struct auxiliary_driver i40iw_auxiliary_drv;
#define IRDMA_IRQ_NAME_STR_LEN (64)
+#define IRDMA_NUM_AEQ_MSIX 1
+#define IRDMA_MIN_MSIX 2
+
enum init_completion_state {
INVALID_STATE = 0,
INITIAL_STATE,
diff --git a/drivers/infiniband/hw/irdma/osdep.h b/drivers/infiniband/hw/irdma/osdep.h
index ddf02a462efa..4b4f78288d12 100644
--- a/drivers/infiniband/hw/irdma/osdep.h
+++ b/drivers/infiniband/hw/irdma/osdep.h
@@ -6,7 +6,6 @@
#include <linux/pci.h>
#include <linux/bitfield.h>
#include <linux/net/intel/iidc.h>
-#include <crypto/hash.h>
#include <rdma/ib_verbs.h>
#define STATS_TIMER_DELAY 60000
@@ -43,15 +42,12 @@ enum irdma_status_code irdma_vf_wait_vchnl_resp(struct irdma_sc_dev *dev);
bool irdma_vf_clear_to_send(struct irdma_sc_dev *dev);
void irdma_add_dev_ref(struct irdma_sc_dev *dev);
void irdma_put_dev_ref(struct irdma_sc_dev *dev);
-int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len,
- u32 val);
+int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val);
struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev,
struct irdma_puda_buf *buf);
void irdma_send_ieq_ack(struct irdma_sc_qp *qp);
void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len,
u32 seqnum);
-void irdma_free_hash_desc(struct shash_desc *hash_desc);
-int irdma_init_hash_desc(struct shash_desc **hash_desc);
int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
struct irdma_puda_buf *buf);
int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
diff --git a/drivers/infiniband/hw/irdma/puda.c b/drivers/infiniband/hw/irdma/puda.c
index 7e3f9bca2c23..694e5a9ed15d 100644
--- a/drivers/infiniband/hw/irdma/puda.c
+++ b/drivers/infiniband/hw/irdma/puda.c
@@ -923,8 +923,6 @@ void irdma_puda_dele_rsrc(struct irdma_sc_vsi *vsi, enum puda_rsrc_type type,
switch (rsrc->cmpl) {
case PUDA_HASH_CRC_COMPLETE:
- irdma_free_hash_desc(rsrc->hash_desc);
- fallthrough;
case PUDA_QP_CREATED:
irdma_qp_rem_qos(&rsrc->qp);
@@ -1095,15 +1093,12 @@ int irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi,
goto error;
if (info->type == IRDMA_PUDA_RSRC_TYPE_IEQ) {
- if (!irdma_init_hash_desc(&rsrc->hash_desc)) {
- rsrc->check_crc = true;
- rsrc->cmpl = PUDA_HASH_CRC_COMPLETE;
- ret = 0;
- }
+ rsrc->check_crc = true;
+ rsrc->cmpl = PUDA_HASH_CRC_COMPLETE;
}
irdma_sc_ccq_arm(&rsrc->cq);
- return ret;
+ return 0;
error:
irdma_puda_dele_rsrc(vsi, info->type, false);
@@ -1396,8 +1391,8 @@ static int irdma_ieq_handle_partial(struct irdma_puda_rsrc *ieq,
crcptr = txbuf->data + fpdu_len - 4;
mpacrc = *(u32 *)crcptr;
if (ieq->check_crc) {
- status = irdma_ieq_check_mpacrc(ieq->hash_desc, txbuf->data,
- (fpdu_len - 4), mpacrc);
+ status = irdma_ieq_check_mpacrc(txbuf->data, fpdu_len - 4,
+ mpacrc);
if (status) {
ibdev_dbg(to_ibdev(ieq->dev), "IEQ: error bad crc\n");
goto error;
@@ -1465,8 +1460,8 @@ static int irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq,
crcptr = datap + fpdu_len - 4;
mpacrc = *(u32 *)crcptr;
if (ieq->check_crc)
- ret = irdma_ieq_check_mpacrc(ieq->hash_desc, datap,
- fpdu_len - 4, mpacrc);
+ ret = irdma_ieq_check_mpacrc(datap, fpdu_len - 4,
+ mpacrc);
if (ret) {
list_add(&buf->list, rxlist);
ibdev_dbg(to_ibdev(ieq->dev),
diff --git a/drivers/infiniband/hw/irdma/puda.h b/drivers/infiniband/hw/irdma/puda.h
index bc6d9514c9c1..2fc638f2b143 100644
--- a/drivers/infiniband/hw/irdma/puda.h
+++ b/drivers/infiniband/hw/irdma/puda.h
@@ -119,7 +119,6 @@ struct irdma_puda_rsrc {
u32 rx_wqe_idx;
u32 rxq_invalid_cnt;
u32 tx_wqe_avail_cnt;
- struct shash_desc *hash_desc;
struct list_head txpend;
struct list_head bufpool; /* free buffers pool list for recv and xmit */
u32 alloc_buf_count;
@@ -163,10 +162,8 @@ struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev,
struct irdma_puda_buf *buf);
int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
struct irdma_puda_buf *buf);
-int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len, u32 val);
-int irdma_init_hash_desc(struct shash_desc **desc);
+int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val);
void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
-void irdma_free_hash_desc(struct shash_desc *desc);
void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len, u32 seqnum);
int irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
int irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq);
diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
index 0e594122baa7..e73b14fd95ef 100644
--- a/drivers/infiniband/hw/irdma/utils.c
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -1274,57 +1274,14 @@ void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp)
}
/**
- * irdma_init_hash_desc - initialize hash for crc calculation
- * @desc: cryption type
- */
-int irdma_init_hash_desc(struct shash_desc **desc)
-{
- struct crypto_shash *tfm;
- struct shash_desc *tdesc;
-
- tfm = crypto_alloc_shash("crc32c", 0, 0);
- if (IS_ERR(tfm))
- return -EINVAL;
-
- tdesc = kzalloc(sizeof(*tdesc) + crypto_shash_descsize(tfm),
- GFP_KERNEL);
- if (!tdesc) {
- crypto_free_shash(tfm);
- return -EINVAL;
- }
-
- tdesc->tfm = tfm;
- *desc = tdesc;
-
- return 0;
-}
-
-/**
- * irdma_free_hash_desc - free hash desc
- * @desc: to be freed
- */
-void irdma_free_hash_desc(struct shash_desc *desc)
-{
- if (desc) {
- crypto_free_shash(desc->tfm);
- kfree(desc);
- }
-}
-
-/**
* irdma_ieq_check_mpacrc - check if mpa crc is OK
- * @desc: desc for hash
* @addr: address of buffer for crc
* @len: length of buffer
* @val: value to be compared
*/
-int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len,
- u32 val)
+int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val)
{
- u32 crc = 0;
-
- crypto_shash_digest(desc, addr, len, (u8 *)&crc);
- if (crc != val)
+ if ((__force u32)cpu_to_le32(~crc32c(~0, addr, len)) != val)
return -EINVAL;
return 0;
diff --git a/drivers/infiniband/hw/mana/Makefile b/drivers/infiniband/hw/mana/Makefile
index 88655fe5e398..921c05e08b11 100644
--- a/drivers/infiniband/hw/mana/Makefile
+++ b/drivers/infiniband/hw/mana/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o
-mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o
+mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o wr.o counters.o
diff --git a/drivers/infiniband/hw/mana/ah.c b/drivers/infiniband/hw/mana/ah.c
new file mode 100644
index 000000000000..f56952eebbaa
--- /dev/null
+++ b/drivers/infiniband/hw/mana/ah.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah);
+ struct rdma_ah_attr *ah_attr = attr->ah_attr;
+ const struct ib_global_route *grh;
+ enum rdma_network_type ntype;
+
+ if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE ||
+ !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
+ return -EINVAL;
+
+ if (udata)
+ return -EINVAL;
+
+ ah->av = dma_pool_zalloc(mdev->av_pool, GFP_ATOMIC, &ah->dma_handle);
+ if (!ah->av)
+ return -ENOMEM;
+
+ grh = rdma_ah_read_grh(ah_attr);
+ ntype = rdma_gid_attr_network_type(grh->sgid_attr);
+
+ copy_in_reverse(ah->av->dest_mac, ah_attr->roce.dmac, ETH_ALEN);
+ ah->av->udp_src_port = rdma_flow_label_to_udp_sport(grh->flow_label);
+ ah->av->hop_limit = grh->hop_limit;
+ ah->av->dscp = (grh->traffic_class >> 2) & 0x3f;
+ ah->av->is_ipv6 = (ntype == RDMA_NETWORK_IPV6);
+
+ if (ah->av->is_ipv6) {
+ copy_in_reverse(ah->av->dest_ip, grh->dgid.raw, 16);
+ copy_in_reverse(ah->av->src_ip, grh->sgid_attr->gid.raw, 16);
+ } else {
+ ah->av->dest_ip[10] = 0xFF;
+ ah->av->dest_ip[11] = 0xFF;
+ copy_in_reverse(&ah->av->dest_ip[12], &grh->dgid.raw[12], 4);
+ copy_in_reverse(&ah->av->src_ip[12], &grh->sgid_attr->gid.raw[12], 4);
+ }
+
+ return 0;
+}
+
+int mana_ib_destroy_ah(struct ib_ah *ibah, u32 flags)
+{
+ struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah);
+
+ dma_pool_free(mdev->av_pool, ah->av, ah->dma_handle);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/counters.c b/drivers/infiniband/hw/mana/counters.c
new file mode 100644
index 000000000000..e533ce21013d
--- /dev/null
+++ b/drivers/infiniband/hw/mana/counters.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "counters.h"
+
+static const struct rdma_stat_desc mana_ib_port_stats_desc[] = {
+ [MANA_IB_REQUESTER_TIMEOUT].name = "requester_timeout",
+ [MANA_IB_REQUESTER_OOS_NAK].name = "requester_oos_nak",
+ [MANA_IB_REQUESTER_RNR_NAK].name = "requester_rnr_nak",
+ [MANA_IB_RESPONDER_RNR_NAK].name = "responder_rnr_nak",
+ [MANA_IB_RESPONDER_OOS].name = "responder_oos",
+ [MANA_IB_RESPONDER_DUP_REQUEST].name = "responder_dup_request",
+ [MANA_IB_REQUESTER_IMPLICIT_NAK].name = "requester_implicit_nak",
+ [MANA_IB_REQUESTER_READRESP_PSN_MISMATCH].name = "requester_readresp_psn_mismatch",
+ [MANA_IB_NAK_INV_REQ].name = "nak_inv_req",
+ [MANA_IB_NAK_ACCESS_ERR].name = "nak_access_error",
+ [MANA_IB_NAK_OPP_ERR].name = "nak_opp_error",
+ [MANA_IB_NAK_INV_READ].name = "nak_inv_read",
+ [MANA_IB_RESPONDER_LOCAL_LEN_ERR].name = "responder_local_len_error",
+ [MANA_IB_REQUESTOR_LOCAL_PROT_ERR].name = "requestor_local_prot_error",
+ [MANA_IB_RESPONDER_REM_ACCESS_ERR].name = "responder_rem_access_error",
+ [MANA_IB_RESPONDER_LOCAL_QP_ERR].name = "responder_local_qp_error",
+ [MANA_IB_RESPONDER_MALFORMED_WQE].name = "responder_malformed_wqe",
+ [MANA_IB_GENERAL_HW_ERR].name = "general_hw_error",
+ [MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED].name = "requester_rnr_nak_retries_exceeded",
+ [MANA_IB_REQUESTER_RETRIES_EXCEEDED].name = "requester_retries_exceeded",
+ [MANA_IB_TOTAL_FATAL_ERR].name = "total_fatal_error",
+ [MANA_IB_RECEIVED_CNPS].name = "received_cnps",
+ [MANA_IB_NUM_QPS_CONGESTED].name = "num_qps_congested",
+ [MANA_IB_RATE_INC_EVENTS].name = "rate_inc_events",
+ [MANA_IB_NUM_QPS_RECOVERED].name = "num_qps_recovered",
+ [MANA_IB_CURRENT_RATE].name = "current_rate",
+};
+
+struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num)
+{
+ return rdma_alloc_hw_stats_struct(mana_ib_port_stats_desc,
+ ARRAY_SIZE(mana_ib_port_stats_desc),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num, int index)
+{
+ struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev,
+ ib_dev);
+ struct mana_rnic_query_vf_cntrs_resp resp = {};
+ struct mana_rnic_query_vf_cntrs_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_QUERY_VF_COUNTERS,
+ sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+
+ err = mana_gd_send_request(mdev_to_gc(mdev), sizeof(req), &req,
+ sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to query vf counters err %d",
+ err);
+ return err;
+ }
+
+ stats->value[MANA_IB_REQUESTER_TIMEOUT] = resp.requester_timeout;
+ stats->value[MANA_IB_REQUESTER_OOS_NAK] = resp.requester_oos_nak;
+ stats->value[MANA_IB_REQUESTER_RNR_NAK] = resp.requester_rnr_nak;
+ stats->value[MANA_IB_RESPONDER_RNR_NAK] = resp.responder_rnr_nak;
+ stats->value[MANA_IB_RESPONDER_OOS] = resp.responder_oos;
+ stats->value[MANA_IB_RESPONDER_DUP_REQUEST] = resp.responder_dup_request;
+ stats->value[MANA_IB_REQUESTER_IMPLICIT_NAK] =
+ resp.requester_implicit_nak;
+ stats->value[MANA_IB_REQUESTER_READRESP_PSN_MISMATCH] =
+ resp.requester_readresp_psn_mismatch;
+ stats->value[MANA_IB_NAK_INV_REQ] = resp.nak_inv_req;
+ stats->value[MANA_IB_NAK_ACCESS_ERR] = resp.nak_access_err;
+ stats->value[MANA_IB_NAK_OPP_ERR] = resp.nak_opp_err;
+ stats->value[MANA_IB_NAK_INV_READ] = resp.nak_inv_read;
+ stats->value[MANA_IB_RESPONDER_LOCAL_LEN_ERR] =
+ resp.responder_local_len_err;
+ stats->value[MANA_IB_REQUESTOR_LOCAL_PROT_ERR] =
+ resp.requestor_local_prot_err;
+ stats->value[MANA_IB_RESPONDER_REM_ACCESS_ERR] =
+ resp.responder_rem_access_err;
+ stats->value[MANA_IB_RESPONDER_LOCAL_QP_ERR] =
+ resp.responder_local_qp_err;
+ stats->value[MANA_IB_RESPONDER_MALFORMED_WQE] =
+ resp.responder_malformed_wqe;
+ stats->value[MANA_IB_GENERAL_HW_ERR] = resp.general_hw_err;
+ stats->value[MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED] =
+ resp.requester_rnr_nak_retries_exceeded;
+ stats->value[MANA_IB_REQUESTER_RETRIES_EXCEEDED] =
+ resp.requester_retries_exceeded;
+ stats->value[MANA_IB_TOTAL_FATAL_ERR] = resp.total_fatal_err;
+
+ stats->value[MANA_IB_RECEIVED_CNPS] = resp.received_cnps;
+ stats->value[MANA_IB_NUM_QPS_CONGESTED] = resp.num_qps_congested;
+ stats->value[MANA_IB_RATE_INC_EVENTS] = resp.rate_inc_events;
+ stats->value[MANA_IB_NUM_QPS_RECOVERED] = resp.num_qps_recovered;
+ stats->value[MANA_IB_CURRENT_RATE] = resp.current_rate;
+
+ return ARRAY_SIZE(mana_ib_port_stats_desc);
+}
diff --git a/drivers/infiniband/hw/mana/counters.h b/drivers/infiniband/hw/mana/counters.h
new file mode 100644
index 000000000000..7ff92d27f6c3
--- /dev/null
+++ b/drivers/infiniband/hw/mana/counters.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024 Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _COUNTERS_H_
+#define _COUNTERS_H_
+
+#include "mana_ib.h"
+
+enum mana_ib_port_counters {
+ MANA_IB_REQUESTER_TIMEOUT,
+ MANA_IB_REQUESTER_OOS_NAK,
+ MANA_IB_REQUESTER_RNR_NAK,
+ MANA_IB_RESPONDER_RNR_NAK,
+ MANA_IB_RESPONDER_OOS,
+ MANA_IB_RESPONDER_DUP_REQUEST,
+ MANA_IB_REQUESTER_IMPLICIT_NAK,
+ MANA_IB_REQUESTER_READRESP_PSN_MISMATCH,
+ MANA_IB_NAK_INV_REQ,
+ MANA_IB_NAK_ACCESS_ERR,
+ MANA_IB_NAK_OPP_ERR,
+ MANA_IB_NAK_INV_READ,
+ MANA_IB_RESPONDER_LOCAL_LEN_ERR,
+ MANA_IB_REQUESTOR_LOCAL_PROT_ERR,
+ MANA_IB_RESPONDER_REM_ACCESS_ERR,
+ MANA_IB_RESPONDER_LOCAL_QP_ERR,
+ MANA_IB_RESPONDER_MALFORMED_WQE,
+ MANA_IB_GENERAL_HW_ERR,
+ MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED,
+ MANA_IB_REQUESTER_RETRIES_EXCEEDED,
+ MANA_IB_TOTAL_FATAL_ERR,
+ MANA_IB_RECEIVED_CNPS,
+ MANA_IB_NUM_QPS_CONGESTED,
+ MANA_IB_RATE_INC_EVENTS,
+ MANA_IB_NUM_QPS_RECOVERED,
+ MANA_IB_CURRENT_RATE,
+};
+
+struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num);
+int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num, int index);
+#endif /* _COUNTERS_H_ */
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
index f04a679d2871..0fc4e2679218 100644
--- a/drivers/infiniband/hw/mana/cq.c
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -15,42 +15,58 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_device *ibdev = ibcq->device;
struct mana_ib_create_cq ucmd = {};
struct mana_ib_dev *mdev;
+ struct gdma_context *gc;
bool is_rnic_cq;
u32 doorbell;
+ u32 buf_size;
int err;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ gc = mdev_to_gc(mdev);
cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
cq->cq_handle = INVALID_MANA_HANDLE;
- if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
- return -EINVAL;
+ if (udata) {
+ if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
+ return -EINVAL;
- err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
- if (err) {
- ibdev_dbg(ibdev,
- "Failed to copy from udata for create cq, %d\n", err);
- return err;
- }
+ err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err);
+ return err;
+ }
- is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ);
+ is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ);
- if (!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) {
- ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
- return -EINVAL;
- }
+ if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
+ attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
+ ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
+ return -EINVAL;
+ }
- cq->cqe = attr->cqe;
- err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE, &cq->queue);
- if (err) {
- ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
- return err;
- }
+ cq->cqe = attr->cqe;
+ err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
+ &cq->queue);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
+ return err;
+ }
- mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
- ibucontext);
- doorbell = mana_ucontext->doorbell;
+ mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
+ ibucontext);
+ doorbell = mana_ucontext->doorbell;
+ } else {
+ is_rnic_cq = true;
+ buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE));
+ cq->cqe = buf_size / COMP_ENTRY_SIZE;
+ err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err);
+ return err;
+ }
+ doorbell = gc->mana_ib.doorbell;
+ }
if (is_rnic_cq) {
err = mana_ib_gd_create_cq(mdev, cq, doorbell);
@@ -66,13 +82,19 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
}
}
- resp.cqid = cq->queue.id;
- err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
- if (err) {
- ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
- goto err_remove_cq_cb;
+ if (udata) {
+ resp.cqid = cq->queue.id;
+ err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
+ goto err_remove_cq_cb;
+ }
}
+ spin_lock_init(&cq->cq_lock);
+ INIT_LIST_HEAD(&cq->list_send_qp);
+ INIT_LIST_HEAD(&cq->list_recv_qp);
+
return 0;
err_remove_cq_cb:
@@ -122,7 +144,10 @@ int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
return -EINVAL;
/* Create CQ table entry */
WARN_ON(gc->cq_table[cq->queue.id]);
- gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
+ if (cq->queue.kmem)
+ gdma_cq = cq->queue.kmem;
+ else
+ gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
if (!gdma_cq)
return -ENOMEM;
@@ -141,6 +166,153 @@ void mana_ib_remove_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
if (cq->queue.id >= gc->max_num_cqs || cq->queue.id == INVALID_QUEUE_ID)
return;
+ if (cq->queue.kmem)
+ /* Then it will be cleaned and removed by the mana */
+ return;
+
kfree(gc->cq_table[cq->queue.id]);
gc->cq_table[cq->queue.id] = NULL;
}
+
+int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct gdma_queue *gdma_cq = cq->queue.kmem;
+
+ if (!gdma_cq)
+ return -EINVAL;
+
+ mana_gd_ring_cq(gdma_cq, SET_ARM_BIT);
+ return 0;
+}
+
+static inline void handle_ud_sq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe)
+{
+ struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data;
+ struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem;
+ struct ud_sq_shadow_wqe *shadow_wqe;
+
+ shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_sq);
+ if (!shadow_wqe)
+ return;
+
+ shadow_wqe->header.error_code = rdma_cqe->ud_send.vendor_error;
+
+ wq->tail += shadow_wqe->header.posted_wqe_size;
+ shadow_queue_advance_next_to_complete(&qp->shadow_sq);
+}
+
+static inline void handle_ud_rq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe)
+{
+ struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data;
+ struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem;
+ struct ud_rq_shadow_wqe *shadow_wqe;
+
+ shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_rq);
+ if (!shadow_wqe)
+ return;
+
+ shadow_wqe->byte_len = rdma_cqe->ud_recv.msg_len;
+ shadow_wqe->src_qpn = rdma_cqe->ud_recv.src_qpn;
+ shadow_wqe->header.error_code = IB_WC_SUCCESS;
+
+ wq->tail += shadow_wqe->header.posted_wqe_size;
+ shadow_queue_advance_next_to_complete(&qp->shadow_rq);
+}
+
+static void mana_handle_cqe(struct mana_ib_dev *mdev, struct gdma_comp *cqe)
+{
+ struct mana_ib_qp *qp = mana_get_qp_ref(mdev, cqe->wq_num, cqe->is_sq);
+
+ if (!qp)
+ return;
+
+ if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) {
+ if (cqe->is_sq)
+ handle_ud_sq_cqe(qp, cqe);
+ else
+ handle_ud_rq_cqe(qp, cqe);
+ }
+
+ mana_put_qp_ref(qp);
+}
+
+static void fill_verbs_from_shadow_wqe(struct mana_ib_qp *qp, struct ib_wc *wc,
+ const struct shadow_wqe_header *shadow_wqe)
+{
+ const struct ud_rq_shadow_wqe *ud_wqe = (const struct ud_rq_shadow_wqe *)shadow_wqe;
+
+ wc->wr_id = shadow_wqe->wr_id;
+ wc->status = shadow_wqe->error_code;
+ wc->opcode = shadow_wqe->opcode;
+ wc->vendor_err = shadow_wqe->error_code;
+ wc->wc_flags = 0;
+ wc->qp = &qp->ibqp;
+ wc->pkey_index = 0;
+
+ if (shadow_wqe->opcode == IB_WC_RECV) {
+ wc->byte_len = ud_wqe->byte_len;
+ wc->src_qp = ud_wqe->src_qpn;
+ wc->wc_flags |= IB_WC_GRH;
+ }
+}
+
+static int mana_process_completions(struct mana_ib_cq *cq, int nwc, struct ib_wc *wc)
+{
+ struct shadow_wqe_header *shadow_wqe;
+ struct mana_ib_qp *qp;
+ int wc_index = 0;
+
+ /* process send shadow queue completions */
+ list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
+ while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_sq))
+ != NULL) {
+ if (wc_index >= nwc)
+ goto out;
+
+ fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe);
+ shadow_queue_advance_consumer(&qp->shadow_sq);
+ wc_index++;
+ }
+ }
+
+ /* process recv shadow queue completions */
+ list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
+ while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_rq))
+ != NULL) {
+ if (wc_index >= nwc)
+ goto out;
+
+ fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe);
+ shadow_queue_advance_consumer(&qp->shadow_rq);
+ wc_index++;
+ }
+ }
+
+out:
+ return wc_index;
+}
+
+int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct mana_ib_dev *mdev = container_of(ibcq->device, struct mana_ib_dev, ib_dev);
+ struct gdma_queue *queue = cq->queue.kmem;
+ struct gdma_comp gdma_cqe;
+ unsigned long flags;
+ int num_polled = 0;
+ int comp_read, i;
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ for (i = 0; i < num_entries; i++) {
+ comp_read = mana_gd_poll_cq(queue, &gdma_cqe, 1);
+ if (comp_read < 1)
+ break;
+ mana_handle_cqe(mdev, &gdma_cqe);
+ }
+
+ num_polled = mana_process_completions(cq, num_entries, wc);
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+ return num_polled;
+}
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index 3416a85f8738..b31089320aa5 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -19,6 +19,7 @@ static const struct ib_device_ops mana_ib_dev_ops = {
.add_gid = mana_ib_gd_add_gid,
.alloc_pd = mana_ib_alloc_pd,
.alloc_ucontext = mana_ib_alloc_ucontext,
+ .create_ah = mana_ib_create_ah,
.create_cq = mana_ib_create_cq,
.create_qp = mana_ib_create_qp,
.create_rwq_ind_table = mana_ib_create_rwq_ind_table,
@@ -27,22 +28,30 @@ static const struct ib_device_ops mana_ib_dev_ops = {
.dealloc_ucontext = mana_ib_dealloc_ucontext,
.del_gid = mana_ib_gd_del_gid,
.dereg_mr = mana_ib_dereg_mr,
+ .destroy_ah = mana_ib_destroy_ah,
.destroy_cq = mana_ib_destroy_cq,
.destroy_qp = mana_ib_destroy_qp,
.destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table,
.destroy_wq = mana_ib_destroy_wq,
.disassociate_ucontext = mana_ib_disassociate_ucontext,
+ .get_dma_mr = mana_ib_get_dma_mr,
.get_link_layer = mana_ib_get_link_layer,
.get_port_immutable = mana_ib_get_port_immutable,
.mmap = mana_ib_mmap,
.modify_qp = mana_ib_modify_qp,
.modify_wq = mana_ib_modify_wq,
+ .poll_cq = mana_ib_poll_cq,
+ .post_recv = mana_ib_post_recv,
+ .post_send = mana_ib_post_send,
.query_device = mana_ib_query_device,
.query_gid = mana_ib_query_gid,
.query_pkey = mana_ib_query_pkey,
.query_port = mana_ib_query_port,
.reg_user_mr = mana_ib_reg_user_mr,
+ .reg_user_mr_dmabuf = mana_ib_reg_user_mr_dmabuf,
+ .req_notify_cq = mana_ib_arm_cq,
+ INIT_RDMA_OBJ_SIZE(ib_ah, mana_ib_ah, ibah),
INIT_RDMA_OBJ_SIZE(ib_cq, mana_ib_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, mana_ib_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_qp, mana_ib_qp, ibqp),
@@ -51,6 +60,43 @@ static const struct ib_device_ops mana_ib_dev_ops = {
ib_ind_table),
};
+static const struct ib_device_ops mana_ib_stats_ops = {
+ .alloc_hw_port_stats = mana_ib_alloc_hw_port_stats,
+ .get_hw_stats = mana_ib_get_hw_stats,
+};
+
+static int mana_ib_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct mana_ib_dev *dev = container_of(this, struct mana_ib_dev, nb);
+ struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+ struct gdma_context *gc = dev->gdma_dev->gdma_context;
+ struct mana_context *mc = gc->mana.driver_data;
+ struct net_device *ndev;
+
+ /* Only process events from our parent device */
+ if (event_dev != mc->ports[0])
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
+ /*
+ * RDMA core will setup GID based on updated netdev.
+ * It's not possible to race with the core as rtnl lock is being
+ * held.
+ */
+ ib_device_set_netdev(&dev->ib_dev, ndev, 1);
+
+ /* mana_get_primary_netdev() returns ndev with refcount held */
+ netdev_put(ndev, &dev->dev_tracker);
+
+ return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
static int mana_ib_probe(struct auxiliary_device *adev,
const struct auxiliary_device_id *id)
{
@@ -84,10 +130,8 @@ static int mana_ib_probe(struct auxiliary_device *adev,
dev->ib_dev.num_comp_vectors = mdev->gdma_context->max_num_queues;
dev->ib_dev.dev.parent = mdev->gdma_context->dev;
- rcu_read_lock(); /* required to get primary netdev */
- ndev = mana_get_primary_netdev_rcu(mc, 0);
+ ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
if (!ndev) {
- rcu_read_unlock();
ret = -ENODEV;
ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1");
goto free_ib_device;
@@ -95,7 +139,8 @@ static int mana_ib_probe(struct auxiliary_device *adev,
ether_addr_copy(mac_addr, ndev->dev_addr);
addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr);
ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1);
- rcu_read_unlock();
+ /* mana_get_primary_netdev() returns ndev with refcount held */
+ netdev_put(ndev, &dev->dev_tracker);
if (ret) {
ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret);
goto free_ib_device;
@@ -109,17 +154,27 @@ static int mana_ib_probe(struct auxiliary_device *adev,
}
dev->gdma_dev = &mdev->gdma_context->mana_ib;
+ dev->nb.notifier_call = mana_ib_netdev_event;
+ ret = register_netdevice_notifier(&dev->nb);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d",
+ ret);
+ goto deregister_device;
+ }
+
ret = mana_ib_gd_query_adapter_caps(dev);
if (ret) {
ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d",
ret);
- goto deregister_device;
+ goto deregister_net_notifier;
}
+ ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
+
ret = mana_ib_create_eqs(dev);
if (ret) {
ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
- goto deregister_device;
+ goto deregister_net_notifier;
}
ret = mana_ib_gd_create_rnic_adapter(dev);
@@ -134,20 +189,31 @@ static int mana_ib_probe(struct auxiliary_device *adev,
goto destroy_rnic;
}
+ dev->av_pool = dma_pool_create("mana_ib_av", mdev->gdma_context->dev,
+ MANA_AV_BUFFER_SIZE, MANA_AV_BUFFER_SIZE, 0);
+ if (!dev->av_pool) {
+ ret = -ENOMEM;
+ goto destroy_rnic;
+ }
+
ret = ib_register_device(&dev->ib_dev, "mana_%d",
mdev->gdma_context->dev);
if (ret)
- goto destroy_rnic;
+ goto deallocate_pool;
dev_set_drvdata(&adev->dev, dev);
return 0;
+deallocate_pool:
+ dma_pool_destroy(dev->av_pool);
destroy_rnic:
xa_destroy(&dev->qp_table_wq);
mana_ib_gd_destroy_rnic_adapter(dev);
destroy_eqs:
mana_ib_destroy_eqs(dev);
+deregister_net_notifier:
+ unregister_netdevice_notifier(&dev->nb);
deregister_device:
mana_gd_deregister_device(dev->gdma_dev);
free_ib_device:
@@ -160,9 +226,11 @@ static void mana_ib_remove(struct auxiliary_device *adev)
struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);
ib_unregister_device(&dev->ib_dev);
+ dma_pool_destroy(dev->av_pool);
xa_destroy(&dev->qp_table_wq);
mana_ib_gd_destroy_rnic_adapter(dev);
mana_ib_destroy_eqs(dev);
+ unregister_netdevice_notifier(&dev->nb);
mana_gd_deregister_device(dev->gdma_dev);
ib_dealloc_device(&dev->ib_dev);
}
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index 457cea6d9909..eda9c5b971de 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -82,6 +82,9 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req),
sizeof(resp));
+ if (!udata)
+ flags |= GDMA_PD_FLAG_ALLOW_GPA_MR;
+
req.flags = flags;
err = mana_gd_send_request(gc, sizeof(req), &req,
sizeof(resp), &resp);
@@ -237,6 +240,27 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret);
}
+int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type,
+ struct mana_ib_queue *queue)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct gdma_queue_spec spec = {};
+ int err;
+
+ queue->id = INVALID_QUEUE_ID;
+ queue->gdma_region = GDMA_INVALID_DMA_REGION;
+ spec.type = type;
+ spec.monitor_avl_buf = false;
+ spec.queue_size = size;
+ err = mana_gd_create_mana_wq_cq(&gc->mana_ib, &spec, &queue->kmem);
+ if (err)
+ return err;
+ /* take ownership into mana_ib from mana */
+ queue->gdma_region = queue->kmem->mem_info.dma_region_handle;
+ queue->kmem->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION;
+ return 0;
+}
+
int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
struct mana_ib_queue *queue)
{
@@ -276,6 +300,8 @@ void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue
*/
mana_ib_gd_destroy_dma_region(mdev, queue->gdma_region);
ib_umem_release(queue->umem);
+ if (queue->kmem)
+ mana_gd_destroy_queue(mdev_to_gc(mdev), queue->kmem);
}
static int
@@ -358,7 +384,7 @@ static int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem
unsigned int tail = 0;
u64 *page_addr_list;
void *request_buf;
- int err;
+ int err = 0;
gc = mdev_to_gc(dev);
hwc = gc->hwc.driver_data;
@@ -535,8 +561,10 @@ int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
- if (port_num == 1)
+ if (port_num == 1) {
immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ }
return 0;
}
@@ -595,8 +623,11 @@ int mana_ib_query_port(struct ib_device *ibdev, u32 port,
props->active_width = IB_WIDTH_4X;
props->active_speed = IB_SPEED_EDR;
props->pkey_tbl_len = 1;
- if (port == 1)
+ if (port == 1) {
props->gid_tbl_len = 16;
+ props->port_cap_flags = IB_PORT_CM_SUP;
+ props->ip_gids = true;
+ }
return 0;
}
@@ -634,7 +665,7 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
mana_gd_init_req_hdr(&req.hdr, MANA_IB_GET_ADAPTER_CAP, sizeof(req),
sizeof(resp));
- req.hdr.resp.msg_version = GDMA_MESSAGE_V3;
+ req.hdr.resp.msg_version = GDMA_MESSAGE_V4;
req.hdr.dev_id = dev->gdma_dev->dev_id;
err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req),
@@ -663,6 +694,7 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
caps->max_inline_data_size = resp.max_inline_data_size;
caps->max_send_sge_count = resp.max_send_sge_count;
caps->max_recv_sge_count = resp.max_recv_sge_count;
+ caps->feature_flags = resp.feature_flags;
return 0;
}
@@ -678,7 +710,7 @@ mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct gdma_event *event)
switch (event->type) {
case GDMA_EQE_RNIC_QP_FATAL:
qpn = event->details[0];
- qp = mana_get_qp_ref(mdev, qpn);
+ qp = mana_get_qp_ref(mdev, qpn, false);
if (!qp)
break;
if (qp->ibqp.event_handler) {
@@ -762,6 +794,9 @@ int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev)
req.hdr.dev_id = gc->mana_ib.dev_id;
req.notify_eq_id = mdev->fatal_err_eq->id;
+ if (mdev->adapter_caps.feature_flags & MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT)
+ req.feature_flags |= MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST;
+
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
if (err) {
ibdev_err(&mdev->ib_dev, "Failed to create RNIC adapter err %d", err);
@@ -987,3 +1022,61 @@ int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
}
return 0;
}
+
+int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u32 type)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_pd *pd = container_of(qp->ibqp.pd, struct mana_ib_pd, ibpd);
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_create_udqp_resp resp = {};
+ struct mana_rnic_create_udqp_req req = {};
+ int err, i;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_UD_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.pd_handle = pd->pd_handle;
+ req.send_cq_handle = send_cq->cq_handle;
+ req.recv_cq_handle = recv_cq->cq_handle;
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++)
+ req.dma_region[i] = qp->ud_qp.queues[i].gdma_region;
+ req.doorbell_page = doorbell;
+ req.max_send_wr = attr->cap.max_send_wr;
+ req.max_recv_wr = attr->cap.max_recv_wr;
+ req.max_send_sge = attr->cap.max_send_sge;
+ req.max_recv_sge = attr->cap.max_recv_sge;
+ req.qp_type = type;
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create ud qp err %d", err);
+ return err;
+ }
+ qp->qp_handle = resp.qp_handle;
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++) {
+ qp->ud_qp.queues[i].id = resp.queue_ids[i];
+ /* The GDMA regions are now owned by the RNIC QP handle */
+ qp->ud_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION;
+ }
+ return 0;
+}
+
+int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ struct mana_rnic_destroy_udqp_resp resp = {0};
+ struct mana_rnic_destroy_udqp_req req = {0};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_UD_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.qp_handle = qp->qp_handle;
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to destroy ud qp err %d", err);
+ return err;
+ }
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index b53a5b4de908..6903946677e5 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -11,8 +11,11 @@
#include <rdma/ib_umem.h>
#include <rdma/mana-abi.h>
#include <rdma/uverbs_ioctl.h>
+#include <linux/dmapool.h>
#include <net/mana/mana.h>
+#include "shadow_queue.h"
+#include "counters.h"
#define PAGE_SZ_BM \
(SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K | \
@@ -21,6 +24,9 @@
/* MANA doesn't have any limit for MR size */
#define MANA_IB_MAX_MR_SIZE U64_MAX
+/* Send queue ID mask */
+#define MANA_SENDQ_MASK BIT(31)
+
/*
* The hardware limit of number of MRs is greater than maximum number of MRs
* that can possibly represent in 24 bits
@@ -32,6 +38,11 @@
*/
#define MANA_CA_ACK_DELAY 16
+/*
+ * The buffer used for writing AV
+ */
+#define MANA_AV_BUFFER_SIZE 64
+
struct mana_ib_adapter_caps {
u32 max_sq_id;
u32 max_rq_id;
@@ -48,10 +59,12 @@ struct mana_ib_adapter_caps {
u32 max_send_sge_count;
u32 max_recv_sge_count;
u32 max_inline_data_size;
+ u64 feature_flags;
};
struct mana_ib_queue {
struct ib_umem *umem;
+ struct gdma_queue *kmem;
u64 gdma_region;
u64 id;
};
@@ -64,6 +77,9 @@ struct mana_ib_dev {
struct gdma_queue **eqs;
struct xarray qp_table_wq;
struct mana_ib_adapter_caps adapter_caps;
+ struct dma_pool *av_pool;
+ netdevice_tracker dev_tracker;
+ struct notifier_block nb;
};
struct mana_ib_wq {
@@ -87,6 +103,25 @@ struct mana_ib_pd {
u32 tx_vp_offset;
};
+struct mana_ib_av {
+ u8 dest_ip[16];
+ u8 dest_mac[ETH_ALEN];
+ u16 udp_src_port;
+ u8 src_ip[16];
+ u32 hop_limit : 8;
+ u32 reserved1 : 12;
+ u32 dscp : 6;
+ u32 reserved2 : 5;
+ u32 is_ipv6 : 1;
+ u32 reserved3 : 32;
+};
+
+struct mana_ib_ah {
+ struct ib_ah ibah;
+ struct mana_ib_av *av;
+ dma_addr_t dma_handle;
+};
+
struct mana_ib_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
@@ -96,6 +131,10 @@ struct mana_ib_mr {
struct mana_ib_cq {
struct ib_cq ibcq;
struct mana_ib_queue queue;
+ /* protects CQ polling */
+ spinlock_t cq_lock;
+ struct list_head list_send_qp;
+ struct list_head list_recv_qp;
int cqe;
u32 comp_vector;
mana_handle_t cq_handle;
@@ -114,6 +153,17 @@ struct mana_ib_rc_qp {
struct mana_ib_queue queues[MANA_RC_QUEUE_TYPE_MAX];
};
+enum mana_ud_queue_type {
+ MANA_UD_SEND_QUEUE = 0,
+ MANA_UD_RECV_QUEUE,
+ MANA_UD_QUEUE_TYPE_MAX,
+};
+
+struct mana_ib_ud_qp {
+ struct mana_ib_queue queues[MANA_UD_QUEUE_TYPE_MAX];
+ u32 sq_psn;
+};
+
struct mana_ib_qp {
struct ib_qp ibqp;
@@ -121,11 +171,17 @@ struct mana_ib_qp {
union {
struct mana_ib_queue raw_sq;
struct mana_ib_rc_qp rc_qp;
+ struct mana_ib_ud_qp ud_qp;
};
/* The port on the IB device, starting with 1 */
u32 port;
+ struct list_head cq_send_list;
+ struct list_head cq_recv_list;
+ struct shadow_queue shadow_rq;
+ struct shadow_queue shadow_sq;
+
refcount_t refcount;
struct completion free;
};
@@ -145,17 +201,24 @@ enum mana_ib_command_code {
MANA_IB_DESTROY_ADAPTER = 0x30003,
MANA_IB_CONFIG_IP_ADDR = 0x30004,
MANA_IB_CONFIG_MAC_ADDR = 0x30005,
+ MANA_IB_CREATE_UD_QP = 0x30006,
+ MANA_IB_DESTROY_UD_QP = 0x30007,
MANA_IB_CREATE_CQ = 0x30008,
MANA_IB_DESTROY_CQ = 0x30009,
MANA_IB_CREATE_RC_QP = 0x3000a,
MANA_IB_DESTROY_RC_QP = 0x3000b,
MANA_IB_SET_QP_STATE = 0x3000d,
+ MANA_IB_QUERY_VF_COUNTERS = 0x30022,
};
struct mana_ib_query_adapter_caps_req {
struct gdma_req_hdr hdr;
}; /*HW Data */
+enum mana_ib_adapter_features {
+ MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT = BIT(4),
+};
+
struct mana_ib_query_adapter_caps_resp {
struct gdma_resp_hdr hdr;
u32 max_sq_id;
@@ -176,8 +239,13 @@ struct mana_ib_query_adapter_caps_resp {
u32 max_send_sge_count;
u32 max_recv_sge_count;
u32 max_inline_data_size;
+ u64 feature_flags;
}; /* HW Data */
+enum mana_ib_adapter_features_request {
+ MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST = BIT(1),
+}; /*HW Data */
+
struct mana_rnic_create_adapter_req {
struct gdma_req_hdr hdr;
u32 notify_eq_id;
@@ -296,6 +364,37 @@ struct mana_rnic_destroy_rc_qp_resp {
struct gdma_resp_hdr hdr;
}; /* HW Data */
+struct mana_rnic_create_udqp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t pd_handle;
+ mana_handle_t send_cq_handle;
+ mana_handle_t recv_cq_handle;
+ u64 dma_region[MANA_UD_QUEUE_TYPE_MAX];
+ u32 qp_type;
+ u32 doorbell_page;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+}; /* HW Data */
+
+struct mana_rnic_create_udqp_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t qp_handle;
+ u32 queue_ids[MANA_UD_QUEUE_TYPE_MAX];
+}; /* HW Data*/
+
+struct mana_rnic_destroy_udqp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t qp_handle;
+}; /* HW Data */
+
+struct mana_rnic_destroy_udqp_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
struct mana_ib_ah_attr {
u8 src_addr[16];
u8 dest_addr[16];
@@ -332,17 +431,104 @@ struct mana_rnic_set_qp_state_resp {
struct gdma_resp_hdr hdr;
}; /* HW Data */
+enum WQE_OPCODE_TYPES {
+ WQE_TYPE_UD_SEND = 0,
+ WQE_TYPE_UD_RECV = 8,
+}; /* HW DATA */
+
+struct rdma_send_oob {
+ u32 wqe_type : 5;
+ u32 fence : 1;
+ u32 signaled : 1;
+ u32 solicited : 1;
+ u32 psn : 24;
+
+ u32 ssn_or_rqpn : 24;
+ u32 reserved1 : 8;
+ union {
+ struct {
+ u32 remote_qkey;
+ u32 immediate;
+ u32 reserved1;
+ u32 reserved2;
+ } ud_send;
+ };
+}; /* HW DATA */
+
+struct mana_rdma_cqe {
+ union {
+ struct {
+ u8 cqe_type;
+ u8 data[GDMA_COMP_DATA_SIZE - 1];
+ };
+ struct {
+ u32 cqe_type : 8;
+ u32 vendor_error : 9;
+ u32 reserved1 : 15;
+ u32 sge_offset : 5;
+ u32 tx_wqe_offset : 27;
+ } ud_send;
+ struct {
+ u32 cqe_type : 8;
+ u32 reserved1 : 24;
+ u32 msg_len;
+ u32 src_qpn : 24;
+ u32 reserved2 : 8;
+ u32 imm_data;
+ u32 rx_wqe_offset;
+ } ud_recv;
+ };
+}; /* HW DATA */
+
+struct mana_rnic_query_vf_cntrs_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+}; /* HW Data */
+
+struct mana_rnic_query_vf_cntrs_resp {
+ struct gdma_resp_hdr hdr;
+ u64 requester_timeout;
+ u64 requester_oos_nak;
+ u64 requester_rnr_nak;
+ u64 responder_rnr_nak;
+ u64 responder_oos;
+ u64 responder_dup_request;
+ u64 requester_implicit_nak;
+ u64 requester_readresp_psn_mismatch;
+ u64 nak_inv_req;
+ u64 nak_access_err;
+ u64 nak_opp_err;
+ u64 nak_inv_read;
+ u64 responder_local_len_err;
+ u64 requestor_local_prot_err;
+ u64 responder_rem_access_err;
+ u64 responder_local_qp_err;
+ u64 responder_malformed_wqe;
+ u64 general_hw_err;
+ u64 requester_rnr_nak_retries_exceeded;
+ u64 requester_retries_exceeded;
+ u64 total_fatal_err;
+ u64 received_cnps;
+ u64 num_qps_congested;
+ u64 rate_inc_events;
+ u64 num_qps_recovered;
+ u64 current_rate;
+}; /* HW Data */
+
static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev)
{
return mdev->gdma_dev->gdma_context;
}
static inline struct mana_ib_qp *mana_get_qp_ref(struct mana_ib_dev *mdev,
- uint32_t qid)
+ u32 qid, bool is_sq)
{
struct mana_ib_qp *qp;
unsigned long flag;
+ if (is_sq)
+ qid |= MANA_SENDQ_MASK;
+
xa_lock_irqsave(&mdev->qp_table_wq, flag);
qp = xa_load(&mdev->qp_table_wq, qid);
if (qp)
@@ -388,6 +574,8 @@ int mana_ib_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev,
mana_handle_t gdma_region);
+int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type,
+ struct mana_ib_queue *queue);
int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
struct mana_ib_queue *queue);
void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue);
@@ -480,4 +668,24 @@ int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq);
int mana_ib_gd_create_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
struct ib_qp_init_attr *attr, u32 doorbell, u64 flags);
int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp);
+
+int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u32 type);
+int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp);
+
+int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int mana_ib_destroy_ah(struct ib_ah *ah, u32 flags);
+
+int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
+int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr);
+
+int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+
+struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 iova, int fd, int mr_access_flags,
+ struct uverbs_attr_bundle *attrs);
#endif
diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c
index 887b09dd86e7..f99557ec7767 100644
--- a/drivers/infiniband/hw/mana/mr.c
+++ b/drivers/infiniband/hw/mana/mr.c
@@ -8,6 +8,8 @@
#define VALID_MR_FLAGS \
(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ)
+#define VALID_DMA_MR_FLAGS (IB_ACCESS_LOCAL_WRITE)
+
static enum gdma_mr_access_flags
mana_ib_verbs_to_gdma_access_flags(int access_flags)
{
@@ -39,6 +41,8 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
req.mr_type = mr_params->mr_type;
switch (mr_params->mr_type) {
+ case GDMA_MR_TYPE_GPA:
+ break;
case GDMA_MR_TYPE_GVA:
req.gva.dma_region_handle = mr_params->gva.dma_region_handle;
req.gva.virtual_address = mr_params->gva.virtual_address;
@@ -169,6 +173,107 @@ err_free:
return ERR_PTR(err);
}
+struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 iova, int fd, int access_flags,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct gdma_create_mr_params mr_params = {};
+ struct ib_device *ibdev = ibpd->device;
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct mana_ib_dev *dev;
+ struct mana_ib_mr *mr;
+ u64 dma_region_handle;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ access_flags &= ~IB_ACCESS_OPTIONAL;
+ if (access_flags & ~VALID_MR_FLAGS)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(ibdev, start, length, fd, access_flags);
+ if (IS_ERR(umem_dmabuf)) {
+ err = PTR_ERR(umem_dmabuf);
+ ibdev_dbg(ibdev, "Failed to get dmabuf umem, %d\n", err);
+ goto err_free;
+ }
+
+ mr->umem = &umem_dmabuf->umem;
+
+ err = mana_ib_create_dma_region(dev, mr->umem, &dma_region_handle, iova);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n",
+ err);
+ goto err_umem;
+ }
+
+ mr_params.pd_handle = pd->pd_handle;
+ mr_params.mr_type = GDMA_MR_TYPE_GVA;
+ mr_params.gva.dma_region_handle = dma_region_handle;
+ mr_params.gva.virtual_address = iova;
+ mr_params.gva.access_flags =
+ mana_ib_verbs_to_gdma_access_flags(access_flags);
+
+ err = mana_ib_gd_create_mr(dev, mr, &mr_params);
+ if (err)
+ goto err_dma_region;
+
+ /*
+ * There is no need to keep track of dma_region_handle after MR is
+ * successfully created. The dma_region_handle is tracked in the PF
+ * as part of the lifecycle of this MR.
+ */
+
+ return &mr->ibmr;
+
+err_dma_region:
+ mana_gd_destroy_dma_region(mdev_to_gc(dev), dma_region_handle);
+
+err_umem:
+ ib_umem_release(mr->umem);
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct gdma_create_mr_params mr_params = {};
+ struct ib_device *ibdev = ibpd->device;
+ struct mana_ib_dev *dev;
+ struct mana_ib_mr *mr;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ if (access_flags & ~VALID_DMA_MR_FLAGS)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr_params.pd_handle = pd->pd_handle;
+ mr_params.mr_type = GDMA_MR_TYPE_GPA;
+
+ err = mana_ib_gd_create_mr(dev, mr, &mr_params);
+ if (err)
+ goto err_free;
+
+ return &mr->ibmr;
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr, ibmr);
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index 73d67c853b6f..c928af58f38b 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -398,18 +398,128 @@ err_free_vport:
return err;
}
+static u32 mana_ib_wqe_size(u32 sge, u32 oob_size)
+{
+ u32 wqe_size = sge * sizeof(struct gdma_sge) + sizeof(struct gdma_wqe) + oob_size;
+
+ return ALIGN(wqe_size, GDMA_WQE_BU_SIZE);
+}
+
+static u32 mana_ib_queue_size(struct ib_qp_init_attr *attr, u32 queue_type)
+{
+ u32 queue_size;
+
+ switch (attr->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ if (queue_type == MANA_UD_SEND_QUEUE)
+ queue_size = attr->cap.max_send_wr *
+ mana_ib_wqe_size(attr->cap.max_send_sge, INLINE_OOB_LARGE_SIZE);
+ else
+ queue_size = attr->cap.max_recv_wr *
+ mana_ib_wqe_size(attr->cap.max_recv_sge, INLINE_OOB_SMALL_SIZE);
+ break;
+ default:
+ return 0;
+ }
+
+ return MANA_PAGE_ALIGN(roundup_pow_of_two(queue_size));
+}
+
+static enum gdma_queue_type mana_ib_queue_type(struct ib_qp_init_attr *attr, u32 queue_type)
+{
+ enum gdma_queue_type type;
+
+ switch (attr->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ if (queue_type == MANA_UD_SEND_QUEUE)
+ type = GDMA_SQ;
+ else
+ type = GDMA_RQ;
+ break;
+ default:
+ type = GDMA_INVALID_QUEUE;
+ }
+ return type;
+}
+
+static int mana_table_store_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp,
+ GFP_KERNEL);
+}
+
+static void mana_table_remove_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num);
+}
+
+static int mana_table_store_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK;
+ u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+ int err;
+
+ err = xa_insert_irq(&mdev->qp_table_wq, qids, qp, GFP_KERNEL);
+ if (err)
+ return err;
+
+ err = xa_insert_irq(&mdev->qp_table_wq, qidr, qp, GFP_KERNEL);
+ if (err)
+ goto remove_sq;
+
+ return 0;
+
+remove_sq:
+ xa_erase_irq(&mdev->qp_table_wq, qids);
+ return err;
+}
+
+static void mana_table_remove_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK;
+ u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+
+ xa_erase_irq(&mdev->qp_table_wq, qids);
+ xa_erase_irq(&mdev->qp_table_wq, qidr);
+}
+
static int mana_table_store_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
{
refcount_set(&qp->refcount, 1);
init_completion(&qp->free);
- return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp,
- GFP_KERNEL);
+
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ return mana_table_store_rc_qp(mdev, qp);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_table_store_ud_qp(mdev, qp);
+ default:
+ ibdev_dbg(&mdev->ib_dev, "Unknown QP type for storing in mana table, %d\n",
+ qp->ibqp.qp_type);
+ }
+
+ return -EINVAL;
}
static void mana_table_remove_qp(struct mana_ib_dev *mdev,
struct mana_ib_qp *qp)
{
- xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num);
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ mana_table_remove_rc_qp(mdev, qp);
+ break;
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ mana_table_remove_ud_qp(mdev, qp);
+ break;
+ default:
+ ibdev_dbg(&mdev->ib_dev, "Unknown QP type for removing from mana table, %d\n",
+ qp->ibqp.qp_type);
+ return;
+ }
mana_put_qp_ref(qp);
wait_for_completion(&qp->free);
}
@@ -490,6 +600,105 @@ destroy_queues:
return err;
}
+static void mana_add_qp_to_cqs(struct mana_ib_qp *qp)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&send_cq->cq_lock, flags);
+ list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
+ spin_unlock_irqrestore(&send_cq->cq_lock, flags);
+
+ spin_lock_irqsave(&recv_cq->cq_lock, flags);
+ list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
+ spin_unlock_irqrestore(&recv_cq->cq_lock, flags);
+}
+
+static void mana_remove_qp_from_cqs(struct mana_ib_qp *qp)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&send_cq->cq_lock, flags);
+ list_del(&qp->cq_send_list);
+ spin_unlock_irqrestore(&send_cq->cq_lock, flags);
+
+ spin_lock_irqsave(&recv_cq->cq_lock, flags);
+ list_del(&qp->cq_recv_list);
+ spin_unlock_irqrestore(&recv_cq->cq_lock, flags);
+}
+
+static int mana_ib_create_ud_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
+ struct ib_qp_init_attr *attr, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibpd->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ u32 doorbell, queue_size;
+ int i, err;
+
+ if (udata) {
+ ibdev_dbg(&mdev->ib_dev, "User-level UD QPs are not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i) {
+ queue_size = mana_ib_queue_size(attr, i);
+ err = mana_ib_create_kernel_queue(mdev, queue_size, mana_ib_queue_type(attr, i),
+ &qp->ud_qp.queues[i]);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create queue %d, err %d\n",
+ i, err);
+ goto destroy_queues;
+ }
+ }
+ doorbell = gc->mana_ib.doorbell;
+
+ err = create_shadow_queue(&qp->shadow_rq, attr->cap.max_recv_wr,
+ sizeof(struct ud_rq_shadow_wqe));
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create shadow rq err %d\n", err);
+ goto destroy_queues;
+ }
+ err = create_shadow_queue(&qp->shadow_sq, attr->cap.max_send_wr,
+ sizeof(struct ud_sq_shadow_wqe));
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create shadow sq err %d\n", err);
+ goto destroy_shadow_queues;
+ }
+
+ err = mana_ib_gd_create_ud_qp(mdev, qp, attr, doorbell, attr->qp_type);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create ud qp %d\n", err);
+ goto destroy_shadow_queues;
+ }
+ qp->ibqp.qp_num = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+ qp->port = attr->port_num;
+
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i)
+ qp->ud_qp.queues[i].kmem->id = qp->ud_qp.queues[i].id;
+
+ err = mana_table_store_qp(mdev, qp);
+ if (err)
+ goto destroy_qp;
+
+ mana_add_qp_to_cqs(qp);
+
+ return 0;
+
+destroy_qp:
+ mana_ib_gd_destroy_ud_qp(mdev, qp);
+destroy_shadow_queues:
+ destroy_shadow_queue(&qp->shadow_rq);
+ destroy_shadow_queue(&qp->shadow_sq);
+destroy_queues:
+ while (i-- > 0)
+ mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]);
+ return err;
+}
+
int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
struct ib_udata *udata)
{
@@ -503,6 +712,9 @@ int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata);
case IB_QPT_RC:
return mana_ib_create_rc_qp(ibqp, ibqp->pd, attr, udata);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_ib_create_ud_qp(ibqp, ibqp->pd, attr, udata);
default:
ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n",
attr->qp_type);
@@ -579,6 +791,8 @@ int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
{
switch (ibqp->qp_type) {
case IB_QPT_RC:
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
return mana_ib_gd_modify_qp(ibqp, attr, attr_mask, udata);
default:
ibdev_dbg(ibqp->device, "Modify QP type %u not supported", ibqp->qp_type);
@@ -652,6 +866,28 @@ static int mana_ib_destroy_rc_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
return 0;
}
+static int mana_ib_destroy_ud_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ int i;
+
+ mana_remove_qp_from_cqs(qp);
+ mana_table_remove_qp(mdev, qp);
+
+ destroy_shadow_queue(&qp->shadow_rq);
+ destroy_shadow_queue(&qp->shadow_sq);
+
+ /* Ignore return code as there is not much we can do about it.
+ * The error message is printed inside.
+ */
+ mana_ib_gd_destroy_ud_qp(mdev, qp);
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i)
+ mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]);
+
+ return 0;
+}
+
int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
@@ -665,6 +901,9 @@ int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
return mana_ib_destroy_qp_raw(qp, udata);
case IB_QPT_RC:
return mana_ib_destroy_rc_qp(qp, udata);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_ib_destroy_ud_qp(qp, udata);
default:
ibdev_dbg(ibqp->device, "Unexpected QP type %u\n",
ibqp->qp_type);
diff --git a/drivers/infiniband/hw/mana/shadow_queue.h b/drivers/infiniband/hw/mana/shadow_queue.h
new file mode 100644
index 000000000000..a4b3818f9c39
--- /dev/null
+++ b/drivers/infiniband/hw/mana/shadow_queue.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _MANA_SHADOW_QUEUE_H_
+#define _MANA_SHADOW_QUEUE_H_
+
+struct shadow_wqe_header {
+ u16 opcode;
+ u16 error_code;
+ u32 posted_wqe_size;
+ u64 wr_id;
+};
+
+struct ud_rq_shadow_wqe {
+ struct shadow_wqe_header header;
+ u32 byte_len;
+ u32 src_qpn;
+};
+
+struct ud_sq_shadow_wqe {
+ struct shadow_wqe_header header;
+};
+
+struct shadow_queue {
+ /* Unmasked producer index, Incremented on wqe posting */
+ u64 prod_idx;
+ /* Unmasked consumer index, Incremented on cq polling */
+ u64 cons_idx;
+ /* Unmasked index of next-to-complete (from HW) shadow WQE */
+ u64 next_to_complete_idx;
+ /* queue size in wqes */
+ u32 length;
+ /* distance between elements in bytes */
+ u32 stride;
+ /* ring buffer holding wqes */
+ void *buffer;
+};
+
+static inline int create_shadow_queue(struct shadow_queue *queue, uint32_t length, uint32_t stride)
+{
+ queue->buffer = kvmalloc_array(length, stride, GFP_KERNEL);
+ if (!queue->buffer)
+ return -ENOMEM;
+
+ queue->length = length;
+ queue->stride = stride;
+
+ return 0;
+}
+
+static inline void destroy_shadow_queue(struct shadow_queue *queue)
+{
+ kvfree(queue->buffer);
+}
+
+static inline bool shadow_queue_full(struct shadow_queue *queue)
+{
+ return (queue->prod_idx - queue->cons_idx) >= queue->length;
+}
+
+static inline bool shadow_queue_empty(struct shadow_queue *queue)
+{
+ return queue->prod_idx == queue->cons_idx;
+}
+
+static inline void *
+shadow_queue_get_element(const struct shadow_queue *queue, u64 unmasked_index)
+{
+ u32 index = unmasked_index % queue->length;
+
+ return ((u8 *)queue->buffer + index * queue->stride);
+}
+
+static inline void *
+shadow_queue_producer_entry(struct shadow_queue *queue)
+{
+ return shadow_queue_get_element(queue, queue->prod_idx);
+}
+
+static inline void *
+shadow_queue_get_next_to_consume(const struct shadow_queue *queue)
+{
+ if (queue->cons_idx == queue->next_to_complete_idx)
+ return NULL;
+
+ return shadow_queue_get_element(queue, queue->cons_idx);
+}
+
+static inline void *
+shadow_queue_get_next_to_complete(struct shadow_queue *queue)
+{
+ if (queue->next_to_complete_idx == queue->prod_idx)
+ return NULL;
+
+ return shadow_queue_get_element(queue, queue->next_to_complete_idx);
+}
+
+static inline void shadow_queue_advance_producer(struct shadow_queue *queue)
+{
+ queue->prod_idx++;
+}
+
+static inline void shadow_queue_advance_consumer(struct shadow_queue *queue)
+{
+ queue->cons_idx++;
+}
+
+static inline void shadow_queue_advance_next_to_complete(struct shadow_queue *queue)
+{
+ queue->next_to_complete_idx++;
+}
+
+#endif
diff --git a/drivers/infiniband/hw/mana/wr.c b/drivers/infiniband/hw/mana/wr.c
new file mode 100644
index 000000000000..1813567d3b16
--- /dev/null
+++ b/drivers/infiniband/hw/mana/wr.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+#define MAX_WR_SGL_NUM (2)
+
+static int mana_ib_post_recv_ud(struct mana_ib_qp *qp, const struct ib_recv_wr *wr)
+{
+ struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem;
+ struct gdma_posted_wqe_info wqe_info = {0};
+ struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM];
+ struct gdma_wqe_request wqe_req = {0};
+ struct ud_rq_shadow_wqe *shadow_wqe;
+ int err, i;
+
+ if (shadow_queue_full(&qp->shadow_rq))
+ return -EINVAL;
+
+ if (wr->num_sge > MAX_WR_SGL_NUM)
+ return -EINVAL;
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ gdma_sgl[i].address = wr->sg_list[i].addr;
+ gdma_sgl[i].mem_key = wr->sg_list[i].lkey;
+ gdma_sgl[i].size = wr->sg_list[i].length;
+ }
+ wqe_req.num_sge = wr->num_sge;
+ wqe_req.sgl = gdma_sgl;
+
+ err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info);
+ if (err)
+ return err;
+
+ shadow_wqe = shadow_queue_producer_entry(&qp->shadow_rq);
+ memset(shadow_wqe, 0, sizeof(*shadow_wqe));
+ shadow_wqe->header.opcode = IB_WC_RECV;
+ shadow_wqe->header.wr_id = wr->wr_id;
+ shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu;
+ shadow_queue_advance_producer(&qp->shadow_rq);
+
+ mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue);
+ return 0;
+}
+
+int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ int err = 0;
+
+ for (; wr; wr = wr->next) {
+ switch (ibqp->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ err = mana_ib_post_recv_ud(qp, wr);
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ return err;
+ }
+ break;
+ default:
+ ibdev_dbg(ibqp->device, "Posting recv wr on qp type %u is not supported\n",
+ ibqp->qp_type);
+ return -EINVAL;
+ }
+ }
+
+ return err;
+}
+
+static int mana_ib_post_send_ud(struct mana_ib_qp *qp, const struct ib_ud_wr *wr)
+{
+ struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(wr->ah, struct mana_ib_ah, ibah);
+ struct net_device *ndev = mana_ib_get_netdev(&mdev->ib_dev, qp->port);
+ struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem;
+ struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM + 1];
+ struct gdma_posted_wqe_info wqe_info = {0};
+ struct gdma_wqe_request wqe_req = {0};
+ struct rdma_send_oob send_oob = {0};
+ struct ud_sq_shadow_wqe *shadow_wqe;
+ int err, i;
+
+ if (!ndev) {
+ ibdev_dbg(&mdev->ib_dev, "Invalid port %u in QP %u\n",
+ qp->port, qp->ibqp.qp_num);
+ return -EINVAL;
+ }
+
+ if (wr->wr.opcode != IB_WR_SEND)
+ return -EINVAL;
+
+ if (shadow_queue_full(&qp->shadow_sq))
+ return -EINVAL;
+
+ if (wr->wr.num_sge > MAX_WR_SGL_NUM)
+ return -EINVAL;
+
+ gdma_sgl[0].address = ah->dma_handle;
+ gdma_sgl[0].mem_key = qp->ibqp.pd->local_dma_lkey;
+ gdma_sgl[0].size = sizeof(struct mana_ib_av);
+ for (i = 0; i < wr->wr.num_sge; ++i) {
+ gdma_sgl[i + 1].address = wr->wr.sg_list[i].addr;
+ gdma_sgl[i + 1].mem_key = wr->wr.sg_list[i].lkey;
+ gdma_sgl[i + 1].size = wr->wr.sg_list[i].length;
+ }
+
+ wqe_req.num_sge = wr->wr.num_sge + 1;
+ wqe_req.sgl = gdma_sgl;
+ wqe_req.inline_oob_size = sizeof(struct rdma_send_oob);
+ wqe_req.inline_oob_data = &send_oob;
+ wqe_req.flags = GDMA_WR_OOB_IN_SGL;
+ wqe_req.client_data_unit = ib_mtu_enum_to_int(ib_mtu_int_to_enum(ndev->mtu));
+
+ send_oob.wqe_type = WQE_TYPE_UD_SEND;
+ send_oob.fence = !!(wr->wr.send_flags & IB_SEND_FENCE);
+ send_oob.signaled = !!(wr->wr.send_flags & IB_SEND_SIGNALED);
+ send_oob.solicited = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
+ send_oob.psn = qp->ud_qp.sq_psn;
+ send_oob.ssn_or_rqpn = wr->remote_qpn;
+ send_oob.ud_send.remote_qkey =
+ qp->ibqp.qp_type == IB_QPT_GSI ? IB_QP1_QKEY : wr->remote_qkey;
+
+ err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info);
+ if (err)
+ return err;
+
+ qp->ud_qp.sq_psn++;
+ shadow_wqe = shadow_queue_producer_entry(&qp->shadow_sq);
+ memset(shadow_wqe, 0, sizeof(*shadow_wqe));
+ shadow_wqe->header.opcode = IB_WC_SEND;
+ shadow_wqe->header.wr_id = wr->wr.wr_id;
+ shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu;
+ shadow_queue_advance_producer(&qp->shadow_sq);
+
+ mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue);
+ return 0;
+}
+
+int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+{
+ int err;
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+
+ for (; wr; wr = wr->next) {
+ switch (ibqp->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ err = mana_ib_post_send_ud(qp, ud_wr(wr));
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ return err;
+ }
+ break;
+ default:
+ ibdev_dbg(ibqp->device, "Posting send wr on qp type %u is not supported\n",
+ ibqp->qp_type);
+ return -EINVAL;
+ }
+ }
+
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index b38961f5058e..11878ddf7cc7 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -9,6 +9,7 @@ mlx5_ib-y := ah.o \
data_direct.o \
dm.o \
doorbell.o \
+ fs.o \
gsi.o \
ib_virt.o \
mad.o \
@@ -26,7 +27,6 @@ mlx5_ib-y := ah.o \
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o \
- fs.o \
qos.o \
std_types.o
mlx5_ib-$(CONFIG_MLX5_MACSEC) += macsec.o
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index 99036afb3aef..531a57f9ee7e 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -50,11 +50,12 @@ static __be16 mlx5_ah_get_udp_sport(const struct mlx5_ib_dev *dev,
return sport;
}
-static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
+static int create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
struct rdma_ah_init_attr *init_attr)
{
struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
enum ib_gid_type gid_type;
+ int rate_val;
if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
@@ -67,8 +68,10 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
ah->av.tclass = grh->traffic_class;
}
- ah->av.stat_rate_sl =
- (mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr)) << 4);
+ rate_val = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr));
+ if (rate_val < 0)
+ return rate_val;
+ ah->av.stat_rate_sl = rate_val << 4;
if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
if (init_attr->xmit_slave)
@@ -89,6 +92,8 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
ah->av.fl_mlid = rdma_ah_get_path_bits(ah_attr) & 0x7f;
ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0xf);
}
+
+ return 0;
}
int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
@@ -121,8 +126,7 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
return err;
}
- create_ib_ah(dev, ah, init_attr);
- return 0;
+ return create_ib_ah(dev, ah, init_attr);
}
int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index 81cfa74147a1..b847084dcd99 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -140,6 +140,13 @@ static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = {
INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS),
};
+static const struct mlx5_ib_counter packets_op_cnts[] = {
+ INIT_OP_COUNTER(rdma_tx_packets, RDMA_TX_PACKETS),
+ INIT_OP_COUNTER(rdma_tx_bytes, RDMA_TX_BYTES),
+ INIT_OP_COUNTER(rdma_rx_packets, RDMA_RX_PACKETS),
+ INIT_OP_COUNTER(rdma_rx_bytes, RDMA_RX_BYTES),
+};
+
static int mlx5_ib_read_counters(struct ib_counters *counters,
struct ib_counters_read_attr *read_attr,
struct uverbs_attr_bundle *attrs)
@@ -427,6 +434,52 @@ done:
return num_counters;
}
+static bool is_rdma_bytes_counter(u32 type)
+{
+ if (type == MLX5_IB_OPCOUNTER_RDMA_TX_BYTES ||
+ type == MLX5_IB_OPCOUNTER_RDMA_RX_BYTES ||
+ type == MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP ||
+ type == MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP)
+ return true;
+
+ return false;
+}
+
+static int do_per_qp_get_op_stat(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+ int i, ret, index, num_hw_counters;
+ u64 packets = 0, bytes = 0;
+
+ for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
+ if (!mcounter->fc[i])
+ continue;
+
+ ret = mlx5_fc_query(dev->mdev, mcounter->fc[i],
+ &packets, &bytes);
+ if (ret)
+ return ret;
+
+ num_hw_counters = cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+
+ index = i - MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP +
+ num_hw_counters;
+
+ if (is_rdma_bytes_counter(i))
+ counter->stats->value[index] = bytes;
+ else
+ counter->stats->value[index] = packets;
+
+ clear_bit(index, counter->stats->is_disabled);
+ }
+ return 0;
+}
+
static int do_get_op_stat(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
u32 port_num, int index)
@@ -434,7 +487,7 @@ static int do_get_op_stat(struct ib_device *ibdev,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct mlx5_ib_counters *cnts;
const struct mlx5_ib_op_fc *opfcs;
- u64 packets = 0, bytes;
+ u64 packets, bytes;
u32 type;
int ret;
@@ -453,8 +506,11 @@ static int do_get_op_stat(struct ib_device *ibdev,
if (ret)
return ret;
+ if (is_rdma_bytes_counter(type))
+ stats->value[index] = bytes;
+ else
+ stats->value[index] = packets;
out:
- stats->value[index] = packets;
return index;
}
@@ -523,19 +579,30 @@ static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
{
struct mlx5_ib_dev *dev = to_mdev(counter->device);
const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
+ int ret;
+
+ ret = mlx5_ib_query_q_counters(dev->mdev, cnts, counter->stats,
+ counter->id);
+ if (ret)
+ return ret;
+
+ if (!counter->mode.bind_opcnt)
+ return 0;
- return mlx5_ib_query_q_counters(dev->mdev, cnts,
- counter->stats, counter->id);
+ return do_per_qp_get_op_stat(counter);
}
static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
struct mlx5_ib_dev *dev = to_mdev(counter->device);
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
if (!counter->id)
return 0;
+ WARN_ON(!xa_empty(&mcounter->qpn_opfc_xa));
+ mlx5r_fs_destroy_fcs(dev, counter);
MLX5_SET(dealloc_q_counter_in, in, opcode,
MLX5_CMD_OP_DEALLOC_Q_COUNTER);
MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
@@ -543,7 +610,7 @@ static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
}
static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
- struct ib_qp *qp)
+ struct ib_qp *qp, u32 port)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
bool new = false;
@@ -568,8 +635,14 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
if (err)
goto fail_set_counter;
+ err = mlx5r_fs_bind_op_fc(qp, counter, port);
+ if (err)
+ goto fail_bind_op_fc;
+
return 0;
+fail_bind_op_fc:
+ mlx5_ib_qp_set_counter(qp, NULL);
fail_set_counter:
if (new) {
mlx5_ib_counter_dealloc(counter);
@@ -579,9 +652,22 @@ fail_set_counter:
return err;
}
-static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
+static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp, u32 port)
{
- return mlx5_ib_qp_set_counter(qp, NULL);
+ struct rdma_counter *counter = qp->counter;
+ int err;
+
+ mlx5r_fs_unbind_op_fc(qp, counter);
+
+ err = mlx5_ib_qp_set_counter(qp, NULL);
+ if (err)
+ goto fail_set_counter;
+
+ return 0;
+
+fail_set_counter:
+ mlx5r_fs_bind_op_fc(qp, counter, port);
+ return err;
}
static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
@@ -681,6 +767,12 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
descs[j].priv = &rdmatx_cnp_op_cnts[i].type;
}
}
+
+ for (i = 0; i < ARRAY_SIZE(packets_op_cnts); i++, j++) {
+ descs[j].name = packets_op_cnts[i].name;
+ descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
+ descs[j].priv = &packets_op_cnts[i].type;
+ }
}
@@ -731,6 +823,8 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
num_op_counters = ARRAY_SIZE(basic_op_cnts);
+ num_op_counters += ARRAY_SIZE(packets_op_cnts);
+
if (MLX5_CAP_FLOWTABLE(dev->mdev,
ft_field_support_2_nic_receive_rdma.bth_opcode))
num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts);
@@ -760,10 +854,58 @@ err:
return -ENOMEM;
}
+/*
+ * Checks if the given flow counter type should be sharing the same flow counter
+ * with another type and if it should, checks if that other type flow counter
+ * was already created, if both conditions are met return true and the counter
+ * else return false.
+ */
+bool mlx5r_is_opfc_shared_and_in_use(struct mlx5_ib_op_fc *opfcs, u32 type,
+ struct mlx5_ib_op_fc **opfc)
+{
+ u32 shared_fc_type;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ default:
+ return false;
+ }
+
+ *opfc = &opfcs[shared_fc_type];
+ if (!(*opfc)->fc)
+ return false;
+
+ return true;
+}
+
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
int num_cnt_ports = dev->num_ports;
+ struct mlx5_ib_op_fc *in_use_opfc;
int i, j;
if (is_mdev_switchdev_mode(dev->mdev))
@@ -785,11 +927,15 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
if (!dev->port[i].cnts.opfcs[j].fc)
continue;
- if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
- mlx5_ib_fs_remove_op_fc(dev,
- &dev->port[i].cnts.opfcs[j], j);
+ if (mlx5r_is_opfc_shared_and_in_use(
+ dev->port[i].cnts.opfcs, j, &in_use_opfc))
+ goto skip;
+
+ mlx5_ib_fs_remove_op_fc(dev,
+ &dev->port[i].cnts.opfcs[j], j);
mlx5_fc_destroy(dev->mdev,
dev->port[i].cnts.opfcs[j].fc);
+skip:
dev->port[i].cnts.opfcs[j].fc = NULL;
}
}
@@ -983,8 +1129,8 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
unsigned int index, bool enable)
{
struct mlx5_ib_dev *dev = to_mdev(device);
+ struct mlx5_ib_op_fc *opfc, *in_use_opfc;
struct mlx5_ib_counters *cnts;
- struct mlx5_ib_op_fc *opfc;
u32 num_hw_counters, type;
int ret;
@@ -1008,6 +1154,13 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
if (opfc->fc)
return -EEXIST;
+ if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type,
+ &in_use_opfc)) {
+ opfc->fc = in_use_opfc->fc;
+ opfc->rule[0] = in_use_opfc->rule[0];
+ return 0;
+ }
+
opfc->fc = mlx5_fc_create(dev->mdev, false);
if (IS_ERR(opfc->fc))
return PTR_ERR(opfc->fc);
@@ -1023,12 +1176,23 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
if (!opfc->fc)
return -EINVAL;
+ if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type, &in_use_opfc))
+ goto out;
+
mlx5_ib_fs_remove_op_fc(dev, opfc, type);
mlx5_fc_destroy(dev->mdev, opfc->fc);
+out:
opfc->fc = NULL;
return 0;
}
+static void mlx5_ib_counter_init(struct rdma_counter *counter)
+{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+
+ xa_init(&mcounter->qpn_opfc_xa);
+}
+
static const struct ib_device_ops hw_stats_ops = {
.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
.get_hw_stats = mlx5_ib_get_hw_stats,
@@ -1037,8 +1201,10 @@ static const struct ib_device_ops hw_stats_ops = {
.counter_dealloc = mlx5_ib_counter_dealloc,
.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
.counter_update_stats = mlx5_ib_counter_update_stats,
- .modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ?
- mlx5_ib_modify_stat : NULL,
+ .modify_hw_stat = mlx5_ib_modify_stat,
+ .counter_init = mlx5_ib_counter_init,
+
+ INIT_RDMA_OBJ_SIZE(rdma_counter, mlx5_rdma_counter, rdma_counter),
};
static const struct ib_device_ops hw_switchdev_vport_op = {
@@ -1053,6 +1219,9 @@ static const struct ib_device_ops hw_switchdev_stats_ops = {
.counter_dealloc = mlx5_ib_counter_dealloc,
.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
.counter_update_stats = mlx5_ib_counter_update_stats,
+ .counter_init = mlx5_ib_counter_init,
+
+ INIT_RDMA_OBJ_SIZE(rdma_counter, mlx5_rdma_counter, rdma_counter),
};
static const struct ib_device_ops counters_ops = {
diff --git a/drivers/infiniband/hw/mlx5/counters.h b/drivers/infiniband/hw/mlx5/counters.h
index 6bcaaa52e2b2..bd03cee42014 100644
--- a/drivers/infiniband/hw/mlx5/counters.h
+++ b/drivers/infiniband/hw/mlx5/counters.h
@@ -8,10 +8,25 @@
#include "mlx5_ib.h"
+struct mlx5_rdma_counter {
+ struct rdma_counter rdma_counter;
+
+ struct mlx5_fc *fc[MLX5_IB_OPCOUNTER_MAX];
+ struct xarray qpn_opfc_xa;
+};
+
+static inline struct mlx5_rdma_counter *
+to_mcounter(struct rdma_counter *counter)
+{
+ return container_of(counter, struct mlx5_rdma_counter, rdma_counter);
+}
+
int mlx5_ib_counters_init(struct mlx5_ib_dev *dev);
void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev);
void mlx5_ib_counters_clear_description(struct ib_counters *counters);
int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
struct mlx5_ib_create_flow *ucmd);
u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num);
+bool mlx5r_is_opfc_shared_and_in_use(struct mlx5_ib_op_fc *opfcs, u32 type,
+ struct mlx5_ib_op_fc **opfc);
#endif /* _MLX5_IB_COUNTERS_H */
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 4c54dc578069..1aa5311b03e9 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -490,7 +490,7 @@ repoll:
}
qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
- if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
+ if (!*cur_qp || (qpn != (*cur_qp)->trans_qp.base.mqp.qpn)) {
/* We do not have to take the QP table lock here,
* because CQs will be locked while QPs are removed
* from the table.
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 4186884c66e1..2479da8620ca 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -13,6 +13,7 @@
#include <rdma/uverbs_std_types.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
+#include <rdma/ib_ucaps.h>
#include "mlx5_ib.h"
#include "devx.h"
#include "qp.h"
@@ -122,7 +123,27 @@ devx_ufile2uctx(const struct uverbs_attr_bundle *attrs)
return to_mucontext(ib_uverbs_get_ucontext(attrs));
}
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
+static int set_uctx_ucaps(struct mlx5_ib_dev *dev, u64 req_ucaps, u32 *cap)
+{
+ if (UCAP_ENABLED(req_ucaps, RDMA_UCAP_MLX5_CTRL_LOCAL)) {
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL)
+ *cap |= MLX5_UCTX_CAP_RDMA_CTRL;
+ else
+ return -EOPNOTSUPP;
+ }
+
+ if (UCAP_ENABLED(req_ucaps, RDMA_UCAP_MLX5_CTRL_OTHER_VHCA)) {
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
+ MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA)
+ *cap |= MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA;
+ else
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user, u64 req_ucaps)
{
u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {};
u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {};
@@ -136,14 +157,22 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
return -EINVAL;
uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
- if (is_user && capable(CAP_NET_RAW) &&
- (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX))
+ if (is_user &&
+ (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX) &&
+ capable(CAP_NET_RAW))
cap |= MLX5_UCTX_CAP_RAW_TX;
- if (is_user && capable(CAP_SYS_RAWIO) &&
+ if (is_user &&
(MLX5_CAP_GEN(dev->mdev, uctx_cap) &
- MLX5_UCTX_CAP_INTERNAL_DEV_RES))
+ MLX5_UCTX_CAP_INTERNAL_DEV_RES) &&
+ capable(CAP_SYS_RAWIO))
cap |= MLX5_UCTX_CAP_INTERNAL_DEV_RES;
+ if (req_ucaps) {
+ err = set_uctx_ucaps(dev, req_ucaps, &cap);
+ if (err)
+ return err;
+ }
+
MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX);
MLX5_SET(uctx, uctx, cap, cap);
@@ -2573,7 +2602,7 @@ int mlx5_ib_devx_init(struct mlx5_ib_dev *dev)
struct mlx5_devx_event_table *table = &dev->devx_event_table;
int uid;
- uid = mlx5_ib_devx_create(dev, false);
+ uid = mlx5_ib_devx_create(dev, false, 0);
if (uid > 0) {
dev->devx_whitelist_uid = uid;
xa_init(&table->event_xa);
diff --git a/drivers/infiniband/hw/mlx5/devx.h b/drivers/infiniband/hw/mlx5/devx.h
index 1344bf4c9d21..ee9e7d3af93f 100644
--- a/drivers/infiniband/hw/mlx5/devx.h
+++ b/drivers/infiniband/hw/mlx5/devx.h
@@ -24,13 +24,14 @@ struct devx_obj {
struct list_head event_sub; /* holds devx_event_subscription entries */
};
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user, u64 req_ucaps);
void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
int mlx5_ib_devx_init(struct mlx5_ib_dev *dev);
void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev);
void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile);
#else
-static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
+static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user,
+ u64 req_ucaps)
{
return -EOPNOTSUPP;
}
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 162814ae8cb4..251246c73b33 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -12,6 +12,7 @@
#include <rdma/mlx5_user_ioctl_verbs.h>
#include <rdma/ib_hdrs.h>
#include <rdma/ib_umem.h>
+#include <rdma/ib_ucaps.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/fs_helpers.h>
@@ -32,6 +33,11 @@ enum {
MATCH_CRITERIA_ENABLE_MISC2_BIT
};
+
+struct mlx5_per_qp_opfc {
+ struct mlx5_ib_op_fc opfcs[MLX5_IB_OPCOUNTER_MAX];
+};
+
#define HEADER_IS_ZERO(match_criteria, headers) \
!(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
@@ -678,7 +684,7 @@ enum flow_table_type {
#define MLX5_FS_MAX_TYPES 6
#define MLX5_FS_MAX_ENTRIES BIT(16)
-static bool mlx5_ib_shared_ft_allowed(struct ib_device *device)
+static bool __maybe_unused mlx5_ib_shared_ft_allowed(struct ib_device *device)
{
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -690,7 +696,7 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *prio,
int priority,
int num_entries, int num_groups,
- u32 flags)
+ u32 flags, u16 vport)
{
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_table *ft;
@@ -698,6 +704,7 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
ft_attr.prio = priority;
ft_attr.max_fte = num_entries;
ft_attr.flags = flags;
+ ft_attr.vport = vport;
ft_attr.autogroup.max_num_groups = num_groups;
ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
if (IS_ERR(ft))
@@ -792,18 +799,25 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
ft = prio->flow_table;
if (!ft)
return _get_prio(dev, ns, prio, priority, max_table_size,
- num_groups, flags);
+ num_groups, flags, 0);
return prio;
}
enum {
+ RDMA_RX_ECN_OPCOUNTER_PER_QP_PRIO,
+ RDMA_RX_CNP_OPCOUNTER_PER_QP_PRIO,
+ RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO,
RDMA_RX_ECN_OPCOUNTER_PRIO,
RDMA_RX_CNP_OPCOUNTER_PRIO,
+ RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO,
};
enum {
+ RDMA_TX_CNP_OPCOUNTER_PER_QP_PRIO,
+ RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO,
RDMA_TX_CNP_OPCOUNTER_PRIO,
+ RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO,
};
static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
@@ -867,6 +881,344 @@ static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
return 0;
}
+/* Returns the prio we should use for the given optional counter type,
+ * whereas for bytes type we use the packet type, since they share the same
+ * resources.
+ */
+static struct mlx5_ib_flow_prio *get_opfc_prio(struct mlx5_ib_dev *dev,
+ u32 type)
+{
+ u32 prio_type;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ prio_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ prio_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
+ prio_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
+ prio_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ default:
+ prio_type = type;
+ }
+
+ return &dev->flow_db->opfcs[prio_type];
+}
+
+static void put_per_qp_prio(struct mlx5_ib_dev *dev,
+ enum mlx5_ib_optional_counter_type type)
+{
+ enum mlx5_ib_optional_counter_type per_qp_type;
+ struct mlx5_ib_flow_prio *prio;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
+ break;
+ default:
+ return;
+ }
+
+ prio = get_opfc_prio(dev, per_qp_type);
+ put_flow_table(dev, prio, true);
+}
+
+static int get_per_qp_prio(struct mlx5_ib_dev *dev,
+ enum mlx5_ib_optional_counter_type type)
+{
+ enum mlx5_ib_optional_counter_type per_qp_type;
+ enum mlx5_flow_namespace_type fn_type;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_ib_flow_prio *prio;
+ int priority;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_ECN_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_CNP_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_CNP_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+ if (!ns)
+ return -EOPNOTSUPP;
+
+ prio = get_opfc_prio(dev, per_qp_type);
+ if (prio->flow_table)
+ return 0;
+
+ prio = _get_prio(dev, ns, prio, priority, MLX5_FS_MAX_POOL_SIZE, 1, 0, 0);
+ if (IS_ERR(prio))
+ return PTR_ERR(prio);
+
+ prio->refcount = 1;
+
+ return 0;
+}
+
+static struct mlx5_per_qp_opfc *
+get_per_qp_opfc(struct mlx5_rdma_counter *mcounter, u32 qp_num, bool *new)
+{
+ struct mlx5_per_qp_opfc *per_qp_opfc;
+
+ *new = false;
+
+ per_qp_opfc = xa_load(&mcounter->qpn_opfc_xa, qp_num);
+ if (per_qp_opfc)
+ return per_qp_opfc;
+ per_qp_opfc = kzalloc(sizeof(*per_qp_opfc), GFP_KERNEL);
+
+ if (!per_qp_opfc)
+ return NULL;
+
+ *new = true;
+ return per_qp_opfc;
+}
+
+static int add_op_fc_rules(struct mlx5_ib_dev *dev,
+ struct mlx5_rdma_counter *mcounter,
+ struct mlx5_per_qp_opfc *per_qp_opfc,
+ struct mlx5_ib_flow_prio *prio,
+ enum mlx5_ib_optional_counter_type type,
+ u32 qp_num, u32 port_num)
+{
+ struct mlx5_ib_op_fc *opfc = &per_qp_opfc->opfcs[type], *in_use_opfc;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_destination dst;
+ struct mlx5_flow_spec *spec;
+ int i, err, spec_num;
+ bool is_tx;
+
+ if (opfc->fc)
+ return -EEXIST;
+
+ if (mlx5r_is_opfc_shared_and_in_use(per_qp_opfc->opfcs, type,
+ &in_use_opfc)) {
+ opfc->fc = in_use_opfc->fc;
+ opfc->rule[0] = in_use_opfc->rule[0];
+ return 0;
+ }
+
+ opfc->fc = mcounter->fc[type];
+
+ spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto null_fc;
+ }
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP:
+ if (set_ecn_ce_spec(dev, port_num, &spec[0],
+ MLX5_FS_IPV4_VERSION) ||
+ set_ecn_ce_spec(dev, port_num, &spec[1],
+ MLX5_FS_IPV6_VERSION)) {
+ err = -EOPNOTSUPP;
+ goto free_spec;
+ }
+ spec_num = 2;
+ is_tx = false;
+
+ MLX5_SET_TO_ONES(fte_match_param, spec[1].match_criteria,
+ misc_parameters.bth_dst_qp);
+ MLX5_SET(fte_match_param, spec[1].match_value,
+ misc_parameters.bth_dst_qp, qp_num);
+ spec[1].match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP:
+ if (!MLX5_CAP_FLOWTABLE(
+ dev->mdev,
+ ft_field_support_2_nic_receive_rdma.bth_opcode) ||
+ set_cnp_spec(dev, port_num, &spec[0])) {
+ err = -EOPNOTSUPP;
+ goto free_spec;
+ }
+ spec_num = 1;
+ is_tx = false;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP:
+ if (!MLX5_CAP_FLOWTABLE(
+ dev->mdev,
+ ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
+ set_cnp_spec(dev, port_num, &spec[0])) {
+ err = -EOPNOTSUPP;
+ goto free_spec;
+ }
+ spec_num = 1;
+ is_tx = true;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
+ spec_num = 1;
+ is_tx = true;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
+ spec_num = 1;
+ is_tx = false;
+ break;
+ default:
+ err = -EINVAL;
+ goto free_spec;
+ }
+
+ if (is_tx) {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters.source_sqn);
+ MLX5_SET(fte_match_param, spec->match_value,
+ misc_parameters.source_sqn, qp_num);
+ } else {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters.bth_dst_qp);
+ MLX5_SET(fte_match_param, spec->match_value,
+ misc_parameters.bth_dst_qp, qp_num);
+ }
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+
+ dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst.counter = opfc->fc;
+
+ flow_act.action =
+ MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+ for (i = 0; i < spec_num; i++) {
+ opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
+ &flow_act, &dst, 1);
+ if (IS_ERR(opfc->rule[i])) {
+ err = PTR_ERR(opfc->rule[i]);
+ goto del_rules;
+ }
+ }
+ prio->refcount += spec_num;
+
+ err = xa_err(xa_store(&mcounter->qpn_opfc_xa, qp_num, per_qp_opfc,
+ GFP_KERNEL));
+ if (err)
+ goto del_rules;
+
+ kfree(spec);
+
+ return 0;
+
+del_rules:
+ while (i--)
+ mlx5_del_flow_rules(opfc->rule[i]);
+ put_flow_table(dev, prio, false);
+free_spec:
+ kfree(spec);
+null_fc:
+ opfc->fc = NULL;
+ return err;
+}
+
+static bool is_fc_shared_and_in_use(struct mlx5_rdma_counter *mcounter,
+ u32 type, struct mlx5_fc **fc)
+{
+ u32 shared_fc_type;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ default:
+ return false;
+ }
+
+ *fc = mcounter->fc[shared_fc_type];
+ if (!(*fc))
+ return false;
+
+ return true;
+}
+
+void mlx5r_fs_destroy_fcs(struct mlx5_ib_dev *dev,
+ struct rdma_counter *counter)
+{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+ struct mlx5_fc *in_use_fc;
+ int i;
+
+ for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
+ if (!mcounter->fc[i])
+ continue;
+
+ if (is_fc_shared_and_in_use(mcounter, i, &in_use_fc)) {
+ mcounter->fc[i] = NULL;
+ continue;
+ }
+
+ mlx5_fc_destroy(dev->mdev, mcounter->fc[i]);
+ mcounter->fc[i] = NULL;
+ }
+}
+
int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
struct mlx5_ib_op_fc *opfc,
enum mlx5_ib_optional_counter_type type)
@@ -921,6 +1273,20 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ spec_num = 1;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO;
+ break;
+
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ spec_num = 1;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO;
+ break;
+
default:
err = -EOPNOTSUPP;
goto free;
@@ -932,13 +1298,17 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
goto free;
}
- prio = &dev->flow_db->opfcs[type];
+ prio = get_opfc_prio(dev, type);
if (!prio->flow_table) {
+ err = get_per_qp_prio(dev, type);
+ if (err)
+ goto free;
+
prio = _get_prio(dev, ns, prio, priority,
- dev->num_ports * MAX_OPFC_RULES, 1, 0);
+ dev->num_ports * MAX_OPFC_RULES, 1, 0, 0);
if (IS_ERR(prio)) {
err = PTR_ERR(prio);
- goto free;
+ goto put_prio;
}
}
@@ -965,6 +1335,8 @@ del_rules:
for (i -= 1; i >= 0; i--)
mlx5_del_flow_rules(opfc->rule[i]);
put_flow_table(dev, prio, false);
+put_prio:
+ put_per_qp_prio(dev, type);
free:
kfree(spec);
return err;
@@ -974,12 +1346,115 @@ void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
struct mlx5_ib_op_fc *opfc,
enum mlx5_ib_optional_counter_type type)
{
+ struct mlx5_ib_flow_prio *prio;
int i;
+ prio = get_opfc_prio(dev, type);
+
for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
mlx5_del_flow_rules(opfc->rule[i]);
- put_flow_table(dev, &dev->flow_db->opfcs[type], true);
+ put_flow_table(dev, prio, true);
}
+
+ put_per_qp_prio(dev, type);
+}
+
+void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct rdma_counter *counter)
+{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ struct mlx5_per_qp_opfc *per_qp_opfc;
+ struct mlx5_ib_op_fc *in_use_opfc;
+ struct mlx5_ib_flow_prio *prio;
+ int i, j;
+
+ per_qp_opfc = xa_load(&mcounter->qpn_opfc_xa, qp->qp_num);
+ if (!per_qp_opfc)
+ return;
+
+ for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
+ if (!per_qp_opfc->opfcs[i].fc)
+ continue;
+
+ if (mlx5r_is_opfc_shared_and_in_use(per_qp_opfc->opfcs, i,
+ &in_use_opfc)) {
+ per_qp_opfc->opfcs[i].fc = NULL;
+ continue;
+ }
+
+ for (j = 0; j < MAX_OPFC_RULES; j++) {
+ if (!per_qp_opfc->opfcs[i].rule[j])
+ continue;
+ mlx5_del_flow_rules(per_qp_opfc->opfcs[i].rule[j]);
+ prio = get_opfc_prio(dev, i);
+ put_flow_table(dev, prio, true);
+ }
+ per_qp_opfc->opfcs[i].fc = NULL;
+ }
+
+ kfree(per_qp_opfc);
+ xa_erase(&mcounter->qpn_opfc_xa, qp->qp_num);
+}
+
+int mlx5r_fs_bind_op_fc(struct ib_qp *qp, struct rdma_counter *counter,
+ u32 port)
+{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_per_qp_opfc *per_qp_opfc;
+ struct mlx5_ib_flow_prio *prio;
+ struct mlx5_ib_counters *cnts;
+ struct mlx5_ib_op_fc *opfc;
+ struct mlx5_fc *in_use_fc;
+ int i, err, per_qp_type;
+ bool new;
+
+ if (!counter->mode.bind_opcnt)
+ return 0;
+
+ cnts = &dev->port[port - 1].cnts;
+
+ for (i = 0; i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES; i++) {
+ opfc = &cnts->opfcs[i];
+ if (!opfc->fc)
+ continue;
+
+ per_qp_type = i + MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ prio = get_opfc_prio(dev, per_qp_type);
+ WARN_ON(!prio->flow_table);
+
+ if (is_fc_shared_and_in_use(mcounter, per_qp_type, &in_use_fc))
+ mcounter->fc[per_qp_type] = in_use_fc;
+
+ if (!mcounter->fc[per_qp_type]) {
+ mcounter->fc[per_qp_type] = mlx5_fc_create(dev->mdev,
+ false);
+ if (IS_ERR(mcounter->fc[per_qp_type]))
+ return PTR_ERR(mcounter->fc[per_qp_type]);
+ }
+
+ per_qp_opfc = get_per_qp_opfc(mcounter, qp->qp_num, &new);
+ if (!per_qp_opfc) {
+ err = -ENOMEM;
+ goto free_fc;
+ }
+ err = add_op_fc_rules(dev, mcounter, per_qp_opfc, prio,
+ per_qp_type, qp->qp_num, port);
+ if (err)
+ goto del_rules;
+ }
+
+ return 0;
+
+del_rules:
+ mlx5r_fs_unbind_op_fc(qp, counter);
+ if (new)
+ kfree(per_qp_opfc);
+free_fc:
+ if (xa_empty(&mcounter->qpn_opfc_xa))
+ mlx5r_fs_destroy_fcs(dev, counter);
+ return err;
}
static void set_underlay_qp(struct mlx5_ib_dev *dev,
@@ -1413,17 +1888,51 @@ free_ucmd:
return ERR_PTR(err);
}
+static int mlx5_ib_fill_transport_ns_info(struct mlx5_ib_dev *dev,
+ enum mlx5_flow_namespace_type type,
+ u32 *flags, u16 *vport_idx,
+ u16 *vport,
+ struct mlx5_core_dev **ft_mdev,
+ u32 ib_port)
+{
+ struct mlx5_core_dev *esw_mdev;
+
+ if (!is_mdev_switchdev_mode(dev->mdev))
+ return 0;
+
+ if (!MLX5_CAP_ADV_RDMA(dev->mdev, rdma_transport_manager))
+ return -EOPNOTSUPP;
+
+ if (!dev->port[ib_port - 1].rep)
+ return -EINVAL;
+
+ esw_mdev = mlx5_eswitch_get_core_dev(dev->port[ib_port - 1].rep->esw);
+ if (esw_mdev != dev->mdev)
+ return -EOPNOTSUPP;
+
+ *flags |= MLX5_FLOW_TABLE_OTHER_VPORT;
+ *ft_mdev = esw_mdev;
+ *vport = dev->port[ib_port - 1].rep->vport;
+ *vport_idx = dev->port[ib_port - 1].rep->vport_index;
+
+ return 0;
+}
+
static struct mlx5_ib_flow_prio *
_get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
enum mlx5_flow_namespace_type ns_type,
- bool mcast)
+ bool mcast, u32 ib_port)
{
+ struct mlx5_core_dev *ft_mdev = dev->mdev;
struct mlx5_flow_namespace *ns = NULL;
struct mlx5_ib_flow_prio *prio = NULL;
int max_table_size = 0;
+ u16 vport_idx = 0;
bool esw_encap;
u32 flags = 0;
+ u16 vport = 0;
int priority;
+ int ret;
if (mcast)
priority = MLX5_IB_FLOW_MCAST_PRIO;
@@ -1471,13 +1980,38 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
priority = user_priority;
break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
+ if (ib_port == 0 || user_priority > MLX5_RDMA_TRANSPORT_BYPASS_PRIO)
+ return ERR_PTR(-EINVAL);
+ ret = mlx5_ib_fill_transport_ns_info(dev, ns_type, &flags,
+ &vport_idx, &vport,
+ &ft_mdev, ib_port);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX)
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(
+ ft_mdev, log_max_ft_size));
+ else
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(
+ ft_mdev, log_max_ft_size));
+ priority = user_priority;
+ break;
default:
break;
}
max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
- ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
+ if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX ||
+ ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX)
+ ns = mlx5_get_flow_vport_namespace(ft_mdev, ns_type, vport_idx);
+ else
+ ns = mlx5_get_flow_namespace(ft_mdev, ns_type);
+
if (!ns)
return ERR_PTR(-EOPNOTSUPP);
@@ -1497,6 +2031,12 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
case MLX5_FLOW_NAMESPACE_RDMA_TX:
prio = &dev->flow_db->rdma_tx[priority];
break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
+ prio = &dev->flow_db->rdma_transport_rx[ib_port - 1];
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
+ prio = &dev->flow_db->rdma_transport_tx[ib_port - 1];
+ break;
default: return ERR_PTR(-EINVAL);
}
@@ -1507,7 +2047,7 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
return prio;
return _get_prio(dev, ns, prio, priority, max_table_size,
- MLX5_FS_MAX_TYPES, flags);
+ MLX5_FS_MAX_TYPES, flags, vport);
}
static struct mlx5_ib_flow_handler *
@@ -1626,7 +2166,8 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add(
mutex_lock(&dev->flow_db->lock);
ft_prio = _get_flow_table(dev, fs_matcher->priority,
- fs_matcher->ns_type, mcast);
+ fs_matcher->ns_type, mcast,
+ fs_matcher->ib_port);
if (IS_ERR(ft_prio)) {
err = PTR_ERR(ft_prio);
goto unlock;
@@ -1742,6 +2283,12 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
*namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_RX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_TX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX;
+ break;
default:
return -EINVAL;
}
@@ -1831,7 +2378,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
return -EINVAL;
/* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
- if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
+ if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
return -EINVAL;
@@ -1848,7 +2396,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
return -EINVAL;
/* Allow only flow table as dest when inserting to FDB or RDMA_RX */
if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS ||
- fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
*dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
return -EINVAL;
} else if (dest_qp) {
@@ -1869,14 +2418,16 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
*dest_id = mqp->raw_packet_qp.rq.tirn;
*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
} else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
- fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) &&
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) &&
!(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) {
*dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
}
if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
(fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
- fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX))
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX))
return -EINVAL;
return 0;
@@ -2353,6 +2904,15 @@ static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
return 0;
}
+static bool verify_context_caps(struct mlx5_ib_dev *dev, u64 enabled_caps)
+{
+ if (is_mdev_switchdev_mode(dev->mdev))
+ return UCAP_ENABLED(enabled_caps,
+ RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
+
+ return UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_LOCAL);
+}
+
static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
struct uverbs_attr_bundle *attrs)
{
@@ -2401,6 +2961,26 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
goto end;
}
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT)) {
+ err = uverbs_copy_from(&obj->ib_port, attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT);
+ if (err)
+ goto end;
+ if (!rdma_is_port_valid(&dev->ib_dev, obj->ib_port)) {
+ err = -EINVAL;
+ goto end;
+ }
+ if (obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX &&
+ obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) {
+ err = -EINVAL;
+ goto end;
+ }
+ if (!verify_context_caps(dev, uobj->context->enabled_caps)) {
+ err = -EOPNOTSUPP;
+ goto end;
+ }
+ }
+
uobj->object = obj;
obj->mdev = dev->mdev;
atomic_set(&obj->usecnt, 0);
@@ -2448,7 +3028,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
mutex_lock(&dev->flow_db->lock);
- ft_prio = _get_flow_table(dev, priority, ns_type, 0);
+ ft_prio = _get_flow_table(dev, priority, ns_type, 0, 0);
if (IS_ERR(ft_prio)) {
err = PTR_ERR(ft_prio);
goto free_obj;
@@ -2834,7 +3414,10 @@ DECLARE_UVERBS_NAMED_METHOD(
UA_OPTIONAL),
UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
enum mlx5_ib_uapi_flow_table_type,
- UA_OPTIONAL));
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL));
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
@@ -2878,6 +3461,7 @@ DECLARE_UVERBS_NAMED_OBJECT(
&UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY));
const struct uapi_definition mlx5_ib_flow_defs[] = {
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
MLX5_IB_OBJECT_FLOW_MATCHER),
UAPI_DEF_CHAIN_OBJ_TREE(
@@ -2888,6 +3472,7 @@ const struct uapi_definition mlx5_ib_flow_defs[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
MLX5_IB_OBJECT_STEERING_ANCHOR,
UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)),
+#endif
{},
};
@@ -2904,8 +3489,26 @@ int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
if (!dev->flow_db)
return -ENOMEM;
+ dev->flow_db->rdma_transport_rx = kcalloc(dev->num_ports,
+ sizeof(struct mlx5_ib_flow_prio),
+ GFP_KERNEL);
+ if (!dev->flow_db->rdma_transport_rx)
+ goto free_flow_db;
+
+ dev->flow_db->rdma_transport_tx = kcalloc(dev->num_ports,
+ sizeof(struct mlx5_ib_flow_prio),
+ GFP_KERNEL);
+ if (!dev->flow_db->rdma_transport_tx)
+ goto free_rdma_transport_rx;
+
mutex_init(&dev->flow_db->lock);
ib_set_device_ops(&dev->ib_dev, &flow_ops);
return 0;
+
+free_rdma_transport_rx:
+ kfree(dev->flow_db->rdma_transport_rx);
+free_flow_db:
+ kfree(dev->flow_db);
+ return -ENOMEM;
}
diff --git a/drivers/infiniband/hw/mlx5/fs.h b/drivers/infiniband/hw/mlx5/fs.h
index b9734904f5f0..2ebe86e5be10 100644
--- a/drivers/infiniband/hw/mlx5/fs.h
+++ b/drivers/infiniband/hw/mlx5/fs.h
@@ -8,23 +8,8 @@
#include "mlx5_ib.h"
-#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
int mlx5_ib_fs_init(struct mlx5_ib_dev *dev);
void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev);
-#else
-static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
-{
- dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
-
- if (!dev->flow_db)
- return -ENOMEM;
-
- mutex_init(&dev->flow_db->lock);
- return 0;
-}
-
-inline void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev) {}
-#endif
static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
{
@@ -40,6 +25,8 @@ static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
* is a safe assumption that all references are gone.
*/
mlx5_ib_fs_cleanup_anchor(dev);
+ kfree(dev->flow_db->rdma_transport_tx);
+ kfree(dev->flow_db->rdma_transport_rx);
kfree(dev->flow_db);
}
#endif /* _MLX5_IB_FS_H */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 81849eb671a1..d07cacaa0abd 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -47,6 +47,7 @@
#include <rdma/uverbs_ioctl.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/ib_ucaps.h>
#include "macsec.h"
#include "data_direct.h"
@@ -1934,6 +1935,12 @@ static int set_ucontext_resp(struct ib_ucontext *uctx,
return 0;
}
+static bool uctx_rdma_ctrl_is_enabled(u64 enabled_caps)
+{
+ return UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_LOCAL) ||
+ UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
+}
+
static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
struct ib_udata *udata)
{
@@ -1976,10 +1983,17 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
return -EINVAL;
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
- err = mlx5_ib_devx_create(dev, true);
+ err = mlx5_ib_devx_create(dev, true, uctx->enabled_caps);
if (err < 0)
goto out_ctx;
context->devx_uid = err;
+
+ if (uctx_rdma_ctrl_is_enabled(uctx->enabled_caps)) {
+ err = mlx5_cmd_add_privileged_uid(dev->mdev,
+ context->devx_uid);
+ if (err)
+ goto out_devx;
+ }
}
lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
@@ -1994,7 +2008,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
/* updates req->total_num_bfregs */
err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi);
if (err)
- goto out_devx;
+ goto out_ucap;
mutex_init(&bfregi->lock);
bfregi->lib_uar_4k = lib_uar_4k;
@@ -2002,7 +2016,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
GFP_KERNEL);
if (!bfregi->count) {
err = -ENOMEM;
- goto out_devx;
+ goto out_ucap;
}
bfregi->sys_pages = kcalloc(bfregi->num_sys_pages,
@@ -2066,6 +2080,11 @@ out_sys_pages:
out_count:
kfree(bfregi->count);
+out_ucap:
+ if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX &&
+ uctx_rdma_ctrl_is_enabled(uctx->enabled_caps))
+ mlx5_cmd_remove_privileged_uid(dev->mdev, context->devx_uid);
+
out_devx:
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
mlx5_ib_devx_destroy(dev, context->devx_uid);
@@ -2110,8 +2129,12 @@ static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
kfree(bfregi->sys_pages);
kfree(bfregi->count);
- if (context->devx_uid)
+ if (context->devx_uid) {
+ if (uctx_rdma_ctrl_is_enabled(ibcontext->enabled_caps))
+ mlx5_cmd_remove_privileged_uid(dev->mdev,
+ context->devx_uid);
mlx5_ib_devx_destroy(dev, context->devx_uid);
+ }
}
static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
@@ -4201,8 +4224,47 @@ static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev)
return (var_table->bitmap) ? 0 : -ENOMEM;
}
+static void mlx5_ib_cleanup_ucaps(struct mlx5_ib_dev *dev)
+{
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL)
+ ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL);
+
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
+ MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA)
+ ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
+}
+
+static int mlx5_ib_init_ucaps(struct mlx5_ib_dev *dev)
+{
+ int ret;
+
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL) {
+ ret = ib_create_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL);
+ if (ret)
+ return ret;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
+ MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA) {
+ ret = ib_create_ucap(RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
+ if (ret)
+ goto remove_local;
+ }
+
+ return 0;
+
+remove_local:
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL)
+ ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL);
+ return ret;
+}
+
static void mlx5_ib_stage_caps_cleanup(struct mlx5_ib_dev *dev)
{
+ if (MLX5_CAP_GEN_2_64(dev->mdev, general_obj_types_127_64) &
+ MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL)
+ mlx5_ib_cleanup_ucaps(dev);
+
bitmap_free(dev->var_table.bitmap);
}
@@ -4253,6 +4315,13 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
return err;
}
+ if (MLX5_CAP_GEN_2_64(dev->mdev, general_obj_types_127_64) &
+ MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL) {
+ err = mlx5_ib_init_ucaps(dev);
+ if (err)
+ return err;
+ }
+
dev->ib_dev.use_cq_dim = true;
return 0;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 974a45c92fbb..ace2df3e1d9f 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -276,6 +276,7 @@ struct mlx5_ib_flow_matcher {
struct mlx5_core_dev *mdev;
atomic_t usecnt;
u8 match_criteria_enable;
+ u32 ib_port;
};
struct mlx5_ib_steering_anchor {
@@ -293,6 +294,18 @@ enum mlx5_ib_optional_counter_type {
MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS,
MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS,
MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS,
+ MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS,
+ MLX5_IB_OPCOUNTER_RDMA_TX_BYTES,
+ MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS,
+ MLX5_IB_OPCOUNTER_RDMA_RX_BYTES,
+
+ MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP,
+ MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP,
+ MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP,
+ MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP,
+ MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP,
+ MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP,
+ MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP,
MLX5_IB_OPCOUNTER_MAX,
};
@@ -307,6 +320,8 @@ struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX];
struct mlx5_flow_table *lag_demux_ft;
+ struct mlx5_ib_flow_prio *rdma_transport_rx;
+ struct mlx5_ib_flow_prio *rdma_transport_tx;
/* Protect flow steering bypass flow tables
* when add/del flow rules.
* only single add/removal of flow steering rule could be done
@@ -883,6 +898,14 @@ void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
struct mlx5_ib_op_fc *opfc,
enum mlx5_ib_optional_counter_type type);
+int mlx5r_fs_bind_op_fc(struct ib_qp *qp, struct rdma_counter *counter,
+ u32 port);
+
+void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct rdma_counter *counter);
+
+void mlx5r_fs_destroy_fcs(struct mlx5_ib_dev *dev,
+ struct rdma_counter *counter);
+
struct mlx5_ib_multiport_info;
struct mlx5_ib_multiport {
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 753faa9ad06a..b7c8c926c578 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -56,7 +56,7 @@ static void
create_mkey_callback(int status, struct mlx5_async_work *context);
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
u64 iova, int access_flags,
- unsigned int page_size, bool populate,
+ unsigned long page_size, bool populate,
int access_mode);
static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr);
@@ -718,8 +718,7 @@ mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev,
}
static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
- struct mlx5_cache_ent *ent,
- int access_flags)
+ struct mlx5_cache_ent *ent)
{
struct mlx5_ib_mr *mr;
int err;
@@ -794,7 +793,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
if (!ent)
return ERR_PTR(-EOPNOTSUPP);
- return _mlx5_mr_cache_alloc(dev, ent, access_flags);
+ return _mlx5_mr_cache_alloc(dev, ent);
}
static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
@@ -919,6 +918,25 @@ mkeys_err:
return ERR_PTR(ret);
}
+static void mlx5r_destroy_cache_entries(struct mlx5_ib_dev *dev)
+{
+ struct rb_root *root = &dev->cache.rb_root;
+ struct mlx5_cache_ent *ent;
+ struct rb_node *node;
+
+ mutex_lock(&dev->cache.rb_lock);
+ node = rb_first(root);
+ while (node) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
+ node = rb_next(node);
+ clean_keys(dev, ent);
+ rb_erase(&ent->node, root);
+ mlx5r_mkeys_uninit(ent);
+ kfree(ent);
+ }
+ mutex_unlock(&dev->cache.rb_lock);
+}
+
int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
{
struct mlx5_mkey_cache *cache = &dev->cache;
@@ -970,6 +988,8 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
err:
mutex_unlock(&cache->rb_lock);
mlx5_mkey_cache_debugfs_cleanup(dev);
+ mlx5r_destroy_cache_entries(dev);
+ destroy_workqueue(cache->wq);
mlx5_ib_warn(dev, "failed to create mkey cache entry\n");
return ret;
}
@@ -1003,17 +1023,7 @@ void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
/* At this point all entries are disabled and have no concurrent work. */
- mutex_lock(&dev->cache.rb_lock);
- node = rb_first(root);
- while (node) {
- ent = rb_entry(node, struct mlx5_cache_ent, node);
- node = rb_next(node);
- clean_keys(dev, ent);
- rb_erase(&ent->node, root);
- mlx5r_mkeys_uninit(ent);
- kfree(ent);
- }
- mutex_unlock(&dev->cache.rb_lock);
+ mlx5r_destroy_cache_entries(dev);
destroy_workqueue(dev->cache.wq);
del_timer_sync(&dev->delay_timer);
@@ -1115,7 +1125,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
struct mlx5r_cache_rb_key rb_key = {};
struct mlx5_cache_ent *ent;
struct mlx5_ib_mr *mr;
- unsigned int page_size;
+ unsigned long page_size;
if (umem->is_dmabuf)
page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova);
@@ -1144,7 +1154,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
return mr;
}
- mr = _mlx5_mr_cache_alloc(dev, ent, access_flags);
+ mr = _mlx5_mr_cache_alloc(dev, ent);
if (IS_ERR(mr))
return mr;
@@ -1219,7 +1229,7 @@ err_1:
*/
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
u64 iova, int access_flags,
- unsigned int page_size, bool populate,
+ unsigned long page_size, bool populate,
int access_mode)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
@@ -1425,7 +1435,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
mr = alloc_cacheable_mr(pd, umem, iova, access_flags,
MLX5_MKC_ACCESS_MODE_MTT);
} else {
- unsigned int page_size =
+ unsigned long page_size =
mlx5_umem_mkc_find_best_pgsz(dev, umem, iova);
mutex_lock(&dev->slow_path_mutex);
@@ -1957,7 +1967,6 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
if (mr->mmkey.cache_ent) {
spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
- mr->mmkey.cache_ent->in_use--;
goto end;
}
@@ -2025,6 +2034,7 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
bool is_odp = is_odp_mr(mr);
bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
!to_ib_umem_dmabuf(mr->umem)->pinned;
+ bool from_cache = !!ent;
int ret = 0;
if (is_odp)
@@ -2037,6 +2047,8 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
ent = mr->mmkey.cache_ent;
/* upon storing to a clean temp entry - schedule its cleanup */
spin_lock_irq(&ent->mkeys_queue.lock);
+ if (from_cache)
+ ent->in_use--;
if (ent->is_tmp && !ent->tmp_cleanup_scheduled) {
mod_delayed_work(ent->dev->cache.wq, &ent->dwork,
msecs_to_jiffies(30 * 1000));
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index e77c9280c07e..86d8fa63bf69 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -309,9 +309,6 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
blk_start_idx = idx;
in_block = 1;
}
-
- /* Count page invalidations */
- invalidations += idx - blk_start_idx + 1;
} else {
u64 umr_offset = idx & umr_block_mask;
@@ -321,14 +318,19 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
in_block = 0;
+ /* Count page invalidations */
+ invalidations += idx - blk_start_idx + 1;
}
}
}
- if (in_block)
+ if (in_block) {
mlx5r_umr_update_xlt(mr, blk_start_idx,
idx - blk_start_idx + 1, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
+ /* Count page invalidations */
+ invalidations += idx - blk_start_idx + 1;
+ }
mlx5_update_odp_stats_with_handled(mr, invalidations, invalidations);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index 9f54aa90a35a..bcd43dc30e21 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -237,34 +237,6 @@ enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev,
return IB_LINK_LAYER_ETHERNET;
}
-int pvrdma_modify_device(struct ib_device *ibdev, int mask,
- struct ib_device_modify *props)
-{
- unsigned long flags;
-
- if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
- IB_DEVICE_MODIFY_NODE_DESC)) {
- dev_warn(&to_vdev(ibdev)->pdev->dev,
- "unsupported device modify mask %#x\n", mask);
- return -EOPNOTSUPP;
- }
-
- if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
- spin_lock_irqsave(&to_vdev(ibdev)->desc_lock, flags);
- memcpy(ibdev->node_desc, props->node_desc, 64);
- spin_unlock_irqrestore(&to_vdev(ibdev)->desc_lock, flags);
- }
-
- if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) {
- mutex_lock(&to_vdev(ibdev)->port_mutex);
- to_vdev(ibdev)->sys_image_guid =
- cpu_to_be64(props->sys_image_guid);
- mutex_unlock(&to_vdev(ibdev)->port_mutex);
- }
-
- return 0;
-}
-
/**
* pvrdma_modify_port - modify device port attributes
* @ibdev: the device to modify
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index 4b9edc03d73d..fd47b0b1df5c 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -356,8 +356,6 @@ int pvrdma_query_pkey(struct ib_device *ibdev, u32 port,
u16 index, u16 *pkey);
enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev,
u32 port);
-int pvrdma_modify_device(struct ib_device *ibdev, int mask,
- struct ib_device_modify *props);
int pvrdma_modify_port(struct ib_device *ibdev, u32 port,
int mask, struct ib_port_modify *props);
int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);