diff options
Diffstat (limited to 'drivers/infiniband/hw')
119 files changed, 5118 insertions, 1518 deletions
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index 1211f4317a9f..aba96ca9bce5 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/ obj-$(CONFIG_INFINIBAND_VMWARE_PVRDMA) += vmw_pvrdma/ obj-$(CONFIG_INFINIBAND_USNIC) += usnic/ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ -obj-$(CONFIG_INFINIBAND_HNS) += hns/ +obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns/ obj-$(CONFIG_INFINIBAND_QEDR) += qedr/ obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re/ obj-$(CONFIG_INFINIBAND_ERDMA) += erdma/ diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 2975b11b79bf..6df5a2738c95 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -53,12 +53,6 @@ #define BNXT_RE_MAX_MR_SIZE_HIGH BIT_ULL(39) #define BNXT_RE_MAX_MR_SIZE BNXT_RE_MAX_MR_SIZE_HIGH -#define BNXT_RE_MAX_QPC_COUNT (64 * 1024) -#define BNXT_RE_MAX_MRW_COUNT (64 * 1024) -#define BNXT_RE_MAX_SRQC_COUNT (64 * 1024) -#define BNXT_RE_MAX_CQ_COUNT (64 * 1024) -#define BNXT_RE_MAX_MRW_COUNT_64K (64 * 1024) -#define BNXT_RE_MAX_MRW_COUNT_256K (256 * 1024) /* Number of MRs to reserve for PF, leaving remainder for VFs */ #define BNXT_RE_RESVD_MR_FOR_PF (32 * 1024) @@ -187,7 +181,6 @@ struct bnxt_re_dev { #define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29 struct net_device *netdev; struct auxiliary_device *adev; - struct notifier_block nb; unsigned int version, major, minor; struct bnxt_qplib_chip_ctx *chip_ctx; struct bnxt_en_dev *en_dev; @@ -204,7 +197,7 @@ struct bnxt_re_dev { struct bnxt_re_nq_record *nqr; /* Device Resources */ - struct bnxt_qplib_dev_attr dev_attr; + struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_ctx qplib_ctx; struct bnxt_qplib_res qplib_res; struct bnxt_qplib_dpi dpi_privileged; @@ -229,6 +222,11 @@ struct bnxt_re_dev { DECLARE_HASHTABLE(srq_hash, MAX_SRQ_HASH_BITS); struct dentry *dbg_root; struct dentry *qp_debugfs; + unsigned long event_bitmap; + struct bnxt_qplib_cc_param cc_param; + struct workqueue_struct *dcb_wq; + struct dentry *cc_config; + struct bnxt_re_dbg_cc_config_params *cc_config_params; }; #define to_bnxt_re_dev(ptr, member) \ @@ -241,6 +239,10 @@ struct bnxt_re_dev { #define BNXT_RE_CHECK_RC(x) ((x) && ((x) != -ETIMEDOUT)) void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev); +int bnxt_re_assign_pma_port_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad); +int bnxt_re_assign_pma_port_ext_counters(struct bnxt_re_dev *rdev, + struct ib_mad *out_mad); + static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev) { if (rdev) diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.c b/drivers/infiniband/hw/bnxt_re/debugfs.c index 7c47039044ef..af91d16c3c77 100644 --- a/drivers/infiniband/hw/bnxt_re/debugfs.c +++ b/drivers/infiniband/hw/bnxt_re/debugfs.c @@ -22,6 +22,23 @@ static struct dentry *bnxt_re_debugfs_root; +static const char * const bnxt_re_cc_gen0_name[] = { + "enable_cc", + "run_avg_weight_g", + "num_phase_per_state", + "init_cr", + "init_tr", + "tos_ecn", + "tos_dscp", + "alt_vlan_pcp", + "alt_vlan_dscp", + "rtt", + "cc_mode", + "tcp_cp", + "tx_queue", + "inactivity_cp", +}; + static inline const char *bnxt_re_qp_state_str(u8 state) { switch (state) { @@ -110,19 +127,215 @@ void bnxt_re_debug_rem_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp) debugfs_remove(qp->dentry); } +static int map_cc_config_offset_gen0_ext0(u32 offset, struct bnxt_qplib_cc_param *ccparam, u32 *val) +{ + u64 map_offset; + + map_offset = BIT(offset); + + switch (map_offset) { + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC: + *val = ccparam->enable; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_G: + *val = ccparam->g; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_NUMPHASEPERSTATE: + *val = ccparam->nph_per_state; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_CR: + *val = ccparam->init_cr; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_TR: + *val = ccparam->init_tr; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN: + *val = ccparam->tos_ecn; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP: + *val = ccparam->tos_dscp; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP: + *val = ccparam->alt_vlan_pcp; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP: + *val = ccparam->alt_tos_dscp; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_RTT: + *val = ccparam->rtt; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE: + *val = ccparam->cc_mode; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TCP_CP: + *val = ccparam->tcp_cp; + break; + default: + return -EINVAL; + } + + return 0; +} + +static ssize_t bnxt_re_cc_config_get(struct file *filp, char __user *buffer, + size_t usr_buf_len, loff_t *ppos) +{ + struct bnxt_re_cc_param *dbg_cc_param = filp->private_data; + struct bnxt_re_dev *rdev = dbg_cc_param->rdev; + struct bnxt_qplib_cc_param ccparam = {}; + u32 offset = dbg_cc_param->offset; + char buf[16]; + u32 val; + int rc; + + rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, &ccparam); + if (rc) + return rc; + + rc = map_cc_config_offset_gen0_ext0(offset, &ccparam, &val); + if (rc) + return rc; + + rc = snprintf(buf, sizeof(buf), "%d\n", val); + if (rc < 0) + return rc; + + return simple_read_from_buffer(buffer, usr_buf_len, ppos, (u8 *)(buf), rc); +} + +static void bnxt_re_fill_gen0_ext0(struct bnxt_qplib_cc_param *ccparam, u32 offset, u32 val) +{ + u32 modify_mask; + + modify_mask = BIT(offset); + + switch (modify_mask) { + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC: + ccparam->enable = val; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_G: + ccparam->g = val; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_NUMPHASEPERSTATE: + ccparam->nph_per_state = val; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_CR: + ccparam->init_cr = val; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_TR: + ccparam->init_tr = val; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN: + ccparam->tos_ecn = val; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP: + ccparam->tos_dscp = val; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP: + ccparam->alt_vlan_pcp = val; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP: + ccparam->alt_tos_dscp = val; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_RTT: + ccparam->rtt = val; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE: + ccparam->cc_mode = val; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TCP_CP: + ccparam->tcp_cp = val; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TX_QUEUE: + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INACTIVITY_CP: + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TIME_PER_PHASE: + ccparam->time_pph = val; + break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_PKTS_PER_PHASE: + ccparam->pkts_pph = val; + break; + } + + ccparam->mask = modify_mask; +} + +static int bnxt_re_configure_cc(struct bnxt_re_dev *rdev, u32 gen_ext, u32 offset, u32 val) +{ + struct bnxt_qplib_cc_param ccparam = { }; + + /* Supporting only Gen 0 now */ + if (gen_ext == CC_CONFIG_GEN0_EXT0) + bnxt_re_fill_gen0_ext0(&ccparam, offset, val); + else + return -EINVAL; + + bnxt_qplib_modify_cc(&rdev->qplib_res, &ccparam); + return 0; +} + +static ssize_t bnxt_re_cc_config_set(struct file *filp, const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct bnxt_re_cc_param *dbg_cc_param = filp->private_data; + struct bnxt_re_dev *rdev = dbg_cc_param->rdev; + u32 offset = dbg_cc_param->offset; + u8 cc_gen = dbg_cc_param->cc_gen; + char buf[16]; + u32 val; + int rc; + + if (count >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(buf, buffer, count)) + return -EFAULT; + + buf[count] = '\0'; + if (kstrtou32(buf, 0, &val)) + return -EINVAL; + + rc = bnxt_re_configure_cc(rdev, cc_gen, offset, val); + return rc ? rc : count; +} + +static const struct file_operations bnxt_re_cc_config_ops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = bnxt_re_cc_config_get, + .write = bnxt_re_cc_config_set, +}; + void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev) { struct pci_dev *pdev = rdev->en_dev->pdev; + struct bnxt_re_dbg_cc_config_params *cc_params; + int i; rdev->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), bnxt_re_debugfs_root); rdev->qp_debugfs = debugfs_create_dir("QPs", rdev->dbg_root); + rdev->cc_config = debugfs_create_dir("cc_config", rdev->dbg_root); + + rdev->cc_config_params = kzalloc(sizeof(*cc_params), GFP_KERNEL); + + for (i = 0; i < BNXT_RE_CC_PARAM_GEN0; i++) { + struct bnxt_re_cc_param *tmp_params = &rdev->cc_config_params->gen0_parms[i]; + + tmp_params->rdev = rdev; + tmp_params->offset = i; + tmp_params->cc_gen = CC_CONFIG_GEN0_EXT0; + tmp_params->dentry = debugfs_create_file(bnxt_re_cc_gen0_name[i], 0400, + rdev->cc_config, tmp_params, + &bnxt_re_cc_config_ops); + } } void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev) { debugfs_remove_recursive(rdev->qp_debugfs); - + debugfs_remove_recursive(rdev->cc_config); + kfree(rdev->cc_config_params); debugfs_remove_recursive(rdev->dbg_root); rdev->dbg_root = NULL; } diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.h b/drivers/infiniband/hw/bnxt_re/debugfs.h index cd3be0a9ec7e..8f101df4e838 100644 --- a/drivers/infiniband/hw/bnxt_re/debugfs.h +++ b/drivers/infiniband/hw/bnxt_re/debugfs.h @@ -18,4 +18,19 @@ void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev); void bnxt_re_register_debugfs(void); void bnxt_re_unregister_debugfs(void); +#define CC_CONFIG_GEN_EXT(x, y) (((x) << 16) | (y)) +#define CC_CONFIG_GEN0_EXT0 CC_CONFIG_GEN_EXT(0, 0) + +#define BNXT_RE_CC_PARAM_GEN0 14 + +struct bnxt_re_cc_param { + struct bnxt_re_dev *rdev; + struct dentry *dentry; + u32 offset; + u8 cc_gen; +}; + +struct bnxt_re_dbg_cc_config_params { + struct bnxt_re_cc_param gen0_parms[BNXT_RE_CC_PARAM_GEN0]; +}; #endif diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 1e63f8091748..44bb082e0a60 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -37,18 +37,11 @@ * */ -#include <linux/interrupt.h> #include <linux/types.h> -#include <linux/spinlock.h> -#include <linux/sched.h> -#include <linux/slab.h> #include <linux/pci.h> -#include <linux/prefetch.h> -#include <linux/delay.h> +#include <rdma/ib_mad.h> +#include <rdma/ib_pma.h> -#include <rdma/ib_addr.h> - -#include "bnxt_ulp.h" #include "roce_hsi.h" #include "qplib_res.h" #include "qplib_sp.h" @@ -294,6 +287,96 @@ static void bnxt_re_copy_db_pacing_stats(struct bnxt_re_dev *rdev, readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off); } +int bnxt_re_assign_pma_port_ext_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad) +{ + struct ib_pma_portcounters_ext *pma_cnt_ext; + struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat; + struct ctx_hw_stats *hw_stats = NULL; + int rc; + + hw_stats = rdev->qplib_ctx.stats.dma; + + pma_cnt_ext = (struct ib_pma_portcounters_ext *)(out_mad->data + 40); + if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags)) { + u32 fid = PCI_FUNC(rdev->en_dev->pdev->devfn); + + rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat); + if (rc) + return rc; + } + + pma_cnt_ext = (struct ib_pma_portcounters_ext *)(out_mad->data + 40); + if ((bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) && rdev->is_virtfn) || + !bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { + pma_cnt_ext->port_xmit_data = + cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_bytes) / 4); + pma_cnt_ext->port_rcv_data = + cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_bytes) / 4); + pma_cnt_ext->port_xmit_packets = + cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_pkts)); + pma_cnt_ext->port_rcv_packets = + cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_pkts)); + pma_cnt_ext->port_unicast_rcv_packets = + cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_pkts)); + pma_cnt_ext->port_unicast_xmit_packets = + cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_pkts)); + + } else { + pma_cnt_ext->port_rcv_packets = cpu_to_be64(estat->rx_roce_good_pkts); + pma_cnt_ext->port_rcv_data = cpu_to_be64(estat->rx_roce_good_bytes / 4); + pma_cnt_ext->port_xmit_packets = cpu_to_be64(estat->tx_roce_pkts); + pma_cnt_ext->port_xmit_data = cpu_to_be64(estat->tx_roce_bytes / 4); + pma_cnt_ext->port_unicast_rcv_packets = cpu_to_be64(estat->rx_roce_good_pkts); + pma_cnt_ext->port_unicast_xmit_packets = cpu_to_be64(estat->tx_roce_pkts); + } + return 0; +} + +int bnxt_re_assign_pma_port_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad) +{ + struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat; + struct ib_pma_portcounters *pma_cnt; + struct ctx_hw_stats *hw_stats = NULL; + int rc; + + hw_stats = rdev->qplib_ctx.stats.dma; + + pma_cnt = (struct ib_pma_portcounters *)(out_mad->data + 40); + if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags)) { + u32 fid = PCI_FUNC(rdev->en_dev->pdev->devfn); + + rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat); + if (rc) + return rc; + } + if ((bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) && rdev->is_virtfn) || + !bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { + pma_cnt->port_rcv_packets = + cpu_to_be32((u32)(le64_to_cpu(hw_stats->rx_ucast_pkts)) & 0xFFFFFFFF); + pma_cnt->port_rcv_data = + cpu_to_be32((u32)((le64_to_cpu(hw_stats->rx_ucast_bytes) & + 0xFFFFFFFF) / 4)); + pma_cnt->port_xmit_packets = + cpu_to_be32((u32)(le64_to_cpu(hw_stats->tx_ucast_pkts)) & 0xFFFFFFFF); + pma_cnt->port_xmit_data = + cpu_to_be32((u32)((le64_to_cpu(hw_stats->tx_ucast_bytes) + & 0xFFFFFFFF) / 4)); + } else { + pma_cnt->port_rcv_packets = cpu_to_be32(estat->rx_roce_good_pkts); + pma_cnt->port_rcv_data = cpu_to_be32((estat->rx_roce_good_bytes / 4)); + pma_cnt->port_xmit_packets = cpu_to_be32(estat->tx_roce_pkts); + pma_cnt->port_xmit_data = cpu_to_be32((estat->tx_roce_bytes / 4)); + } + pma_cnt->port_rcv_constraint_errors = (u8)(le64_to_cpu(hw_stats->rx_discard_pkts) & 0xFF); + pma_cnt->port_rcv_errors = cpu_to_be16((u16)(le64_to_cpu(hw_stats->rx_error_pkts) + & 0xFFFF)); + pma_cnt->port_xmit_constraint_errors = (u8)(le64_to_cpu(hw_stats->tx_error_pkts) & 0xFF); + pma_cnt->port_xmit_discards = cpu_to_be16((u16)(le64_to_cpu(hw_stats->tx_discard_pkts) + & 0xFFFF)); + + return 0; +} + int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, u32 port, int index) @@ -357,8 +440,8 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, goto done; } bnxt_re_copy_err_stats(rdev, stats, err_s); - if (_is_ext_stats_supported(rdev->dev_attr.dev_cap_flags) && - !rdev->is_virtfn) { + if (bnxt_ext_stats_supported(rdev->chip_ctx, rdev->dev_attr->dev_cap_flags, + rdev->is_virtfn)) { rc = bnxt_re_get_ext_stat(rdev, stats); if (rc) { clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 82023394e330..9082b3fd2b47 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -49,11 +49,10 @@ #include <rdma/ib_addr.h> #include <rdma/ib_mad.h> #include <rdma/ib_cache.h> +#include <rdma/ib_pma.h> #include <rdma/uverbs_ioctl.h> #include <linux/hashtable.h> -#include "bnxt_ulp.h" - #include "roce_hsi.h" #include "qplib_res.h" #include "qplib_sp.h" @@ -161,7 +160,7 @@ static int __qp_access_flags_to_ib(struct bnxt_qplib_chip_ctx *cctx, u8 qflags) static void bnxt_re_check_and_set_relaxed_ordering(struct bnxt_re_dev *rdev, struct bnxt_qplib_mrw *qplib_mr) { - if (_is_relaxed_ordering_supported(rdev->dev_attr.dev_cap_flags2) && + if (_is_relaxed_ordering_supported(rdev->dev_attr->dev_cap_flags2) && pcie_relaxed_ordering_enabled(rdev->en_dev->pdev)) qplib_mr->flags |= CMDQ_REGISTER_MR_FLAGS_ENABLE_RO; } @@ -186,7 +185,7 @@ int bnxt_re_query_device(struct ib_device *ibdev, struct ib_udata *udata) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); - struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr; memset(ib_attr, 0, sizeof(*ib_attr)); memcpy(&ib_attr->fw_ver, dev_attr->fw_ver, @@ -199,7 +198,7 @@ int bnxt_re_query_device(struct ib_device *ibdev, ib_attr->vendor_id = rdev->en_dev->pdev->vendor; ib_attr->vendor_part_id = rdev->en_dev->pdev->device; - ib_attr->hw_ver = rdev->en_dev->pdev->subsystem_device; + ib_attr->hw_ver = rdev->en_dev->pdev->revision; ib_attr->max_qp = dev_attr->max_qp; ib_attr->max_qp_wr = dev_attr->max_qp_wqes; ib_attr->device_cap_flags = @@ -275,7 +274,7 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num, struct ib_port_attr *port_attr) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); - struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr; int rc; memset(port_attr, 0, sizeof(*port_attr)); @@ -333,8 +332,8 @@ void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str) struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d.%d", - rdev->dev_attr.fw_ver[0], rdev->dev_attr.fw_ver[1], - rdev->dev_attr.fw_ver[2], rdev->dev_attr.fw_ver[3]); + rdev->dev_attr->fw_ver[0], rdev->dev_attr->fw_ver[1], + rdev->dev_attr->fw_ver[2], rdev->dev_attr->fw_ver[3]); } int bnxt_re_query_pkey(struct ib_device *ibdev, u32 port_num, @@ -585,7 +584,7 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd) mr->qplib_mr.pd = &pd->qplib_pd; mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR; mr->qplib_mr.access_flags = __from_ib_access_flags(mr_access_flags); - if (!_is_alloc_mr_unified(rdev->dev_attr.dev_cap_flags)) { + if (!_is_alloc_mr_unified(rdev->dev_attr->dev_cap_flags)) { rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr); if (rc) { ibdev_err(&rdev->ibdev, "Failed to alloc fence-HW-MR\n"); @@ -967,13 +966,13 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) unsigned int flags; int rc; + bnxt_re_debug_rem_qpinfo(rdev, qp); + bnxt_qplib_flush_cqn_wq(&qp->qplib_qp); rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp); - if (rc) { + if (rc) ibdev_err(&rdev->ibdev, "Failed to destroy HW QP"); - return rc; - } if (rdma_is_kernel_res(&qp->ib_qp.res)) { flags = bnxt_re_lock_cqs(qp); @@ -983,11 +982,8 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) bnxt_qplib_free_qp_res(&rdev->qplib_res, &qp->qplib_qp); - if (ib_qp->qp_type == IB_QPT_GSI && rdev->gsi_ctx.gsi_sqp) { - rc = bnxt_re_destroy_gsi_sqp(qp); - if (rc) - return rc; - } + if (ib_qp->qp_type == IB_QPT_GSI && rdev->gsi_ctx.gsi_sqp) + bnxt_re_destroy_gsi_sqp(qp); mutex_lock(&rdev->qp_lock); list_del(&qp->list); @@ -998,8 +994,6 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) else if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD) atomic_dec(&rdev->stats.res.ud_qp_count); - bnxt_re_debug_rem_qpinfo(rdev, qp); - ib_umem_release(qp->rumem); ib_umem_release(qp->sumem); @@ -1062,7 +1056,7 @@ static int bnxt_re_setup_swqe_size(struct bnxt_re_qp *qp, rdev = qp->rdev; qplqp = &qp->qplib_qp; sq = &qplqp->sq; - dev_attr = &rdev->dev_attr; + dev_attr = rdev->dev_attr; align = sizeof(struct sq_send_hdr); ilsize = ALIGN(init_attr->cap.max_inline_data, align); @@ -1282,7 +1276,7 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp, rdev = qp->rdev; qplqp = &qp->qplib_qp; rq = &qplqp->rq; - dev_attr = &rdev->dev_attr; + dev_attr = rdev->dev_attr; if (init_attr->srq) { struct bnxt_re_srq *srq; @@ -1319,7 +1313,7 @@ static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp) rdev = qp->rdev; qplqp = &qp->qplib_qp; - dev_attr = &rdev->dev_attr; + dev_attr = rdev->dev_attr; if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) { qplqp->rq.max_sge = dev_attr->max_qp_sges; @@ -1345,7 +1339,7 @@ static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, rdev = qp->rdev; qplqp = &qp->qplib_qp; sq = &qplqp->sq; - dev_attr = &rdev->dev_attr; + dev_attr = rdev->dev_attr; sq->max_sge = init_attr->cap.max_send_sge; entries = init_attr->cap.max_send_wr; @@ -1398,7 +1392,7 @@ static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, rdev = qp->rdev; qplqp = &qp->qplib_qp; - dev_attr = &rdev->dev_attr; + dev_attr = rdev->dev_attr; if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) { entries = bnxt_re_init_depth(init_attr->cap.max_send_wr + 1, uctx); @@ -1447,7 +1441,7 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, rdev = qp->rdev; qplqp = &qp->qplib_qp; - dev_attr = &rdev->dev_attr; + dev_attr = rdev->dev_attr; /* Setup misc params */ ether_addr_copy(qplqp->smac, rdev->netdev->dev_addr); @@ -1617,7 +1611,7 @@ int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr, ib_pd = ib_qp->pd; pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); rdev = pd->rdev; - dev_attr = &rdev->dev_attr; + dev_attr = rdev->dev_attr; qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx); @@ -1845,7 +1839,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, ib_pd = ib_srq->pd; pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); rdev = pd->rdev; - dev_attr = &rdev->dev_attr; + dev_attr = rdev->dev_attr; srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); if (srq_init_attr->attr.max_wr >= dev_attr->max_srq_wqes) { @@ -1877,6 +1871,8 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; srq->srq_limit = srq_init_attr->attr.srq_limit; srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id; + srq->qplib_srq.sg_info.pgsize = PAGE_SIZE; + srq->qplib_srq.sg_info.pgshft = PAGE_SHIFT; nq = &rdev->nqr->nq[0]; if (udata) { @@ -2049,7 +2045,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, { struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); struct bnxt_re_dev *rdev = qp->rdev; - struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr; enum ib_qp_state curr_qp_state, new_qp_state; int rc, entries; unsigned int flags; @@ -2167,18 +2163,20 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, } } - if (qp_attr_mask & IB_QP_PATH_MTU) { - qp->qplib_qp.modify_flags |= - CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU; - qp->qplib_qp.path_mtu = __from_ib_mtu(qp_attr->path_mtu); - qp->qplib_qp.mtu = ib_mtu_enum_to_int(qp_attr->path_mtu); - } else if (qp_attr->qp_state == IB_QPS_RTR) { - qp->qplib_qp.modify_flags |= - CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU; - qp->qplib_qp.path_mtu = - __from_ib_mtu(iboe_get_mtu(rdev->netdev->mtu)); - qp->qplib_qp.mtu = - ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu)); + if (qp_attr->qp_state == IB_QPS_RTR) { + enum ib_mtu qpmtu; + + qpmtu = iboe_get_mtu(rdev->netdev->mtu); + if (qp_attr_mask & IB_QP_PATH_MTU) { + if (ib_mtu_enum_to_int(qp_attr->path_mtu) > + ib_mtu_enum_to_int(qpmtu)) + return -EINVAL; + qpmtu = qp_attr->path_mtu; + } + + qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU; + qp->qplib_qp.path_mtu = __from_ib_mtu(qpmtu); + qp->qplib_qp.mtu = ib_mtu_enum_to_int(qpmtu); } if (qp_attr_mask & IB_QP_TIMEOUT) { @@ -2328,6 +2326,7 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, qp_attr->retry_cnt = qplib_qp->retry_cnt; qp_attr->rnr_retry = qplib_qp->rnr_retry; qp_attr->min_rnr_timer = qplib_qp->min_rnr_timer; + qp_attr->port_num = __to_ib_port_num(qplib_qp->port_id); qp_attr->rq_psn = qplib_qp->rq.psn; qp_attr->max_rd_atomic = qplib_qp->max_rd_atomic; qp_attr->sq_psn = qplib_qp->sq.psn; @@ -2824,7 +2823,8 @@ bad: wr = wr->next; } bnxt_qplib_post_send_db(&qp->qplib_qp); - bnxt_ud_qp_hw_stall_workaround(qp); + if (!bnxt_qplib_is_chip_gen_p5_p7(qp->rdev->chip_ctx)) + bnxt_ud_qp_hw_stall_workaround(qp); spin_unlock_irqrestore(&qp->sq_lock, flags); return rc; } @@ -2936,7 +2936,8 @@ bad: wr = wr->next; } bnxt_qplib_post_send_db(&qp->qplib_qp); - bnxt_ud_qp_hw_stall_workaround(qp); + if (!bnxt_qplib_is_chip_gen_p5_p7(qp->rdev->chip_ctx)) + bnxt_ud_qp_hw_stall_workaround(qp); spin_unlock_irqrestore(&qp->sq_lock, flags); return rc; @@ -3091,7 +3092,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata = &attrs->driver_udata; struct bnxt_re_ucontext *uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx); - struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr; struct bnxt_qplib_chip_ctx *cctx; int cqe = attr->cqe; int rc, entries; @@ -3226,7 +3227,7 @@ int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) cq = container_of(ibcq, struct bnxt_re_cq, ib_cq); rdev = cq->rdev; - dev_attr = &rdev->dev_attr; + dev_attr = rdev->dev_attr; if (!ibcq->uobject) { ibdev_err(&rdev->ibdev, "Kernel CQ Resize not supported"); return -EOPNOTSUPP; @@ -4199,7 +4200,7 @@ static struct ib_mr *__bnxt_re_user_reg_mr(struct ib_pd *ib_pd, u64 length, u64 mr->qplib_mr.access_flags = __from_ib_access_flags(mr_access_flags); mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_MR; - if (!_is_alloc_mr_unified(rdev->dev_attr.dev_cap_flags)) { + if (!_is_alloc_mr_unified(rdev->dev_attr->dev_cap_flags)) { rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr); if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate MR rc = %d", rc); @@ -4291,7 +4292,7 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) struct bnxt_re_ucontext *uctx = container_of(ctx, struct bnxt_re_ucontext, ib_uctx); struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); - struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr; struct bnxt_re_user_mmap_entry *entry; struct bnxt_re_uctx_resp resp = {}; struct bnxt_re_uctx_req ureq = {}; @@ -4467,9 +4468,10 @@ int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma) case BNXT_RE_MMAP_TOGGLE_PAGE: /* Driver doesn't expect write access for user space */ if (vma->vm_flags & VM_WRITE) - return -EFAULT; - ret = vm_insert_page(vma, vma->vm_start, - virt_to_page((void *)bnxt_entry->mem_offset)); + ret = -EFAULT; + else + ret = vm_insert_page(vma, vma->vm_start, + virt_to_page((void *)bnxt_entry->mem_offset)); break; default: ret = -EINVAL; @@ -4490,6 +4492,41 @@ void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry) kfree(bnxt_entry); } +int bnxt_re_process_mad(struct ib_device *ibdev, int mad_flags, + u32 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad *in_mad, struct ib_mad *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index) +{ + struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); + struct ib_class_port_info cpi = {}; + int ret = IB_MAD_RESULT_SUCCESS; + int rc = 0; + + if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) + return ret; + + switch (in_mad->mad_hdr.attr_id) { + case IB_PMA_CLASS_PORT_INFO: + cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH; + memcpy((out_mad->data + 40), &cpi, sizeof(cpi)); + break; + case IB_PMA_PORT_COUNTERS_EXT: + rc = bnxt_re_assign_pma_port_ext_counters(rdev, out_mad); + break; + case IB_PMA_PORT_COUNTERS: + rc = bnxt_re_assign_pma_port_counters(rdev, out_mad); + break; + default: + rc = -EINVAL; + break; + } + if (rc) + return IB_MAD_RESULT_FAILURE; + ret |= IB_MAD_RESULT_REPLY; + return ret; +} + static int UVERBS_HANDLER(BNXT_RE_METHOD_NOTIFY_DRV)(struct uverbs_attr_bundle *attrs) { struct bnxt_re_ucontext *uctx; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index ac59f1d73b15..22c9eb8e9cfc 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -268,6 +268,16 @@ void bnxt_re_dealloc_ucontext(struct ib_ucontext *context); int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry); +int bnxt_re_process_mad(struct ib_device *device, int process_mad_flags, + u32 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad *in_mad, struct ib_mad *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index); + +static inline u32 __to_ib_port_num(u16 port_id) +{ + return (u32)port_id + 1; +} unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp); void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, unsigned long flags); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index b7af0d5ff3b6..293b0a96c8e3 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -79,17 +79,12 @@ MODULE_LICENSE("Dual BSD/GPL"); /* globals */ static DEFINE_MUTEX(bnxt_re_mutex); -static void bnxt_re_stop_irq(void *handle); -static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev); -static int bnxt_re_netdev_event(struct notifier_block *notifier, - unsigned long event, void *ptr); -static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev); -static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type); static int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev); static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len, u32 *offset); -static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable); +static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp, + u8 port_num, enum ib_event_type event); static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev) { struct bnxt_qplib_chip_ctx *cctx; @@ -153,6 +148,10 @@ static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) if (!rdev->chip_ctx) return; + + kfree(rdev->dev_attr); + rdev->dev_attr = NULL; + chip_ctx = rdev->chip_ctx; rdev->chip_ctx = NULL; rdev->rcfw.res = NULL; @@ -166,7 +165,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) { struct bnxt_qplib_chip_ctx *chip_ctx; struct bnxt_en_dev *en_dev; - int rc; + int rc = -ENOMEM; en_dev = rdev->en_dev; @@ -182,7 +181,10 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) rdev->qplib_res.cctx = rdev->chip_ctx; rdev->rcfw.res = &rdev->qplib_res; - rdev->qplib_res.dattr = &rdev->dev_attr; + rdev->dev_attr = kzalloc(sizeof(*rdev->dev_attr), GFP_KERNEL); + if (!rdev->dev_attr) + goto free_chip_ctx; + rdev->qplib_res.dattr = rdev->dev_attr; rdev->qplib_res.is_vf = BNXT_EN_VF(en_dev); rdev->qplib_res.en_dev = en_dev; @@ -190,16 +192,20 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) bnxt_re_set_db_offset(rdev); rc = bnxt_qplib_map_db_bar(&rdev->qplib_res); - if (rc) { - kfree(rdev->chip_ctx); - rdev->chip_ctx = NULL; - return rc; - } + if (rc) + goto free_dev_attr; if (bnxt_qplib_determine_atomics(en_dev->pdev)) ibdev_info(&rdev->ibdev, "platform doesn't support global atomics."); return 0; +free_dev_attr: + kfree(rdev->dev_attr); + rdev->dev_attr = NULL; +free_chip_ctx: + kfree(rdev->chip_ctx); + rdev->chip_ctx = NULL; + return rc; } /* SR-IOV helper functions */ @@ -221,7 +227,7 @@ static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev) struct bnxt_qplib_ctx *ctx; int i; - attr = &rdev->dev_attr; + attr = rdev->dev_attr; ctx = &rdev->qplib_ctx; ctx->qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT, @@ -235,7 +241,7 @@ static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev) if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) rdev->qplib_ctx.tqm_ctx.qcount[i] = - rdev->dev_attr.tqm_alloc_reqs[i]; + rdev->dev_attr->tqm_alloc_reqs[i]; } static void bnxt_re_limit_vf_res(struct bnxt_qplib_ctx *qplib_ctx, u32 num_vf) @@ -302,17 +308,128 @@ static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev) &rdev->qplib_ctx); } -static void bnxt_re_shutdown(struct auxiliary_device *adev) +struct bnxt_re_dcb_work { + struct work_struct work; + struct bnxt_re_dev *rdev; + struct hwrm_async_event_cmpl cmpl; +}; + +static bool bnxt_re_is_qp1_qp(struct bnxt_re_qp *qp) { - struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev); + return qp->ib_qp.qp_type == IB_QPT_GSI; +} + +static struct bnxt_re_qp *bnxt_re_get_qp1_qp(struct bnxt_re_dev *rdev) +{ + struct bnxt_re_qp *qp; + + mutex_lock(&rdev->qp_lock); + list_for_each_entry(qp, &rdev->qp_list, list) { + if (bnxt_re_is_qp1_qp(qp)) { + mutex_unlock(&rdev->qp_lock); + return qp; + } + } + mutex_unlock(&rdev->qp_lock); + return NULL; +} + +static int bnxt_re_update_qp1_tos_dscp(struct bnxt_re_dev *rdev) +{ + struct bnxt_re_qp *qp; + + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) + return 0; + + qp = bnxt_re_get_qp1_qp(rdev); + if (!qp) + return 0; + + qp->qplib_qp.modify_flags = CMDQ_MODIFY_QP_MODIFY_MASK_TOS_DSCP; + qp->qplib_qp.tos_dscp = rdev->cc_param.qp1_tos_dscp; + + return bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp); +} + +static void bnxt_re_init_dcb_wq(struct bnxt_re_dev *rdev) +{ + rdev->dcb_wq = create_singlethread_workqueue("bnxt_re_dcb_wq"); +} + +static void bnxt_re_uninit_dcb_wq(struct bnxt_re_dev *rdev) +{ + if (!rdev->dcb_wq) + return; + destroy_workqueue(rdev->dcb_wq); +} + +static void bnxt_re_dcb_wq_task(struct work_struct *work) +{ + struct bnxt_re_dcb_work *dcb_work = + container_of(work, struct bnxt_re_dcb_work, work); + struct bnxt_re_dev *rdev = dcb_work->rdev; + struct bnxt_qplib_cc_param *cc_param; + int rc; + + if (!rdev) + goto free_dcb; + + cc_param = &rdev->cc_param; + rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, cc_param); + if (rc) { + ibdev_dbg(&rdev->ibdev, "Failed to query ccparam rc:%d", rc); + goto free_dcb; + } + if (cc_param->qp1_tos_dscp != cc_param->tos_dscp) { + cc_param->qp1_tos_dscp = cc_param->tos_dscp; + rc = bnxt_re_update_qp1_tos_dscp(rdev); + if (rc) { + ibdev_dbg(&rdev->ibdev, "%s: Failed to modify QP1 rc:%d", + __func__, rc); + goto free_dcb; + } + } + +free_dcb: + kfree(dcb_work); +} + +static void bnxt_re_async_notifier(void *handle, struct hwrm_async_event_cmpl *cmpl) +{ + struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle); + struct bnxt_re_dcb_work *dcb_work; struct bnxt_re_dev *rdev; + u32 data1, data2; + u16 event_id; rdev = en_info->rdev; - ib_unregister_device(&rdev->ibdev); - bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE); + if (!rdev) + return; + + event_id = le16_to_cpu(cmpl->event_id); + data1 = le32_to_cpu(cmpl->event_data1); + data2 = le32_to_cpu(cmpl->event_data2); + + ibdev_dbg(&rdev->ibdev, "Async event_id = %d data1 = %d data2 = %d", + event_id, data1, data2); + + switch (event_id) { + case ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE: + dcb_work = kzalloc(sizeof(*dcb_work), GFP_ATOMIC); + if (!dcb_work) + break; + + dcb_work->rdev = rdev; + memcpy(&dcb_work->cmpl, cmpl, sizeof(*cmpl)); + INIT_WORK(&dcb_work->work, bnxt_re_dcb_wq_task); + queue_work(rdev->dcb_wq, &dcb_work->work); + break; + default: + break; + } } -static void bnxt_re_stop_irq(void *handle) +static void bnxt_re_stop_irq(void *handle, bool reset) { struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle); struct bnxt_qplib_rcfw *rcfw; @@ -321,8 +438,18 @@ static void bnxt_re_stop_irq(void *handle) int indx; rdev = en_info->rdev; + if (!rdev) + return; rcfw = &rdev->rcfw; + if (reset) { + set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags); + set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); + wake_up_all(&rdev->rcfw.cmdq.waitq); + bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, + IB_EVENT_DEVICE_FATAL); + } + for (indx = BNXT_RE_NQ_IDX; indx < rdev->nqr->num_msix; indx++) { nq = &rdev->nqr->nq[indx - 1]; bnxt_qplib_nq_stop_irq(nq, false); @@ -341,6 +468,8 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) int indx, rc; rdev = en_info->rdev; + if (!rdev) + return; msix_ent = rdev->nqr->msix_entries; rcfw = &rdev->rcfw; if (!ent) { @@ -378,6 +507,7 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) } static struct bnxt_ulp_ops bnxt_re_ulp_ops = { + .ulp_async_notifier = bnxt_re_async_notifier, .ulp_irq_stop = bnxt_re_stop_irq, .ulp_irq_restart = bnxt_re_start_irq }; @@ -839,17 +969,6 @@ static void bnxt_re_disassociate_ucontext(struct ib_ucontext *ibcontext) } /* Device */ - -static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev) -{ - struct ib_device *ibdev = - ib_device_get_by_netdev(netdev, RDMA_DRIVER_BNXT_RE); - if (!ibdev) - return NULL; - - return container_of(ibdev, struct bnxt_re_dev, ibdev); -} - static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { @@ -1166,6 +1285,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .post_recv = bnxt_re_post_recv, .post_send = bnxt_re_post_send, .post_srq_recv = bnxt_re_post_srq_recv, + .process_mad = bnxt_re_process_mad, .query_ah = bnxt_re_query_ah, .query_device = bnxt_re_query_device, .modify_device = bnxt_re_modify_device, @@ -1240,7 +1360,6 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct auxiliary_device *adev, return NULL; } /* Default values */ - rdev->nb.notifier_call = NULL; rdev->netdev = en_dev->net; rdev->en_dev = en_dev; rdev->adev = adev; @@ -1627,12 +1746,11 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) /* Configure and allocate resources for qplib */ rdev->qplib_res.rcfw = &rdev->rcfw; - rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr); + rc = bnxt_qplib_get_dev_attr(&rdev->rcfw); if (rc) goto fail; - rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->en_dev->pdev, - rdev->netdev, &rdev->dev_attr); + rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->netdev); if (rc) goto fail; @@ -1715,11 +1833,8 @@ static bool bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev *rdev, static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev) { - int mask = IB_QP_STATE; - struct ib_qp_attr qp_attr; struct bnxt_re_qp *qp; - qp_attr.qp_state = IB_QPS_ERR; mutex_lock(&rdev->qp_lock); list_for_each_entry(qp, &rdev->qp_list, list) { /* Modify the state of all QPs except QP1/Shadow QP */ @@ -1727,12 +1842,9 @@ static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev) if (qp->qplib_qp.state != CMDQ_MODIFY_QP_NEW_STATE_RESET && qp->qplib_qp.state != - CMDQ_MODIFY_QP_NEW_STATE_ERR) { + CMDQ_MODIFY_QP_NEW_STATE_ERR) bnxt_re_dispatch_event(&rdev->ibdev, &qp->ib_qp, 1, IB_EVENT_QP_FATAL); - bnxt_re_modify_qp(&qp->ib_qp, &qp_attr, mask, - NULL); - } } } mutex_unlock(&rdev->qp_lock); @@ -1813,6 +1925,26 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev) return 0; } +static void bnxt_re_net_unregister_async_event(struct bnxt_re_dev *rdev) +{ + if (rdev->is_virtfn) + return; + + memset(&rdev->event_bitmap, 0, sizeof(rdev->event_bitmap)); + bnxt_register_async_events(rdev->en_dev, &rdev->event_bitmap, + ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE); +} + +static void bnxt_re_net_register_async_event(struct bnxt_re_dev *rdev) +{ + if (rdev->is_virtfn) + return; + + rdev->event_bitmap |= (1 << ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE); + bnxt_register_async_events(rdev->en_dev, &rdev->event_bitmap, + ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE); +} + static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) { struct bnxt_en_dev *en_dev = rdev->en_dev; @@ -1892,6 +2024,9 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) bnxt_re_debugfs_rem_pdev(rdev); + bnxt_re_net_unregister_async_event(rdev); + bnxt_re_uninit_dcb_wq(rdev); + if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags)) cancel_delayed_work_sync(&rdev->worker); @@ -1996,8 +2131,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) * memory for the function and all child VFs */ rc = bnxt_qplib_alloc_rcfw_channel(&rdev->qplib_res, &rdev->rcfw, - &rdev->qplib_ctx, - BNXT_RE_MAX_QPC_COUNT); + &rdev->qplib_ctx); if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate RCFW Channel: %#x\n", rc); @@ -2038,7 +2172,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) rdev->pacing.dbr_pacing = false; } } - rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr); + rc = bnxt_qplib_get_dev_attr(&rdev->rcfw); if (rc) goto disable_rcfw; @@ -2087,6 +2221,11 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) set_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags); if (!rdev->is_virtfn) { + /* Query f/w defaults of CC params */ + rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, &rdev->cc_param); + if (rc) + ibdev_warn(&rdev->ibdev, "Failed to query CC defaults\n"); + rc = bnxt_re_setup_qos(rdev); if (rc) ibdev_info(&rdev->ibdev, @@ -2105,6 +2244,9 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) bnxt_re_debugfs_add_pdev(rdev); + bnxt_re_init_dcb_wq(rdev); + bnxt_re_net_register_async_event(rdev); + return 0; free_sctx: bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id); @@ -2123,6 +2265,30 @@ fail: return rc; } +static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable) +{ + struct bnxt_qplib_cc_param cc_param = {}; + + /* Do not enable congestion control on VFs */ + if (rdev->is_virtfn) + return; + + /* Currently enabling only for GenP5 adapters */ + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) + return; + + if (enable) { + cc_param.enable = 1; + cc_param.tos_ecn = 1; + } + + cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC | + CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN); + + if (bnxt_qplib_modify_cc(&rdev->qplib_res, &cc_param)) + ibdev_err(&rdev->ibdev, "Failed to setup CC enable = %d\n", enable); +} + static void bnxt_re_update_en_info_rdev(struct bnxt_re_dev *rdev, struct bnxt_re_en_dev_info *en_info, struct auxiliary_device *adev) @@ -2169,20 +2335,10 @@ static int bnxt_re_add_device(struct auxiliary_device *adev, u8 op_type) goto re_dev_uninit; } - rdev->nb.notifier_call = bnxt_re_netdev_event; - rc = register_netdevice_notifier(&rdev->nb); - if (rc) { - rdev->nb.notifier_call = NULL; - pr_err("%s: Cannot register to netdevice_notifier", - ROCE_DRV_MODULE_NAME); - goto re_dev_unreg; - } bnxt_re_setup_cc(rdev, true); return 0; -re_dev_unreg: - ib_unregister_device(&rdev->ibdev); re_dev_uninit: bnxt_re_update_en_info_rdev(NULL, en_info, adev); bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE); @@ -2192,93 +2348,11 @@ exit: return rc; } -static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable) -{ - struct bnxt_qplib_cc_param cc_param = {}; - - /* Do not enable congestion control on VFs */ - if (rdev->is_virtfn) - return; - - /* Currently enabling only for GenP5 adapters */ - if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) - return; - - if (enable) { - cc_param.enable = 1; - cc_param.tos_ecn = 1; - } - - cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC | - CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN); - - if (bnxt_qplib_modify_cc(&rdev->qplib_res, &cc_param)) - ibdev_err(&rdev->ibdev, "Failed to setup CC enable = %d\n", enable); -} - -/* - * "Notifier chain callback can be invoked for the same chain from - * different CPUs at the same time". - * - * For cases when the netdev is already present, our call to the - * register_netdevice_notifier() will actually get the rtnl_lock() - * before sending NETDEV_REGISTER and (if up) NETDEV_UP - * events. - * - * But for cases when the netdev is not already present, the notifier - * chain is subjected to be invoked from different CPUs simultaneously. - * - * This is protected by the netdev_mutex. - */ -static int bnxt_re_netdev_event(struct notifier_block *notifier, - unsigned long event, void *ptr) -{ - struct net_device *real_dev, *netdev = netdev_notifier_info_to_dev(ptr); - struct bnxt_re_dev *rdev; - - real_dev = rdma_vlan_dev_real_dev(netdev); - if (!real_dev) - real_dev = netdev; - - if (real_dev != netdev) - goto exit; - - rdev = bnxt_re_from_netdev(real_dev); - if (!rdev) - return NOTIFY_DONE; - - - switch (event) { - case NETDEV_UP: - case NETDEV_DOWN: - case NETDEV_CHANGE: - bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, - netif_carrier_ok(real_dev) ? - IB_EVENT_PORT_ACTIVE : - IB_EVENT_PORT_ERR); - break; - default: - break; - } - ib_device_put(&rdev->ibdev); -exit: - return NOTIFY_DONE; -} - #define BNXT_ADEV_NAME "bnxt_en" static void bnxt_re_remove_device(struct bnxt_re_dev *rdev, u8 op_type, struct auxiliary_device *aux_dev) { - if (rdev->nb.notifier_call) { - unregister_netdevice_notifier(&rdev->nb); - rdev->nb.notifier_call = NULL; - } else { - /* If notifier is null, we should have already done a - * clean up before coming here. - */ - return; - } bnxt_re_setup_cc(rdev, false); ib_unregister_device(&rdev->ibdev); bnxt_re_dev_uninit(rdev, op_type); @@ -2322,13 +2396,9 @@ static int bnxt_re_probe(struct auxiliary_device *adev, rc = bnxt_re_add_device(adev, BNXT_RE_COMPLETE_INIT); if (rc) - goto err; - mutex_unlock(&bnxt_re_mutex); - return 0; + kfree(en_info); -err: mutex_unlock(&bnxt_re_mutex); - kfree(en_info); return rc; } @@ -2362,6 +2432,7 @@ static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state) ibdev_info(&rdev->ibdev, "%s: L2 driver notified to stop en_state 0x%lx", __func__, en_dev->en_state); bnxt_re_remove_device(rdev, BNXT_RE_PRE_RECOVERY_REMOVE, adev); + bnxt_re_update_en_info_rdev(NULL, en_info, adev); mutex_unlock(&bnxt_re_mutex); return 0; @@ -2381,6 +2452,16 @@ static int bnxt_re_resume(struct auxiliary_device *adev) return 0; } +static void bnxt_re_shutdown(struct auxiliary_device *adev) +{ + struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev); + struct bnxt_re_dev *rdev; + + rdev = en_info->rdev; + ib_unregister_device(&rdev->ibdev); + bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE); +} + static const struct auxiliary_device_id bnxt_re_id_table[] = { { .name = BNXT_ADEV_NAME ".rdma", }, {}, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index e42abf5be6c0..457eecb99f96 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -659,13 +659,6 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, rc = bnxt_qplib_alloc_init_hwq(&srq->hwq, &hwq_attr); if (rc) return rc; - - srq->swq = kcalloc(srq->hwq.max_elements, sizeof(*srq->swq), - GFP_KERNEL); - if (!srq->swq) { - rc = -ENOMEM; - goto fail; - } srq->dbinfo.flags = 0; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_CREATE_SRQ, @@ -694,9 +687,17 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, spin_lock_init(&srq->lock); srq->start_idx = 0; srq->last_idx = srq->hwq.max_elements - 1; - for (idx = 0; idx < srq->hwq.max_elements; idx++) - srq->swq[idx].next_idx = idx + 1; - srq->swq[srq->last_idx].next_idx = -1; + if (!srq->hwq.is_user) { + srq->swq = kcalloc(srq->hwq.max_elements, sizeof(*srq->swq), + GFP_KERNEL); + if (!srq->swq) { + rc = -ENOMEM; + goto fail; + } + for (idx = 0; idx < srq->hwq.max_elements; idx++) + srq->swq[idx].next_idx = idx + 1; + srq->swq[srq->last_idx].next_idx = -1; + } srq->id = le32_to_cpu(resp.xid); srq->dbinfo.hwq = &srq->hwq; @@ -1000,9 +1001,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) u32 tbl_indx; u16 nsge; - if (res->dattr) - qp->is_host_msn_tbl = _is_host_msn_table(res->dattr->dev_cap_flags2); - + qp->is_host_msn_tbl = _is_host_msn_table(res->dattr->dev_cap_flags2); sq->dbinfo.flags = 0; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_CREATE_QP, @@ -1034,7 +1033,12 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) : 0; /* Update msn tbl size */ if (qp->is_host_msn_tbl && psn_sz) { - hwq_attr.aux_depth = roundup_pow_of_two(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); + if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) + hwq_attr.aux_depth = + roundup_pow_of_two(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); + else + hwq_attr.aux_depth = + roundup_pow_of_two(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)) / 2; qp->msn_tbl_sz = hwq_attr.aux_depth; qp->msn = 0; } @@ -1044,13 +1048,14 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (rc) return rc; - rc = bnxt_qplib_alloc_init_swq(sq); - if (rc) - goto fail_sq; - - if (psn_sz) - bnxt_qplib_init_psn_ptr(qp, psn_sz); + if (!sq->hwq.is_user) { + rc = bnxt_qplib_alloc_init_swq(sq); + if (rc) + goto fail_sq; + if (psn_sz) + bnxt_qplib_init_psn_ptr(qp, psn_sz); + } req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); pbl = &sq->hwq.pbl[PBL_LVL_0]; req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); @@ -1076,9 +1081,11 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr); if (rc) goto sq_swq; - rc = bnxt_qplib_alloc_init_swq(rq); - if (rc) - goto fail_rq; + if (!rq->hwq.is_user) { + rc = bnxt_qplib_alloc_init_swq(rq); + if (rc) + goto fail_rq; + } req.rq_size = cpu_to_le32(rq->max_wqe); pbl = &rq->hwq.pbl[PBL_LVL_0]; @@ -1174,9 +1181,11 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) rq->dbinfo.db = qp->dpi->dbr; rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size); } + spin_lock_bh(&rcfw->tbl_lock); tbl_indx = map_qp_id_to_tbl_indx(qp->id, rcfw); rcfw->qp_tbl[tbl_indx].qp_id = qp->id; rcfw->qp_tbl[tbl_indx].qp_handle = (void *)qp; + spin_unlock_bh(&rcfw->tbl_lock); return 0; fail: @@ -1208,8 +1217,6 @@ static void __modify_flags_from_init_state(struct bnxt_qplib_qp *qp) qp->path_mtu = CMDQ_MODIFY_QP_PATH_MTU_MTU_2048; } - qp->modify_flags &= - ~CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID; /* Bono FW require the max_dest_rd_atomic to be >= 1 */ if (qp->max_dest_rd_atomic < 1) qp->max_dest_rd_atomic = 1; @@ -1283,7 +1290,8 @@ static void __filter_modify_flags(struct bnxt_qplib_qp *qp) } } -static void bnxt_set_mandatory_attributes(struct bnxt_qplib_qp *qp, +static void bnxt_set_mandatory_attributes(struct bnxt_qplib_res *res, + struct bnxt_qplib_qp *qp, struct cmdq_modify_qp *req) { u32 mandatory_flags = 0; @@ -1298,6 +1306,14 @@ static void bnxt_set_mandatory_attributes(struct bnxt_qplib_qp *qp, mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY; } + if (_is_min_rnr_in_rtr_rts_mandatory(res->dattr->dev_cap_flags2) && + (qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_RTR && + qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTS)) { + if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_RC) + mandatory_flags |= + CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER; + } + if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_UD || qp->type == CMDQ_MODIFY_QP_QP_TYPE_GSI) mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_QKEY; @@ -1338,7 +1354,7 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* Set mandatory attributes for INIT -> RTR and RTR -> RTS transition */ if (_is_optimize_modify_qp_supported(res->dattr->dev_cap_flags2) && is_optimized_state_transition(qp)) - bnxt_set_mandatory_attributes(qp, &req); + bnxt_set_mandatory_attributes(res, qp, &req); } bmask = qp->modify_flags; req.modify_mask = cpu_to_le32(qp->modify_flags); @@ -1521,6 +1537,7 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) qp->dest_qpn = le32_to_cpu(sb->dest_qp_id); memcpy(qp->smac, sb->src_mac, 6); qp->vlan_id = le16_to_cpu(sb->vlan_pcp_vlan_dei_vlan_id); + qp->port_id = le16_to_cpu(sb->port_id); bail: dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr); @@ -2667,10 +2684,12 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, bnxt_qplib_add_flush_qp(qp); } else { /* Before we complete, do WA 9060 */ - if (do_wa9060(qp, cq, cq_cons, sq->swq_last, - cqe_sq_cons)) { - *lib_qp = qp; - goto out; + if (!bnxt_qplib_is_chip_gen_p5_p7(qp->cctx)) { + if (do_wa9060(qp, cq, cq_cons, sq->swq_last, + cqe_sq_cons)) { + *lib_qp = qp; + goto out; + } } if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) { cqe->status = CQ_REQ_STATUS_OK; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index ef3424c81345..0d9487c889ff 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -114,7 +114,6 @@ struct bnxt_qplib_sge { u32 size; }; -#define BNXT_QPLIB_QP_MAX_SGL 6 struct bnxt_qplib_swq { u64 wr_id; int next_idx; @@ -154,7 +153,7 @@ struct bnxt_qplib_swqe { #define BNXT_QPLIB_SWQE_FLAGS_UC_FENCE BIT(2) #define BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT BIT(3) #define BNXT_QPLIB_SWQE_FLAGS_INLINE BIT(4) - struct bnxt_qplib_sge sg_list[BNXT_QPLIB_QP_MAX_SGL]; + struct bnxt_qplib_sge sg_list[BNXT_VAR_MAX_SGE]; int num_sge; /* Max inline data is 96 bytes */ u32 inline_len; @@ -299,6 +298,7 @@ struct bnxt_qplib_qp { u32 dest_qpn; u8 smac[6]; u16 vlan_id; + u16 port_id; u8 nw_type; struct bnxt_qplib_ah ah; @@ -343,6 +343,7 @@ struct bnxt_qplib_qp { u32 msn; u32 msn_tbl_sz; bool is_host_msn_tbl; + u8 tos_dscp; }; #define BNXT_QPLIB_MAX_CQE_ENTRY_SIZE sizeof(struct cq_base) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 5e90ea232de8..804bc773b4ef 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -160,7 +160,7 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) wait_event_timeout(cmdq->waitq, !crsqe->is_in_used || test_bit(ERR_DEVICE_DETACHED, &cmdq->flags), - msecs_to_jiffies(rcfw->max_timeout * 1000)); + secs_to_jiffies(rcfw->max_timeout)); if (!crsqe->is_in_used) return 0; @@ -424,7 +424,8 @@ static int __send_message_basic_sanity(struct bnxt_qplib_rcfw *rcfw, /* Prevent posting if f/w is not in a state to process */ if (test_bit(ERR_DEVICE_DETACHED, &rcfw->cmdq.flags)) - return bnxt_qplib_map_rc(opcode); + return -ENXIO; + if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags)) return -ETIMEDOUT; @@ -493,7 +494,7 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, rc = __send_message_basic_sanity(rcfw, msg, opcode); if (rc) - return rc; + return rc == -ENXIO ? bnxt_qplib_map_rc(opcode) : rc; rc = __send_message(rcfw, msg, opcode); if (rc) @@ -914,7 +915,6 @@ skip_ctx_setup: void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) { - kfree(rcfw->qp_tbl); kfree(rcfw->crsqe_tbl); bnxt_qplib_free_hwq(rcfw->res, &rcfw->cmdq.hwq); bnxt_qplib_free_hwq(rcfw->res, &rcfw->creq.hwq); @@ -923,8 +923,7 @@ void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res, struct bnxt_qplib_rcfw *rcfw, - struct bnxt_qplib_ctx *ctx, - int qp_tbl_sz) + struct bnxt_qplib_ctx *ctx) { struct bnxt_qplib_hwq_attr hwq_attr = {}; struct bnxt_qplib_sg_info sginfo = {}; @@ -968,12 +967,6 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res, if (!rcfw->crsqe_tbl) goto fail; - /* Allocate one extra to hold the QP1 entries */ - rcfw->qp_tbl_size = qp_tbl_sz + 1; - rcfw->qp_tbl = kcalloc(rcfw->qp_tbl_size, sizeof(struct bnxt_qplib_qp_node), - GFP_KERNEL); - if (!rcfw->qp_tbl) - goto fail; spin_lock_init(&rcfw->tbl_lock); rcfw->max_timeout = res->cctx->hwrm_cmd_max_timeout; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 88814cb3aa74..ff873c5f1b25 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -262,8 +262,7 @@ static inline void bnxt_qplib_fill_cmdqmsg(struct bnxt_qplib_cmdqmsg *msg, void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res, struct bnxt_qplib_rcfw *rcfw, - struct bnxt_qplib_ctx *ctx, - int qp_tbl_sz); + struct bnxt_qplib_ctx *ctx); void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill); void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, @@ -285,9 +284,10 @@ int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_ctx *ctx, int is_virtfn); void bnxt_qplib_mark_qp_error(void *qp_handle); + static inline u32 map_qp_id_to_tbl_indx(u32 qid, struct bnxt_qplib_rcfw *rcfw) { /* Last index of the qp_tbl is for QP1 ie. qp_tbl_size - 1*/ - return (qid == 1) ? rcfw->qp_tbl_size - 1 : qid % rcfw->qp_tbl_size - 2; + return (qid == 1) ? rcfw->qp_tbl_size - 1 : (qid % (rcfw->qp_tbl_size - 2)); } #endif /* __BNXT_QPLIB_RCFW_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 96ceec1e8199..6cd05207ffed 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -871,19 +871,27 @@ int bnxt_qplib_init_res(struct bnxt_qplib_res *res) void bnxt_qplib_free_res(struct bnxt_qplib_res *res) { + kfree(res->rcfw->qp_tbl); bnxt_qplib_free_sgid_tbl(res, &res->sgid_tbl); bnxt_qplib_free_pd_tbl(&res->pd_tbl); bnxt_qplib_free_dpi_tbl(res, &res->dpi_tbl); } -int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev, - struct net_device *netdev, - struct bnxt_qplib_dev_attr *dev_attr) +int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct net_device *netdev) { + struct bnxt_qplib_rcfw *rcfw = res->rcfw; + struct bnxt_qplib_dev_attr *dev_attr; int rc; - res->pdev = pdev; res->netdev = netdev; + dev_attr = res->dattr; + + /* Allocate one extra to hold the QP1 entries */ + rcfw->qp_tbl_size = max_t(u32, BNXT_RE_MAX_QPC_COUNT + 1, dev_attr->max_qp); + rcfw->qp_tbl = kcalloc(rcfw->qp_tbl_size, sizeof(struct bnxt_qplib_qp_node), + GFP_KERNEL); + if (!rcfw->qp_tbl) + return -ENOMEM; rc = bnxt_qplib_alloc_sgid_tbl(res, &res->sgid_tbl, dev_attr->max_sgid); if (rc) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 21fb148713a6..6a13927674b4 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -49,6 +49,13 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero; #define CHIP_NUM_58818 0xd818 #define CHIP_NUM_57608 0x1760 +#define BNXT_RE_MAX_QPC_COUNT (64 * 1024) +#define BNXT_RE_MAX_MRW_COUNT (64 * 1024) +#define BNXT_RE_MAX_SRQC_COUNT (64 * 1024) +#define BNXT_RE_MAX_CQ_COUNT (64 * 1024) +#define BNXT_RE_MAX_MRW_COUNT_64K (64 * 1024) +#define BNXT_RE_MAX_MRW_COUNT_256K (256 * 1024) + #define BNXT_QPLIB_DBR_VALID (0x1UL << 26) #define BNXT_QPLIB_DBR_EPOCH_SHIFT 24 #define BNXT_QPLIB_DBR_TOGGLE_SHIFT 25 @@ -424,9 +431,7 @@ int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res, void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res); int bnxt_qplib_init_res(struct bnxt_qplib_res *res); void bnxt_qplib_free_res(struct bnxt_qplib_res *res); -int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev, - struct net_device *netdev, - struct bnxt_qplib_dev_attr *dev_attr); +int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct net_device *netdev); void bnxt_qplib_free_ctx(struct bnxt_qplib_res *res, struct bnxt_qplib_ctx *ctx); int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res, @@ -549,6 +554,14 @@ static inline bool _is_ext_stats_supported(u16 dev_cap_flags) CREQ_QUERY_FUNC_RESP_SB_EXT_STATS; } +static inline int bnxt_ext_stats_supported(struct bnxt_qplib_chip_ctx *ctx, + u16 flags, bool virtfn) +{ + /* ext stats supported if cap flag is set AND is a PF OR a Thor2 VF */ + return (_is_ext_stats_supported(flags) && + ((virtfn && bnxt_qplib_is_chip_gen_p7(ctx)) || (!virtfn))); +} + static inline bool _is_hw_retx_supported(u16 dev_cap_flags) { return dev_cap_flags & @@ -584,9 +597,19 @@ static inline bool _is_optimize_modify_qp_supported(u16 dev_cap_ext_flags2) return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_OPTIMIZE_MODIFY_QP_SUPPORTED; } +static inline bool _is_min_rnr_in_rtr_rts_mandatory(u16 dev_cap_ext_flags2) +{ + return !!(dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_MIN_RNR_RTR_RTS_OPT_SUPPORTED); +} + static inline bool _is_cq_coalescing_supported(u16 dev_cap_ext_flags2) { return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_CQ_COALESCING_SUPPORTED; } +static inline bool _is_max_srq_ext_supported(u16 dev_cap_ext_flags_2) +{ + return !!(dev_cap_ext_flags_2 & CREQ_QUERY_FUNC_RESP_SB_MAX_SRQ_EXTENDED); +} + #endif /* __BNXT_QPLIB_RES_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 7e20ae3d2c4f..f231e886ad9d 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -88,9 +88,9 @@ static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw, fw_ver[3] = resp.fw_rsvd; } -int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, - struct bnxt_qplib_dev_attr *attr) +int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw) { + struct bnxt_qplib_dev_attr *attr = rcfw->res->dattr; struct creq_query_func_resp resp = {}; struct bnxt_qplib_cmdqmsg msg = {}; struct creq_query_func_resp_sb *sb; @@ -129,12 +129,18 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->max_qp_init_rd_atom = sb->max_qp_init_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ? BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_init_rd_atom; - attr->max_qp_wqes = le16_to_cpu(sb->max_qp_wr); - /* - * 128 WQEs needs to be reserved for the HW (8916). Prevent - * reporting the max number - */ - attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS + 1; + attr->max_qp_wqes = le16_to_cpu(sb->max_qp_wr) - 1; + if (!bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx)) { + /* + * 128 WQEs needs to be reserved for the HW (8916). Prevent + * reporting the max number on legacy devices + */ + attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS + 1; + } + + /* Adjust for max_qp_wqes for variable wqe */ + if (cctx->modes.wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) + attr->max_qp_wqes = BNXT_VAR_MAX_WQE - 1; attr->max_qp_sges = cctx->modes.wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE ? min_t(u32, sb->max_sge_var_wqe, BNXT_VAR_MAX_SGE) : 6; @@ -170,6 +176,9 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags); attr->dev_cap_flags2 = le16_to_cpu(sb->dev_cap_ext_flags_2); + if (_is_max_srq_ext_supported(attr->dev_cap_flags2)) + attr->max_srq += le16_to_cpu(sb->max_srq_ext); + bnxt_qplib_query_version(rcfw, attr->fw_ver); for (i = 0; i < MAX_TQM_ALLOC_REQ / 4; i++) { @@ -1016,3 +1025,116 @@ free_mem: dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr); return rc; } + +static void bnxt_qplib_read_cc_gen1(struct bnxt_qplib_cc_param_ext *cc_ext, + struct creq_query_roce_cc_gen1_resp_sb_tlv *sb) +{ + cc_ext->inact_th_hi = le16_to_cpu(sb->inactivity_th_hi); + cc_ext->min_delta_cnp = le16_to_cpu(sb->min_time_between_cnps); + cc_ext->init_cp = le16_to_cpu(sb->init_cp); + cc_ext->tr_update_mode = sb->tr_update_mode; + cc_ext->tr_update_cyls = sb->tr_update_cycles; + cc_ext->fr_rtt = sb->fr_num_rtts; + cc_ext->ai_rate_incr = sb->ai_rate_increase; + cc_ext->rr_rtt_th = le16_to_cpu(sb->reduction_relax_rtts_th); + cc_ext->ar_cr_th = le16_to_cpu(sb->additional_relax_cr_th); + cc_ext->cr_min_th = le16_to_cpu(sb->cr_min_th); + cc_ext->bw_avg_weight = sb->bw_avg_weight; + cc_ext->cr_factor = sb->actual_cr_factor; + cc_ext->cr_th_max_cp = le16_to_cpu(sb->max_cp_cr_th); + cc_ext->cp_bias_en = sb->cp_bias_en; + cc_ext->cp_bias = sb->cp_bias; + cc_ext->cnp_ecn = sb->cnp_ecn; + cc_ext->rtt_jitter_en = sb->rtt_jitter_en; + cc_ext->bytes_per_usec = le16_to_cpu(sb->link_bytes_per_usec); + cc_ext->cc_cr_reset_th = le16_to_cpu(sb->reset_cc_cr_th); + cc_ext->cr_width = sb->cr_width; + cc_ext->min_quota = sb->quota_period_min; + cc_ext->max_quota = sb->quota_period_max; + cc_ext->abs_max_quota = sb->quota_period_abs_max; + cc_ext->tr_lb = le16_to_cpu(sb->tr_lower_bound); + cc_ext->cr_prob_fac = sb->cr_prob_factor; + cc_ext->tr_prob_fac = sb->tr_prob_factor; + cc_ext->fair_cr_th = le16_to_cpu(sb->fairness_cr_th); + cc_ext->red_div = sb->red_div; + cc_ext->cnp_ratio_th = sb->cnp_ratio_th; + cc_ext->ai_ext_rtt = le16_to_cpu(sb->exp_ai_rtts); + cc_ext->exp_crcp_ratio = sb->exp_ai_cr_cp_ratio; + cc_ext->low_rate_en = sb->use_rate_table; + cc_ext->cpcr_update_th = le16_to_cpu(sb->cp_exp_update_th); + cc_ext->ai_rtt_th1 = le16_to_cpu(sb->high_exp_ai_rtts_th1); + cc_ext->ai_rtt_th2 = le16_to_cpu(sb->high_exp_ai_rtts_th2); + cc_ext->cf_rtt_th = le16_to_cpu(sb->actual_cr_cong_free_rtts_th); + cc_ext->sc_cr_th1 = le16_to_cpu(sb->severe_cong_cr_th1); + cc_ext->sc_cr_th2 = le16_to_cpu(sb->severe_cong_cr_th2); + cc_ext->l64B_per_rtt = le32_to_cpu(sb->link64B_per_rtt); + cc_ext->cc_ack_bytes = sb->cc_ack_bytes; + cc_ext->reduce_cf_rtt_th = le16_to_cpu(sb->reduce_init_cong_free_rtts_th); +} + +int bnxt_qplib_query_cc_param(struct bnxt_qplib_res *res, + struct bnxt_qplib_cc_param *cc_param) +{ + struct bnxt_qplib_tlv_query_rcc_sb *ext_sb; + struct bnxt_qplib_rcfw *rcfw = res->rcfw; + struct creq_query_roce_cc_resp resp = {}; + struct creq_query_roce_cc_resp_sb *sb; + struct bnxt_qplib_cmdqmsg msg = {}; + struct cmdq_query_roce_cc req = {}; + struct bnxt_qplib_rcfw_sbuf sbuf; + size_t resp_size; + int rc; + + /* Query the parameters from chip */ + bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_QUERY_ROCE_CC, + sizeof(req)); + if (bnxt_qplib_is_chip_gen_p5_p7(res->cctx)) + resp_size = sizeof(*ext_sb); + else + resp_size = sizeof(*sb); + + sbuf.size = ALIGN(resp_size, BNXT_QPLIB_CMDQE_UNITS); + sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size, + &sbuf.dma_addr, GFP_KERNEL); + if (!sbuf.sb) + return -ENOMEM; + + req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS; + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req), + sizeof(resp), 0); + rc = bnxt_qplib_rcfw_send_message(res->rcfw, &msg); + if (rc) + goto out; + + ext_sb = sbuf.sb; + sb = bnxt_qplib_is_chip_gen_p5_p7(res->cctx) ? &ext_sb->base_sb : + (struct creq_query_roce_cc_resp_sb *)ext_sb; + + cc_param->enable = sb->enable_cc & CREQ_QUERY_ROCE_CC_RESP_SB_ENABLE_CC; + cc_param->tos_ecn = (sb->tos_dscp_tos_ecn & + CREQ_QUERY_ROCE_CC_RESP_SB_TOS_ECN_MASK) >> + CREQ_QUERY_ROCE_CC_RESP_SB_TOS_ECN_SFT; + cc_param->tos_dscp = (sb->tos_dscp_tos_ecn & + CREQ_QUERY_ROCE_CC_RESP_SB_TOS_DSCP_MASK) >> + CREQ_QUERY_ROCE_CC_RESP_SB_TOS_DSCP_SFT; + cc_param->alt_tos_dscp = sb->alt_tos_dscp; + cc_param->alt_vlan_pcp = sb->alt_vlan_pcp; + + cc_param->g = sb->g; + cc_param->nph_per_state = sb->num_phases_per_state; + cc_param->init_cr = le16_to_cpu(sb->init_cr); + cc_param->init_tr = le16_to_cpu(sb->init_tr); + cc_param->cc_mode = sb->cc_mode; + cc_param->inact_th = le16_to_cpu(sb->inactivity_th); + cc_param->rtt = le16_to_cpu(sb->rtt); + cc_param->tcp_cp = le16_to_cpu(sb->tcp_cp); + cc_param->time_pph = sb->time_per_phase; + cc_param->pkts_pph = sb->pkts_per_phase; + if (bnxt_qplib_is_chip_gen_p5_p7(res->cctx)) { + bnxt_qplib_read_cc_gen1(&cc_param->cc_ext, &ext_sb->gen1_sb); + cc_param->inact_th |= (cc_param->cc_ext.inact_th_hi & 0x3F) << 16; + } +out: + dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr); + return rc; +} diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index e6beeb514b7d..e626b05038a1 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -296,6 +296,7 @@ struct bnxt_qplib_cc_param_ext { struct bnxt_qplib_cc_param { u8 alt_vlan_pcp; + u8 qp1_tos_dscp; u16 alt_tos_dscp; u8 cc_mode; u8 enable; @@ -325,8 +326,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct bnxt_qplib_gid *gid, u16 gid_idx, const u8 *smac); -int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, - struct bnxt_qplib_dev_attr *attr); +int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res, struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_ctx *ctx); @@ -355,6 +355,8 @@ int bnxt_qplib_modify_cc(struct bnxt_qplib_res *res, struct bnxt_qplib_cc_param *cc_param); int bnxt_qplib_read_context(struct bnxt_qplib_rcfw *rcfw, u8 type, u32 xid, u32 resp_size, void *resp_va); +int bnxt_qplib_query_cc_param(struct bnxt_qplib_res *res, + struct bnxt_qplib_cc_param *cc_param); #define BNXT_VAR_MAX_WQE 4352 #define BNXT_VAR_MAX_SLOT_ALIGN 256 diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index a98fc9c2313e..7eceb3e9f4ce 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -2215,10 +2215,12 @@ struct creq_query_func_resp_sb { #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE (0x2UL << 4) #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_LAST \ CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE + #define CREQ_QUERY_FUNC_RESP_SB_MAX_SRQ_EXTENDED 0x40UL + #define CREQ_QUERY_FUNC_RESP_SB_MIN_RNR_RTR_RTS_OPT_SUPPORTED 0x1000UL __le16 max_xp_qp_size; __le16 create_qp_batch_size; __le16 destroy_qp_batch_size; - __le16 reserved16; + __le16 max_srq_ext; __le64 reserved64; }; diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 8d753e6e0c71..e02721a9e288 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -191,7 +191,7 @@ static void start_ep_timer(struct c4iw_ep *ep) static int stop_ep_timer(struct c4iw_ep *ep) { pr_debug("ep %p stopping\n", ep); - del_timer_sync(&ep->timer); + timer_delete_sync(&ep->timer); if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { c4iw_put_ep(&ep->com); return 0; diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 80970a1738f8..034b85c42255 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -1114,8 +1114,10 @@ static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl, * The math here assumes sizeof cpl_pass_accept_req >= sizeof * cpl_rx_pkt. */ - skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) + - sizeof(struct rss_header) - pktshift, GFP_ATOMIC); + skb = alloc_skb(size_add(gl->tot_len, + sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header)) - pktshift, + GFP_ATOMIC); if (unlikely(!skb)) return NULL; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 7b5c4522b426..955f061a55e9 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1599,6 +1599,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, int count; int rq_flushed = 0, sq_flushed; unsigned long flag; + struct ib_event ev; pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp); @@ -1607,6 +1608,13 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, if (schp != rchp) spin_lock(&schp->lock); spin_lock(&qhp->lock); + if (qhp->srq && qhp->attr.state == C4IW_QP_STATE_ERROR && + qhp->ibqp.event_handler) { + ev.device = qhp->ibqp.device; + ev.element.qp = &qhp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qhp->ibqp.event_handler(&ev, qhp->ibqp.qp_context); + } if (qhp->wq.flushed) { spin_unlock(&qhp->lock); diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index d7fc9d5eeefd..838182d0409c 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_H_ @@ -57,15 +57,15 @@ struct efa_dev { u64 db_bar_addr; u64 db_bar_len; - unsigned int num_irq_vectors; - int admin_msix_vector_idx; + u32 num_irq_vectors; + u32 admin_msix_vector_idx; struct efa_irq admin_irq; struct efa_stats stats; /* Array of completion EQs */ struct efa_eq *eqs; - unsigned int neqs; + u32 neqs; /* Only stores CQs with interrupts enabled */ struct xarray cqs_xa; diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h index 77282234ce68..4d9ca97e4296 100644 --- a/drivers/infiniband/hw/efa/efa_com.h +++ b/drivers/infiniband/hw/efa/efa_com.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_COM_H_ @@ -65,7 +65,7 @@ struct efa_com_admin_queue { u16 depth; struct efa_com_admin_cq cq; struct efa_com_admin_sq sq; - u16 msix_vector_idx; + u32 msix_vector_idx; unsigned long state; @@ -89,7 +89,7 @@ struct efa_com_aenq { struct efa_aenq_handlers *aenq_handlers; dma_addr_t dma_addr; u32 cc; /* consumer counter */ - u16 msix_vector_idx; + u32 msix_vector_idx; u16 depth; u8 phase; }; diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index ad225823e6f2..4f03c0ec819f 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include <linux/module.h> @@ -141,8 +141,7 @@ static int efa_request_irq(struct efa_dev *dev, struct efa_irq *irq) return 0; } -static void efa_setup_comp_irq(struct efa_dev *dev, struct efa_eq *eq, - int vector) +static void efa_setup_comp_irq(struct efa_dev *dev, struct efa_eq *eq, u32 vector) { u32 cpu; @@ -305,7 +304,7 @@ static void efa_destroy_eq(struct efa_dev *dev, struct efa_eq *eq) efa_free_irq(dev, &eq->irq); } -static int efa_create_eq(struct efa_dev *dev, struct efa_eq *eq, u8 msix_vec) +static int efa_create_eq(struct efa_dev *dev, struct efa_eq *eq, u32 msix_vec) { int err; @@ -328,21 +327,17 @@ err_free_comp_irq: static int efa_create_eqs(struct efa_dev *dev) { - unsigned int neqs = dev->dev_attr.max_eq; - int err; - int i; - - neqs = min_t(unsigned int, neqs, - dev->num_irq_vectors - EFA_COMP_EQS_VEC_BASE); + u32 neqs = dev->dev_attr.max_eq; + int err, i; + neqs = min_t(u32, neqs, dev->num_irq_vectors - EFA_COMP_EQS_VEC_BASE); dev->neqs = neqs; dev->eqs = kcalloc(neqs, sizeof(*dev->eqs), GFP_KERNEL); if (!dev->eqs) return -ENOMEM; for (i = 0; i < neqs; i++) { - err = efa_create_eq(dev, &dev->eqs[i], - i + EFA_COMP_EQS_VEC_BASE); + err = efa_create_eq(dev, &dev->eqs[i], i + EFA_COMP_EQS_VEC_BASE); if (err) goto err_destroy_eqs; } @@ -470,7 +465,6 @@ static void efa_ib_device_remove(struct efa_dev *dev) ibdev_info(&dev->ibdev, "Unregister ib device\n"); ib_unregister_device(&dev->ibdev); efa_destroy_eqs(dev); - efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL); efa_release_doorbell_bar(dev); } @@ -643,12 +637,14 @@ err_disable_device: return ERR_PTR(err); } -static void efa_remove_device(struct pci_dev *pdev) +static void efa_remove_device(struct pci_dev *pdev, + enum efa_regs_reset_reason_types reset_reason) { struct efa_dev *dev = pci_get_drvdata(pdev); struct efa_com_dev *edev; edev = &dev->edev; + efa_com_dev_reset(edev, reset_reason); efa_com_admin_destroy(edev); efa_free_irq(dev, &dev->admin_irq); efa_disable_msix(dev); @@ -676,7 +672,7 @@ static int efa_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; err_remove_device: - efa_remove_device(pdev); + efa_remove_device(pdev, EFA_REGS_RESET_INIT_ERR); return err; } @@ -685,7 +681,7 @@ static void efa_remove(struct pci_dev *pdev) struct efa_dev *dev = pci_get_drvdata(pdev); efa_ib_device_remove(dev); - efa_remove_device(pdev); + efa_remove_device(pdev, EFA_REGS_RESET_NORMAL); } static void efa_shutdown(struct pci_dev *pdev) diff --git a/drivers/infiniband/hw/erdma/Kconfig b/drivers/infiniband/hw/erdma/Kconfig index 169038e3ceb1..267fc1f3c42a 100644 --- a/drivers/infiniband/hw/erdma/Kconfig +++ b/drivers/infiniband/hw/erdma/Kconfig @@ -5,7 +5,7 @@ config INFINIBAND_ERDMA depends on INFINIBAND_ADDR_TRANS depends on INFINIBAND_USER_ACCESS help - This is a RDMA/iWarp driver for Alibaba Elastic RDMA Adapter(ERDMA), + This is a RDMA driver for Alibaba Elastic RDMA Adapter(ERDMA), which supports RDMA features in Alibaba cloud environment. To compile this driver as module, choose M here. The module will be diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h index 3c166359448d..2a023b99f992 100644 --- a/drivers/infiniband/hw/erdma/erdma.h +++ b/drivers/infiniband/hw/erdma/erdma.h @@ -16,7 +16,7 @@ #include "erdma_hw.h" #define DRV_MODULE_NAME "erdma" -#define ERDMA_NODE_DESC "Elastic RDMA(iWARP) stack" +#define ERDMA_NODE_DESC "Elastic RDMA Adapter stack" struct erdma_eq { void *qbuf; @@ -101,8 +101,6 @@ struct erdma_cmdq { struct erdma_comp_wait *wait_pool; spinlock_t lock; - bool use_event; - struct erdma_cmdq_sq sq; struct erdma_cmdq_cq cq; struct erdma_eq eq; @@ -148,6 +146,8 @@ struct erdma_devattr { u32 max_mr; u32 max_pd; u32 max_mw; + u32 max_gid; + u32 max_ah; u32 local_dma_key; }; @@ -177,7 +177,8 @@ struct erdma_resource_cb { enum { ERDMA_RES_TYPE_PD = 0, ERDMA_RES_TYPE_STAG_IDX = 1, - ERDMA_RES_CNT = 2, + ERDMA_RES_TYPE_AH = 2, + ERDMA_RES_CNT = 3, }; struct erdma_dev { @@ -192,8 +193,6 @@ struct erdma_dev { u8 __iomem *func_bar; struct erdma_devattr attrs; - /* physical port state (only one port per device) */ - enum ib_port_state state; u32 mtu; /* cmdq and aeq use the same msix vector */ @@ -215,6 +214,7 @@ struct erdma_dev { struct dma_pool *db_pool; struct dma_pool *resp_pool; + enum erdma_proto_type proto; }; static inline void *get_queue_entry(void *qbuf, u32 idx, u32 depth, u32 shift) @@ -265,7 +265,7 @@ void erdma_cmdq_destroy(struct erdma_dev *dev); void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op); int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, - u64 *resp0, u64 *resp1); + u64 *resp0, u64 *resp1, bool sleepable); void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq); int erdma_ceqs_init(struct erdma_dev *dev); diff --git a/drivers/infiniband/hw/erdma/erdma_cm.c b/drivers/infiniband/hw/erdma/erdma_cm.c index 771059a8eb7d..e0acc185e719 100644 --- a/drivers/infiniband/hw/erdma/erdma_cm.c +++ b/drivers/infiniband/hw/erdma/erdma_cm.c @@ -567,7 +567,8 @@ reject_conn: static int erdma_proc_mpareply(struct erdma_cep *cep) { - struct erdma_qp_attrs qp_attrs; + enum erdma_qpa_mask_iwarp to_modify_attrs = 0; + struct erdma_mod_qp_params_iwarp params; struct erdma_qp *qp = cep->qp; struct mpa_rr *rep; int ret; @@ -597,26 +598,29 @@ static int erdma_proc_mpareply(struct erdma_cep *cep) return -EINVAL; } - memset(&qp_attrs, 0, sizeof(qp_attrs)); - qp_attrs.irq_size = cep->ird; - qp_attrs.orq_size = cep->ord; - qp_attrs.state = ERDMA_QP_STATE_RTS; + memset(¶ms, 0, sizeof(params)); + params.state = ERDMA_QPS_IWARP_RTS; + params.irq_size = cep->ird; + params.orq_size = cep->ord; down_write(&qp->state_lock); - if (qp->attrs.state > ERDMA_QP_STATE_RTR) { + if (qp->attrs.iwarp.state > ERDMA_QPS_IWARP_RTR) { ret = -EINVAL; up_write(&qp->state_lock); goto out_err; } - qp->attrs.qp_type = ERDMA_QP_ACTIVE; - if (__mpa_ext_cc(cep->mpa.ext_data.bits) != qp->attrs.cc) - qp->attrs.cc = COMPROMISE_CC; + to_modify_attrs = ERDMA_QPA_IWARP_STATE | ERDMA_QPA_IWARP_LLP_HANDLE | + ERDMA_QPA_IWARP_MPA | ERDMA_QPA_IWARP_IRD | + ERDMA_QPA_IWARP_ORD; - ret = erdma_modify_qp_internal(qp, &qp_attrs, - ERDMA_QP_ATTR_STATE | - ERDMA_QP_ATTR_LLP_HANDLE | - ERDMA_QP_ATTR_MPA); + params.qp_type = ERDMA_QP_ACTIVE; + if (__mpa_ext_cc(cep->mpa.ext_data.bits) != qp->attrs.cc) { + to_modify_attrs |= ERDMA_QPA_IWARP_CC; + params.cc = COMPROMISE_CC; + } + + ret = erdma_modify_qp_state_iwarp(qp, ¶ms, to_modify_attrs); up_write(&qp->state_lock); @@ -705,7 +709,6 @@ error: erdma_cancel_mpatimer(new_cep); erdma_cep_put(new_cep); - new_cep->sock = NULL; } if (new_s) { @@ -722,7 +725,7 @@ static int erdma_newconn_connected(struct erdma_cep *cep) __mpa_rr_set_revision(&cep->mpa.hdr.params.bits, MPA_REVISION_EXT_1); memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, MPA_KEY_SIZE); - cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie); + cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.iwarp.cookie); __mpa_ext_set_cc(&cep->mpa.ext_data.bits, cep->qp->attrs.cc); ret = erdma_send_mpareqrep(cep, cep->private_data, cep->pd_len); @@ -1126,10 +1129,11 @@ error_put_qp: int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) { - struct erdma_dev *dev = to_edev(id->device); struct erdma_cep *cep = (struct erdma_cep *)id->provider_data; + struct erdma_mod_qp_params_iwarp mod_qp_params; + enum erdma_qpa_mask_iwarp to_modify_attrs = 0; + struct erdma_dev *dev = to_edev(id->device); struct erdma_qp *qp; - struct erdma_qp_attrs qp_attrs; int ret; erdma_cep_set_inuse(cep); @@ -1156,7 +1160,7 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) erdma_qp_get(qp); down_write(&qp->state_lock); - if (qp->attrs.state > ERDMA_QP_STATE_RTR) { + if (qp->attrs.iwarp.state > ERDMA_QPS_IWARP_RTR) { ret = -EINVAL; up_write(&qp->state_lock); goto error; @@ -1181,11 +1185,11 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) cep->cm_id = id; id->add_ref(id); - memset(&qp_attrs, 0, sizeof(qp_attrs)); - qp_attrs.orq_size = params->ord; - qp_attrs.irq_size = params->ird; + memset(&mod_qp_params, 0, sizeof(mod_qp_params)); - qp_attrs.state = ERDMA_QP_STATE_RTS; + mod_qp_params.irq_size = params->ird; + mod_qp_params.orq_size = params->ord; + mod_qp_params.state = ERDMA_QPS_IWARP_RTS; /* Associate QP with CEP */ erdma_cep_get(cep); @@ -1194,19 +1198,21 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) cep->state = ERDMA_EPSTATE_RDMA_MODE; - qp->attrs.qp_type = ERDMA_QP_PASSIVE; - qp->attrs.pd_len = params->private_data_len; + mod_qp_params.qp_type = ERDMA_QP_PASSIVE; + mod_qp_params.pd_len = params->private_data_len; - if (qp->attrs.cc != __mpa_ext_cc(cep->mpa.ext_data.bits)) - qp->attrs.cc = COMPROMISE_CC; + to_modify_attrs = ERDMA_QPA_IWARP_STATE | ERDMA_QPA_IWARP_ORD | + ERDMA_QPA_IWARP_LLP_HANDLE | ERDMA_QPA_IWARP_IRD | + ERDMA_QPA_IWARP_MPA; + + if (qp->attrs.cc != __mpa_ext_cc(cep->mpa.ext_data.bits)) { + to_modify_attrs |= ERDMA_QPA_IWARP_CC; + mod_qp_params.cc = COMPROMISE_CC; + } /* move to rts */ - ret = erdma_modify_qp_internal(qp, &qp_attrs, - ERDMA_QP_ATTR_STATE | - ERDMA_QP_ATTR_ORD | - ERDMA_QP_ATTR_LLP_HANDLE | - ERDMA_QP_ATTR_IRD | - ERDMA_QP_ATTR_MPA); + ret = erdma_modify_qp_state_iwarp(qp, &mod_qp_params, to_modify_attrs); + up_write(&qp->state_lock); if (ret) @@ -1214,7 +1220,7 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) cep->mpa.ext_data.bits = 0; __mpa_ext_set_cc(&cep->mpa.ext_data.bits, qp->attrs.cc); - cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie); + cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.iwarp.cookie); ret = erdma_send_mpareqrep(cep, params->private_data, params->private_data_len); diff --git a/drivers/infiniband/hw/erdma/erdma_cmdq.c b/drivers/infiniband/hw/erdma/erdma_cmdq.c index a3d8922d1ad1..b867aefe83b2 100644 --- a/drivers/infiniband/hw/erdma/erdma_cmdq.c +++ b/drivers/infiniband/hw/erdma/erdma_cmdq.c @@ -182,7 +182,6 @@ int erdma_cmdq_init(struct erdma_dev *dev) int err; cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING; - cmdq->use_event = false; sema_init(&cmdq->credits, cmdq->max_outstandings); @@ -223,8 +222,6 @@ err_destroy_sq: void erdma_finish_cmdq_init(struct erdma_dev *dev) { - /* after device init successfully, change cmdq to event mode. */ - dev->cmdq.use_event = true; arm_cmdq_cq(&dev->cmdq); } @@ -312,8 +309,7 @@ static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq) /* Copy 16B comp data after cqe hdr to outer */ be32_to_cpu_array(comp_wait->comp_data, cqe + 2, 4); - if (cmdq->use_event) - complete(&comp_wait->wait_event); + complete(&comp_wait->wait_event); return 0; } @@ -332,9 +328,6 @@ static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq) if (erdma_poll_single_cmd_completion(cmdq)) break; - if (comp_num && cmdq->use_event) - arm_cmdq_cq(cmdq); - spin_unlock_irqrestore(&cmdq->cq.lock, flags); } @@ -342,8 +335,7 @@ void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq) { int got_event = 0; - if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state) || - !cmdq->use_event) + if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state)) return; while (get_next_valid_eqe(&cmdq->eq)) { @@ -354,6 +346,7 @@ void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq) if (got_event) { cmdq->cq.cmdsn++; erdma_polling_cmd_completions(cmdq); + arm_cmdq_cq(cmdq); } notify_eq(&cmdq->eq); @@ -372,7 +365,7 @@ static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx, if (time_is_before_jiffies(comp_timeout)) return -ETIME; - msleep(20); + udelay(20); } return 0; @@ -403,7 +396,7 @@ void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op) } int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, - u64 *resp0, u64 *resp1) + u64 *resp0, u64 *resp1, bool sleepable) { struct erdma_comp_wait *comp_wait; int ret; @@ -411,7 +404,12 @@ int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state)) return -ENODEV; - down(&cmdq->credits); + if (!sleepable) { + while (down_trylock(&cmdq->credits)) + ; + } else { + down(&cmdq->credits); + } comp_wait = get_comp_wait(cmdq); if (IS_ERR(comp_wait)) { @@ -425,7 +423,7 @@ int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, push_cmdq_sqe(cmdq, req, req_size, comp_wait); spin_unlock(&cmdq->sq.lock); - if (cmdq->use_event) + if (sleepable) ret = erdma_wait_cmd_completion(comp_wait, cmdq, ERDMA_CMDQ_TIMEOUT_MS); else diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c index 70f89f0162aa..1f456327e63c 100644 --- a/drivers/infiniband/hw/erdma/erdma_cq.c +++ b/drivers/infiniband/hw/erdma/erdma_cq.c @@ -105,6 +105,22 @@ static const struct { { ERDMA_WC_RETRY_EXC_ERR, IB_WC_RETRY_EXC_ERR, ERDMA_WC_VENDOR_NO_ERR }, }; +static void erdma_process_ud_cqe(struct erdma_cqe *cqe, struct ib_wc *wc) +{ + u32 ud_info; + + wc->wc_flags |= (IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE); + ud_info = be32_to_cpu(cqe->ud.info); + wc->network_hdr_type = FIELD_GET(ERDMA_CQE_NTYPE_MASK, ud_info); + if (wc->network_hdr_type == ERDMA_NETWORK_TYPE_IPV4) + wc->network_hdr_type = RDMA_NETWORK_IPV4; + else + wc->network_hdr_type = RDMA_NETWORK_IPV6; + wc->src_qp = FIELD_GET(ERDMA_CQE_SQPN_MASK, ud_info); + wc->sl = FIELD_GET(ERDMA_CQE_SL_MASK, ud_info); + wc->pkey_index = 0; +} + #define ERDMA_POLLCQ_NO_QP 1 static int erdma_poll_one_cqe(struct erdma_cq *cq, struct ib_wc *wc) @@ -168,6 +184,10 @@ static int erdma_poll_one_cqe(struct erdma_cq *cq, struct ib_wc *wc) wc->wc_flags |= IB_WC_WITH_INVALIDATE; } + if (erdma_device_rocev2(dev) && + (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_GSI)) + erdma_process_ud_cqe(cqe, wc); + if (syndrome >= ERDMA_NUM_WC_STATUS) syndrome = ERDMA_WC_GENERAL_ERR; @@ -201,3 +221,48 @@ int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) return npolled; } + +void erdma_remove_cqes_of_qp(struct ib_cq *ibcq, u32 qpn) +{ + struct erdma_cq *cq = to_ecq(ibcq); + struct erdma_cqe *cqe, *dst_cqe; + u32 prev_cq_ci, cur_cq_ci; + u32 ncqe = 0, nqp_cqe = 0; + unsigned long flags; + u8 owner; + + spin_lock_irqsave(&cq->kern_cq.lock, flags); + + prev_cq_ci = cq->kern_cq.ci; + + while (ncqe < cq->depth && (cqe = get_next_valid_cqe(cq)) != NULL) { + ++cq->kern_cq.ci; + ++ncqe; + } + + while (ncqe > 0) { + cur_cq_ci = prev_cq_ci + ncqe - 1; + cqe = get_queue_entry(cq->kern_cq.qbuf, cur_cq_ci, cq->depth, + CQE_SHIFT); + + if (be32_to_cpu(cqe->qpn) == qpn) { + ++nqp_cqe; + } else if (nqp_cqe) { + dst_cqe = get_queue_entry(cq->kern_cq.qbuf, + cur_cq_ci + nqp_cqe, + cq->depth, CQE_SHIFT); + owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK, + be32_to_cpu(dst_cqe->hdr)); + cqe->hdr = cpu_to_be32( + (be32_to_cpu(cqe->hdr) & + ~ERDMA_CQE_HDR_OWNER_MASK) | + FIELD_PREP(ERDMA_CQE_HDR_OWNER_MASK, owner)); + memcpy(dst_cqe, cqe, sizeof(*cqe)); + } + + --ncqe; + } + + cq->kern_cq.ci = prev_cq_ci + nqp_cqe; + spin_unlock_irqrestore(&cq->kern_cq.lock, flags); +} diff --git a/drivers/infiniband/hw/erdma/erdma_eq.c b/drivers/infiniband/hw/erdma/erdma_eq.c index 9a72fec6d5cc..6486234a2360 100644 --- a/drivers/infiniband/hw/erdma/erdma_eq.c +++ b/drivers/infiniband/hw/erdma/erdma_eq.c @@ -236,7 +236,8 @@ static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq) req.db_dma_addr_l = lower_32_bits(eq->dbrec_dma); req.db_dma_addr_h = upper_32_bits(eq->dbrec_dma); - return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + false); } static int erdma_ceq_init_one(struct erdma_dev *dev, u16 ceqn) @@ -278,7 +279,8 @@ static void erdma_ceq_uninit_one(struct erdma_dev *dev, u16 ceqn) req.qtype = ERDMA_EQ_TYPE_CEQ; req.vector_idx = ceqn + 1; - err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + false); if (err) return; diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index 05978f3b1475..ea4db53901a4 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/types.h> +#include <linux/if_ether.h> /* PCIe device related definition. */ #define ERDMA_PCI_WIDTH 64 @@ -21,8 +22,21 @@ #define ERDMA_NUM_MSIX_VEC 32U #define ERDMA_MSIX_VECTOR_CMDQ 0 +/* RoCEv2 related */ +#define ERDMA_ROCEV2_GID_SIZE 16 +#define ERDMA_MAX_PKEYS 1 +#define ERDMA_DEFAULT_PKEY 0xFFFF + +/* erdma device protocol type */ +enum erdma_proto_type { + ERDMA_PROTO_IWARP = 0, + ERDMA_PROTO_ROCEV2 = 1, + ERDMA_PROTO_COUNT = 2, +}; + /* PCIe Bar0 Registers. */ #define ERDMA_REGS_VERSION_REG 0x0 +#define ERDMA_REGS_DEV_PROTO_REG 0xC #define ERDMA_REGS_DEV_CTRL_REG 0x10 #define ERDMA_REGS_DEV_ST_REG 0x14 #define ERDMA_REGS_NETDEV_MAC_L_REG 0x18 @@ -136,7 +150,11 @@ enum CMDQ_RDMA_OPCODE { CMDQ_OPCODE_DESTROY_CQ = 5, CMDQ_OPCODE_REFLUSH = 6, CMDQ_OPCODE_REG_MR = 8, - CMDQ_OPCODE_DEREG_MR = 9 + CMDQ_OPCODE_DEREG_MR = 9, + CMDQ_OPCODE_SET_GID = 14, + CMDQ_OPCODE_CREATE_AH = 15, + CMDQ_OPCODE_DESTROY_AH = 16, + CMDQ_OPCODE_QUERY_QP = 17, }; enum CMDQ_COMMON_OPCODE { @@ -284,6 +302,36 @@ struct erdma_cmdq_dereg_mr_req { u32 cfg; }; +/* create_av cfg0 */ +#define ERDMA_CMD_CREATE_AV_FL_MASK GENMASK(19, 0) +#define ERDMA_CMD_CREATE_AV_NTYPE_MASK BIT(20) + +struct erdma_av_cfg { + u32 cfg0; + u8 traffic_class; + u8 hop_limit; + u8 sl; + u8 rsvd; + u16 udp_sport; + u16 sgid_index; + u8 dmac[ETH_ALEN]; + u8 padding[2]; + u8 dgid[ERDMA_ROCEV2_GID_SIZE]; +}; + +struct erdma_cmdq_create_ah_req { + u64 hdr; + u32 pdn; + u32 ahn; + struct erdma_av_cfg av_cfg; +}; + +struct erdma_cmdq_destroy_ah_req { + u64 hdr; + u32 pdn; + u32 ahn; +}; + /* modify qp cfg */ #define ERDMA_CMD_MODIFY_QP_STATE_MASK GENMASK(31, 24) #define ERDMA_CMD_MODIFY_QP_CC_MASK GENMASK(23, 20) @@ -301,6 +349,36 @@ struct erdma_cmdq_modify_qp_req { u32 recv_nxt; }; +/* modify qp cfg1 for roce device */ +#define ERDMA_CMD_MODIFY_QP_DQPN_MASK GENMASK(19, 0) + +struct erdma_cmdq_mod_qp_req_rocev2 { + u64 hdr; + u32 cfg0; + u32 cfg1; + u32 attr_mask; + u32 qkey; + u32 rq_psn; + u32 sq_psn; + struct erdma_av_cfg av_cfg; +}; + +/* query qp response mask */ +#define ERDMA_CMD_QUERY_QP_RESP_SQ_PSN_MASK GENMASK_ULL(23, 0) +#define ERDMA_CMD_QUERY_QP_RESP_RQ_PSN_MASK GENMASK_ULL(47, 24) +#define ERDMA_CMD_QUERY_QP_RESP_QP_STATE_MASK GENMASK_ULL(55, 48) +#define ERDMA_CMD_QUERY_QP_RESP_SQ_DRAINING_MASK GENMASK_ULL(56, 56) + +struct erdma_cmdq_query_qp_req_rocev2 { + u64 hdr; + u32 qpn; +}; + +enum erdma_qp_type { + ERDMA_QPT_RC = 0, + ERDMA_QPT_UD = 1, +}; + /* create qp cfg0 */ #define ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK GENMASK(31, 20) #define ERDMA_CMD_CREATE_QP_QPN_MASK GENMASK(19, 0) @@ -309,6 +387,9 @@ struct erdma_cmdq_modify_qp_req { #define ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK GENMASK(31, 20) #define ERDMA_CMD_CREATE_QP_PD_MASK GENMASK(19, 0) +/* create qp cfg2 */ +#define ERDMA_CMD_CREATE_QP_TYPE_MASK GENMASK(3, 0) + /* create qp cqn_mtt_cfg */ #define ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK GENMASK(31, 28) #define ERDMA_CMD_CREATE_QP_DB_CFG_MASK BIT(25) @@ -342,6 +423,7 @@ struct erdma_cmdq_create_qp_req { u64 rq_mtt_entry[3]; u32 db_cfg; + u32 cfg2; }; struct erdma_cmdq_destroy_qp_req { @@ -394,10 +476,33 @@ struct erdma_cmdq_query_stats_resp { u64 rx_pps_meter_drop_packets_cnt; }; +enum erdma_network_type { + ERDMA_NETWORK_TYPE_IPV4 = 0, + ERDMA_NETWORK_TYPE_IPV6 = 1, +}; + +enum erdma_set_gid_op { + ERDMA_SET_GID_OP_ADD = 0, + ERDMA_SET_GID_OP_DEL = 1, +}; + +/* set gid cfg */ +#define ERDMA_CMD_SET_GID_SGID_IDX_MASK GENMASK(15, 0) +#define ERDMA_CMD_SET_GID_NTYPE_MASK BIT(16) +#define ERDMA_CMD_SET_GID_OP_MASK BIT(31) + +struct erdma_cmdq_set_gid_req { + u64 hdr; + u32 cfg; + u8 gid[ERDMA_ROCEV2_GID_SIZE]; +}; + /* cap qword 0 definition */ +#define ERDMA_CMD_DEV_CAP_MAX_GID_MASK GENMASK_ULL(51, 48) #define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40) #define ERDMA_CMD_DEV_CAP_FLAGS_MASK GENMASK_ULL(31, 24) #define ERDMA_CMD_DEV_CAP_MAX_RECV_WR_MASK GENMASK_ULL(23, 16) +#define ERDMA_CMD_DEV_CAP_MAX_AH_MASK GENMASK_ULL(15, 8) #define ERDMA_CMD_DEV_CAP_MAX_MR_SIZE_MASK GENMASK_ULL(7, 0) /* cap qword 1 definition */ @@ -426,6 +531,10 @@ enum { #define ERDMA_CQE_QTYPE_RQ 1 #define ERDMA_CQE_QTYPE_CMDQ 2 +#define ERDMA_CQE_NTYPE_MASK BIT(31) +#define ERDMA_CQE_SL_MASK GENMASK(27, 20) +#define ERDMA_CQE_SQPN_MASK GENMASK(19, 0) + struct erdma_cqe { __be32 hdr; __be32 qe_idx; @@ -435,7 +544,16 @@ struct erdma_cqe { __be32 inv_rkey; }; __be32 size; - __be32 rsvd[3]; + union { + struct { + __be32 rsvd[3]; + } rc; + + struct { + __be32 rsvd[2]; + __be32 info; + } ud; + }; }; struct erdma_sge { @@ -487,7 +605,7 @@ struct erdma_write_sqe { struct erdma_sge sgl[]; }; -struct erdma_send_sqe { +struct erdma_send_sqe_rc { __le64 hdr; union { __be32 imm_data; @@ -498,6 +616,17 @@ struct erdma_send_sqe { struct erdma_sge sgl[]; }; +struct erdma_send_sqe_ud { + __le64 hdr; + __be32 imm_data; + __le32 length; + __le32 qkey; + __le32 dst_qpn; + __le32 ahn; + __le32 rsvd; + struct erdma_sge sgl[]; +}; + struct erdma_readreq_sqe { __le64 hdr; __le32 invalid_stag; diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c index 62f497a71004..f35b30235018 100644 --- a/drivers/infiniband/hw/erdma/erdma_main.c +++ b/drivers/infiniband/hw/erdma/erdma_main.c @@ -26,14 +26,6 @@ static int erdma_netdev_event(struct notifier_block *nb, unsigned long event, goto done; switch (event) { - case NETDEV_UP: - dev->state = IB_PORT_ACTIVE; - erdma_port_event(dev, IB_EVENT_PORT_ACTIVE); - break; - case NETDEV_DOWN: - dev->state = IB_PORT_DOWN; - erdma_port_event(dev, IB_EVENT_PORT_ERR); - break; case NETDEV_CHANGEMTU: if (dev->mtu != netdev->mtu) { erdma_set_mtu(dev, netdev->mtu); @@ -172,6 +164,8 @@ static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev) { int ret; + dev->proto = erdma_reg_read32(dev, ERDMA_REGS_DEV_PROTO_REG); + dev->resp_pool = dma_pool_create("erdma_resp_pool", &pdev->dev, ERDMA_HW_RESP_SIZE, ERDMA_HW_RESP_SIZE, 0); @@ -390,7 +384,7 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev) CMDQ_OPCODE_QUERY_DEVICE); err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0, - &cap1); + &cap1, true); if (err) return err; @@ -398,6 +392,8 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev) dev->attrs.max_mr_size = 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE, cap0); dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1); dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0); + dev->attrs.max_gid = 1 << ERDMA_GET_CAP(MAX_GID, cap0); + dev->attrs.max_ah = 1 << ERDMA_GET_CAP(MAX_AH, cap0); dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1); dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1); dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1); @@ -415,12 +411,13 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev) dev->res_cb[ERDMA_RES_TYPE_PD].max_cap = ERDMA_MAX_PD; dev->res_cb[ERDMA_RES_TYPE_STAG_IDX].max_cap = dev->attrs.max_mr; + dev->res_cb[ERDMA_RES_TYPE_AH].max_cap = dev->attrs.max_ah; erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_COMMON, CMDQ_OPCODE_QUERY_FW_INFO); err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0, - &cap1); + &cap1, true); if (!err) dev->attrs.fw_version = FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK, cap0); @@ -441,7 +438,8 @@ static int erdma_device_config(struct erdma_dev *dev) req.cfg = FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PGSHIFT_MASK, PAGE_SHIFT) | FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PS_EN_MASK, 1); - return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); } static int erdma_res_cb_init(struct erdma_dev *dev) @@ -474,6 +472,29 @@ static void erdma_res_cb_free(struct erdma_dev *dev) bitmap_free(dev->res_cb[i].bitmap); } +static const struct ib_device_ops erdma_device_ops_rocev2 = { + .get_link_layer = erdma_get_link_layer, + .add_gid = erdma_add_gid, + .del_gid = erdma_del_gid, + .query_pkey = erdma_query_pkey, + .create_ah = erdma_create_ah, + .destroy_ah = erdma_destroy_ah, + .query_ah = erdma_query_ah, + + INIT_RDMA_OBJ_SIZE(ib_ah, erdma_ah, ibah), +}; + +static const struct ib_device_ops erdma_device_ops_iwarp = { + .iw_accept = erdma_accept, + .iw_add_ref = erdma_qp_get_ref, + .iw_connect = erdma_connect, + .iw_create_listen = erdma_create_listen, + .iw_destroy_listen = erdma_destroy_listen, + .iw_get_qp = erdma_get_ibqp, + .iw_reject = erdma_reject, + .iw_rem_ref = erdma_qp_put_ref, +}; + static const struct ib_device_ops erdma_device_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_ERDMA, @@ -494,18 +515,9 @@ static const struct ib_device_ops erdma_device_ops = { .get_dma_mr = erdma_get_dma_mr, .get_hw_stats = erdma_get_hw_stats, .get_port_immutable = erdma_get_port_immutable, - .iw_accept = erdma_accept, - .iw_add_ref = erdma_qp_get_ref, - .iw_connect = erdma_connect, - .iw_create_listen = erdma_create_listen, - .iw_destroy_listen = erdma_destroy_listen, - .iw_get_qp = erdma_get_ibqp, - .iw_reject = erdma_reject, - .iw_rem_ref = erdma_qp_put_ref, .map_mr_sg = erdma_map_mr_sg, .mmap = erdma_mmap, .mmap_free = erdma_mmap_free, - .modify_qp = erdma_modify_qp, .post_recv = erdma_post_recv, .post_send = erdma_post_send, .poll_cq = erdma_poll_cq, @@ -515,6 +527,7 @@ static const struct ib_device_ops erdma_device_ops = { .query_qp = erdma_query_qp, .req_notify_cq = erdma_req_notify_cq, .reg_user_mr = erdma_reg_user_mr, + .modify_qp = erdma_modify_qp, INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd), @@ -537,7 +550,14 @@ static int erdma_ib_device_add(struct pci_dev *pdev) if (ret) return ret; - ibdev->node_type = RDMA_NODE_RNIC; + if (erdma_device_iwarp(dev)) { + ibdev->node_type = RDMA_NODE_RNIC; + ib_set_device_ops(ibdev, &erdma_device_ops_iwarp); + } else { + ibdev->node_type = RDMA_NODE_IB_CA; + ib_set_device_ops(ibdev, &erdma_device_ops_rocev2); + } + memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC)); /* diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c index 4d1f9114cd97..25f6c49aec77 100644 --- a/drivers/infiniband/hw/erdma/erdma_qp.c +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -11,20 +11,20 @@ void erdma_qp_llp_close(struct erdma_qp *qp) { - struct erdma_qp_attrs qp_attrs; + struct erdma_mod_qp_params_iwarp params; down_write(&qp->state_lock); - switch (qp->attrs.state) { - case ERDMA_QP_STATE_RTS: - case ERDMA_QP_STATE_RTR: - case ERDMA_QP_STATE_IDLE: - case ERDMA_QP_STATE_TERMINATE: - qp_attrs.state = ERDMA_QP_STATE_CLOSING; - erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE); + switch (qp->attrs.iwarp.state) { + case ERDMA_QPS_IWARP_RTS: + case ERDMA_QPS_IWARP_RTR: + case ERDMA_QPS_IWARP_IDLE: + case ERDMA_QPS_IWARP_TERMINATE: + params.state = ERDMA_QPS_IWARP_CLOSING; + erdma_modify_qp_state_iwarp(qp, ¶ms, ERDMA_QPA_IWARP_STATE); break; - case ERDMA_QP_STATE_CLOSING: - qp->attrs.state = ERDMA_QP_STATE_IDLE; + case ERDMA_QPS_IWARP_CLOSING: + qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE; break; default: break; @@ -48,9 +48,10 @@ struct ib_qp *erdma_get_ibqp(struct ib_device *ibdev, int id) return NULL; } -static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp, - struct erdma_qp_attrs *attrs, - enum erdma_qp_attr_mask mask) +static int +erdma_modify_qp_state_to_rts(struct erdma_qp *qp, + struct erdma_mod_qp_params_iwarp *params, + enum erdma_qpa_mask_iwarp mask) { int ret; struct erdma_dev *dev = qp->dev; @@ -59,12 +60,15 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp, struct erdma_cep *cep = qp->cep; struct sockaddr_storage local_addr, remote_addr; - if (!(mask & ERDMA_QP_ATTR_LLP_HANDLE)) + if (!(mask & ERDMA_QPA_IWARP_LLP_HANDLE)) return -EINVAL; - if (!(mask & ERDMA_QP_ATTR_MPA)) + if (!(mask & ERDMA_QPA_IWARP_MPA)) return -EINVAL; + if (!(mask & ERDMA_QPA_IWARP_CC)) + params->cc = qp->attrs.cc; + ret = getname_local(cep->sock, &local_addr); if (ret < 0) return ret; @@ -73,18 +77,16 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp, if (ret < 0) return ret; - qp->attrs.state = ERDMA_QP_STATE_RTS; - tp = tcp_sk(qp->cep->sock->sk); erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_MODIFY_QP); - req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, qp->attrs.state) | - FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, qp->attrs.cc) | + req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, params->state) | + FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, params->cc) | FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp)); - req.cookie = be32_to_cpu(qp->cep->mpa.ext_data.cookie); + req.cookie = be32_to_cpu(cep->mpa.ext_data.cookie); req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr; req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr; req.dport = to_sockaddr_in(remote_addr).sin_port; @@ -92,33 +94,57 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp, req.send_nxt = tp->snd_nxt; /* rsvd tcp seq for mpa-rsp in server. */ - if (qp->attrs.qp_type == ERDMA_QP_PASSIVE) - req.send_nxt += MPA_DEFAULT_HDR_LEN + qp->attrs.pd_len; + if (params->qp_type == ERDMA_QP_PASSIVE) + req.send_nxt += MPA_DEFAULT_HDR_LEN + params->pd_len; req.recv_nxt = tp->rcv_nxt; - return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); + if (ret) + return ret; + + if (mask & ERDMA_QPA_IWARP_IRD) + qp->attrs.irq_size = params->irq_size; + + if (mask & ERDMA_QPA_IWARP_ORD) + qp->attrs.orq_size = params->orq_size; + + if (mask & ERDMA_QPA_IWARP_CC) + qp->attrs.cc = params->cc; + + qp->attrs.iwarp.state = ERDMA_QPS_IWARP_RTS; + + return 0; } -static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp, - struct erdma_qp_attrs *attrs, - enum erdma_qp_attr_mask mask) +static int +erdma_modify_qp_state_to_stop(struct erdma_qp *qp, + struct erdma_mod_qp_params_iwarp *params, + enum erdma_qpa_mask_iwarp mask) { struct erdma_dev *dev = qp->dev; struct erdma_cmdq_modify_qp_req req; - - qp->attrs.state = attrs->state; + int ret; erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_MODIFY_QP); - req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, attrs->state) | + req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, params->state) | FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp)); - return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); + if (ret) + return ret; + + qp->attrs.iwarp.state = params->state; + + return 0; } -int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, - enum erdma_qp_attr_mask mask) +int erdma_modify_qp_state_iwarp(struct erdma_qp *qp, + struct erdma_mod_qp_params_iwarp *params, + int mask) { bool need_reflush = false; int drop_conn, ret = 0; @@ -126,31 +152,31 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, if (!mask) return 0; - if (!(mask & ERDMA_QP_ATTR_STATE)) + if (!(mask & ERDMA_QPA_IWARP_STATE)) return 0; - switch (qp->attrs.state) { - case ERDMA_QP_STATE_IDLE: - case ERDMA_QP_STATE_RTR: - if (attrs->state == ERDMA_QP_STATE_RTS) { - ret = erdma_modify_qp_state_to_rts(qp, attrs, mask); - } else if (attrs->state == ERDMA_QP_STATE_ERROR) { - qp->attrs.state = ERDMA_QP_STATE_ERROR; + switch (qp->attrs.iwarp.state) { + case ERDMA_QPS_IWARP_IDLE: + case ERDMA_QPS_IWARP_RTR: + if (params->state == ERDMA_QPS_IWARP_RTS) { + ret = erdma_modify_qp_state_to_rts(qp, params, mask); + } else if (params->state == ERDMA_QPS_IWARP_ERROR) { + qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR; need_reflush = true; if (qp->cep) { erdma_cep_put(qp->cep); qp->cep = NULL; } - ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); + ret = erdma_modify_qp_state_to_stop(qp, params, mask); } break; - case ERDMA_QP_STATE_RTS: + case ERDMA_QPS_IWARP_RTS: drop_conn = 0; - if (attrs->state == ERDMA_QP_STATE_CLOSING || - attrs->state == ERDMA_QP_STATE_TERMINATE || - attrs->state == ERDMA_QP_STATE_ERROR) { - ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); + if (params->state == ERDMA_QPS_IWARP_CLOSING || + params->state == ERDMA_QPS_IWARP_TERMINATE || + params->state == ERDMA_QPS_IWARP_ERROR) { + ret = erdma_modify_qp_state_to_stop(qp, params, mask); drop_conn = 1; need_reflush = true; } @@ -159,17 +185,17 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, erdma_qp_cm_drop(qp); break; - case ERDMA_QP_STATE_TERMINATE: - if (attrs->state == ERDMA_QP_STATE_ERROR) - qp->attrs.state = ERDMA_QP_STATE_ERROR; + case ERDMA_QPS_IWARP_TERMINATE: + if (params->state == ERDMA_QPS_IWARP_ERROR) + qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR; break; - case ERDMA_QP_STATE_CLOSING: - if (attrs->state == ERDMA_QP_STATE_IDLE) { - qp->attrs.state = ERDMA_QP_STATE_IDLE; - } else if (attrs->state == ERDMA_QP_STATE_ERROR) { - ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); - qp->attrs.state = ERDMA_QP_STATE_ERROR; - } else if (attrs->state != ERDMA_QP_STATE_CLOSING) { + case ERDMA_QPS_IWARP_CLOSING: + if (params->state == ERDMA_QPS_IWARP_IDLE) { + qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE; + } else if (params->state == ERDMA_QPS_IWARP_ERROR) { + ret = erdma_modify_qp_state_to_stop(qp, params, mask); + qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR; + } else if (params->state != ERDMA_QPS_IWARP_CLOSING) { return -ECONNABORTED; } break; @@ -186,6 +212,98 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, return ret; } +static int modify_qp_cmd_rocev2(struct erdma_qp *qp, + struct erdma_mod_qp_params_rocev2 *params, + enum erdma_qpa_mask_rocev2 attr_mask) +{ + struct erdma_cmdq_mod_qp_req_rocev2 req; + + memset(&req, 0, sizeof(req)); + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_MODIFY_QP); + + req.cfg0 = FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp)); + + if (attr_mask & ERDMA_QPA_ROCEV2_STATE) + req.cfg0 |= FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, + params->state); + + if (attr_mask & ERDMA_QPA_ROCEV2_DST_QPN) + req.cfg1 = FIELD_PREP(ERDMA_CMD_MODIFY_QP_DQPN_MASK, + params->dst_qpn); + + if (attr_mask & ERDMA_QPA_ROCEV2_QKEY) + req.qkey = params->qkey; + + if (attr_mask & ERDMA_QPA_ROCEV2_AV) + erdma_set_av_cfg(&req.av_cfg, ¶ms->av); + + if (attr_mask & ERDMA_QPA_ROCEV2_SQ_PSN) + req.sq_psn = params->sq_psn; + + if (attr_mask & ERDMA_QPA_ROCEV2_RQ_PSN) + req.rq_psn = params->rq_psn; + + req.attr_mask = attr_mask; + + return erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, + NULL, true); +} + +static void erdma_reset_qp(struct erdma_qp *qp) +{ + qp->kern_qp.sq_pi = 0; + qp->kern_qp.sq_ci = 0; + qp->kern_qp.rq_pi = 0; + qp->kern_qp.rq_ci = 0; + memset(qp->kern_qp.swr_tbl, 0, qp->attrs.sq_size * sizeof(u64)); + memset(qp->kern_qp.rwr_tbl, 0, qp->attrs.rq_size * sizeof(u64)); + memset(qp->kern_qp.sq_buf, 0, qp->attrs.sq_size << SQEBB_SHIFT); + memset(qp->kern_qp.rq_buf, 0, qp->attrs.rq_size << RQE_SHIFT); + erdma_remove_cqes_of_qp(&qp->scq->ibcq, QP_ID(qp)); + if (qp->rcq != qp->scq) + erdma_remove_cqes_of_qp(&qp->rcq->ibcq, QP_ID(qp)); +} + +int erdma_modify_qp_state_rocev2(struct erdma_qp *qp, + struct erdma_mod_qp_params_rocev2 *params, + int attr_mask) +{ + struct erdma_dev *dev = to_edev(qp->ibqp.device); + int ret; + + ret = modify_qp_cmd_rocev2(qp, params, attr_mask); + if (ret) + return ret; + + if (attr_mask & ERDMA_QPA_ROCEV2_STATE) + qp->attrs.rocev2.state = params->state; + + if (attr_mask & ERDMA_QPA_ROCEV2_QKEY) + qp->attrs.rocev2.qkey = params->qkey; + + if (attr_mask & ERDMA_QPA_ROCEV2_DST_QPN) + qp->attrs.rocev2.dst_qpn = params->dst_qpn; + + if (attr_mask & ERDMA_QPA_ROCEV2_AV) + memcpy(&qp->attrs.rocev2.av, ¶ms->av, + sizeof(struct erdma_av)); + + if (rdma_is_kernel_res(&qp->ibqp.res) && + params->state == ERDMA_QPS_ROCEV2_RESET) + erdma_reset_qp(qp); + + if (rdma_is_kernel_res(&qp->ibqp.res) && + params->state == ERDMA_QPS_ROCEV2_ERROR) { + qp->flags |= ERDMA_QP_IN_FLUSHING; + mod_delayed_work(dev->reflush_wq, &qp->reflush_dwork, + usecs_to_jiffies(100)); + } + + return 0; +} + static void erdma_qp_safe_free(struct kref *ref) { struct erdma_qp *qp = container_of(ref, struct erdma_qp, ref); @@ -282,17 +400,57 @@ static int fill_sgl(struct erdma_qp *qp, const struct ib_send_wr *send_wr, return 0; } +static void init_send_sqe_rc(struct erdma_qp *qp, struct erdma_send_sqe_rc *sqe, + const struct ib_send_wr *wr, u32 *hw_op) +{ + u32 op = ERDMA_OP_SEND; + + if (wr->opcode == IB_WR_SEND_WITH_IMM) { + op = ERDMA_OP_SEND_WITH_IMM; + sqe->imm_data = wr->ex.imm_data; + } else if (wr->opcode == IB_WR_SEND_WITH_INV) { + op = ERDMA_OP_SEND_WITH_INV; + sqe->invalid_stag = cpu_to_le32(wr->ex.invalidate_rkey); + } + + *hw_op = op; +} + +static void init_send_sqe_ud(struct erdma_qp *qp, struct erdma_send_sqe_ud *sqe, + const struct ib_send_wr *wr, u32 *hw_op) +{ + const struct ib_ud_wr *uwr = ud_wr(wr); + struct erdma_ah *ah = to_eah(uwr->ah); + u32 op = ERDMA_OP_SEND; + + if (wr->opcode == IB_WR_SEND_WITH_IMM) { + op = ERDMA_OP_SEND_WITH_IMM; + sqe->imm_data = wr->ex.imm_data; + } + + *hw_op = op; + + sqe->ahn = cpu_to_le32(ah->ahn); + sqe->dst_qpn = cpu_to_le32(uwr->remote_qpn); + /* Not allowed to send control qkey */ + if (uwr->remote_qkey & 0x80000000) + sqe->qkey = cpu_to_le32(qp->attrs.rocev2.qkey); + else + sqe->qkey = cpu_to_le32(uwr->remote_qkey); +} + static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, const struct ib_send_wr *send_wr) { u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset; u32 idx = *pi & (qp->attrs.sq_size - 1); enum ib_wr_opcode op = send_wr->opcode; + struct erdma_send_sqe_rc *rc_send_sqe; + struct erdma_send_sqe_ud *ud_send_sqe; struct erdma_atomic_sqe *atomic_sqe; struct erdma_readreq_sqe *read_sqe; struct erdma_reg_mr_sqe *regmr_sge; struct erdma_write_sqe *write_sqe; - struct erdma_send_sqe *send_sqe; struct ib_rdma_wr *rdma_wr; struct erdma_sge *sge; __le32 *length_field; @@ -301,6 +459,10 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, u32 attrs; int ret; + if (qp->ibqp.qp_type != IB_QPT_RC && send_wr->opcode != IB_WR_SEND && + send_wr->opcode != IB_WR_SEND_WITH_IMM) + return -EINVAL; + entry = get_queue_entry(qp->kern_qp.sq_buf, idx, qp->attrs.sq_size, SQEBB_SHIFT); @@ -374,21 +536,20 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, case IB_WR_SEND: case IB_WR_SEND_WITH_IMM: case IB_WR_SEND_WITH_INV: - send_sqe = (struct erdma_send_sqe *)entry; - hw_op = ERDMA_OP_SEND; - if (op == IB_WR_SEND_WITH_IMM) { - hw_op = ERDMA_OP_SEND_WITH_IMM; - send_sqe->imm_data = send_wr->ex.imm_data; - } else if (op == IB_WR_SEND_WITH_INV) { - hw_op = ERDMA_OP_SEND_WITH_INV; - send_sqe->invalid_stag = - cpu_to_le32(send_wr->ex.invalidate_rkey); + if (qp->ibqp.qp_type == IB_QPT_RC) { + rc_send_sqe = (struct erdma_send_sqe_rc *)entry; + init_send_sqe_rc(qp, rc_send_sqe, send_wr, &hw_op); + length_field = &rc_send_sqe->length; + wqe_size = sizeof(struct erdma_send_sqe_rc); + } else { + ud_send_sqe = (struct erdma_send_sqe_ud *)entry; + init_send_sqe_ud(qp, ud_send_sqe, send_wr, &hw_op); + length_field = &ud_send_sqe->length; + wqe_size = sizeof(struct erdma_send_sqe_ud); } - wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op); - length_field = &send_sqe->length; - wqe_size = sizeof(struct erdma_send_sqe); - sgl_offset = wqe_size; + sgl_offset = wqe_size; + wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op); break; case IB_WR_REG_MR: wqe_hdr |= diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 51d619edb6c5..af36a8d2df22 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -55,6 +55,13 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) ilog2(qp->attrs.rq_size)) | FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn); + if (qp->ibqp.qp_type == IB_QPT_RC) + req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_QP_TYPE_MASK, + ERDMA_QPT_RC); + else + req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_QP_TYPE_MASK, + ERDMA_QPT_UD); + if (rdma_is_kernel_res(&qp->ibqp.res)) { u32 pgsz_range = ilog2(SZ_1M) - ERDMA_HW_PAGE_SHIFT; @@ -119,10 +126,10 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) } } - err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0, - &resp1); - if (!err) - qp->attrs.cookie = + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0, &resp1, + true); + if (!err && erdma_device_iwarp(dev)) + qp->attrs.iwarp.cookie = FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0); return err; @@ -178,7 +185,8 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) } post_cmd: - return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); } static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq) @@ -240,7 +248,8 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq) } } - return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); } static int erdma_alloc_idx(struct erdma_resource_cb *res_cb) @@ -336,6 +345,11 @@ int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA; attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT; + if (erdma_device_rocev2(dev)) { + attr->max_pkeys = ERDMA_MAX_PKEYS; + attr->max_ah = dev->attrs.max_ah; + } + if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC) attr->atomic_cap = IB_ATOMIC_GLOB; @@ -367,7 +381,14 @@ int erdma_query_port(struct ib_device *ibdev, u32 port, memset(attr, 0, sizeof(*attr)); - attr->gid_tbl_len = 1; + if (erdma_device_iwarp(dev)) { + attr->gid_tbl_len = 1; + } else { + attr->gid_tbl_len = dev->attrs.max_gid; + attr->ip_gids = true; + attr->pkey_tbl_len = ERDMA_MAX_PKEYS; + } + attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; attr->max_msg_sz = -1; @@ -377,14 +398,10 @@ int erdma_query_port(struct ib_device *ibdev, u32 port, ib_get_eth_speed(ibdev, port, &attr->active_speed, &attr->active_width); attr->max_mtu = ib_mtu_int_to_enum(ndev->mtu); attr->active_mtu = ib_mtu_int_to_enum(ndev->mtu); - if (netif_running(ndev) && netif_carrier_ok(ndev)) - dev->state = IB_PORT_ACTIVE; - else - dev->state = IB_PORT_DOWN; - attr->state = dev->state; + attr->state = ib_get_curr_port_state(ndev); out: - if (dev->state == IB_PORT_ACTIVE) + if (attr->state == IB_PORT_ACTIVE) attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; else attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; @@ -395,8 +412,18 @@ out: int erdma_get_port_immutable(struct ib_device *ibdev, u32 port, struct ib_port_immutable *port_immutable) { - port_immutable->gid_tbl_len = 1; - port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + struct erdma_dev *dev = to_edev(ibdev); + + if (erdma_device_iwarp(dev)) { + port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + port_immutable->gid_tbl_len = 1; + } else { + port_immutable->core_cap_flags = + RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + port_immutable->max_mad_size = IB_MGMT_MAD_SIZE; + port_immutable->gid_tbl_len = dev->attrs.max_gid; + port_immutable->pkey_tbl_len = ERDMA_MAX_PKEYS; + } return 0; } @@ -438,7 +465,8 @@ static void erdma_flush_worker(struct work_struct *work) req.qpn = QP_ID(qp); req.sq_pi = qp->kern_qp.sq_pi; req.rq_pi = qp->kern_qp.rq_pi; - erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL); + erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL, + true); } static int erdma_qp_validate_cap(struct erdma_dev *dev, @@ -459,7 +487,11 @@ static int erdma_qp_validate_cap(struct erdma_dev *dev, static int erdma_qp_validate_attr(struct erdma_dev *dev, struct ib_qp_init_attr *attrs) { - if (attrs->qp_type != IB_QPT_RC) + if (erdma_device_iwarp(dev) && attrs->qp_type != IB_QPT_RC) + return -EOPNOTSUPP; + + if (erdma_device_rocev2(dev) && attrs->qp_type != IB_QPT_RC && + attrs->qp_type != IB_QPT_UD && attrs->qp_type != IB_QPT_GSI) return -EOPNOTSUPP; if (attrs->srq) @@ -937,7 +969,8 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, udata, struct erdma_ucontext, ibucontext); struct erdma_ureq_create_qp ureq; struct erdma_uresp_create_qp uresp; - int ret; + void *old_entry; + int ret = 0; ret = erdma_qp_validate_cap(dev, attrs); if (ret) @@ -956,9 +989,16 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, kref_init(&qp->ref); init_completion(&qp->safe_free); - ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp, - XA_LIMIT(1, dev->attrs.max_qp - 1), - &dev->next_alloc_qpn, GFP_KERNEL); + if (qp->ibqp.qp_type == IB_QPT_GSI) { + old_entry = xa_store(&dev->qp_xa, 1, qp, GFP_KERNEL); + if (xa_is_err(old_entry)) + ret = xa_err(old_entry); + } else { + ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp, + XA_LIMIT(1, dev->attrs.max_qp - 1), + &dev->next_alloc_qpn, GFP_KERNEL); + } + if (ret < 0) { ret = -ENOMEM; goto err_out; @@ -995,7 +1035,12 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, qp->attrs.max_send_sge = attrs->cap.max_send_sge; qp->attrs.max_recv_sge = attrs->cap.max_recv_sge; - qp->attrs.state = ERDMA_QP_STATE_IDLE; + + if (erdma_device_iwarp(qp->dev)) + qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE; + else + qp->attrs.rocev2.state = ERDMA_QPS_ROCEV2_RESET; + INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker); ret = create_qp_cmd(uctx, qp); @@ -1219,7 +1264,8 @@ int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) req.cfg = FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, ibmr->lkey >> 8) | FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF); - ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); if (ret) return ret; @@ -1244,7 +1290,8 @@ int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) CMDQ_OPCODE_DESTROY_CQ); req.cqn = cq->cqn; - err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); if (err) return err; @@ -1269,13 +1316,20 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) struct erdma_dev *dev = to_edev(ibqp->device); struct erdma_ucontext *ctx = rdma_udata_to_drv_context( udata, struct erdma_ucontext, ibucontext); - struct erdma_qp_attrs qp_attrs; - int err; struct erdma_cmdq_destroy_qp_req req; + union erdma_mod_qp_params params; + int err; down_write(&qp->state_lock); - qp_attrs.state = ERDMA_QP_STATE_ERROR; - erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE); + if (erdma_device_iwarp(dev)) { + params.iwarp.state = ERDMA_QPS_IWARP_ERROR; + erdma_modify_qp_state_iwarp(qp, ¶ms.iwarp, + ERDMA_QPA_IWARP_STATE); + } else { + params.rocev2.state = ERDMA_QPS_ROCEV2_ERROR; + erdma_modify_qp_state_rocev2(qp, ¶ms.rocev2, + ERDMA_QPA_ROCEV2_STATE); + } up_write(&qp->state_lock); cancel_delayed_work_sync(&qp->reflush_dwork); @@ -1284,7 +1338,8 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) CMDQ_OPCODE_DESTROY_QP); req.qpn = QP_ID(qp); - err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); if (err) return err; @@ -1382,7 +1437,8 @@ static int alloc_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx, FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) | FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1); - ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &val0, &val1); + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &val0, &val1, + true); if (ret) return ret; @@ -1417,7 +1473,8 @@ static void free_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx) req.rdb_off = ctx->ext_db.rdb_off; req.cdb_off = ctx->ext_db.cdb_off; - ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); if (ret) ibdev_err_ratelimited(&dev->ibdev, "free db resources failed %d", ret); @@ -1506,69 +1563,248 @@ void erdma_dealloc_ucontext(struct ib_ucontext *ibctx) atomic_dec(&dev->num_ctx); } -static int ib_qp_state_to_erdma_qp_state[IB_QPS_ERR + 1] = { - [IB_QPS_RESET] = ERDMA_QP_STATE_IDLE, - [IB_QPS_INIT] = ERDMA_QP_STATE_IDLE, - [IB_QPS_RTR] = ERDMA_QP_STATE_RTR, - [IB_QPS_RTS] = ERDMA_QP_STATE_RTS, - [IB_QPS_SQD] = ERDMA_QP_STATE_CLOSING, - [IB_QPS_SQE] = ERDMA_QP_STATE_TERMINATE, - [IB_QPS_ERR] = ERDMA_QP_STATE_ERROR +static void erdma_attr_to_av(const struct rdma_ah_attr *ah_attr, + struct erdma_av *av, u16 sport) +{ + const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); + + av->port = rdma_ah_get_port_num(ah_attr); + av->sgid_index = grh->sgid_index; + av->hop_limit = grh->hop_limit; + av->traffic_class = grh->traffic_class; + av->sl = rdma_ah_get_sl(ah_attr); + + av->flow_label = grh->flow_label; + av->udp_sport = sport; + + ether_addr_copy(av->dmac, ah_attr->roce.dmac); + memcpy(av->dgid, grh->dgid.raw, ERDMA_ROCEV2_GID_SIZE); + + if (ipv6_addr_v4mapped((struct in6_addr *)&grh->dgid)) + av->ntype = ERDMA_NETWORK_TYPE_IPV4; + else + av->ntype = ERDMA_NETWORK_TYPE_IPV6; +} + +static void erdma_av_to_attr(struct erdma_av *av, struct rdma_ah_attr *ah_attr) +{ + ah_attr->type = RDMA_AH_ATTR_TYPE_ROCE; + + rdma_ah_set_sl(ah_attr, av->sl); + rdma_ah_set_port_num(ah_attr, av->port); + rdma_ah_set_ah_flags(ah_attr, IB_AH_GRH); + + rdma_ah_set_grh(ah_attr, NULL, av->flow_label, av->sgid_index, + av->hop_limit, av->traffic_class); + rdma_ah_set_dgid_raw(ah_attr, av->dgid); +} + +static int ib_qps_to_erdma_qps[ERDMA_PROTO_COUNT][IB_QPS_ERR + 1] = { + [ERDMA_PROTO_IWARP] = { + [IB_QPS_RESET] = ERDMA_QPS_IWARP_IDLE, + [IB_QPS_INIT] = ERDMA_QPS_IWARP_IDLE, + [IB_QPS_RTR] = ERDMA_QPS_IWARP_RTR, + [IB_QPS_RTS] = ERDMA_QPS_IWARP_RTS, + [IB_QPS_SQD] = ERDMA_QPS_IWARP_CLOSING, + [IB_QPS_SQE] = ERDMA_QPS_IWARP_TERMINATE, + [IB_QPS_ERR] = ERDMA_QPS_IWARP_ERROR, + }, + [ERDMA_PROTO_ROCEV2] = { + [IB_QPS_RESET] = ERDMA_QPS_ROCEV2_RESET, + [IB_QPS_INIT] = ERDMA_QPS_ROCEV2_INIT, + [IB_QPS_RTR] = ERDMA_QPS_ROCEV2_RTR, + [IB_QPS_RTS] = ERDMA_QPS_ROCEV2_RTS, + [IB_QPS_SQD] = ERDMA_QPS_ROCEV2_SQD, + [IB_QPS_SQE] = ERDMA_QPS_ROCEV2_SQE, + [IB_QPS_ERR] = ERDMA_QPS_ROCEV2_ERROR, + }, }; -int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, - struct ib_udata *udata) +static int erdma_qps_to_ib_qps[ERDMA_PROTO_COUNT][ERDMA_QPS_ROCEV2_COUNT] = { + [ERDMA_PROTO_IWARP] = { + [ERDMA_QPS_IWARP_IDLE] = IB_QPS_INIT, + [ERDMA_QPS_IWARP_RTR] = IB_QPS_RTR, + [ERDMA_QPS_IWARP_RTS] = IB_QPS_RTS, + [ERDMA_QPS_IWARP_CLOSING] = IB_QPS_ERR, + [ERDMA_QPS_IWARP_TERMINATE] = IB_QPS_ERR, + [ERDMA_QPS_IWARP_ERROR] = IB_QPS_ERR, + }, + [ERDMA_PROTO_ROCEV2] = { + [ERDMA_QPS_ROCEV2_RESET] = IB_QPS_RESET, + [ERDMA_QPS_ROCEV2_INIT] = IB_QPS_INIT, + [ERDMA_QPS_ROCEV2_RTR] = IB_QPS_RTR, + [ERDMA_QPS_ROCEV2_RTS] = IB_QPS_RTS, + [ERDMA_QPS_ROCEV2_SQD] = IB_QPS_SQD, + [ERDMA_QPS_ROCEV2_SQE] = IB_QPS_SQE, + [ERDMA_QPS_ROCEV2_ERROR] = IB_QPS_ERR, + }, +}; + +static inline enum erdma_qps_iwarp ib_to_iwarp_qps(enum ib_qp_state state) { - struct erdma_qp_attrs new_attrs; - enum erdma_qp_attr_mask erdma_attr_mask = 0; - struct erdma_qp *qp = to_eqp(ibqp); - int ret = 0; + return ib_qps_to_erdma_qps[ERDMA_PROTO_IWARP][state]; +} - if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) - return -EOPNOTSUPP; +static inline enum erdma_qps_rocev2 ib_to_rocev2_qps(enum ib_qp_state state) +{ + return ib_qps_to_erdma_qps[ERDMA_PROTO_ROCEV2][state]; +} - memset(&new_attrs, 0, sizeof(new_attrs)); +static inline enum ib_qp_state iwarp_to_ib_qps(enum erdma_qps_iwarp state) +{ + return erdma_qps_to_ib_qps[ERDMA_PROTO_IWARP][state]; +} - if (attr_mask & IB_QP_STATE) { - new_attrs.state = ib_qp_state_to_erdma_qp_state[attr->qp_state]; +static inline enum ib_qp_state rocev2_to_ib_qps(enum erdma_qps_rocev2 state) +{ + return erdma_qps_to_ib_qps[ERDMA_PROTO_ROCEV2][state]; +} - erdma_attr_mask |= ERDMA_QP_ATTR_STATE; +static int erdma_check_qp_attrs(struct erdma_qp *qp, struct ib_qp_attr *attr, + int attr_mask) +{ + enum ib_qp_state cur_state, nxt_state; + struct erdma_dev *dev = qp->dev; + int ret = -EINVAL; + + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) { + ret = -EOPNOTSUPP; + goto out; + } + + if ((attr_mask & IB_QP_PORT) && + !rdma_is_port_valid(&dev->ibdev, attr->port_num)) + goto out; + + if (erdma_device_rocev2(dev)) { + cur_state = (attr_mask & IB_QP_CUR_STATE) ? + attr->cur_qp_state : + rocev2_to_ib_qps(qp->attrs.rocev2.state); + + nxt_state = (attr_mask & IB_QP_STATE) ? attr->qp_state : + cur_state; + + if (!ib_modify_qp_is_ok(cur_state, nxt_state, qp->ibqp.qp_type, + attr_mask)) + goto out; + + if ((attr_mask & IB_QP_AV) && + erdma_check_gid_attr( + rdma_ah_read_grh(&attr->ah_attr)->sgid_attr)) + goto out; + + if ((attr_mask & IB_QP_PKEY_INDEX) && + attr->pkey_index >= ERDMA_MAX_PKEYS) + goto out; + } + + return 0; + +out: + return ret; +} + +static void erdma_init_mod_qp_params_rocev2( + struct erdma_qp *qp, struct erdma_mod_qp_params_rocev2 *params, + int *erdma_attr_mask, struct ib_qp_attr *attr, int ib_attr_mask) +{ + enum erdma_qpa_mask_rocev2 to_modify_attrs = 0; + enum erdma_qps_rocev2 cur_state, nxt_state; + u16 udp_sport; + + if (ib_attr_mask & IB_QP_CUR_STATE) + cur_state = ib_to_rocev2_qps(attr->cur_qp_state); + else + cur_state = qp->attrs.rocev2.state; + + if (ib_attr_mask & IB_QP_STATE) + nxt_state = ib_to_rocev2_qps(attr->qp_state); + else + nxt_state = cur_state; + + to_modify_attrs |= ERDMA_QPA_ROCEV2_STATE; + params->state = nxt_state; + + if (ib_attr_mask & IB_QP_QKEY) { + to_modify_attrs |= ERDMA_QPA_ROCEV2_QKEY; + params->qkey = attr->qkey; + } + + if (ib_attr_mask & IB_QP_SQ_PSN) { + to_modify_attrs |= ERDMA_QPA_ROCEV2_SQ_PSN; + params->sq_psn = attr->sq_psn; + } + + if (ib_attr_mask & IB_QP_RQ_PSN) { + to_modify_attrs |= ERDMA_QPA_ROCEV2_RQ_PSN; + params->rq_psn = attr->rq_psn; + } + + if (ib_attr_mask & IB_QP_DEST_QPN) { + to_modify_attrs |= ERDMA_QPA_ROCEV2_DST_QPN; + params->dst_qpn = attr->dest_qp_num; } + if (ib_attr_mask & IB_QP_AV) { + to_modify_attrs |= ERDMA_QPA_ROCEV2_AV; + udp_sport = rdma_get_udp_sport(attr->ah_attr.grh.flow_label, + QP_ID(qp), params->dst_qpn); + erdma_attr_to_av(&attr->ah_attr, ¶ms->av, udp_sport); + } + + *erdma_attr_mask = to_modify_attrs; +} + +int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata) +{ + struct erdma_qp *qp = to_eqp(ibqp); + union erdma_mod_qp_params params; + int ret = 0, erdma_attr_mask = 0; + down_write(&qp->state_lock); - ret = erdma_modify_qp_internal(qp, &new_attrs, erdma_attr_mask); + ret = erdma_check_qp_attrs(qp, attr, attr_mask); + if (ret) + goto out; - up_write(&qp->state_lock); + if (erdma_device_iwarp(qp->dev)) { + if (attr_mask & IB_QP_STATE) { + erdma_attr_mask |= ERDMA_QPA_IWARP_STATE; + params.iwarp.state = ib_to_iwarp_qps(attr->qp_state); + } + + ret = erdma_modify_qp_state_iwarp(qp, ¶ms.iwarp, + erdma_attr_mask); + } else { + erdma_init_mod_qp_params_rocev2( + qp, ¶ms.rocev2, &erdma_attr_mask, attr, attr_mask); + + ret = erdma_modify_qp_state_rocev2(qp, ¶ms.rocev2, + erdma_attr_mask); + } +out: + up_write(&qp->state_lock); return ret; } static enum ib_qp_state query_qp_state(struct erdma_qp *qp) { - switch (qp->attrs.state) { - case ERDMA_QP_STATE_IDLE: - return IB_QPS_INIT; - case ERDMA_QP_STATE_RTR: - return IB_QPS_RTR; - case ERDMA_QP_STATE_RTS: - return IB_QPS_RTS; - case ERDMA_QP_STATE_CLOSING: - return IB_QPS_ERR; - case ERDMA_QP_STATE_TERMINATE: - return IB_QPS_ERR; - case ERDMA_QP_STATE_ERROR: - return IB_QPS_ERR; - default: - return IB_QPS_ERR; - } + if (erdma_device_iwarp(qp->dev)) + return iwarp_to_ib_qps(qp->attrs.iwarp.state); + else + return rocev2_to_ib_qps(qp->attrs.rocev2.state); } int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { + struct erdma_cmdq_query_qp_req_rocev2 req; struct erdma_dev *dev; struct erdma_qp *qp; + u64 resp0, resp1; + int ret; if (ibqp && qp_attr && qp_init_attr) { qp = to_eqp(ibqp); @@ -1595,8 +1831,37 @@ int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_init_attr->cap = qp_attr->cap; - qp_attr->qp_state = query_qp_state(qp); - qp_attr->cur_qp_state = query_qp_state(qp); + if (erdma_device_rocev2(dev)) { + /* Query hardware to get some attributes */ + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_QUERY_QP); + req.qpn = QP_ID(qp); + + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0, + &resp1, true); + if (ret) + return ret; + + qp_attr->sq_psn = + FIELD_GET(ERDMA_CMD_QUERY_QP_RESP_SQ_PSN_MASK, resp0); + qp_attr->rq_psn = + FIELD_GET(ERDMA_CMD_QUERY_QP_RESP_RQ_PSN_MASK, resp0); + qp_attr->qp_state = rocev2_to_ib_qps(FIELD_GET( + ERDMA_CMD_QUERY_QP_RESP_QP_STATE_MASK, resp0)); + qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->sq_draining = FIELD_GET( + ERDMA_CMD_QUERY_QP_RESP_SQ_DRAINING_MASK, resp0); + + qp_attr->pkey_index = 0; + qp_attr->dest_qp_num = qp->attrs.rocev2.dst_qpn; + + if (qp->ibqp.qp_type == IB_QPT_RC) + erdma_av_to_attr(&qp->attrs.rocev2.av, + &qp_attr->ah_attr); + } else { + qp_attr->qp_state = query_qp_state(qp); + qp_attr->cur_qp_state = qp_attr->qp_state; + } return 0; } @@ -1736,7 +2001,7 @@ void erdma_set_mtu(struct erdma_dev *dev, u32 mtu) CMDQ_OPCODE_CONF_MTU); req.mtu = mtu; - erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, true); } void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason) @@ -1806,7 +2071,8 @@ static int erdma_query_hw_stats(struct erdma_dev *dev, req.target_addr = dma_addr; req.target_length = ERDMA_HW_RESP_SIZE; - err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); if (err) goto out; @@ -1839,3 +2105,159 @@ int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, return stats->num_counters; } + +enum rdma_link_layer erdma_get_link_layer(struct ib_device *ibdev, u32 port_num) +{ + return IB_LINK_LAYER_ETHERNET; +} + +static int erdma_set_gid(struct erdma_dev *dev, u8 op, u32 idx, + const union ib_gid *gid) +{ + struct erdma_cmdq_set_gid_req req; + u8 ntype; + + req.cfg = FIELD_PREP(ERDMA_CMD_SET_GID_SGID_IDX_MASK, idx) | + FIELD_PREP(ERDMA_CMD_SET_GID_OP_MASK, op); + + if (op == ERDMA_SET_GID_OP_ADD) { + if (ipv6_addr_v4mapped((struct in6_addr *)gid)) + ntype = ERDMA_NETWORK_TYPE_IPV4; + else + ntype = ERDMA_NETWORK_TYPE_IPV6; + + req.cfg |= FIELD_PREP(ERDMA_CMD_SET_GID_NTYPE_MASK, ntype); + + memcpy(&req.gid, gid, ERDMA_ROCEV2_GID_SIZE); + } + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_SET_GID); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); +} + +int erdma_add_gid(const struct ib_gid_attr *attr, void **context) +{ + struct erdma_dev *dev = to_edev(attr->device); + int ret; + + ret = erdma_check_gid_attr(attr); + if (ret) + return ret; + + return erdma_set_gid(dev, ERDMA_SET_GID_OP_ADD, attr->index, + &attr->gid); +} + +int erdma_del_gid(const struct ib_gid_attr *attr, void **context) +{ + return erdma_set_gid(to_edev(attr->device), ERDMA_SET_GID_OP_DEL, + attr->index, NULL); +} + +int erdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey) +{ + if (index >= ERDMA_MAX_PKEYS) + return -EINVAL; + + *pkey = ERDMA_DEFAULT_PKEY; + return 0; +} + +void erdma_set_av_cfg(struct erdma_av_cfg *av_cfg, struct erdma_av *av) +{ + av_cfg->cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_AV_FL_MASK, av->flow_label) | + FIELD_PREP(ERDMA_CMD_CREATE_AV_NTYPE_MASK, av->ntype); + + av_cfg->traffic_class = av->traffic_class; + av_cfg->hop_limit = av->hop_limit; + av_cfg->sl = av->sl; + + av_cfg->udp_sport = av->udp_sport; + av_cfg->sgid_index = av->sgid_index; + + ether_addr_copy(av_cfg->dmac, av->dmac); + memcpy(av_cfg->dgid, av->dgid, ERDMA_ROCEV2_GID_SIZE); +} + +int erdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) +{ + const struct ib_global_route *grh = + rdma_ah_read_grh(init_attr->ah_attr); + struct erdma_dev *dev = to_edev(ibah->device); + struct erdma_pd *pd = to_epd(ibah->pd); + struct erdma_ah *ah = to_eah(ibah); + struct erdma_cmdq_create_ah_req req; + u32 udp_sport; + int ret; + + ret = erdma_check_gid_attr(grh->sgid_attr); + if (ret) + return ret; + + ret = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_AH]); + if (ret < 0) + return ret; + + ah->ahn = ret; + + if (grh->flow_label) + udp_sport = rdma_flow_label_to_udp_sport(grh->flow_label); + else + udp_sport = + IB_ROCE_UDP_ENCAP_VALID_PORT_MIN + (ah->ahn & 0x3FFF); + + erdma_attr_to_av(init_attr->ah_attr, &ah->av, udp_sport); + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_CREATE_AH); + + req.pdn = pd->pdn; + req.ahn = ah->ahn; + erdma_set_av_cfg(&req.av_cfg, &ah->av); + + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + init_attr->flags & RDMA_CREATE_AH_SLEEPABLE); + if (ret) { + erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_AH], ah->ahn); + return ret; + } + + return 0; +} + +int erdma_destroy_ah(struct ib_ah *ibah, u32 flags) +{ + struct erdma_dev *dev = to_edev(ibah->device); + struct erdma_pd *pd = to_epd(ibah->pd); + struct erdma_ah *ah = to_eah(ibah); + struct erdma_cmdq_destroy_ah_req req; + int ret; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_DESTROY_AH); + + req.pdn = pd->pdn; + req.ahn = ah->ahn; + + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + flags & RDMA_DESTROY_AH_SLEEPABLE); + if (ret) + return ret; + + erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_AH], ah->ahn); + + return 0; +} + +int erdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) +{ + struct erdma_ah *ah = to_eah(ibah); + + memset(ah_attr, 0, sizeof(*ah_attr)); + erdma_av_to_attr(&ah->av, ah_attr); + + return 0; +} diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index c998acd39a78..f9408ccc8bad 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -136,6 +136,25 @@ struct erdma_user_dbrecords_page { int refcnt; }; +struct erdma_av { + u8 port; + u8 hop_limit; + u8 traffic_class; + u8 sl; + u8 sgid_index; + u16 udp_sport; + u32 flow_label; + u8 dmac[ETH_ALEN]; + u8 dgid[ERDMA_ROCEV2_GID_SIZE]; + enum erdma_network_type ntype; +}; + +struct erdma_ah { + struct ib_ah ibah; + struct erdma_av av; + u32 ahn; +}; + struct erdma_uqp { struct erdma_mem sq_mem; struct erdma_mem rq_mem; @@ -176,33 +195,91 @@ struct erdma_kqp { u8 sig_all; }; -enum erdma_qp_state { - ERDMA_QP_STATE_IDLE = 0, - ERDMA_QP_STATE_RTR = 1, - ERDMA_QP_STATE_RTS = 2, - ERDMA_QP_STATE_CLOSING = 3, - ERDMA_QP_STATE_TERMINATE = 4, - ERDMA_QP_STATE_ERROR = 5, - ERDMA_QP_STATE_UNDEF = 7, - ERDMA_QP_STATE_COUNT = 8 +enum erdma_qps_iwarp { + ERDMA_QPS_IWARP_IDLE = 0, + ERDMA_QPS_IWARP_RTR = 1, + ERDMA_QPS_IWARP_RTS = 2, + ERDMA_QPS_IWARP_CLOSING = 3, + ERDMA_QPS_IWARP_TERMINATE = 4, + ERDMA_QPS_IWARP_ERROR = 5, + ERDMA_QPS_IWARP_UNDEF = 6, + ERDMA_QPS_IWARP_COUNT = 7, +}; + +enum erdma_qpa_mask_iwarp { + ERDMA_QPA_IWARP_STATE = (1 << 0), + ERDMA_QPA_IWARP_LLP_HANDLE = (1 << 2), + ERDMA_QPA_IWARP_ORD = (1 << 3), + ERDMA_QPA_IWARP_IRD = (1 << 4), + ERDMA_QPA_IWARP_SQ_SIZE = (1 << 5), + ERDMA_QPA_IWARP_RQ_SIZE = (1 << 6), + ERDMA_QPA_IWARP_MPA = (1 << 7), + ERDMA_QPA_IWARP_CC = (1 << 8), }; -enum erdma_qp_attr_mask { - ERDMA_QP_ATTR_STATE = (1 << 0), - ERDMA_QP_ATTR_LLP_HANDLE = (1 << 2), - ERDMA_QP_ATTR_ORD = (1 << 3), - ERDMA_QP_ATTR_IRD = (1 << 4), - ERDMA_QP_ATTR_SQ_SIZE = (1 << 5), - ERDMA_QP_ATTR_RQ_SIZE = (1 << 6), - ERDMA_QP_ATTR_MPA = (1 << 7) +enum erdma_qps_rocev2 { + ERDMA_QPS_ROCEV2_RESET = 0, + ERDMA_QPS_ROCEV2_INIT = 1, + ERDMA_QPS_ROCEV2_RTR = 2, + ERDMA_QPS_ROCEV2_RTS = 3, + ERDMA_QPS_ROCEV2_SQD = 4, + ERDMA_QPS_ROCEV2_SQE = 5, + ERDMA_QPS_ROCEV2_ERROR = 6, + ERDMA_QPS_ROCEV2_COUNT = 7, +}; + +enum erdma_qpa_mask_rocev2 { + ERDMA_QPA_ROCEV2_STATE = (1 << 0), + ERDMA_QPA_ROCEV2_QKEY = (1 << 1), + ERDMA_QPA_ROCEV2_AV = (1 << 2), + ERDMA_QPA_ROCEV2_SQ_PSN = (1 << 3), + ERDMA_QPA_ROCEV2_RQ_PSN = (1 << 4), + ERDMA_QPA_ROCEV2_DST_QPN = (1 << 5), }; enum erdma_qp_flags { ERDMA_QP_IN_FLUSHING = (1 << 0), }; +#define ERDMA_QP_ACTIVE 0 +#define ERDMA_QP_PASSIVE 1 + +struct erdma_mod_qp_params_iwarp { + enum erdma_qps_iwarp state; + enum erdma_cc_alg cc; + u8 qp_type; + u8 pd_len; + u32 irq_size; + u32 orq_size; +}; + +struct erdma_qp_attrs_iwarp { + enum erdma_qps_iwarp state; + u32 cookie; +}; + +struct erdma_mod_qp_params_rocev2 { + enum erdma_qps_rocev2 state; + u32 qkey; + u32 sq_psn; + u32 rq_psn; + u32 dst_qpn; + struct erdma_av av; +}; + +union erdma_mod_qp_params { + struct erdma_mod_qp_params_iwarp iwarp; + struct erdma_mod_qp_params_rocev2 rocev2; +}; + +struct erdma_qp_attrs_rocev2 { + enum erdma_qps_rocev2 state; + u32 qkey; + u32 dst_qpn; + struct erdma_av av; +}; + struct erdma_qp_attrs { - enum erdma_qp_state state; enum erdma_cc_alg cc; /* Congestion control algorithm */ u32 sq_size; u32 rq_size; @@ -210,11 +287,10 @@ struct erdma_qp_attrs { u32 irq_size; u32 max_send_sge; u32 max_recv_sge; - u32 cookie; -#define ERDMA_QP_ACTIVE 0 -#define ERDMA_QP_PASSIVE 1 - u8 qp_type; - u8 pd_len; + union { + struct erdma_qp_attrs_iwarp iwarp; + struct erdma_qp_attrs_rocev2 rocev2; + }; }; struct erdma_qp { @@ -286,11 +362,25 @@ static inline struct erdma_cq *find_cq_by_cqn(struct erdma_dev *dev, int id) void erdma_qp_get(struct erdma_qp *qp); void erdma_qp_put(struct erdma_qp *qp); -int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, - enum erdma_qp_attr_mask mask); +int erdma_modify_qp_state_iwarp(struct erdma_qp *qp, + struct erdma_mod_qp_params_iwarp *params, + int mask); +int erdma_modify_qp_state_rocev2(struct erdma_qp *qp, + struct erdma_mod_qp_params_rocev2 *params, + int attr_mask); void erdma_qp_llp_close(struct erdma_qp *qp); void erdma_qp_cm_drop(struct erdma_qp *qp); +static inline bool erdma_device_iwarp(struct erdma_dev *dev) +{ + return dev->proto == ERDMA_PROTO_IWARP; +} + +static inline bool erdma_device_rocev2(struct erdma_dev *dev) +{ + return dev->proto == ERDMA_PROTO_ROCEV2; +} + static inline struct erdma_ucontext *to_ectx(struct ib_ucontext *ibctx) { return container_of(ibctx, struct erdma_ucontext, ibucontext); @@ -316,6 +406,21 @@ static inline struct erdma_cq *to_ecq(struct ib_cq *ibcq) return container_of(ibcq, struct erdma_cq, ibcq); } +static inline struct erdma_ah *to_eah(struct ib_ah *ibah) +{ + return container_of(ibah, struct erdma_ah, ibah); +} + +static inline int erdma_check_gid_attr(const struct ib_gid_attr *attr) +{ + u8 ntype = rdma_gid_attr_network_type(attr); + + if (ntype != RDMA_NETWORK_IPV4 && ntype != RDMA_NETWORK_IPV6) + return -EINVAL; + + return 0; +} + static inline struct erdma_user_mmap_entry * to_emmap(struct rdma_user_mmap_entry *ibmmap) { @@ -360,6 +465,7 @@ int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr, int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); +void erdma_remove_cqes_of_qp(struct ib_cq *ibcq, u32 qpn); struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, u32 max_num_sg); int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, @@ -370,5 +476,15 @@ struct rdma_hw_stats *erdma_alloc_hw_port_stats(struct ib_device *device, u32 port_num); int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, u32 port, int index); +enum rdma_link_layer erdma_get_link_layer(struct ib_device *ibdev, + u32 port_num); +int erdma_add_gid(const struct ib_gid_attr *attr, void **context); +int erdma_del_gid(const struct ib_gid_attr *attr, void **context); +int erdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey); +void erdma_set_av_cfg(struct erdma_av_cfg *av_cfg, struct erdma_av *av); +int erdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata); +int erdma_destroy_ah(struct ib_ah *ibah, u32 flags); +int erdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); #endif diff --git a/drivers/infiniband/hw/hfi1/aspm.c b/drivers/infiniband/hw/hfi1/aspm.c index a3c53be4072c..9b508eaf441d 100644 --- a/drivers/infiniband/hw/hfi1/aspm.c +++ b/drivers/infiniband/hw/hfi1/aspm.c @@ -191,7 +191,7 @@ void aspm_disable_all(struct hfi1_devdata *dd) for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { rcd = hfi1_rcd_get_by_index(dd, i); if (rcd) { - del_timer_sync(&rcd->aspm_timer); + timer_delete_sync(&rcd->aspm_timer); spin_lock_irqsave(&rcd->aspm_lock, flags); rcd->aspm_intr_enable = false; spin_unlock_irqrestore(&rcd->aspm_lock, flags); diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index a442eca498b8..e908f529335d 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -5576,7 +5576,7 @@ static int init_rcverr(struct hfi1_devdata *dd) static void free_rcverr(struct hfi1_devdata *dd) { if (dd->rcverr_timer.function) - del_timer_sync(&dd->rcverr_timer); + timer_delete_sync(&dd->rcverr_timer); } static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg) @@ -12308,7 +12308,7 @@ static void free_cntrs(struct hfi1_devdata *dd) int i; if (dd->synth_stats_timer.function) - del_timer_sync(&dd->synth_stats_timer); + timer_delete_sync(&dd->synth_stats_timer); cancel_work_sync(&dd->update_cntr_work); ppd = (struct hfi1_pportdata *)(dd + 1); for (i = 0; i < dd->num_pports; i++, ppd++) { @@ -12882,22 +12882,6 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate) } } -/* return the OPA port logical state name */ -const char *opa_lstate_name(u32 lstate) -{ - static const char * const port_logical_names[] = { - "PORT_NOP", - "PORT_DOWN", - "PORT_INIT", - "PORT_ARMED", - "PORT_ACTIVE", - "PORT_ACTIVE_DEFER", - }; - if (lstate < ARRAY_SIZE(port_logical_names)) - return port_logical_names[lstate]; - return "unknown"; -} - /* return the OPA port physical state name */ const char *opa_pstate_name(u32 pstate) { @@ -12956,8 +12940,6 @@ static void update_statusp(struct hfi1_pportdata *ppd, u32 state) break; } } - dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n", - opa_lstate_name(state), state); } /** diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 8841db16bde7..6992f6d40255 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -771,7 +771,6 @@ int is_bx(struct hfi1_devdata *dd); bool is_urg_masked(struct hfi1_ctxtdata *rcd); u32 read_physical_state(struct hfi1_devdata *dd); u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate); -const char *opa_lstate_name(u32 lstate); const char *opa_pstate_name(u32 pstate); u32 driver_pstate(struct hfi1_pportdata *ppd); u32 driver_lstate(struct hfi1_pportdata *ppd); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 37a6794885d3..3da90f2eb8e7 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -968,7 +968,7 @@ static bool __set_armed_to_active(struct hfi1_packet *packet) if (hwstate != IB_PORT_ACTIVE) { dd_dev_info(packet->rcd->dd, "Unexpected link state %s\n", - opa_lstate_name(hwstate)); + ib_port_state_to_str(hwstate)); return false; } @@ -1303,7 +1303,7 @@ void shutdown_led_override(struct hfi1_pportdata *ppd) */ smp_rmb(); if (atomic_read(&ppd->led_override_timer_active)) { - del_timer_sync(&ppd->led_override_timer); + timer_delete_sync(&ppd->led_override_timer); atomic_set(&ppd->led_override_timer_active, 0); /* Ensure the atomic_set is visible to all CPUs */ smp_wmb(); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index eb38f81aeeb1..cb630551cf1a 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -2339,20 +2339,6 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd) dev_err(&(dd)->pcidev->dev, "%s: port %u: " fmt, \ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), (port), ##__VA_ARGS__) -/* - * this is used for formatting hw error messages... - */ -struct hfi1_hwerror_msgs { - u64 mask; - const char *msg; - size_t sz; -}; - -/* in intr.c... */ -void hfi1_format_hwerrors(u64 hwerrs, - const struct hfi1_hwerror_msgs *hwerrmsgs, - size_t nhwerrmsgs, char *msg, size_t lmsg); - #define USER_OPCODE_CHECK_VAL 0xC0 #define USER_OPCODE_CHECK_MASK 0xC0 #define OPCODE_CHECK_VAL_DISABLED 0x0 diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index cbac4a442d9e..b35f92e7d865 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -635,12 +635,11 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, spin_lock_init(&ppd->cca_timer_lock); for (i = 0; i < OPA_MAX_SLS; i++) { - hrtimer_init(&ppd->cca_timer[i].hrtimer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); ppd->cca_timer[i].ppd = ppd; ppd->cca_timer[i].sl = i; ppd->cca_timer[i].ccti = 0; - ppd->cca_timer[i].hrtimer.function = cca_timer_fn; + hrtimer_setup(&ppd->cca_timer[i].hrtimer, cca_timer_fn, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); } ppd->cc_max_table_entries = IB_CC_TABLE_CAP_DEFAULT; @@ -986,7 +985,7 @@ static void stop_timers(struct hfi1_devdata *dd) for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; if (ppd->led_override_timer.function) { - del_timer_sync(&ppd->led_override_timer); + timer_delete_sync(&ppd->led_override_timer); atomic_set(&ppd->led_override_timer_active, 0); } } diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 3737f632d62a..d8dd1a599631 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -47,37 +47,6 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) hfi1_event_pkey_change(ppd->dd, ppd->port); } -/** - * format_hwmsg - format a single hwerror message - * @msg: message buffer - * @msgl: length of message buffer - * @hwmsg: message to add to message buffer - */ -static void format_hwmsg(char *msg, size_t msgl, const char *hwmsg) -{ - strlcat(msg, "[", msgl); - strlcat(msg, hwmsg, msgl); - strlcat(msg, "]", msgl); -} - -/** - * hfi1_format_hwerrors - format hardware error messages for display - * @hwerrs: hardware errors bit vector - * @hwerrmsgs: hardware error descriptions - * @nhwerrmsgs: number of hwerrmsgs - * @msg: message buffer - * @msgl: message buffer length - */ -void hfi1_format_hwerrors(u64 hwerrs, const struct hfi1_hwerror_msgs *hwerrmsgs, - size_t nhwerrmsgs, char *msg, size_t msgl) -{ - int i; - - for (i = 0; i < nhwerrmsgs; i++) - if (hwerrs & hwerrmsgs[i].mask) - format_hwmsg(msg, msgl, hwerrmsgs[i].msg); -} - static void signal_ib_event(struct hfi1_pportdata *ppd, enum ib_event_type ev) { struct ib_event event; diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h index 49805a24bb0a..7259f4f55700 100644 --- a/drivers/infiniband/hw/hfi1/iowait.h +++ b/drivers/infiniband/hw/hfi1/iowait.h @@ -92,7 +92,7 @@ struct iowait_work { * * The lock field is used by waiters to record * the seqlock_t that guards the list head. - * Waiters explicity know that, but the destroy + * Waiters explicitly know that, but the destroy * code that unwaits QPs does not. */ struct iowait { diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index a9883295f4af..b39f63ce6dfc 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1160,8 +1160,8 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd, if (ret == HFI_TRANSITION_DISALLOWED || ret == HFI_TRANSITION_UNDEFINED) { pr_warn("invalid logical state transition %s -> %s\n", - opa_lstate_name(logical_old), - opa_lstate_name(logical_new)); + ib_port_state_to_str(logical_old), + ib_port_state_to_str(logical_new)); return ret; } diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index 52cce1c8b76a..3b7842a7f634 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -405,26 +405,6 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, } /* - * Perform a stand-alone single QSFP write. Acquire the resource, do the - * write, then release the resource. - */ -int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, - int len) -{ - struct hfi1_devdata *dd = ppd->dd; - u32 resource = qsfp_resource(dd); - int ret; - - ret = acquire_chip_resource(dd, resource, QSFP_WAIT); - if (ret) - return ret; - ret = qsfp_write(ppd, target, addr, bp, len); - release_chip_resource(dd, resource); - - return ret; -} - -/* * Access page n, offset m of QSFP memory as defined by SFF 8636 * by reading @addr = ((256 * n) + m) * diff --git a/drivers/infiniband/hw/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h index df1389bad86b..5c59d53fcb63 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.h +++ b/drivers/infiniband/hw/hfi1/qsfp.h @@ -195,8 +195,6 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len); int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len); -int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, - int len); int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, int len); struct hfi1_asic_data; diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index b67d23b1f286..0d2b39b7c8b5 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1575,7 +1575,7 @@ void sdma_exit(struct hfi1_devdata *dd) sde->this_idx); sdma_process_event(sde, sdma_event_e00_go_hw_down); - del_timer_sync(&sde->err_progress_check_timer); + timer_delete_sync(&sde->err_progress_check_timer); /* * This waits for the state machine to exit so it is not diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index d62ba5fdd80c..d94216c7d576 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -27,8 +27,8 @@ static struct hfi1_pportdata *hfi1_get_pportdata_kobj(struct kobject *kobj) * Congestion control table size followed by table entries */ static ssize_t cc_table_bin_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, - loff_t pos, size_t count) + const struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) { int ret; struct hfi1_pportdata *ppd = hfi1_get_pportdata_kobj(kobj); @@ -57,7 +57,7 @@ static ssize_t cc_table_bin_read(struct file *filp, struct kobject *kobj, return count; } -static BIN_ATTR_RO(cc_table_bin, PAGE_SIZE); +static const BIN_ATTR_RO(cc_table_bin, PAGE_SIZE); /* * Congestion settings: port control, control map and an array of 16 @@ -65,7 +65,7 @@ static BIN_ATTR_RO(cc_table_bin, PAGE_SIZE); * trigger threshold and the minimum injection rate delay. */ static ssize_t cc_setting_bin_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { struct hfi1_pportdata *ppd = hfi1_get_pportdata_kobj(kobj); @@ -93,9 +93,9 @@ static ssize_t cc_setting_bin_read(struct file *filp, struct kobject *kobj, return count; } -static BIN_ATTR_RO(cc_setting_bin, PAGE_SIZE); +static const BIN_ATTR_RO(cc_setting_bin, PAGE_SIZE); -static struct bin_attribute *port_cc_bin_attributes[] = { +static const struct bin_attribute *const port_cc_bin_attributes[] = { &bin_attr_cc_setting_bin, &bin_attr_cc_table_bin, NULL @@ -134,7 +134,7 @@ static struct attribute *port_cc_attributes[] = { static const struct attribute_group port_cc_group = { .name = "CCMgtA", .attrs = port_cc_attributes, - .bin_attrs = port_cc_bin_attributes, + .bin_attrs_new = port_cc_bin_attributes, }; /* Start sc2vl */ diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index c465966a1d9c..78bf4a48c035 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -3965,7 +3965,7 @@ static int hfi1_stop_tid_reap_timer(struct rvt_qp *qp) lockdep_assert_held(&qp->s_lock); if (qpriv->s_flags & HFI1_R_TID_RSC_TIMER) { - rval = del_timer(&qpriv->s_tid_timer); + rval = timer_delete(&qpriv->s_tid_timer); qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER; } return rval; @@ -3975,7 +3975,7 @@ void hfi1_del_tid_reap_timer(struct rvt_qp *qp) { struct hfi1_qp_priv *qpriv = qp->priv; - del_timer_sync(&qpriv->s_tid_timer); + timer_delete_sync(&qpriv->s_tid_timer); qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER; } @@ -4781,7 +4781,7 @@ static int hfi1_stop_tid_retry_timer(struct rvt_qp *qp) lockdep_assert_held(&qp->s_lock); if (priv->s_flags & HFI1_S_TID_RETRY_TIMER) { - rval = del_timer(&priv->s_tid_retry_timer); + rval = timer_delete(&priv->s_tid_retry_timer); priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER; } return rval; @@ -4791,7 +4791,7 @@ void hfi1_del_tid_retry_timer(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; - del_timer_sync(&priv->s_tid_retry_timer); + timer_delete_sync(&priv->s_tid_retry_timer); priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER; } diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 33af2196ef31..49e0f79b950c 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1900,7 +1900,7 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd) if (!list_empty(&dev->memwait)) dd_dev_err(dd, "memwait list not empty!\n"); - del_timer_sync(&dev->mem_timer); + timer_delete_sync(&dev->mem_timer); verbs_txreq_exit(dev); kfree(dev_cntr_descs); diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig index ab3fbba70789..44cdb706fe27 100644 --- a/drivers/infiniband/hw/hns/Kconfig +++ b/drivers/infiniband/hw/hns/Kconfig @@ -1,21 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only -config INFINIBAND_HNS - tristate "HNS RoCE Driver" - depends on NET_VENDOR_HISILICON - depends on ARM64 || (COMPILE_TEST && 64BIT) - depends on (HNS_DSAF && HNS_ENET) || HNS3 - help - This is a RoCE/RDMA driver for the Hisilicon RoCE engine. - - To compile HIP08 driver as module, choose M here. - config INFINIBAND_HNS_HIP08 - bool "Hisilicon Hip08 Family RoCE support" - depends on INFINIBAND_HNS && PCI && HNS3 - depends on INFINIBAND_HNS=m || HNS3=y + tristate "Hisilicon Hip08 Family RoCE support" + depends on ARM64 || (COMPILE_TEST && 64BIT) + depends on PCI && HNS3 help RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC. The RoCE engine is a PCI device. - To compile this driver, choose Y here: if INFINIBAND_HNS is m, this - module will be called hns-roce-hw-v2. + To compile this driver, choose M here. This module will be called + hns-roce-hw-v2. diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index be1e1cdbcfa8..7917af8e6380 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -5,12 +5,9 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 -hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ +hns-roce-hw-v2-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o \ - hns_roce_debugfs.o + hns_roce_debugfs.o hns_roce_hw_v2.o -ifdef CONFIG_INFINIBAND_HNS_HIP08 -hns-roce-hw-v2-objs := hns_roce_hw_v2.o $(hns-roce-objs) -obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v2.o -endif +obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 950c133d4220..6ee911f6885b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -175,8 +175,10 @@ void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev) if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC) ida_destroy(&hr_dev->xrcd_ida.ida); - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { ida_destroy(&hr_dev->srq_table.srq_ida.ida); + xa_destroy(&hr_dev->srq_table.xa); + } hns_roce_cleanup_qp_table(hr_dev); hns_roce_cleanup_cq_table(hr_dev); ida_destroy(&hr_dev->mr_table.mtpt_ida.ida); diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 4106423a1b39..3a5c93c9fb3e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -537,5 +537,6 @@ void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev) for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) ida_destroy(&hr_dev->cq_table.bank[i].ida); + xa_destroy(&hr_dev->cq_table.array); mutex_destroy(&hr_dev->cq_table.bank_mutex); } diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index f84521be3bea..ca0798224e56 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -931,6 +931,7 @@ struct hns_roce_hem_item { size_t count; /* max ba numbers */ int start; /* start buf offset in this hem */ int end; /* end buf offset in this hem */ + bool exist_bt; }; /* All HEM items are linked in a tree structure */ @@ -959,6 +960,7 @@ hem_list_alloc_item(struct hns_roce_dev *hr_dev, int start, int end, int count, } } + hem->exist_bt = exist_bt; hem->count = count; hem->start = start; hem->end = end; @@ -969,22 +971,22 @@ hem_list_alloc_item(struct hns_roce_dev *hr_dev, int start, int end, int count, } static void hem_list_free_item(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_item *hem, bool exist_bt) + struct hns_roce_hem_item *hem) { - if (exist_bt) + if (hem->exist_bt) dma_free_coherent(hr_dev->dev, hem->count * BA_BYTE_LEN, hem->addr, hem->dma_addr); kfree(hem); } static void hem_list_free_all(struct hns_roce_dev *hr_dev, - struct list_head *head, bool exist_bt) + struct list_head *head) { struct hns_roce_hem_item *hem, *temp_hem; list_for_each_entry_safe(hem, temp_hem, head, list) { list_del(&hem->list); - hem_list_free_item(hr_dev, hem, exist_bt); + hem_list_free_item(hr_dev, hem); } } @@ -1084,6 +1086,10 @@ int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions, for (i = 0; i < region_cnt; i++) { r = (struct hns_roce_buf_region *)®ions[i]; + /* when r->hopnum = 0, the region should not occupy root_ba. */ + if (!r->hopnum) + continue; + if (r->hopnum > 1) { step = hem_list_calc_ba_range(r->hopnum, 1, unit); if (step > 0) @@ -1177,7 +1183,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, err_exit: for (level = 1; level < hopnum; level++) - hem_list_free_all(hr_dev, &temp_list[level], true); + hem_list_free_all(hr_dev, &temp_list[level]); return ret; } @@ -1218,16 +1224,26 @@ static int alloc_fake_root_bt(struct hns_roce_dev *hr_dev, void *cpu_base, { struct hns_roce_hem_item *hem; + /* This is on the has_mtt branch, if r->hopnum + * is 0, there is no root_ba to reuse for the + * region's fake hem, so a dma_alloc request is + * necessary here. + */ hem = hem_list_alloc_item(hr_dev, r->offset, r->offset + r->count - 1, - r->count, false); + r->count, !r->hopnum); if (!hem) return -ENOMEM; - hem_list_assign_bt(hem, cpu_base, phy_base); + /* The root_ba can be reused only when r->hopnum > 0. */ + if (r->hopnum) + hem_list_assign_bt(hem, cpu_base, phy_base); list_add(&hem->list, branch_head); list_add(&hem->sibling, leaf_head); - return r->count; + /* If r->hopnum == 0, 0 is returned, + * so that the root_bt entry is not occupied. + */ + return r->hopnum ? r->count : 0; } static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base, @@ -1271,7 +1287,7 @@ setup_root_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, return -ENOMEM; total = 0; - for (i = 0; i < region_cnt && total < max_ba_num; i++) { + for (i = 0; i < region_cnt && total <= max_ba_num; i++) { r = ®ions[i]; if (!r->count) continue; @@ -1337,14 +1353,19 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, region_cnt); if (ret) { for (i = 0; i < region_cnt; i++) - hem_list_free_all(hr_dev, &head.branch[i], false); + hem_list_free_all(hr_dev, &head.branch[i]); - hem_list_free_all(hr_dev, &head.root, true); + hem_list_free_all(hr_dev, &head.root); } return ret; } +/* This is the bottom bt pages number of a 100G MR on 4K OS, assuming + * the bt page size is not expanded by cal_best_bt_pg_sz() + */ +#define RESCHED_LOOP_CNT_THRESHOLD_ON_4K 12800 + /* construct the base address table and link them by address hop config */ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, @@ -1353,6 +1374,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, { const struct hns_roce_buf_region *r; int ofs, end; + int loop; int unit; int ret; int i; @@ -1370,7 +1392,10 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, continue; end = r->offset + r->count; - for (ofs = r->offset; ofs < end; ofs += unit) { + for (ofs = r->offset, loop = 1; ofs < end; ofs += unit, loop++) { + if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K)) + cond_resched(); + ret = hem_list_alloc_mid_bt(hr_dev, r, unit, ofs, hem_list->mid_bt[i], &hem_list->btm_bt); @@ -1402,10 +1427,9 @@ void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev, for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++) for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++) - hem_list_free_all(hr_dev, &hem_list->mid_bt[i][j], - j != 0); + hem_list_free_all(hr_dev, &hem_list->mid_bt[i][j]); - hem_list_free_all(hr_dev, &hem_list->root_bt, true); + hem_list_free_all(hr_dev, &hem_list->root_bt); INIT_LIST_HEAD(&hem_list->btm_bt); hem_list->root_ba = 0; } @@ -1428,9 +1452,14 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, struct list_head *head = &hem_list->btm_bt; struct hns_roce_hem_item *hem, *temp_hem; void *cpu_base = NULL; + int loop = 1; int nr = 0; list_for_each_entry_safe(hem, temp_hem, head, sibling) { + if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K)) + cond_resched(); + loop++; + if (hem_list_page_is_in_range(hem, offset)) { nr = offset - hem->start; cpu_base = hem->addr + nr * BA_BYTE_LEN; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 697b17cca02e..160e8927d364 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -468,7 +468,7 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, valid_num_sge = calc_wr_sge_num(wr, &msg_len); ret = set_ud_opcode(ud_sq_wqe, wr); - if (WARN_ON(ret)) + if (WARN_ON_ONCE(ret)) return ret; ud_sq_wqe->msg_len = cpu_to_le32(msg_len); @@ -572,7 +572,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, rc_sq_wqe->msg_len = cpu_to_le32(msg_len); ret = set_rc_opcode(hr_dev, rc_sq_wqe, wr); - if (WARN_ON(ret)) + if (WARN_ON_ONCE(ret)) return ret; hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SO, @@ -670,6 +670,10 @@ static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, #define HNS_ROCE_SL_SHIFT 2 struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe; + if (unlikely(qp->state == IB_QPS_ERR)) { + flush_cqe(hr_dev, qp); + return; + } /* All kinds of DirectWQE have the same header field layout */ hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_FLAG); hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_L, qp->sl); @@ -1282,10 +1286,8 @@ static u32 hns_roce_cmdq_tx_timeout(u16 opcode, u32 tx_timeout) return tx_timeout; } -static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode) +static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u32 tx_timeout) { - struct hns_roce_v2_priv *priv = hr_dev->priv; - u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout); u32 timeout = 0; do { @@ -1295,8 +1297,9 @@ static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode) } while (++timeout < tx_timeout); } -static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, - struct hns_roce_cmq_desc *desc, int num) +static int __hns_roce_cmq_send_one(struct hns_roce_dev *hr_dev, + struct hns_roce_cmq_desc *desc, + int num, u32 tx_timeout) { struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; @@ -1305,8 +1308,6 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, int ret; int i; - spin_lock_bh(&csq->lock); - tail = csq->head; for (i = 0; i < num; i++) { @@ -1320,22 +1321,17 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]); - hns_roce_wait_csq_done(hr_dev, le16_to_cpu(desc->opcode)); + hns_roce_wait_csq_done(hr_dev, tx_timeout); if (hns_roce_cmq_csq_done(hr_dev)) { ret = 0; for (i = 0; i < num; i++) { /* check the result of hardware write back */ - desc[i] = csq->desc[tail++]; + desc_ret = le16_to_cpu(csq->desc[tail++].retval); if (tail == csq->desc_num) tail = 0; - - desc_ret = le16_to_cpu(desc[i].retval); if (likely(desc_ret == CMD_EXEC_SUCCESS)) continue; - dev_err_ratelimited(hr_dev->dev, - "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n", - desc->opcode, desc_ret); ret = hns_roce_cmd_err_convert_errno(desc_ret); } } else { @@ -1350,14 +1346,54 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, ret = -EAGAIN; } - spin_unlock_bh(&csq->lock); - if (ret) atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]); return ret; } +static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, + struct hns_roce_cmq_desc *desc, int num) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; + u16 opcode = le16_to_cpu(desc->opcode); + u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout); + u8 try_cnt = HNS_ROCE_OPC_POST_MB_TRY_CNT; + u32 rsv_tail; + int ret; + int i; + + while (try_cnt) { + try_cnt--; + + spin_lock_bh(&csq->lock); + rsv_tail = csq->head; + ret = __hns_roce_cmq_send_one(hr_dev, desc, num, tx_timeout); + if (opcode == HNS_ROCE_OPC_POST_MB && ret == -ETIME && + try_cnt) { + spin_unlock_bh(&csq->lock); + mdelay(HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC); + continue; + } + + for (i = 0; i < num; i++) { + desc[i] = csq->desc[rsv_tail++]; + if (rsv_tail == csq->desc_num) + rsv_tail = 0; + } + spin_unlock_bh(&csq->lock); + break; + } + + if (ret) + dev_err_ratelimited(hr_dev->dev, + "Cmdq IO error, opcode = 0x%x, return = %d.\n", + opcode, ret); + + return ret; +} + static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, struct hns_roce_cmq_desc *desc, int num) { @@ -5619,6 +5655,9 @@ static void put_dip_ctx_idx(struct hns_roce_dev *hr_dev, { struct hns_roce_dip *hr_dip = hr_qp->dip; + if (!hr_dip) + return; + xa_lock(&hr_dev->qp_table.dip_xa); hr_dip->qp_cnt--; @@ -7178,9 +7217,22 @@ static int hns_roce_hw_v2_reset_notify(struct hnae3_handle *handle, return ret; } +static void hns_roce_hw_v2_link_status_change(struct hnae3_handle *handle, + bool linkup) +{ + struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv; + struct net_device *netdev = handle->rinfo.netdev; + + if (linkup || !hr_dev) + return; + + ib_dispatch_port_state_event(&hr_dev->ib_dev, netdev); +} + static const struct hnae3_client_ops hns_roce_hw_v2_ops = { .init_instance = hns_roce_hw_v2_init_instance, .uninit_instance = hns_roce_hw_v2_uninit_instance, + .link_status_change = hns_roce_hw_v2_link_status_change, .reset_notify = hns_roce_hw_v2_reset_notify, }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index cbdbc9edbce6..91a5665465ff 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -230,6 +230,8 @@ enum hns_roce_opcode_type { }; #define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000 +#define HNS_ROCE_OPC_POST_MB_TRY_CNT 8 +#define HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC 5 struct hns_roce_cmdq_tx_timeout_map { u16 opcode; u32 tx_timeout; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index ae24c81c9812..cf89a8db4f64 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -183,7 +183,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev, IB_DEVICE_RC_RNR_NAK_GEN; props->max_send_sge = hr_dev->caps.max_sq_sg; props->max_recv_sge = hr_dev->caps.max_rq_sg; - props->max_sge_rd = 1; + props->max_sge_rd = hr_dev->caps.max_sq_sg; props->max_cq = hr_dev->caps.num_cqs; props->max_cqe = hr_dev->caps.max_cqes; props->max_mr = hr_dev->caps.num_mtpts; diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index bf30b3a65a9b..09da3496843b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -814,11 +814,6 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, for (i = 0, mapped_cnt = 0; i < mtr->hem_cfg.region_count && mapped_cnt < page_cnt; i++) { r = &mtr->hem_cfg.region[i]; - /* if hopnum is 0, no need to map pages in this region */ - if (!r->hopnum) { - mapped_cnt += r->count; - continue; - } if (r->offset + r->count > page_cnt) { ret = -EINVAL; @@ -1003,7 +998,7 @@ static bool is_buf_attr_valid(struct hns_roce_dev *hr_dev, if (attr->region_count > ARRAY_SIZE(attr->region) || attr->region_count < 1 || attr->page_shift < HNS_HW_PAGE_SHIFT) { ibdev_err(ibdev, - "invalid buf attr, region count %d, page shift %u.\n", + "invalid buf attr, region count %u, page shift %u.\n", attr->region_count, attr->page_shift); return false; } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 9e2e76c59406..9f376a2232b0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -868,12 +868,14 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_ib_create_qp *ucmd, struct hns_roce_ib_create_qp_resp *resp) { + bool has_sdb = user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd); struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, ibucontext); + bool has_rdb = user_qp_has_rdb(hr_dev, init_attr, udata, resp); struct ib_device *ibdev = &hr_dev->ib_dev; int ret; - if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) { + if (has_sdb) { ret = hns_roce_db_map_user(uctx, ucmd->sdb_addr, &hr_qp->sdb); if (ret) { ibdev_err(ibdev, @@ -884,7 +886,7 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev, hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB; } - if (user_qp_has_rdb(hr_dev, init_attr, udata, resp)) { + if (has_rdb) { ret = hns_roce_db_map_user(uctx, ucmd->db_addr, &hr_qp->rdb); if (ret) { ibdev_err(ibdev, @@ -898,7 +900,7 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev, return 0; err_sdb: - if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) + if (has_sdb) hns_roce_db_unmap_user(uctx, &hr_qp->sdb); err_out: return ret; @@ -1119,24 +1121,23 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ibucontext); hr_qp->config = uctx->config; ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd); - if (ret) + if (ret) { ibdev_err(ibdev, "failed to set user SQ size, ret = %d.\n", ret); + return ret; + } ret = set_congest_param(hr_dev, hr_qp, ucmd); - if (ret) - return ret; } else { if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) hr_qp->config = HNS_ROCE_EXSGE_FLAGS; + default_congest_type(hr_dev, hr_qp); ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp); if (ret) ibdev_err(ibdev, "failed to set kernel SQ size, ret = %d.\n", ret); - - default_congest_type(hr_dev, hr_qp); } return ret; @@ -1219,7 +1220,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, min(udata->outlen, sizeof(resp))); if (ret) { ibdev_err(ibdev, "copy qp resp failed!\n"); - goto err_store; + goto err_flow_ctrl; } } @@ -1319,7 +1320,7 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr, ret = hns_roce_create_qp_common(hr_dev, init_attr, udata, hr_qp); if (ret) - ibdev_err(ibdev, "create QP type 0x%x failed(%d)\n", + ibdev_err(ibdev, "create QP type %d failed(%d)\n", init_attr->qp_type, ret); err_out: @@ -1602,6 +1603,7 @@ void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev) for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) ida_destroy(&hr_dev->qp_table.bank[i].ida); xa_destroy(&hr_dev->qp_table.dip_xa); + xa_destroy(&hr_dev->qp_table_xa); mutex_destroy(&hr_dev->qp_table.bank_mutex); mutex_destroy(&hr_dev->qp_table.scc_mutex); } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 70c06ef65603..1090051f493b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -51,7 +51,7 @@ static void hns_roce_ib_srq_event(struct hns_roce_srq *srq, break; default: dev_err(hr_dev->dev, - "hns_roce:Unexpected event type 0x%x on SRQ %06lx\n", + "hns_roce:Unexpected event type %d on SRQ %06lx\n", event_type, srq->srqn); return; } diff --git a/drivers/infiniband/hw/irdma/Kconfig b/drivers/infiniband/hw/irdma/Kconfig index b6f9c41bca51..5f49a58590ed 100644 --- a/drivers/infiniband/hw/irdma/Kconfig +++ b/drivers/infiniband/hw/irdma/Kconfig @@ -7,6 +7,7 @@ config INFINIBAND_IRDMA depends on ICE && I40E select GENERIC_ALLOCATOR select AUXILIARY_BUS + select CRC32 help This is an Intel(R) Ethernet Protocol Driver for RDMA driver that support E810 (iWARP/RoCE) and X722 (iWARP) network devices. diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index ce8d821bdad8..23207f13ac1b 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -3303,7 +3303,7 @@ void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core) if (!cm_core) return; - del_timer_sync(&cm_core->tcp_timer); + timer_delete_sync(&cm_core->tcp_timer); destroy_workqueue(cm_core->event_wq); cm_core->dev->ws_reset(&cm_core->iwdev->vsi); diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index ad50b77282f8..69ce1862eabe 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -498,8 +498,6 @@ static int irdma_save_msix_info(struct irdma_pci_f *rf) iw_qvlist->num_vectors = rf->msix_count; if (rf->msix_count <= num_online_cpus()) rf->msix_shared = true; - else if (rf->msix_count > num_online_cpus() + 1) - rf->msix_count = num_online_cpus() + 1; pmsix = rf->msix_entries; for (i = 0, ceq_idx = 0; i < rf->msix_count; i++, iw_qvinfo++) { diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index 3f13200ff71b..1ee8969595d3 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -206,6 +206,43 @@ static void irdma_lan_unregister_qset(struct irdma_sc_vsi *vsi, ibdev_dbg(&iwdev->ibdev, "WS: LAN free_res for rdma qset failed.\n"); } +static int irdma_init_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf) +{ + int i; + + rf->msix_count = num_online_cpus() + IRDMA_NUM_AEQ_MSIX; + rf->msix_entries = kcalloc(rf->msix_count, sizeof(*rf->msix_entries), + GFP_KERNEL); + if (!rf->msix_entries) + return -ENOMEM; + + for (i = 0; i < rf->msix_count; i++) + if (ice_alloc_rdma_qvector(pf, &rf->msix_entries[i])) + break; + + if (i < IRDMA_MIN_MSIX) { + for (; i > 0; i--) + ice_free_rdma_qvector(pf, &rf->msix_entries[i]); + + kfree(rf->msix_entries); + return -ENOMEM; + } + + rf->msix_count = i; + + return 0; +} + +static void irdma_deinit_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf) +{ + int i; + + for (i = 0; i < rf->msix_count; i++) + ice_free_rdma_qvector(pf, &rf->msix_entries[i]); + + kfree(rf->msix_entries); +} + static void irdma_remove(struct auxiliary_device *aux_dev) { struct iidc_auxiliary_dev *iidc_adev = container_of(aux_dev, @@ -216,6 +253,7 @@ static void irdma_remove(struct auxiliary_device *aux_dev) irdma_ib_unregister_device(iwdev); ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, false); + irdma_deinit_interrupts(iwdev->rf, pf); pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn)); } @@ -230,9 +268,7 @@ static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf rf->gen_ops.unregister_qset = irdma_lan_unregister_qset; rf->hw.hw_addr = pf->hw.hw_addr; rf->pcidev = pf->pdev; - rf->msix_count = pf->num_rdma_msix; rf->pf_id = pf->hw.pf_id; - rf->msix_entries = &pf->msix_entries[pf->rdma_base_vector]; rf->default_vsi.vsi_idx = vsi->vsi_num; rf->protocol_used = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ? IRDMA_ROCE_PROTOCOL_ONLY : IRDMA_IWARP_PROTOCOL_ONLY; @@ -281,6 +317,10 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_ irdma_fill_device_info(iwdev, pf, vsi); rf = iwdev->rf; + err = irdma_init_interrupts(rf, pf); + if (err) + goto err_init_interrupts; + err = irdma_ctrl_init_hw(rf); if (err) goto err_ctrl_init; @@ -311,6 +351,8 @@ err_ibreg: err_rt_init: irdma_ctrl_deinit_hw(rf); err_ctrl_init: + irdma_deinit_interrupts(rf, pf); +err_init_interrupts: kfree(iwdev->rf); ib_dealloc_device(&iwdev->ibdev); diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index 9f0ed6e84471..bb0b6494ccb2 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -30,7 +30,6 @@ #endif #include <linux/auxiliary_bus.h> #include <linux/net/intel/iidc.h> -#include <crypto/hash.h> #include <rdma/ib_smi.h> #include <rdma/ib_verbs.h> #include <rdma/ib_pack.h> @@ -117,6 +116,9 @@ extern struct auxiliary_driver i40iw_auxiliary_drv; #define IRDMA_IRQ_NAME_STR_LEN (64) +#define IRDMA_NUM_AEQ_MSIX 1 +#define IRDMA_MIN_MSIX 2 + enum init_completion_state { INVALID_STATE = 0, INITIAL_STATE, diff --git a/drivers/infiniband/hw/irdma/osdep.h b/drivers/infiniband/hw/irdma/osdep.h index e1e3d3ae72b7..4b4f78288d12 100644 --- a/drivers/infiniband/hw/irdma/osdep.h +++ b/drivers/infiniband/hw/irdma/osdep.h @@ -6,7 +6,6 @@ #include <linux/pci.h> #include <linux/bitfield.h> #include <linux/net/intel/iidc.h> -#include <crypto/hash.h> #include <rdma/ib_verbs.h> #define STATS_TIMER_DELAY 60000 @@ -43,15 +42,12 @@ enum irdma_status_code irdma_vf_wait_vchnl_resp(struct irdma_sc_dev *dev); bool irdma_vf_clear_to_send(struct irdma_sc_dev *dev); void irdma_add_dev_ref(struct irdma_sc_dev *dev); void irdma_put_dev_ref(struct irdma_sc_dev *dev); -int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len, - u32 val); +int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val); struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev, struct irdma_puda_buf *buf); void irdma_send_ieq_ack(struct irdma_sc_qp *qp); void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len, u32 seqnum); -void irdma_free_hash_desc(struct shash_desc *hash_desc); -int irdma_init_hash_desc(struct shash_desc **hash_desc); int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, struct irdma_puda_buf *buf); int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, @@ -59,10 +55,6 @@ int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, int irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev, struct irdma_hmc_fcn_info *hmcfcninfo, u16 *pmf_idx); -int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id); -int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id); int irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev, struct irdma_dma_mem *mem); void *irdma_remove_cqp_head(struct irdma_sc_dev *dev); diff --git a/drivers/infiniband/hw/irdma/protos.h b/drivers/infiniband/hw/irdma/protos.h index d7c8ea948bcd..c0c9441885d3 100644 --- a/drivers/infiniband/hw/irdma/protos.h +++ b/drivers/infiniband/hw/irdma/protos.h @@ -85,10 +85,6 @@ int irdma_process_cqp_cmd(struct irdma_sc_dev *dev, int irdma_process_bh(struct irdma_sc_dev *dev); int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, struct irdma_update_sds_info *info); -int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id); -int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id); int irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev, struct irdma_dma_mem *mem); int irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev, diff --git a/drivers/infiniband/hw/irdma/puda.c b/drivers/infiniband/hw/irdma/puda.c index 7e3f9bca2c23..694e5a9ed15d 100644 --- a/drivers/infiniband/hw/irdma/puda.c +++ b/drivers/infiniband/hw/irdma/puda.c @@ -923,8 +923,6 @@ void irdma_puda_dele_rsrc(struct irdma_sc_vsi *vsi, enum puda_rsrc_type type, switch (rsrc->cmpl) { case PUDA_HASH_CRC_COMPLETE: - irdma_free_hash_desc(rsrc->hash_desc); - fallthrough; case PUDA_QP_CREATED: irdma_qp_rem_qos(&rsrc->qp); @@ -1095,15 +1093,12 @@ int irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi, goto error; if (info->type == IRDMA_PUDA_RSRC_TYPE_IEQ) { - if (!irdma_init_hash_desc(&rsrc->hash_desc)) { - rsrc->check_crc = true; - rsrc->cmpl = PUDA_HASH_CRC_COMPLETE; - ret = 0; - } + rsrc->check_crc = true; + rsrc->cmpl = PUDA_HASH_CRC_COMPLETE; } irdma_sc_ccq_arm(&rsrc->cq); - return ret; + return 0; error: irdma_puda_dele_rsrc(vsi, info->type, false); @@ -1396,8 +1391,8 @@ static int irdma_ieq_handle_partial(struct irdma_puda_rsrc *ieq, crcptr = txbuf->data + fpdu_len - 4; mpacrc = *(u32 *)crcptr; if (ieq->check_crc) { - status = irdma_ieq_check_mpacrc(ieq->hash_desc, txbuf->data, - (fpdu_len - 4), mpacrc); + status = irdma_ieq_check_mpacrc(txbuf->data, fpdu_len - 4, + mpacrc); if (status) { ibdev_dbg(to_ibdev(ieq->dev), "IEQ: error bad crc\n"); goto error; @@ -1465,8 +1460,8 @@ static int irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq, crcptr = datap + fpdu_len - 4; mpacrc = *(u32 *)crcptr; if (ieq->check_crc) - ret = irdma_ieq_check_mpacrc(ieq->hash_desc, datap, - fpdu_len - 4, mpacrc); + ret = irdma_ieq_check_mpacrc(datap, fpdu_len - 4, + mpacrc); if (ret) { list_add(&buf->list, rxlist); ibdev_dbg(to_ibdev(ieq->dev), diff --git a/drivers/infiniband/hw/irdma/puda.h b/drivers/infiniband/hw/irdma/puda.h index bc6d9514c9c1..2fc638f2b143 100644 --- a/drivers/infiniband/hw/irdma/puda.h +++ b/drivers/infiniband/hw/irdma/puda.h @@ -119,7 +119,6 @@ struct irdma_puda_rsrc { u32 rx_wqe_idx; u32 rxq_invalid_cnt; u32 tx_wqe_avail_cnt; - struct shash_desc *hash_desc; struct list_head txpend; struct list_head bufpool; /* free buffers pool list for recv and xmit */ u32 alloc_buf_count; @@ -163,10 +162,8 @@ struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev, struct irdma_puda_buf *buf); int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, struct irdma_puda_buf *buf); -int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len, u32 val); -int irdma_init_hash_desc(struct shash_desc **desc); +int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val); void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp); -void irdma_free_hash_desc(struct shash_desc *desc); void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len, u32 seqnum); int irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp); int irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq); diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 0422787592d8..d66b4f7a84ec 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -320,9 +320,6 @@ int irdma_netdevice_event(struct notifier_block *notifier, unsigned long event, case NETDEV_DOWN: iwdev->iw_status = 0; fallthrough; - case NETDEV_UP: - irdma_port_ibevent(iwdev); - break; default: break; } @@ -966,80 +963,12 @@ void irdma_terminate_del_timer(struct irdma_sc_qp *qp) int ret; iwqp = qp->qp_uk.back_qp; - ret = del_timer(&iwqp->terminate_timer); + ret = timer_delete(&iwqp->terminate_timer); if (ret) irdma_qp_rem_ref(&iwqp->ibqp); } /** - * irdma_cqp_query_fpm_val_cmd - send cqp command for fpm - * @dev: function device struct - * @val_mem: buffer for fpm - * @hmc_fn_id: function id for fpm - */ -int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id) -{ - struct irdma_cqp_request *cqp_request; - struct cqp_cmds_info *cqp_info; - struct irdma_pci_f *rf = dev_to_rf(dev); - int status; - - cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); - if (!cqp_request) - return -ENOMEM; - - cqp_info = &cqp_request->info; - cqp_request->param = NULL; - cqp_info->in.u.query_fpm_val.cqp = dev->cqp; - cqp_info->in.u.query_fpm_val.fpm_val_pa = val_mem->pa; - cqp_info->in.u.query_fpm_val.fpm_val_va = val_mem->va; - cqp_info->in.u.query_fpm_val.hmc_fn_id = hmc_fn_id; - cqp_info->cqp_cmd = IRDMA_OP_QUERY_FPM_VAL; - cqp_info->post_sq = 1; - cqp_info->in.u.query_fpm_val.scratch = (uintptr_t)cqp_request; - - status = irdma_handle_cqp_op(rf, cqp_request); - irdma_put_cqp_request(&rf->cqp, cqp_request); - - return status; -} - -/** - * irdma_cqp_commit_fpm_val_cmd - commit fpm values in hw - * @dev: hardware control device structure - * @val_mem: buffer with fpm values - * @hmc_fn_id: function id for fpm - */ -int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id) -{ - struct irdma_cqp_request *cqp_request; - struct cqp_cmds_info *cqp_info; - struct irdma_pci_f *rf = dev_to_rf(dev); - int status; - - cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); - if (!cqp_request) - return -ENOMEM; - - cqp_info = &cqp_request->info; - cqp_request->param = NULL; - cqp_info->in.u.commit_fpm_val.cqp = dev->cqp; - cqp_info->in.u.commit_fpm_val.fpm_val_pa = val_mem->pa; - cqp_info->in.u.commit_fpm_val.fpm_val_va = val_mem->va; - cqp_info->in.u.commit_fpm_val.hmc_fn_id = hmc_fn_id; - cqp_info->cqp_cmd = IRDMA_OP_COMMIT_FPM_VAL; - cqp_info->post_sq = 1; - cqp_info->in.u.commit_fpm_val.scratch = (uintptr_t)cqp_request; - - status = irdma_handle_cqp_op(rf, cqp_request); - irdma_put_cqp_request(&rf->cqp, cqp_request); - - return status; -} - -/** * irdma_cqp_cq_create_cmd - create a cq for the cqp * @dev: device pointer * @cq: pointer to created cq @@ -1345,57 +1274,14 @@ void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp) } /** - * irdma_init_hash_desc - initialize hash for crc calculation - * @desc: cryption type - */ -int irdma_init_hash_desc(struct shash_desc **desc) -{ - struct crypto_shash *tfm; - struct shash_desc *tdesc; - - tfm = crypto_alloc_shash("crc32c", 0, 0); - if (IS_ERR(tfm)) - return -EINVAL; - - tdesc = kzalloc(sizeof(*tdesc) + crypto_shash_descsize(tfm), - GFP_KERNEL); - if (!tdesc) { - crypto_free_shash(tfm); - return -EINVAL; - } - - tdesc->tfm = tfm; - *desc = tdesc; - - return 0; -} - -/** - * irdma_free_hash_desc - free hash desc - * @desc: to be freed - */ -void irdma_free_hash_desc(struct shash_desc *desc) -{ - if (desc) { - crypto_free_shash(desc->tfm); - kfree(desc); - } -} - -/** * irdma_ieq_check_mpacrc - check if mpa crc is OK - * @desc: desc for hash * @addr: address of buffer for crc * @len: length of buffer * @val: value to be compared */ -int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len, - u32 val) +int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val) { - u32 crc = 0; - - crypto_shash_digest(desc, addr, len, (u8 *)&crc); - if (crc != val) + if ((__force u32)cpu_to_le32(~crc32c(~0, addr, len)) != val) return -EINVAL; return 0; @@ -1684,7 +1570,7 @@ void irdma_hw_stats_stop_timer(struct irdma_sc_vsi *vsi) { struct irdma_vsi_pestat *devstat = vsi->pestat; - del_timer_sync(&devstat->stats_timer); + timer_delete_sync(&devstat->stats_timer); } /** diff --git a/drivers/infiniband/hw/mana/Makefile b/drivers/infiniband/hw/mana/Makefile index 88655fe5e398..921c05e08b11 100644 --- a/drivers/infiniband/hw/mana/Makefile +++ b/drivers/infiniband/hw/mana/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o -mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o +mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o wr.o counters.o diff --git a/drivers/infiniband/hw/mana/ah.c b/drivers/infiniband/hw/mana/ah.c new file mode 100644 index 000000000000..f56952eebbaa --- /dev/null +++ b/drivers/infiniband/hw/mana/ah.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" + +int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr, + struct ib_udata *udata) +{ + struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev); + struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah); + struct rdma_ah_attr *ah_attr = attr->ah_attr; + const struct ib_global_route *grh; + enum rdma_network_type ntype; + + if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE || + !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) + return -EINVAL; + + if (udata) + return -EINVAL; + + ah->av = dma_pool_zalloc(mdev->av_pool, GFP_ATOMIC, &ah->dma_handle); + if (!ah->av) + return -ENOMEM; + + grh = rdma_ah_read_grh(ah_attr); + ntype = rdma_gid_attr_network_type(grh->sgid_attr); + + copy_in_reverse(ah->av->dest_mac, ah_attr->roce.dmac, ETH_ALEN); + ah->av->udp_src_port = rdma_flow_label_to_udp_sport(grh->flow_label); + ah->av->hop_limit = grh->hop_limit; + ah->av->dscp = (grh->traffic_class >> 2) & 0x3f; + ah->av->is_ipv6 = (ntype == RDMA_NETWORK_IPV6); + + if (ah->av->is_ipv6) { + copy_in_reverse(ah->av->dest_ip, grh->dgid.raw, 16); + copy_in_reverse(ah->av->src_ip, grh->sgid_attr->gid.raw, 16); + } else { + ah->av->dest_ip[10] = 0xFF; + ah->av->dest_ip[11] = 0xFF; + copy_in_reverse(&ah->av->dest_ip[12], &grh->dgid.raw[12], 4); + copy_in_reverse(&ah->av->src_ip[12], &grh->sgid_attr->gid.raw[12], 4); + } + + return 0; +} + +int mana_ib_destroy_ah(struct ib_ah *ibah, u32 flags) +{ + struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev); + struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah); + + dma_pool_free(mdev->av_pool, ah->av, ah->dma_handle); + + return 0; +} diff --git a/drivers/infiniband/hw/mana/counters.c b/drivers/infiniband/hw/mana/counters.c new file mode 100644 index 000000000000..e533ce21013d --- /dev/null +++ b/drivers/infiniband/hw/mana/counters.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, Microsoft Corporation. All rights reserved. + */ + +#include "counters.h" + +static const struct rdma_stat_desc mana_ib_port_stats_desc[] = { + [MANA_IB_REQUESTER_TIMEOUT].name = "requester_timeout", + [MANA_IB_REQUESTER_OOS_NAK].name = "requester_oos_nak", + [MANA_IB_REQUESTER_RNR_NAK].name = "requester_rnr_nak", + [MANA_IB_RESPONDER_RNR_NAK].name = "responder_rnr_nak", + [MANA_IB_RESPONDER_OOS].name = "responder_oos", + [MANA_IB_RESPONDER_DUP_REQUEST].name = "responder_dup_request", + [MANA_IB_REQUESTER_IMPLICIT_NAK].name = "requester_implicit_nak", + [MANA_IB_REQUESTER_READRESP_PSN_MISMATCH].name = "requester_readresp_psn_mismatch", + [MANA_IB_NAK_INV_REQ].name = "nak_inv_req", + [MANA_IB_NAK_ACCESS_ERR].name = "nak_access_error", + [MANA_IB_NAK_OPP_ERR].name = "nak_opp_error", + [MANA_IB_NAK_INV_READ].name = "nak_inv_read", + [MANA_IB_RESPONDER_LOCAL_LEN_ERR].name = "responder_local_len_error", + [MANA_IB_REQUESTOR_LOCAL_PROT_ERR].name = "requestor_local_prot_error", + [MANA_IB_RESPONDER_REM_ACCESS_ERR].name = "responder_rem_access_error", + [MANA_IB_RESPONDER_LOCAL_QP_ERR].name = "responder_local_qp_error", + [MANA_IB_RESPONDER_MALFORMED_WQE].name = "responder_malformed_wqe", + [MANA_IB_GENERAL_HW_ERR].name = "general_hw_error", + [MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED].name = "requester_rnr_nak_retries_exceeded", + [MANA_IB_REQUESTER_RETRIES_EXCEEDED].name = "requester_retries_exceeded", + [MANA_IB_TOTAL_FATAL_ERR].name = "total_fatal_error", + [MANA_IB_RECEIVED_CNPS].name = "received_cnps", + [MANA_IB_NUM_QPS_CONGESTED].name = "num_qps_congested", + [MANA_IB_RATE_INC_EVENTS].name = "rate_inc_events", + [MANA_IB_NUM_QPS_RECOVERED].name = "num_qps_recovered", + [MANA_IB_CURRENT_RATE].name = "current_rate", +}; + +struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev, + u32 port_num) +{ + return rdma_alloc_hw_stats_struct(mana_ib_port_stats_desc, + ARRAY_SIZE(mana_ib_port_stats_desc), + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + u32 port_num, int index) +{ + struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev, + ib_dev); + struct mana_rnic_query_vf_cntrs_resp resp = {}; + struct mana_rnic_query_vf_cntrs_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_IB_QUERY_VF_COUNTERS, + sizeof(req), sizeof(resp)); + req.hdr.dev_id = mdev->gdma_dev->dev_id; + req.adapter = mdev->adapter_handle; + + err = mana_gd_send_request(mdev_to_gc(mdev), sizeof(req), &req, + sizeof(resp), &resp); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to query vf counters err %d", + err); + return err; + } + + stats->value[MANA_IB_REQUESTER_TIMEOUT] = resp.requester_timeout; + stats->value[MANA_IB_REQUESTER_OOS_NAK] = resp.requester_oos_nak; + stats->value[MANA_IB_REQUESTER_RNR_NAK] = resp.requester_rnr_nak; + stats->value[MANA_IB_RESPONDER_RNR_NAK] = resp.responder_rnr_nak; + stats->value[MANA_IB_RESPONDER_OOS] = resp.responder_oos; + stats->value[MANA_IB_RESPONDER_DUP_REQUEST] = resp.responder_dup_request; + stats->value[MANA_IB_REQUESTER_IMPLICIT_NAK] = + resp.requester_implicit_nak; + stats->value[MANA_IB_REQUESTER_READRESP_PSN_MISMATCH] = + resp.requester_readresp_psn_mismatch; + stats->value[MANA_IB_NAK_INV_REQ] = resp.nak_inv_req; + stats->value[MANA_IB_NAK_ACCESS_ERR] = resp.nak_access_err; + stats->value[MANA_IB_NAK_OPP_ERR] = resp.nak_opp_err; + stats->value[MANA_IB_NAK_INV_READ] = resp.nak_inv_read; + stats->value[MANA_IB_RESPONDER_LOCAL_LEN_ERR] = + resp.responder_local_len_err; + stats->value[MANA_IB_REQUESTOR_LOCAL_PROT_ERR] = + resp.requestor_local_prot_err; + stats->value[MANA_IB_RESPONDER_REM_ACCESS_ERR] = + resp.responder_rem_access_err; + stats->value[MANA_IB_RESPONDER_LOCAL_QP_ERR] = + resp.responder_local_qp_err; + stats->value[MANA_IB_RESPONDER_MALFORMED_WQE] = + resp.responder_malformed_wqe; + stats->value[MANA_IB_GENERAL_HW_ERR] = resp.general_hw_err; + stats->value[MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED] = + resp.requester_rnr_nak_retries_exceeded; + stats->value[MANA_IB_REQUESTER_RETRIES_EXCEEDED] = + resp.requester_retries_exceeded; + stats->value[MANA_IB_TOTAL_FATAL_ERR] = resp.total_fatal_err; + + stats->value[MANA_IB_RECEIVED_CNPS] = resp.received_cnps; + stats->value[MANA_IB_NUM_QPS_CONGESTED] = resp.num_qps_congested; + stats->value[MANA_IB_RATE_INC_EVENTS] = resp.rate_inc_events; + stats->value[MANA_IB_NUM_QPS_RECOVERED] = resp.num_qps_recovered; + stats->value[MANA_IB_CURRENT_RATE] = resp.current_rate; + + return ARRAY_SIZE(mana_ib_port_stats_desc); +} diff --git a/drivers/infiniband/hw/mana/counters.h b/drivers/infiniband/hw/mana/counters.h new file mode 100644 index 000000000000..7ff92d27f6c3 --- /dev/null +++ b/drivers/infiniband/hw/mana/counters.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024 Microsoft Corporation. All rights reserved. + */ + +#ifndef _COUNTERS_H_ +#define _COUNTERS_H_ + +#include "mana_ib.h" + +enum mana_ib_port_counters { + MANA_IB_REQUESTER_TIMEOUT, + MANA_IB_REQUESTER_OOS_NAK, + MANA_IB_REQUESTER_RNR_NAK, + MANA_IB_RESPONDER_RNR_NAK, + MANA_IB_RESPONDER_OOS, + MANA_IB_RESPONDER_DUP_REQUEST, + MANA_IB_REQUESTER_IMPLICIT_NAK, + MANA_IB_REQUESTER_READRESP_PSN_MISMATCH, + MANA_IB_NAK_INV_REQ, + MANA_IB_NAK_ACCESS_ERR, + MANA_IB_NAK_OPP_ERR, + MANA_IB_NAK_INV_READ, + MANA_IB_RESPONDER_LOCAL_LEN_ERR, + MANA_IB_REQUESTOR_LOCAL_PROT_ERR, + MANA_IB_RESPONDER_REM_ACCESS_ERR, + MANA_IB_RESPONDER_LOCAL_QP_ERR, + MANA_IB_RESPONDER_MALFORMED_WQE, + MANA_IB_GENERAL_HW_ERR, + MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED, + MANA_IB_REQUESTER_RETRIES_EXCEEDED, + MANA_IB_TOTAL_FATAL_ERR, + MANA_IB_RECEIVED_CNPS, + MANA_IB_NUM_QPS_CONGESTED, + MANA_IB_RATE_INC_EVENTS, + MANA_IB_NUM_QPS_RECOVERED, + MANA_IB_CURRENT_RATE, +}; + +struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev, + u32 port_num); +int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + u32 port_num, int index); +#endif /* _COUNTERS_H_ */ diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c index f04a679d2871..0fc4e2679218 100644 --- a/drivers/infiniband/hw/mana/cq.c +++ b/drivers/infiniband/hw/mana/cq.c @@ -15,42 +15,58 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_device *ibdev = ibcq->device; struct mana_ib_create_cq ucmd = {}; struct mana_ib_dev *mdev; + struct gdma_context *gc; bool is_rnic_cq; u32 doorbell; + u32 buf_size; int err; mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); + gc = mdev_to_gc(mdev); cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors; cq->cq_handle = INVALID_MANA_HANDLE; - if (udata->inlen < offsetof(struct mana_ib_create_cq, flags)) - return -EINVAL; + if (udata) { + if (udata->inlen < offsetof(struct mana_ib_create_cq, flags)) + return -EINVAL; - err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); - if (err) { - ibdev_dbg(ibdev, - "Failed to copy from udata for create cq, %d\n", err); - return err; - } + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); + if (err) { + ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err); + return err; + } - is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ); + is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ); - if (!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) { - ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe); - return -EINVAL; - } + if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) || + attr->cqe > U32_MAX / COMP_ENTRY_SIZE) { + ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe); + return -EINVAL; + } - cq->cqe = attr->cqe; - err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE, &cq->queue); - if (err) { - ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err); - return err; - } + cq->cqe = attr->cqe; + err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE, + &cq->queue); + if (err) { + ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err); + return err; + } - mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, - ibucontext); - doorbell = mana_ucontext->doorbell; + mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, + ibucontext); + doorbell = mana_ucontext->doorbell; + } else { + is_rnic_cq = true; + buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE)); + cq->cqe = buf_size / COMP_ENTRY_SIZE; + err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue); + if (err) { + ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err); + return err; + } + doorbell = gc->mana_ib.doorbell; + } if (is_rnic_cq) { err = mana_ib_gd_create_cq(mdev, cq, doorbell); @@ -66,13 +82,19 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, } } - resp.cqid = cq->queue.id; - err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); - if (err) { - ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err); - goto err_remove_cq_cb; + if (udata) { + resp.cqid = cq->queue.id; + err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); + if (err) { + ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err); + goto err_remove_cq_cb; + } } + spin_lock_init(&cq->cq_lock); + INIT_LIST_HEAD(&cq->list_send_qp); + INIT_LIST_HEAD(&cq->list_recv_qp); + return 0; err_remove_cq_cb: @@ -122,7 +144,10 @@ int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq) return -EINVAL; /* Create CQ table entry */ WARN_ON(gc->cq_table[cq->queue.id]); - gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL); + if (cq->queue.kmem) + gdma_cq = cq->queue.kmem; + else + gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL); if (!gdma_cq) return -ENOMEM; @@ -141,6 +166,153 @@ void mana_ib_remove_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq) if (cq->queue.id >= gc->max_num_cqs || cq->queue.id == INVALID_QUEUE_ID) return; + if (cq->queue.kmem) + /* Then it will be cleaned and removed by the mana */ + return; + kfree(gc->cq_table[cq->queue.id]); gc->cq_table[cq->queue.id] = NULL; } + +int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq); + struct gdma_queue *gdma_cq = cq->queue.kmem; + + if (!gdma_cq) + return -EINVAL; + + mana_gd_ring_cq(gdma_cq, SET_ARM_BIT); + return 0; +} + +static inline void handle_ud_sq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe) +{ + struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data; + struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem; + struct ud_sq_shadow_wqe *shadow_wqe; + + shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_sq); + if (!shadow_wqe) + return; + + shadow_wqe->header.error_code = rdma_cqe->ud_send.vendor_error; + + wq->tail += shadow_wqe->header.posted_wqe_size; + shadow_queue_advance_next_to_complete(&qp->shadow_sq); +} + +static inline void handle_ud_rq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe) +{ + struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data; + struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem; + struct ud_rq_shadow_wqe *shadow_wqe; + + shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_rq); + if (!shadow_wqe) + return; + + shadow_wqe->byte_len = rdma_cqe->ud_recv.msg_len; + shadow_wqe->src_qpn = rdma_cqe->ud_recv.src_qpn; + shadow_wqe->header.error_code = IB_WC_SUCCESS; + + wq->tail += shadow_wqe->header.posted_wqe_size; + shadow_queue_advance_next_to_complete(&qp->shadow_rq); +} + +static void mana_handle_cqe(struct mana_ib_dev *mdev, struct gdma_comp *cqe) +{ + struct mana_ib_qp *qp = mana_get_qp_ref(mdev, cqe->wq_num, cqe->is_sq); + + if (!qp) + return; + + if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) { + if (cqe->is_sq) + handle_ud_sq_cqe(qp, cqe); + else + handle_ud_rq_cqe(qp, cqe); + } + + mana_put_qp_ref(qp); +} + +static void fill_verbs_from_shadow_wqe(struct mana_ib_qp *qp, struct ib_wc *wc, + const struct shadow_wqe_header *shadow_wqe) +{ + const struct ud_rq_shadow_wqe *ud_wqe = (const struct ud_rq_shadow_wqe *)shadow_wqe; + + wc->wr_id = shadow_wqe->wr_id; + wc->status = shadow_wqe->error_code; + wc->opcode = shadow_wqe->opcode; + wc->vendor_err = shadow_wqe->error_code; + wc->wc_flags = 0; + wc->qp = &qp->ibqp; + wc->pkey_index = 0; + + if (shadow_wqe->opcode == IB_WC_RECV) { + wc->byte_len = ud_wqe->byte_len; + wc->src_qp = ud_wqe->src_qpn; + wc->wc_flags |= IB_WC_GRH; + } +} + +static int mana_process_completions(struct mana_ib_cq *cq, int nwc, struct ib_wc *wc) +{ + struct shadow_wqe_header *shadow_wqe; + struct mana_ib_qp *qp; + int wc_index = 0; + + /* process send shadow queue completions */ + list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) { + while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_sq)) + != NULL) { + if (wc_index >= nwc) + goto out; + + fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe); + shadow_queue_advance_consumer(&qp->shadow_sq); + wc_index++; + } + } + + /* process recv shadow queue completions */ + list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) { + while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_rq)) + != NULL) { + if (wc_index >= nwc) + goto out; + + fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe); + shadow_queue_advance_consumer(&qp->shadow_rq); + wc_index++; + } + } + +out: + return wc_index; +} + +int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq); + struct mana_ib_dev *mdev = container_of(ibcq->device, struct mana_ib_dev, ib_dev); + struct gdma_queue *queue = cq->queue.kmem; + struct gdma_comp gdma_cqe; + unsigned long flags; + int num_polled = 0; + int comp_read, i; + + spin_lock_irqsave(&cq->cq_lock, flags); + for (i = 0; i < num_entries; i++) { + comp_read = mana_gd_poll_cq(queue, &gdma_cqe, 1); + if (comp_read < 1) + break; + mana_handle_cqe(mdev, &gdma_cqe); + } + + num_polled = mana_process_completions(cq, num_entries, wc); + spin_unlock_irqrestore(&cq->cq_lock, flags); + + return num_polled; +} diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c index 3416a85f8738..b31089320aa5 100644 --- a/drivers/infiniband/hw/mana/device.c +++ b/drivers/infiniband/hw/mana/device.c @@ -19,6 +19,7 @@ static const struct ib_device_ops mana_ib_dev_ops = { .add_gid = mana_ib_gd_add_gid, .alloc_pd = mana_ib_alloc_pd, .alloc_ucontext = mana_ib_alloc_ucontext, + .create_ah = mana_ib_create_ah, .create_cq = mana_ib_create_cq, .create_qp = mana_ib_create_qp, .create_rwq_ind_table = mana_ib_create_rwq_ind_table, @@ -27,22 +28,30 @@ static const struct ib_device_ops mana_ib_dev_ops = { .dealloc_ucontext = mana_ib_dealloc_ucontext, .del_gid = mana_ib_gd_del_gid, .dereg_mr = mana_ib_dereg_mr, + .destroy_ah = mana_ib_destroy_ah, .destroy_cq = mana_ib_destroy_cq, .destroy_qp = mana_ib_destroy_qp, .destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table, .destroy_wq = mana_ib_destroy_wq, .disassociate_ucontext = mana_ib_disassociate_ucontext, + .get_dma_mr = mana_ib_get_dma_mr, .get_link_layer = mana_ib_get_link_layer, .get_port_immutable = mana_ib_get_port_immutable, .mmap = mana_ib_mmap, .modify_qp = mana_ib_modify_qp, .modify_wq = mana_ib_modify_wq, + .poll_cq = mana_ib_poll_cq, + .post_recv = mana_ib_post_recv, + .post_send = mana_ib_post_send, .query_device = mana_ib_query_device, .query_gid = mana_ib_query_gid, .query_pkey = mana_ib_query_pkey, .query_port = mana_ib_query_port, .reg_user_mr = mana_ib_reg_user_mr, + .reg_user_mr_dmabuf = mana_ib_reg_user_mr_dmabuf, + .req_notify_cq = mana_ib_arm_cq, + INIT_RDMA_OBJ_SIZE(ib_ah, mana_ib_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, mana_ib_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, mana_ib_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_qp, mana_ib_qp, ibqp), @@ -51,6 +60,43 @@ static const struct ib_device_ops mana_ib_dev_ops = { ib_ind_table), }; +static const struct ib_device_ops mana_ib_stats_ops = { + .alloc_hw_port_stats = mana_ib_alloc_hw_port_stats, + .get_hw_stats = mana_ib_get_hw_stats, +}; + +static int mana_ib_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct mana_ib_dev *dev = container_of(this, struct mana_ib_dev, nb); + struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + struct gdma_context *gc = dev->gdma_dev->gdma_context; + struct mana_context *mc = gc->mana.driver_data; + struct net_device *ndev; + + /* Only process events from our parent device */ + if (event_dev != mc->ports[0]) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_CHANGEUPPER: + ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker); + /* + * RDMA core will setup GID based on updated netdev. + * It's not possible to race with the core as rtnl lock is being + * held. + */ + ib_device_set_netdev(&dev->ib_dev, ndev, 1); + + /* mana_get_primary_netdev() returns ndev with refcount held */ + netdev_put(ndev, &dev->dev_tracker); + + return NOTIFY_OK; + default: + return NOTIFY_DONE; + } +} + static int mana_ib_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) { @@ -84,10 +130,8 @@ static int mana_ib_probe(struct auxiliary_device *adev, dev->ib_dev.num_comp_vectors = mdev->gdma_context->max_num_queues; dev->ib_dev.dev.parent = mdev->gdma_context->dev; - rcu_read_lock(); /* required to get primary netdev */ - ndev = mana_get_primary_netdev_rcu(mc, 0); + ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker); if (!ndev) { - rcu_read_unlock(); ret = -ENODEV; ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1"); goto free_ib_device; @@ -95,7 +139,8 @@ static int mana_ib_probe(struct auxiliary_device *adev, ether_addr_copy(mac_addr, ndev->dev_addr); addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr); ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1); - rcu_read_unlock(); + /* mana_get_primary_netdev() returns ndev with refcount held */ + netdev_put(ndev, &dev->dev_tracker); if (ret) { ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret); goto free_ib_device; @@ -109,17 +154,27 @@ static int mana_ib_probe(struct auxiliary_device *adev, } dev->gdma_dev = &mdev->gdma_context->mana_ib; + dev->nb.notifier_call = mana_ib_netdev_event; + ret = register_netdevice_notifier(&dev->nb); + if (ret) { + ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d", + ret); + goto deregister_device; + } + ret = mana_ib_gd_query_adapter_caps(dev); if (ret) { ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d", ret); - goto deregister_device; + goto deregister_net_notifier; } + ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops); + ret = mana_ib_create_eqs(dev); if (ret) { ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret); - goto deregister_device; + goto deregister_net_notifier; } ret = mana_ib_gd_create_rnic_adapter(dev); @@ -134,20 +189,31 @@ static int mana_ib_probe(struct auxiliary_device *adev, goto destroy_rnic; } + dev->av_pool = dma_pool_create("mana_ib_av", mdev->gdma_context->dev, + MANA_AV_BUFFER_SIZE, MANA_AV_BUFFER_SIZE, 0); + if (!dev->av_pool) { + ret = -ENOMEM; + goto destroy_rnic; + } + ret = ib_register_device(&dev->ib_dev, "mana_%d", mdev->gdma_context->dev); if (ret) - goto destroy_rnic; + goto deallocate_pool; dev_set_drvdata(&adev->dev, dev); return 0; +deallocate_pool: + dma_pool_destroy(dev->av_pool); destroy_rnic: xa_destroy(&dev->qp_table_wq); mana_ib_gd_destroy_rnic_adapter(dev); destroy_eqs: mana_ib_destroy_eqs(dev); +deregister_net_notifier: + unregister_netdevice_notifier(&dev->nb); deregister_device: mana_gd_deregister_device(dev->gdma_dev); free_ib_device: @@ -160,9 +226,11 @@ static void mana_ib_remove(struct auxiliary_device *adev) struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev); ib_unregister_device(&dev->ib_dev); + dma_pool_destroy(dev->av_pool); xa_destroy(&dev->qp_table_wq); mana_ib_gd_destroy_rnic_adapter(dev); mana_ib_destroy_eqs(dev); + unregister_netdevice_notifier(&dev->nb); mana_gd_deregister_device(dev->gdma_dev); ib_dealloc_device(&dev->ib_dev); } diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index 67c2d43135a8..eda9c5b971de 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -82,6 +82,9 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req), sizeof(resp)); + if (!udata) + flags |= GDMA_PD_FLAG_ALLOW_GPA_MR; + req.flags = flags; err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); @@ -174,7 +177,7 @@ static int mana_gd_allocate_doorbell_page(struct gdma_context *gc, req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE; req.num_resources = 1; - req.alignment = 1; + req.alignment = PAGE_SIZE / MANA_PAGE_SIZE; /* Have GDMA start searching from 0 */ req.allocated_resources = 0; @@ -237,6 +240,27 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret); } +int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type, + struct mana_ib_queue *queue) +{ + struct gdma_context *gc = mdev_to_gc(mdev); + struct gdma_queue_spec spec = {}; + int err; + + queue->id = INVALID_QUEUE_ID; + queue->gdma_region = GDMA_INVALID_DMA_REGION; + spec.type = type; + spec.monitor_avl_buf = false; + spec.queue_size = size; + err = mana_gd_create_mana_wq_cq(&gc->mana_ib, &spec, &queue->kmem); + if (err) + return err; + /* take ownership into mana_ib from mana */ + queue->gdma_region = queue->kmem->mem_info.dma_region_handle; + queue->kmem->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION; + return 0; +} + int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size, struct mana_ib_queue *queue) { @@ -276,6 +300,8 @@ void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue */ mana_ib_gd_destroy_dma_region(mdev, queue->gdma_region); ib_umem_release(queue->umem); + if (queue->kmem) + mana_gd_destroy_queue(mdev_to_gc(mdev), queue->kmem); } static int @@ -358,7 +384,7 @@ static int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem unsigned int tail = 0; u64 *page_addr_list; void *request_buf; - int err; + int err = 0; gc = mdev_to_gc(dev); hwc = gc->hwc.driver_data; @@ -535,8 +561,10 @@ int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num, immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET; - if (port_num == 1) + if (port_num == 1) { immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + } return 0; } @@ -595,8 +623,11 @@ int mana_ib_query_port(struct ib_device *ibdev, u32 port, props->active_width = IB_WIDTH_4X; props->active_speed = IB_SPEED_EDR; props->pkey_tbl_len = 1; - if (port == 1) + if (port == 1) { props->gid_tbl_len = 16; + props->port_cap_flags = IB_PORT_CM_SUP; + props->ip_gids = true; + } return 0; } @@ -634,7 +665,7 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev) mana_gd_init_req_hdr(&req.hdr, MANA_IB_GET_ADAPTER_CAP, sizeof(req), sizeof(resp)); - req.hdr.resp.msg_version = GDMA_MESSAGE_V3; + req.hdr.resp.msg_version = GDMA_MESSAGE_V4; req.hdr.dev_id = dev->gdma_dev->dev_id; err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req), @@ -663,6 +694,7 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev) caps->max_inline_data_size = resp.max_inline_data_size; caps->max_send_sge_count = resp.max_send_sge_count; caps->max_recv_sge_count = resp.max_recv_sge_count; + caps->feature_flags = resp.feature_flags; return 0; } @@ -678,7 +710,7 @@ mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct gdma_event *event) switch (event->type) { case GDMA_EQE_RNIC_QP_FATAL: qpn = event->details[0]; - qp = mana_get_qp_ref(mdev, qpn); + qp = mana_get_qp_ref(mdev, qpn, false); if (!qp) break; if (qp->ibqp.event_handler) { @@ -762,6 +794,9 @@ int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev) req.hdr.dev_id = gc->mana_ib.dev_id; req.notify_eq_id = mdev->fatal_err_eq->id; + if (mdev->adapter_caps.feature_flags & MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT) + req.feature_flags |= MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST; + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err) { ibdev_err(&mdev->ib_dev, "Failed to create RNIC adapter err %d", err); @@ -987,3 +1022,61 @@ int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) } return 0; } + +int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp, + struct ib_qp_init_attr *attr, u32 doorbell, u32 type) +{ + struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq); + struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq); + struct mana_ib_pd *pd = container_of(qp->ibqp.pd, struct mana_ib_pd, ibpd); + struct gdma_context *gc = mdev_to_gc(mdev); + struct mana_rnic_create_udqp_resp resp = {}; + struct mana_rnic_create_udqp_req req = {}; + int err, i; + + mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_UD_QP, sizeof(req), sizeof(resp)); + req.hdr.dev_id = gc->mana_ib.dev_id; + req.adapter = mdev->adapter_handle; + req.pd_handle = pd->pd_handle; + req.send_cq_handle = send_cq->cq_handle; + req.recv_cq_handle = recv_cq->cq_handle; + for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++) + req.dma_region[i] = qp->ud_qp.queues[i].gdma_region; + req.doorbell_page = doorbell; + req.max_send_wr = attr->cap.max_send_wr; + req.max_recv_wr = attr->cap.max_recv_wr; + req.max_send_sge = attr->cap.max_send_sge; + req.max_recv_sge = attr->cap.max_recv_sge; + req.qp_type = type; + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to create ud qp err %d", err); + return err; + } + qp->qp_handle = resp.qp_handle; + for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++) { + qp->ud_qp.queues[i].id = resp.queue_ids[i]; + /* The GDMA regions are now owned by the RNIC QP handle */ + qp->ud_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION; + } + return 0; +} + +int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) +{ + struct mana_rnic_destroy_udqp_resp resp = {0}; + struct mana_rnic_destroy_udqp_req req = {0}; + struct gdma_context *gc = mdev_to_gc(mdev); + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_UD_QP, sizeof(req), sizeof(resp)); + req.hdr.dev_id = gc->mana_ib.dev_id; + req.adapter = mdev->adapter_handle; + req.qp_handle = qp->qp_handle; + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to destroy ud qp err %d", err); + return err; + } + return 0; +} diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h index b53a5b4de908..6903946677e5 100644 --- a/drivers/infiniband/hw/mana/mana_ib.h +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -11,8 +11,11 @@ #include <rdma/ib_umem.h> #include <rdma/mana-abi.h> #include <rdma/uverbs_ioctl.h> +#include <linux/dmapool.h> #include <net/mana/mana.h> +#include "shadow_queue.h" +#include "counters.h" #define PAGE_SZ_BM \ (SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K | \ @@ -21,6 +24,9 @@ /* MANA doesn't have any limit for MR size */ #define MANA_IB_MAX_MR_SIZE U64_MAX +/* Send queue ID mask */ +#define MANA_SENDQ_MASK BIT(31) + /* * The hardware limit of number of MRs is greater than maximum number of MRs * that can possibly represent in 24 bits @@ -32,6 +38,11 @@ */ #define MANA_CA_ACK_DELAY 16 +/* + * The buffer used for writing AV + */ +#define MANA_AV_BUFFER_SIZE 64 + struct mana_ib_adapter_caps { u32 max_sq_id; u32 max_rq_id; @@ -48,10 +59,12 @@ struct mana_ib_adapter_caps { u32 max_send_sge_count; u32 max_recv_sge_count; u32 max_inline_data_size; + u64 feature_flags; }; struct mana_ib_queue { struct ib_umem *umem; + struct gdma_queue *kmem; u64 gdma_region; u64 id; }; @@ -64,6 +77,9 @@ struct mana_ib_dev { struct gdma_queue **eqs; struct xarray qp_table_wq; struct mana_ib_adapter_caps adapter_caps; + struct dma_pool *av_pool; + netdevice_tracker dev_tracker; + struct notifier_block nb; }; struct mana_ib_wq { @@ -87,6 +103,25 @@ struct mana_ib_pd { u32 tx_vp_offset; }; +struct mana_ib_av { + u8 dest_ip[16]; + u8 dest_mac[ETH_ALEN]; + u16 udp_src_port; + u8 src_ip[16]; + u32 hop_limit : 8; + u32 reserved1 : 12; + u32 dscp : 6; + u32 reserved2 : 5; + u32 is_ipv6 : 1; + u32 reserved3 : 32; +}; + +struct mana_ib_ah { + struct ib_ah ibah; + struct mana_ib_av *av; + dma_addr_t dma_handle; +}; + struct mana_ib_mr { struct ib_mr ibmr; struct ib_umem *umem; @@ -96,6 +131,10 @@ struct mana_ib_mr { struct mana_ib_cq { struct ib_cq ibcq; struct mana_ib_queue queue; + /* protects CQ polling */ + spinlock_t cq_lock; + struct list_head list_send_qp; + struct list_head list_recv_qp; int cqe; u32 comp_vector; mana_handle_t cq_handle; @@ -114,6 +153,17 @@ struct mana_ib_rc_qp { struct mana_ib_queue queues[MANA_RC_QUEUE_TYPE_MAX]; }; +enum mana_ud_queue_type { + MANA_UD_SEND_QUEUE = 0, + MANA_UD_RECV_QUEUE, + MANA_UD_QUEUE_TYPE_MAX, +}; + +struct mana_ib_ud_qp { + struct mana_ib_queue queues[MANA_UD_QUEUE_TYPE_MAX]; + u32 sq_psn; +}; + struct mana_ib_qp { struct ib_qp ibqp; @@ -121,11 +171,17 @@ struct mana_ib_qp { union { struct mana_ib_queue raw_sq; struct mana_ib_rc_qp rc_qp; + struct mana_ib_ud_qp ud_qp; }; /* The port on the IB device, starting with 1 */ u32 port; + struct list_head cq_send_list; + struct list_head cq_recv_list; + struct shadow_queue shadow_rq; + struct shadow_queue shadow_sq; + refcount_t refcount; struct completion free; }; @@ -145,17 +201,24 @@ enum mana_ib_command_code { MANA_IB_DESTROY_ADAPTER = 0x30003, MANA_IB_CONFIG_IP_ADDR = 0x30004, MANA_IB_CONFIG_MAC_ADDR = 0x30005, + MANA_IB_CREATE_UD_QP = 0x30006, + MANA_IB_DESTROY_UD_QP = 0x30007, MANA_IB_CREATE_CQ = 0x30008, MANA_IB_DESTROY_CQ = 0x30009, MANA_IB_CREATE_RC_QP = 0x3000a, MANA_IB_DESTROY_RC_QP = 0x3000b, MANA_IB_SET_QP_STATE = 0x3000d, + MANA_IB_QUERY_VF_COUNTERS = 0x30022, }; struct mana_ib_query_adapter_caps_req { struct gdma_req_hdr hdr; }; /*HW Data */ +enum mana_ib_adapter_features { + MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT = BIT(4), +}; + struct mana_ib_query_adapter_caps_resp { struct gdma_resp_hdr hdr; u32 max_sq_id; @@ -176,8 +239,13 @@ struct mana_ib_query_adapter_caps_resp { u32 max_send_sge_count; u32 max_recv_sge_count; u32 max_inline_data_size; + u64 feature_flags; }; /* HW Data */ +enum mana_ib_adapter_features_request { + MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST = BIT(1), +}; /*HW Data */ + struct mana_rnic_create_adapter_req { struct gdma_req_hdr hdr; u32 notify_eq_id; @@ -296,6 +364,37 @@ struct mana_rnic_destroy_rc_qp_resp { struct gdma_resp_hdr hdr; }; /* HW Data */ +struct mana_rnic_create_udqp_req { + struct gdma_req_hdr hdr; + mana_handle_t adapter; + mana_handle_t pd_handle; + mana_handle_t send_cq_handle; + mana_handle_t recv_cq_handle; + u64 dma_region[MANA_UD_QUEUE_TYPE_MAX]; + u32 qp_type; + u32 doorbell_page; + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; +}; /* HW Data */ + +struct mana_rnic_create_udqp_resp { + struct gdma_resp_hdr hdr; + mana_handle_t qp_handle; + u32 queue_ids[MANA_UD_QUEUE_TYPE_MAX]; +}; /* HW Data*/ + +struct mana_rnic_destroy_udqp_req { + struct gdma_req_hdr hdr; + mana_handle_t adapter; + mana_handle_t qp_handle; +}; /* HW Data */ + +struct mana_rnic_destroy_udqp_resp { + struct gdma_resp_hdr hdr; +}; /* HW Data */ + struct mana_ib_ah_attr { u8 src_addr[16]; u8 dest_addr[16]; @@ -332,17 +431,104 @@ struct mana_rnic_set_qp_state_resp { struct gdma_resp_hdr hdr; }; /* HW Data */ +enum WQE_OPCODE_TYPES { + WQE_TYPE_UD_SEND = 0, + WQE_TYPE_UD_RECV = 8, +}; /* HW DATA */ + +struct rdma_send_oob { + u32 wqe_type : 5; + u32 fence : 1; + u32 signaled : 1; + u32 solicited : 1; + u32 psn : 24; + + u32 ssn_or_rqpn : 24; + u32 reserved1 : 8; + union { + struct { + u32 remote_qkey; + u32 immediate; + u32 reserved1; + u32 reserved2; + } ud_send; + }; +}; /* HW DATA */ + +struct mana_rdma_cqe { + union { + struct { + u8 cqe_type; + u8 data[GDMA_COMP_DATA_SIZE - 1]; + }; + struct { + u32 cqe_type : 8; + u32 vendor_error : 9; + u32 reserved1 : 15; + u32 sge_offset : 5; + u32 tx_wqe_offset : 27; + } ud_send; + struct { + u32 cqe_type : 8; + u32 reserved1 : 24; + u32 msg_len; + u32 src_qpn : 24; + u32 reserved2 : 8; + u32 imm_data; + u32 rx_wqe_offset; + } ud_recv; + }; +}; /* HW DATA */ + +struct mana_rnic_query_vf_cntrs_req { + struct gdma_req_hdr hdr; + mana_handle_t adapter; +}; /* HW Data */ + +struct mana_rnic_query_vf_cntrs_resp { + struct gdma_resp_hdr hdr; + u64 requester_timeout; + u64 requester_oos_nak; + u64 requester_rnr_nak; + u64 responder_rnr_nak; + u64 responder_oos; + u64 responder_dup_request; + u64 requester_implicit_nak; + u64 requester_readresp_psn_mismatch; + u64 nak_inv_req; + u64 nak_access_err; + u64 nak_opp_err; + u64 nak_inv_read; + u64 responder_local_len_err; + u64 requestor_local_prot_err; + u64 responder_rem_access_err; + u64 responder_local_qp_err; + u64 responder_malformed_wqe; + u64 general_hw_err; + u64 requester_rnr_nak_retries_exceeded; + u64 requester_retries_exceeded; + u64 total_fatal_err; + u64 received_cnps; + u64 num_qps_congested; + u64 rate_inc_events; + u64 num_qps_recovered; + u64 current_rate; +}; /* HW Data */ + static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev) { return mdev->gdma_dev->gdma_context; } static inline struct mana_ib_qp *mana_get_qp_ref(struct mana_ib_dev *mdev, - uint32_t qid) + u32 qid, bool is_sq) { struct mana_ib_qp *qp; unsigned long flag; + if (is_sq) + qid |= MANA_SENDQ_MASK; + xa_lock_irqsave(&mdev->qp_table_wq, flag); qp = xa_load(&mdev->qp_table_wq, qid); if (qp) @@ -388,6 +574,8 @@ int mana_ib_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem, int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev, mana_handle_t gdma_region); +int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type, + struct mana_ib_queue *queue); int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size, struct mana_ib_queue *queue); void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue); @@ -480,4 +668,24 @@ int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq); int mana_ib_gd_create_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp, struct ib_qp_init_attr *attr, u32 doorbell, u64 flags); int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp); + +int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp, + struct ib_qp_init_attr *attr, u32 doorbell, u32 type); +int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp); + +int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata); +int mana_ib_destroy_ah(struct ib_ah *ah, u32 flags); + +int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); +int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); + +int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); +int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); + +struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length, + u64 iova, int fd, int mr_access_flags, + struct uverbs_attr_bundle *attrs); #endif diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c index 887b09dd86e7..f99557ec7767 100644 --- a/drivers/infiniband/hw/mana/mr.c +++ b/drivers/infiniband/hw/mana/mr.c @@ -8,6 +8,8 @@ #define VALID_MR_FLAGS \ (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ) +#define VALID_DMA_MR_FLAGS (IB_ACCESS_LOCAL_WRITE) + static enum gdma_mr_access_flags mana_ib_verbs_to_gdma_access_flags(int access_flags) { @@ -39,6 +41,8 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr, req.mr_type = mr_params->mr_type; switch (mr_params->mr_type) { + case GDMA_MR_TYPE_GPA: + break; case GDMA_MR_TYPE_GVA: req.gva.dma_region_handle = mr_params->gva.dma_region_handle; req.gva.virtual_address = mr_params->gva.virtual_address; @@ -169,6 +173,107 @@ err_free: return ERR_PTR(err); } +struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length, + u64 iova, int fd, int access_flags, + struct uverbs_attr_bundle *attrs) +{ + struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd); + struct gdma_create_mr_params mr_params = {}; + struct ib_device *ibdev = ibpd->device; + struct ib_umem_dmabuf *umem_dmabuf; + struct mana_ib_dev *dev; + struct mana_ib_mr *mr; + u64 dma_region_handle; + int err; + + dev = container_of(ibdev, struct mana_ib_dev, ib_dev); + + access_flags &= ~IB_ACCESS_OPTIONAL; + if (access_flags & ~VALID_MR_FLAGS) + return ERR_PTR(-EOPNOTSUPP); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + umem_dmabuf = ib_umem_dmabuf_get_pinned(ibdev, start, length, fd, access_flags); + if (IS_ERR(umem_dmabuf)) { + err = PTR_ERR(umem_dmabuf); + ibdev_dbg(ibdev, "Failed to get dmabuf umem, %d\n", err); + goto err_free; + } + + mr->umem = &umem_dmabuf->umem; + + err = mana_ib_create_dma_region(dev, mr->umem, &dma_region_handle, iova); + if (err) { + ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n", + err); + goto err_umem; + } + + mr_params.pd_handle = pd->pd_handle; + mr_params.mr_type = GDMA_MR_TYPE_GVA; + mr_params.gva.dma_region_handle = dma_region_handle; + mr_params.gva.virtual_address = iova; + mr_params.gva.access_flags = + mana_ib_verbs_to_gdma_access_flags(access_flags); + + err = mana_ib_gd_create_mr(dev, mr, &mr_params); + if (err) + goto err_dma_region; + + /* + * There is no need to keep track of dma_region_handle after MR is + * successfully created. The dma_region_handle is tracked in the PF + * as part of the lifecycle of this MR. + */ + + return &mr->ibmr; + +err_dma_region: + mana_gd_destroy_dma_region(mdev_to_gc(dev), dma_region_handle); + +err_umem: + ib_umem_release(mr->umem); + +err_free: + kfree(mr); + return ERR_PTR(err); +} + +struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags) +{ + struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd); + struct gdma_create_mr_params mr_params = {}; + struct ib_device *ibdev = ibpd->device; + struct mana_ib_dev *dev; + struct mana_ib_mr *mr; + int err; + + dev = container_of(ibdev, struct mana_ib_dev, ib_dev); + + if (access_flags & ~VALID_DMA_MR_FLAGS) + return ERR_PTR(-EINVAL); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mr_params.pd_handle = pd->pd_handle; + mr_params.mr_type = GDMA_MR_TYPE_GPA; + + err = mana_ib_gd_create_mr(dev, mr, &mr_params); + if (err) + goto err_free; + + return &mr->ibmr; + +err_free: + kfree(mr); + return ERR_PTR(err); +} + int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr, ibmr); diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 73d67c853b6f..c928af58f38b 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -398,18 +398,128 @@ err_free_vport: return err; } +static u32 mana_ib_wqe_size(u32 sge, u32 oob_size) +{ + u32 wqe_size = sge * sizeof(struct gdma_sge) + sizeof(struct gdma_wqe) + oob_size; + + return ALIGN(wqe_size, GDMA_WQE_BU_SIZE); +} + +static u32 mana_ib_queue_size(struct ib_qp_init_attr *attr, u32 queue_type) +{ + u32 queue_size; + + switch (attr->qp_type) { + case IB_QPT_UD: + case IB_QPT_GSI: + if (queue_type == MANA_UD_SEND_QUEUE) + queue_size = attr->cap.max_send_wr * + mana_ib_wqe_size(attr->cap.max_send_sge, INLINE_OOB_LARGE_SIZE); + else + queue_size = attr->cap.max_recv_wr * + mana_ib_wqe_size(attr->cap.max_recv_sge, INLINE_OOB_SMALL_SIZE); + break; + default: + return 0; + } + + return MANA_PAGE_ALIGN(roundup_pow_of_two(queue_size)); +} + +static enum gdma_queue_type mana_ib_queue_type(struct ib_qp_init_attr *attr, u32 queue_type) +{ + enum gdma_queue_type type; + + switch (attr->qp_type) { + case IB_QPT_UD: + case IB_QPT_GSI: + if (queue_type == MANA_UD_SEND_QUEUE) + type = GDMA_SQ; + else + type = GDMA_RQ; + break; + default: + type = GDMA_INVALID_QUEUE; + } + return type; +} + +static int mana_table_store_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) +{ + return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp, + GFP_KERNEL); +} + +static void mana_table_remove_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) +{ + xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num); +} + +static int mana_table_store_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) +{ + u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK; + u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id; + int err; + + err = xa_insert_irq(&mdev->qp_table_wq, qids, qp, GFP_KERNEL); + if (err) + return err; + + err = xa_insert_irq(&mdev->qp_table_wq, qidr, qp, GFP_KERNEL); + if (err) + goto remove_sq; + + return 0; + +remove_sq: + xa_erase_irq(&mdev->qp_table_wq, qids); + return err; +} + +static void mana_table_remove_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) +{ + u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK; + u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id; + + xa_erase_irq(&mdev->qp_table_wq, qids); + xa_erase_irq(&mdev->qp_table_wq, qidr); +} + static int mana_table_store_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) { refcount_set(&qp->refcount, 1); init_completion(&qp->free); - return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp, - GFP_KERNEL); + + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + return mana_table_store_rc_qp(mdev, qp); + case IB_QPT_UD: + case IB_QPT_GSI: + return mana_table_store_ud_qp(mdev, qp); + default: + ibdev_dbg(&mdev->ib_dev, "Unknown QP type for storing in mana table, %d\n", + qp->ibqp.qp_type); + } + + return -EINVAL; } static void mana_table_remove_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) { - xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num); + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + mana_table_remove_rc_qp(mdev, qp); + break; + case IB_QPT_UD: + case IB_QPT_GSI: + mana_table_remove_ud_qp(mdev, qp); + break; + default: + ibdev_dbg(&mdev->ib_dev, "Unknown QP type for removing from mana table, %d\n", + qp->ibqp.qp_type); + return; + } mana_put_qp_ref(qp); wait_for_completion(&qp->free); } @@ -490,6 +600,105 @@ destroy_queues: return err; } +static void mana_add_qp_to_cqs(struct mana_ib_qp *qp) +{ + struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq); + struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq); + unsigned long flags; + + spin_lock_irqsave(&send_cq->cq_lock, flags); + list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp); + spin_unlock_irqrestore(&send_cq->cq_lock, flags); + + spin_lock_irqsave(&recv_cq->cq_lock, flags); + list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp); + spin_unlock_irqrestore(&recv_cq->cq_lock, flags); +} + +static void mana_remove_qp_from_cqs(struct mana_ib_qp *qp) +{ + struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq); + struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq); + unsigned long flags; + + spin_lock_irqsave(&send_cq->cq_lock, flags); + list_del(&qp->cq_send_list); + spin_unlock_irqrestore(&send_cq->cq_lock, flags); + + spin_lock_irqsave(&recv_cq->cq_lock, flags); + list_del(&qp->cq_recv_list); + spin_unlock_irqrestore(&recv_cq->cq_lock, flags); +} + +static int mana_ib_create_ud_qp(struct ib_qp *ibqp, struct ib_pd *ibpd, + struct ib_qp_init_attr *attr, struct ib_udata *udata) +{ + struct mana_ib_dev *mdev = container_of(ibpd->device, struct mana_ib_dev, ib_dev); + struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); + struct gdma_context *gc = mdev_to_gc(mdev); + u32 doorbell, queue_size; + int i, err; + + if (udata) { + ibdev_dbg(&mdev->ib_dev, "User-level UD QPs are not supported\n"); + return -EOPNOTSUPP; + } + + for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i) { + queue_size = mana_ib_queue_size(attr, i); + err = mana_ib_create_kernel_queue(mdev, queue_size, mana_ib_queue_type(attr, i), + &qp->ud_qp.queues[i]); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to create queue %d, err %d\n", + i, err); + goto destroy_queues; + } + } + doorbell = gc->mana_ib.doorbell; + + err = create_shadow_queue(&qp->shadow_rq, attr->cap.max_recv_wr, + sizeof(struct ud_rq_shadow_wqe)); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to create shadow rq err %d\n", err); + goto destroy_queues; + } + err = create_shadow_queue(&qp->shadow_sq, attr->cap.max_send_wr, + sizeof(struct ud_sq_shadow_wqe)); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to create shadow sq err %d\n", err); + goto destroy_shadow_queues; + } + + err = mana_ib_gd_create_ud_qp(mdev, qp, attr, doorbell, attr->qp_type); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to create ud qp %d\n", err); + goto destroy_shadow_queues; + } + qp->ibqp.qp_num = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id; + qp->port = attr->port_num; + + for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i) + qp->ud_qp.queues[i].kmem->id = qp->ud_qp.queues[i].id; + + err = mana_table_store_qp(mdev, qp); + if (err) + goto destroy_qp; + + mana_add_qp_to_cqs(qp); + + return 0; + +destroy_qp: + mana_ib_gd_destroy_ud_qp(mdev, qp); +destroy_shadow_queues: + destroy_shadow_queue(&qp->shadow_rq); + destroy_shadow_queue(&qp->shadow_sq); +destroy_queues: + while (i-- > 0) + mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]); + return err; +} + int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr, struct ib_udata *udata) { @@ -503,6 +712,9 @@ int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr, return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata); case IB_QPT_RC: return mana_ib_create_rc_qp(ibqp, ibqp->pd, attr, udata); + case IB_QPT_UD: + case IB_QPT_GSI: + return mana_ib_create_ud_qp(ibqp, ibqp->pd, attr, udata); default: ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n", attr->qp_type); @@ -579,6 +791,8 @@ int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, { switch (ibqp->qp_type) { case IB_QPT_RC: + case IB_QPT_UD: + case IB_QPT_GSI: return mana_ib_gd_modify_qp(ibqp, attr, attr_mask, udata); default: ibdev_dbg(ibqp->device, "Modify QP type %u not supported", ibqp->qp_type); @@ -652,6 +866,28 @@ static int mana_ib_destroy_rc_qp(struct mana_ib_qp *qp, struct ib_udata *udata) return 0; } +static int mana_ib_destroy_ud_qp(struct mana_ib_qp *qp, struct ib_udata *udata) +{ + struct mana_ib_dev *mdev = + container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev); + int i; + + mana_remove_qp_from_cqs(qp); + mana_table_remove_qp(mdev, qp); + + destroy_shadow_queue(&qp->shadow_rq); + destroy_shadow_queue(&qp->shadow_sq); + + /* Ignore return code as there is not much we can do about it. + * The error message is printed inside. + */ + mana_ib_gd_destroy_ud_qp(mdev, qp); + for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i) + mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]); + + return 0; +} + int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); @@ -665,6 +901,9 @@ int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) return mana_ib_destroy_qp_raw(qp, udata); case IB_QPT_RC: return mana_ib_destroy_rc_qp(qp, udata); + case IB_QPT_UD: + case IB_QPT_GSI: + return mana_ib_destroy_ud_qp(qp, udata); default: ibdev_dbg(ibqp->device, "Unexpected QP type %u\n", ibqp->qp_type); diff --git a/drivers/infiniband/hw/mana/shadow_queue.h b/drivers/infiniband/hw/mana/shadow_queue.h new file mode 100644 index 000000000000..a4b3818f9c39 --- /dev/null +++ b/drivers/infiniband/hw/mana/shadow_queue.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2024, Microsoft Corporation. All rights reserved. + */ + +#ifndef _MANA_SHADOW_QUEUE_H_ +#define _MANA_SHADOW_QUEUE_H_ + +struct shadow_wqe_header { + u16 opcode; + u16 error_code; + u32 posted_wqe_size; + u64 wr_id; +}; + +struct ud_rq_shadow_wqe { + struct shadow_wqe_header header; + u32 byte_len; + u32 src_qpn; +}; + +struct ud_sq_shadow_wqe { + struct shadow_wqe_header header; +}; + +struct shadow_queue { + /* Unmasked producer index, Incremented on wqe posting */ + u64 prod_idx; + /* Unmasked consumer index, Incremented on cq polling */ + u64 cons_idx; + /* Unmasked index of next-to-complete (from HW) shadow WQE */ + u64 next_to_complete_idx; + /* queue size in wqes */ + u32 length; + /* distance between elements in bytes */ + u32 stride; + /* ring buffer holding wqes */ + void *buffer; +}; + +static inline int create_shadow_queue(struct shadow_queue *queue, uint32_t length, uint32_t stride) +{ + queue->buffer = kvmalloc_array(length, stride, GFP_KERNEL); + if (!queue->buffer) + return -ENOMEM; + + queue->length = length; + queue->stride = stride; + + return 0; +} + +static inline void destroy_shadow_queue(struct shadow_queue *queue) +{ + kvfree(queue->buffer); +} + +static inline bool shadow_queue_full(struct shadow_queue *queue) +{ + return (queue->prod_idx - queue->cons_idx) >= queue->length; +} + +static inline bool shadow_queue_empty(struct shadow_queue *queue) +{ + return queue->prod_idx == queue->cons_idx; +} + +static inline void * +shadow_queue_get_element(const struct shadow_queue *queue, u64 unmasked_index) +{ + u32 index = unmasked_index % queue->length; + + return ((u8 *)queue->buffer + index * queue->stride); +} + +static inline void * +shadow_queue_producer_entry(struct shadow_queue *queue) +{ + return shadow_queue_get_element(queue, queue->prod_idx); +} + +static inline void * +shadow_queue_get_next_to_consume(const struct shadow_queue *queue) +{ + if (queue->cons_idx == queue->next_to_complete_idx) + return NULL; + + return shadow_queue_get_element(queue, queue->cons_idx); +} + +static inline void * +shadow_queue_get_next_to_complete(struct shadow_queue *queue) +{ + if (queue->next_to_complete_idx == queue->prod_idx) + return NULL; + + return shadow_queue_get_element(queue, queue->next_to_complete_idx); +} + +static inline void shadow_queue_advance_producer(struct shadow_queue *queue) +{ + queue->prod_idx++; +} + +static inline void shadow_queue_advance_consumer(struct shadow_queue *queue) +{ + queue->cons_idx++; +} + +static inline void shadow_queue_advance_next_to_complete(struct shadow_queue *queue) +{ + queue->next_to_complete_idx++; +} + +#endif diff --git a/drivers/infiniband/hw/mana/wr.c b/drivers/infiniband/hw/mana/wr.c new file mode 100644 index 000000000000..1813567d3b16 --- /dev/null +++ b/drivers/infiniband/hw/mana/wr.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" + +#define MAX_WR_SGL_NUM (2) + +static int mana_ib_post_recv_ud(struct mana_ib_qp *qp, const struct ib_recv_wr *wr) +{ + struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev); + struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem; + struct gdma_posted_wqe_info wqe_info = {0}; + struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM]; + struct gdma_wqe_request wqe_req = {0}; + struct ud_rq_shadow_wqe *shadow_wqe; + int err, i; + + if (shadow_queue_full(&qp->shadow_rq)) + return -EINVAL; + + if (wr->num_sge > MAX_WR_SGL_NUM) + return -EINVAL; + + for (i = 0; i < wr->num_sge; ++i) { + gdma_sgl[i].address = wr->sg_list[i].addr; + gdma_sgl[i].mem_key = wr->sg_list[i].lkey; + gdma_sgl[i].size = wr->sg_list[i].length; + } + wqe_req.num_sge = wr->num_sge; + wqe_req.sgl = gdma_sgl; + + err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info); + if (err) + return err; + + shadow_wqe = shadow_queue_producer_entry(&qp->shadow_rq); + memset(shadow_wqe, 0, sizeof(*shadow_wqe)); + shadow_wqe->header.opcode = IB_WC_RECV; + shadow_wqe->header.wr_id = wr->wr_id; + shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu; + shadow_queue_advance_producer(&qp->shadow_rq); + + mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue); + return 0; +} + +int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); + int err = 0; + + for (; wr; wr = wr->next) { + switch (ibqp->qp_type) { + case IB_QPT_UD: + case IB_QPT_GSI: + err = mana_ib_post_recv_ud(qp, wr); + if (unlikely(err)) { + *bad_wr = wr; + return err; + } + break; + default: + ibdev_dbg(ibqp->device, "Posting recv wr on qp type %u is not supported\n", + ibqp->qp_type); + return -EINVAL; + } + } + + return err; +} + +static int mana_ib_post_send_ud(struct mana_ib_qp *qp, const struct ib_ud_wr *wr) +{ + struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev); + struct mana_ib_ah *ah = container_of(wr->ah, struct mana_ib_ah, ibah); + struct net_device *ndev = mana_ib_get_netdev(&mdev->ib_dev, qp->port); + struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem; + struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM + 1]; + struct gdma_posted_wqe_info wqe_info = {0}; + struct gdma_wqe_request wqe_req = {0}; + struct rdma_send_oob send_oob = {0}; + struct ud_sq_shadow_wqe *shadow_wqe; + int err, i; + + if (!ndev) { + ibdev_dbg(&mdev->ib_dev, "Invalid port %u in QP %u\n", + qp->port, qp->ibqp.qp_num); + return -EINVAL; + } + + if (wr->wr.opcode != IB_WR_SEND) + return -EINVAL; + + if (shadow_queue_full(&qp->shadow_sq)) + return -EINVAL; + + if (wr->wr.num_sge > MAX_WR_SGL_NUM) + return -EINVAL; + + gdma_sgl[0].address = ah->dma_handle; + gdma_sgl[0].mem_key = qp->ibqp.pd->local_dma_lkey; + gdma_sgl[0].size = sizeof(struct mana_ib_av); + for (i = 0; i < wr->wr.num_sge; ++i) { + gdma_sgl[i + 1].address = wr->wr.sg_list[i].addr; + gdma_sgl[i + 1].mem_key = wr->wr.sg_list[i].lkey; + gdma_sgl[i + 1].size = wr->wr.sg_list[i].length; + } + + wqe_req.num_sge = wr->wr.num_sge + 1; + wqe_req.sgl = gdma_sgl; + wqe_req.inline_oob_size = sizeof(struct rdma_send_oob); + wqe_req.inline_oob_data = &send_oob; + wqe_req.flags = GDMA_WR_OOB_IN_SGL; + wqe_req.client_data_unit = ib_mtu_enum_to_int(ib_mtu_int_to_enum(ndev->mtu)); + + send_oob.wqe_type = WQE_TYPE_UD_SEND; + send_oob.fence = !!(wr->wr.send_flags & IB_SEND_FENCE); + send_oob.signaled = !!(wr->wr.send_flags & IB_SEND_SIGNALED); + send_oob.solicited = !!(wr->wr.send_flags & IB_SEND_SOLICITED); + send_oob.psn = qp->ud_qp.sq_psn; + send_oob.ssn_or_rqpn = wr->remote_qpn; + send_oob.ud_send.remote_qkey = + qp->ibqp.qp_type == IB_QPT_GSI ? IB_QP1_QKEY : wr->remote_qkey; + + err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info); + if (err) + return err; + + qp->ud_qp.sq_psn++; + shadow_wqe = shadow_queue_producer_entry(&qp->shadow_sq); + memset(shadow_wqe, 0, sizeof(*shadow_wqe)); + shadow_wqe->header.opcode = IB_WC_SEND; + shadow_wqe->header.wr_id = wr->wr.wr_id; + shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu; + shadow_queue_advance_producer(&qp->shadow_sq); + + mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue); + return 0; +} + +int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) +{ + int err; + struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); + + for (; wr; wr = wr->next) { + switch (ibqp->qp_type) { + case IB_QPT_UD: + case IB_QPT_GSI: + err = mana_ib_post_send_ud(qp, ud_wr(wr)); + if (unlikely(err)) { + *bad_wr = wr; + return err; + } + break; + default: + ibdev_dbg(ibqp->device, "Posting send wr on qp type %u is not supported\n", + ibqp->qp_type); + return -EINVAL; + } + } + + return err; +} diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index aa9ea6ba26e5..c592374f4a58 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -150,8 +150,12 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, return PTR_ERR(*umem); shift = mlx4_ib_umem_calc_optimal_mtt_size(*umem, 0, &n); - err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt); + if (shift < 0) { + err = shift; + goto err_buf; + } + err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt); if (err) goto err_buf; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 529db874d67c..dd35e03402ab 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -351,7 +351,7 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context) struct mlx4_port_gid_table *port_gid_table; int ret = 0; int hw_update = 0; - struct gid_entry *gids; + struct gid_entry *gids = NULL; if (!rdma_cap_roce_gid_table(attr->device, attr->port_num)) return -EINVAL; @@ -389,10 +389,10 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context) } spin_unlock_bh(&iboe->lock); - if (!ret && hw_update) { + if (gids) ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num); - kfree(gids); - } + + kfree(gids); return ret; } @@ -2341,37 +2341,38 @@ static void mlx4_ib_scan_netdev(struct mlx4_ib_dev *ibdev, iboe->netdevs[dev->dev_port] = event != NETDEV_UNREGISTER ? dev : NULL; - if (event == NETDEV_UP || event == NETDEV_DOWN) { - enum ib_port_state port_state; - struct ib_event ibev = { }; + spin_unlock_bh(&iboe->lock); - if (ib_get_cached_port_state(&ibdev->ib_dev, dev->dev_port + 1, - &port_state)) - goto iboe_out; + if (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER) + mlx4_ib_update_qps(ibdev, dev, dev->dev_port + 1); +} - if (event == NETDEV_UP && - (port_state != IB_PORT_ACTIVE || - iboe->last_port_state[dev->dev_port] != IB_PORT_DOWN)) - goto iboe_out; - if (event == NETDEV_DOWN && - (port_state != IB_PORT_DOWN || - iboe->last_port_state[dev->dev_port] != IB_PORT_ACTIVE)) - goto iboe_out; - iboe->last_port_state[dev->dev_port] = port_state; +static void mlx4_ib_port_event(struct ib_device *ibdev, struct net_device *ndev, + unsigned long event) +{ + struct mlx4_ib_dev *mlx4_ibdev = + container_of(ibdev, struct mlx4_ib_dev, ib_dev); + struct mlx4_ib_iboe *iboe = &mlx4_ibdev->iboe; - ibev.device = &ibdev->ib_dev; - ibev.element.port_num = dev->dev_port + 1; - ibev.event = event == NETDEV_UP ? IB_EVENT_PORT_ACTIVE : - IB_EVENT_PORT_ERR; - ib_dispatch_event(&ibev); - } + if (!net_eq(dev_net(ndev), &init_net)) + return; + + ASSERT_RTNL(); + + if (ndev->dev.parent != mlx4_ibdev->ib_dev.dev.parent) + return; + + spin_lock_bh(&iboe->lock); + + iboe->netdevs[ndev->dev_port] = event != NETDEV_UNREGISTER ? ndev : NULL; + + if (event == NETDEV_UP || event == NETDEV_DOWN) + ib_dispatch_port_state_event(&mlx4_ibdev->ib_dev, ndev); -iboe_out: spin_unlock_bh(&iboe->lock); - if (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER || - event == NETDEV_UP || event == NETDEV_CHANGE) - mlx4_ib_update_qps(ibdev, dev, dev->dev_port + 1); + if (event == NETDEV_UP || event == NETDEV_CHANGE) + mlx4_ib_update_qps(mlx4_ibdev, ndev, ndev->dev_port + 1); } static int mlx4_ib_netdev_event(struct notifier_block *this, @@ -2569,6 +2570,7 @@ static const struct ib_device_ops mlx4_ib_dev_ops = { .req_notify_cq = mlx4_ib_arm_cq, .rereg_user_mr = mlx4_ib_rereg_user_mr, .resize_cq = mlx4_ib_resize_cq, + .report_port_event = mlx4_ib_port_event, INIT_RDMA_OBJ_SIZE(ib_ah, mlx4_ib_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, mlx4_ib_cq, ibcq), diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index b52bceff7d97..f53b1846594c 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -667,6 +667,9 @@ struct mlx4_uverbs_ex_query_device { __u32 reserved; }; +/* 4k - 4G */ +#define MLX4_PAGE_SIZE_SUPPORTED ((unsigned long)GENMASK_ULL(31, 12)) + static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) { return container_of(ibdev, struct mlx4_ib_dev, ib_dev); @@ -936,8 +939,19 @@ mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table) { return 0; } -int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, - int *num_of_mtts); +static inline int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, + u64 start, + int *num_of_mtts) +{ + unsigned long pg_sz; + + pg_sz = ib_umem_find_best_pgsz(umem, MLX4_PAGE_SIZE_SUPPORTED, start); + if (!pg_sz) + return -EOPNOTSUPP; + + *num_of_mtts = ib_umem_num_dma_blocks(umem, pg_sz); + return order_base_2(pg_sz); +} int mlx4_ib_cm_init(void); void mlx4_ib_cm_destroy(void); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index a40bf58bcdd3..e77645a673fb 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -87,286 +87,20 @@ err_free: return ERR_PTR(err); } -enum { - MLX4_MAX_MTT_SHIFT = 31 -}; - -static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev, - struct mlx4_mtt *mtt, - u64 mtt_size, u64 mtt_shift, u64 len, - u64 cur_start_addr, u64 *pages, - int *start_index, int *npages) -{ - u64 cur_end_addr = cur_start_addr + len; - u64 cur_end_addr_aligned = 0; - u64 mtt_entries; - int err = 0; - int k; - - len += (cur_start_addr & (mtt_size - 1ULL)); - cur_end_addr_aligned = round_up(cur_end_addr, mtt_size); - len += (cur_end_addr_aligned - cur_end_addr); - if (len & (mtt_size - 1ULL)) { - pr_warn("write_block: len %llx is not aligned to mtt_size %llx\n", - len, mtt_size); - return -EINVAL; - } - - mtt_entries = (len >> mtt_shift); - - /* - * Align the MTT start address to the mtt_size. - * Required to handle cases when the MR starts in the middle of an MTT - * record. Was not required in old code since the physical addresses - * provided by the dma subsystem were page aligned, which was also the - * MTT size. - */ - cur_start_addr = round_down(cur_start_addr, mtt_size); - /* A new block is started ... */ - for (k = 0; k < mtt_entries; ++k) { - pages[*npages] = cur_start_addr + (mtt_size * k); - (*npages)++; - /* - * Be friendly to mlx4_write_mtt() and pass it chunks of - * appropriate size. - */ - if (*npages == PAGE_SIZE / sizeof(u64)) { - err = mlx4_write_mtt(dev->dev, mtt, *start_index, - *npages, pages); - if (err) - return err; - - (*start_index) += *npages; - *npages = 0; - } - } - - return 0; -} - -static inline u64 alignment_of(u64 ptr) -{ - return ilog2(ptr & (~(ptr - 1))); -} - -static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start, - u64 current_block_end, - u64 block_shift) -{ - /* Check whether the alignment of the new block is aligned as well as - * the previous block. - * Block address must start with zeros till size of entity_size. - */ - if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0) - /* - * It is not as well aligned as the previous block-reduce the - * mtt size accordingly. Here we take the last right bit which - * is 1. - */ - block_shift = alignment_of(next_block_start); - - /* - * Check whether the alignment of the end of previous block - is it - * aligned as well as the start of the block - */ - if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0) - /* - * It is not as well aligned as the start of the block - - * reduce the mtt size accordingly. - */ - block_shift = alignment_of(current_block_end); - - return block_shift; -} - int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, struct ib_umem *umem) { - u64 *pages; - u64 len = 0; - int err = 0; - u64 mtt_size; - u64 cur_start_addr = 0; - u64 mtt_shift; - int start_index = 0; - int npages = 0; - struct scatterlist *sg; - int i; - - pages = (u64 *) __get_free_page(GFP_KERNEL); - if (!pages) - return -ENOMEM; - - mtt_shift = mtt->page_shift; - mtt_size = 1ULL << mtt_shift; + struct ib_block_iter biter; + int err, i = 0; + u64 addr; - for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) { - if (cur_start_addr + len == sg_dma_address(sg)) { - /* still the same block */ - len += sg_dma_len(sg); - continue; - } - /* - * A new block is started ... - * If len is malaligned, write an extra mtt entry to cover the - * misaligned area (round up the division) - */ - err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size, - mtt_shift, len, - cur_start_addr, - pages, &start_index, - &npages); - if (err) - goto out; - - cur_start_addr = sg_dma_address(sg); - len = sg_dma_len(sg); - } - - /* Handle the last block */ - if (len > 0) { - /* - * If len is malaligned, write an extra mtt entry to cover - * the misaligned area (round up the division) - */ - err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size, - mtt_shift, len, - cur_start_addr, pages, - &start_index, &npages); + rdma_umem_for_each_dma_block(umem, &biter, BIT(mtt->page_shift)) { + addr = rdma_block_iter_dma_address(&biter); + err = mlx4_write_mtt(dev->dev, mtt, i++, 1, &addr); if (err) - goto out; - } - - if (npages) - err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages); - -out: - free_page((unsigned long) pages); - return err; -} - -/* - * Calculate optimal mtt size based on contiguous pages. - * Function will return also the number of pages that are not aligned to the - * calculated mtt_size to be added to total number of pages. For that we should - * check the first chunk length & last chunk length and if not aligned to - * mtt_size we should increment the non_aligned_pages number. All chunks in the - * middle already handled as part of mtt shift calculation for both their start - * & end addresses. - */ -int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, - int *num_of_mtts) -{ - u64 block_shift = MLX4_MAX_MTT_SHIFT; - u64 min_shift = PAGE_SHIFT; - u64 last_block_aligned_end = 0; - u64 current_block_start = 0; - u64 first_block_start = 0; - u64 current_block_len = 0; - u64 last_block_end = 0; - struct scatterlist *sg; - u64 current_block_end; - u64 misalignment_bits; - u64 next_block_start; - u64 total_len = 0; - int i; - - *num_of_mtts = ib_umem_num_dma_blocks(umem, PAGE_SIZE); - - for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) { - /* - * Initialization - save the first chunk start as the - * current_block_start - block means contiguous pages. - */ - if (current_block_len == 0 && current_block_start == 0) { - current_block_start = sg_dma_address(sg); - first_block_start = current_block_start; - /* - * Find the bits that are different between the physical - * address and the virtual address for the start of the - * MR. - * umem_get aligned the start_va to a page boundary. - * Therefore, we need to align the start va to the same - * boundary. - * misalignment_bits is needed to handle the case of a - * single memory region. In this case, the rest of the - * logic will not reduce the block size. If we use a - * block size which is bigger than the alignment of the - * misalignment bits, we might use the virtual page - * number instead of the physical page number, resulting - * in access to the wrong data. - */ - misalignment_bits = - (start_va & (~(((u64)(PAGE_SIZE)) - 1ULL))) ^ - current_block_start; - block_shift = min(alignment_of(misalignment_bits), - block_shift); - } - - /* - * Go over the scatter entries and check if they continue the - * previous scatter entry. - */ - next_block_start = sg_dma_address(sg); - current_block_end = current_block_start + current_block_len; - /* If we have a split (non-contig.) between two blocks */ - if (current_block_end != next_block_start) { - block_shift = mlx4_ib_umem_calc_block_mtt - (next_block_start, - current_block_end, - block_shift); - - /* - * If we reached the minimum shift for 4k page we stop - * the loop. - */ - if (block_shift <= min_shift) - goto end; - - /* - * If not saved yet we are in first block - we save the - * length of first block to calculate the - * non_aligned_pages number at the end. - */ - total_len += current_block_len; - - /* Start a new block */ - current_block_start = next_block_start; - current_block_len = sg_dma_len(sg); - continue; - } - /* The scatter entry is another part of the current block, - * increase the block size. - * An entry in the scatter can be larger than 4k (page) as of - * dma mapping which merge some blocks together. - */ - current_block_len += sg_dma_len(sg); + return err; } - - /* Account for the last block in the total len */ - total_len += current_block_len; - /* Add to the first block the misalignment that it suffers from. */ - total_len += (first_block_start & ((1ULL << block_shift) - 1ULL)); - last_block_end = current_block_start + current_block_len; - last_block_aligned_end = round_up(last_block_end, 1ULL << block_shift); - total_len += (last_block_aligned_end - last_block_end); - - if (total_len & ((1ULL << block_shift) - 1ULL)) - pr_warn("misaligned total length detected (%llu, %llu)!", - total_len, block_shift); - - *num_of_mtts = total_len >> block_shift; -end: - if (block_shift < min_shift) { - /* - * If shift is less than the min we set a warning and return the - * min shift. - */ - pr_warn("umem_calc_optimal_mtt_size - unexpected shift %lld\n", block_shift); - - block_shift = min_shift; - } - return block_shift; + return 0; } static struct ib_umem *mlx4_get_umem_mr(struct ib_device *device, u64 start, @@ -424,6 +158,10 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n); + if (shift < 0) { + err = shift; + goto err_umem; + } err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, convert_access(access_flags), n, shift, &mr->mmr); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 9d08aa99f3cb..50fd407103c7 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -925,8 +925,12 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, } shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n); - err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt); + if (shift < 0) { + err = shift; + goto err_buf; + } + err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt); if (err) goto err_buf; @@ -1108,8 +1112,12 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, } shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n); - err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt); + if (shift < 0) { + err = shift; + goto err_buf; + } + err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt); if (err) goto err_buf; diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index b38961f5058e..11878ddf7cc7 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -9,6 +9,7 @@ mlx5_ib-y := ah.o \ data_direct.o \ dm.o \ doorbell.o \ + fs.o \ gsi.o \ ib_virt.o \ mad.o \ @@ -26,7 +27,6 @@ mlx5_ib-y := ah.o \ mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o \ - fs.o \ qos.o \ std_types.o mlx5_ib-$(CONFIG_MLX5_MACSEC) += macsec.o diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 505bc47fd575..531a57f9ee7e 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -50,11 +50,12 @@ static __be16 mlx5_ah_get_udp_sport(const struct mlx5_ib_dev *dev, return sport; } -static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, +static int create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, struct rdma_ah_init_attr *init_attr) { struct rdma_ah_attr *ah_attr = init_attr->ah_attr; enum ib_gid_type gid_type; + int rate_val; if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); @@ -67,7 +68,10 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, ah->av.tclass = grh->traffic_class; } - ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4); + rate_val = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr)); + if (rate_val < 0) + return rate_val; + ah->av.stat_rate_sl = rate_val << 4; if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { if (init_attr->xmit_slave) @@ -88,6 +92,8 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, ah->av.fl_mlid = rdma_ah_get_path_bits(ah_attr) & 0x7f; ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0xf); } + + return 0; } int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, @@ -120,8 +126,7 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, return err; } - create_ib_ah(dev, ah, init_attr); - return 0; + return create_ib_ah(dev, ah, init_attr); } int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 4f6c1968a2ee..b847084dcd99 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -140,6 +140,13 @@ static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = { INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS), }; +static const struct mlx5_ib_counter packets_op_cnts[] = { + INIT_OP_COUNTER(rdma_tx_packets, RDMA_TX_PACKETS), + INIT_OP_COUNTER(rdma_tx_bytes, RDMA_TX_BYTES), + INIT_OP_COUNTER(rdma_rx_packets, RDMA_RX_PACKETS), + INIT_OP_COUNTER(rdma_rx_bytes, RDMA_RX_BYTES), +}; + static int mlx5_ib_read_counters(struct ib_counters *counters, struct ib_counters_read_attr *read_attr, struct uverbs_attr_bundle *attrs) @@ -427,6 +434,52 @@ done: return num_counters; } +static bool is_rdma_bytes_counter(u32 type) +{ + if (type == MLX5_IB_OPCOUNTER_RDMA_TX_BYTES || + type == MLX5_IB_OPCOUNTER_RDMA_RX_BYTES || + type == MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP || + type == MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP) + return true; + + return false; +} + +static int do_per_qp_get_op_stat(struct rdma_counter *counter) +{ + struct mlx5_ib_dev *dev = to_mdev(counter->device); + const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port); + struct mlx5_rdma_counter *mcounter = to_mcounter(counter); + int i, ret, index, num_hw_counters; + u64 packets = 0, bytes = 0; + + for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP; + i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) { + if (!mcounter->fc[i]) + continue; + + ret = mlx5_fc_query(dev->mdev, mcounter->fc[i], + &packets, &bytes); + if (ret) + return ret; + + num_hw_counters = cnts->num_q_counters + + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + + index = i - MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP + + num_hw_counters; + + if (is_rdma_bytes_counter(i)) + counter->stats->value[index] = bytes; + else + counter->stats->value[index] = packets; + + clear_bit(index, counter->stats->is_disabled); + } + return 0; +} + static int do_get_op_stat(struct ib_device *ibdev, struct rdma_hw_stats *stats, u32 port_num, int index) @@ -434,7 +487,7 @@ static int do_get_op_stat(struct ib_device *ibdev, struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct mlx5_ib_counters *cnts; const struct mlx5_ib_op_fc *opfcs; - u64 packets = 0, bytes; + u64 packets, bytes; u32 type; int ret; @@ -453,8 +506,11 @@ static int do_get_op_stat(struct ib_device *ibdev, if (ret) return ret; + if (is_rdma_bytes_counter(type)) + stats->value[index] = bytes; + else + stats->value[index] = packets; out: - stats->value[index] = packets; return index; } @@ -523,19 +579,30 @@ static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) { struct mlx5_ib_dev *dev = to_mdev(counter->device); const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port); + int ret; + + ret = mlx5_ib_query_q_counters(dev->mdev, cnts, counter->stats, + counter->id); + if (ret) + return ret; + + if (!counter->mode.bind_opcnt) + return 0; - return mlx5_ib_query_q_counters(dev->mdev, cnts, - counter->stats, counter->id); + return do_per_qp_get_op_stat(counter); } static int mlx5_ib_counter_dealloc(struct rdma_counter *counter) { + struct mlx5_rdma_counter *mcounter = to_mcounter(counter); struct mlx5_ib_dev *dev = to_mdev(counter->device); u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; if (!counter->id) return 0; + WARN_ON(!xa_empty(&mcounter->qpn_opfc_xa)); + mlx5r_fs_destroy_fcs(dev, counter); MLX5_SET(dealloc_q_counter_in, in, opcode, MLX5_CMD_OP_DEALLOC_Q_COUNTER); MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id); @@ -543,9 +610,10 @@ static int mlx5_ib_counter_dealloc(struct rdma_counter *counter) } static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, - struct ib_qp *qp) + struct ib_qp *qp, u32 port) { struct mlx5_ib_dev *dev = to_mdev(qp->device); + bool new = false; int err; if (!counter->id) { @@ -560,24 +628,46 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, return err; counter->id = MLX5_GET(alloc_q_counter_out, out, counter_set_id); + new = true; } err = mlx5_ib_qp_set_counter(qp, counter); if (err) goto fail_set_counter; + err = mlx5r_fs_bind_op_fc(qp, counter, port); + if (err) + goto fail_bind_op_fc; + return 0; +fail_bind_op_fc: + mlx5_ib_qp_set_counter(qp, NULL); fail_set_counter: - mlx5_ib_counter_dealloc(counter); - counter->id = 0; + if (new) { + mlx5_ib_counter_dealloc(counter); + counter->id = 0; + } return err; } -static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp) +static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp, u32 port) { - return mlx5_ib_qp_set_counter(qp, NULL); + struct rdma_counter *counter = qp->counter; + int err; + + mlx5r_fs_unbind_op_fc(qp, counter); + + err = mlx5_ib_qp_set_counter(qp, NULL); + if (err) + goto fail_set_counter; + + return 0; + +fail_set_counter: + mlx5r_fs_bind_op_fc(qp, counter, port); + return err; } static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, @@ -677,6 +767,12 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, descs[j].priv = &rdmatx_cnp_op_cnts[i].type; } } + + for (i = 0; i < ARRAY_SIZE(packets_op_cnts); i++, j++) { + descs[j].name = packets_op_cnts[i].name; + descs[j].flags |= IB_STAT_FLAG_OPTIONAL; + descs[j].priv = &packets_op_cnts[i].type; + } } @@ -727,6 +823,8 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, num_op_counters = ARRAY_SIZE(basic_op_cnts); + num_op_counters += ARRAY_SIZE(packets_op_cnts); + if (MLX5_CAP_FLOWTABLE(dev->mdev, ft_field_support_2_nic_receive_rdma.bth_opcode)) num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts); @@ -756,10 +854,58 @@ err: return -ENOMEM; } +/* + * Checks if the given flow counter type should be sharing the same flow counter + * with another type and if it should, checks if that other type flow counter + * was already created, if both conditions are met return true and the counter + * else return false. + */ +bool mlx5r_is_opfc_shared_and_in_use(struct mlx5_ib_op_fc *opfcs, u32 type, + struct mlx5_ib_op_fc **opfc) +{ + u32 shared_fc_type; + + switch (type) { + case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES; + break; + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS; + break; + case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP; + break; + default: + return false; + } + + *opfc = &opfcs[shared_fc_type]; + if (!(*opfc)->fc) + return false; + + return true; +} + static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) { u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; int num_cnt_ports = dev->num_ports; + struct mlx5_ib_op_fc *in_use_opfc; int i, j; if (is_mdev_switchdev_mode(dev->mdev)) @@ -781,11 +927,15 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) if (!dev->port[i].cnts.opfcs[j].fc) continue; - if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) - mlx5_ib_fs_remove_op_fc(dev, - &dev->port[i].cnts.opfcs[j], j); + if (mlx5r_is_opfc_shared_and_in_use( + dev->port[i].cnts.opfcs, j, &in_use_opfc)) + goto skip; + + mlx5_ib_fs_remove_op_fc(dev, + &dev->port[i].cnts.opfcs[j], j); mlx5_fc_destroy(dev->mdev, dev->port[i].cnts.opfcs[j].fc); +skip: dev->port[i].cnts.opfcs[j].fc = NULL; } } @@ -979,8 +1129,8 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port, unsigned int index, bool enable) { struct mlx5_ib_dev *dev = to_mdev(device); + struct mlx5_ib_op_fc *opfc, *in_use_opfc; struct mlx5_ib_counters *cnts; - struct mlx5_ib_op_fc *opfc; u32 num_hw_counters, type; int ret; @@ -1004,6 +1154,13 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port, if (opfc->fc) return -EEXIST; + if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type, + &in_use_opfc)) { + opfc->fc = in_use_opfc->fc; + opfc->rule[0] = in_use_opfc->rule[0]; + return 0; + } + opfc->fc = mlx5_fc_create(dev->mdev, false); if (IS_ERR(opfc->fc)) return PTR_ERR(opfc->fc); @@ -1019,12 +1176,23 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port, if (!opfc->fc) return -EINVAL; + if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type, &in_use_opfc)) + goto out; + mlx5_ib_fs_remove_op_fc(dev, opfc, type); mlx5_fc_destroy(dev->mdev, opfc->fc); +out: opfc->fc = NULL; return 0; } +static void mlx5_ib_counter_init(struct rdma_counter *counter) +{ + struct mlx5_rdma_counter *mcounter = to_mcounter(counter); + + xa_init(&mcounter->qpn_opfc_xa); +} + static const struct ib_device_ops hw_stats_ops = { .alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats, .get_hw_stats = mlx5_ib_get_hw_stats, @@ -1033,8 +1201,10 @@ static const struct ib_device_ops hw_stats_ops = { .counter_dealloc = mlx5_ib_counter_dealloc, .counter_alloc_stats = mlx5_ib_counter_alloc_stats, .counter_update_stats = mlx5_ib_counter_update_stats, - .modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ? - mlx5_ib_modify_stat : NULL, + .modify_hw_stat = mlx5_ib_modify_stat, + .counter_init = mlx5_ib_counter_init, + + INIT_RDMA_OBJ_SIZE(rdma_counter, mlx5_rdma_counter, rdma_counter), }; static const struct ib_device_ops hw_switchdev_vport_op = { @@ -1049,6 +1219,9 @@ static const struct ib_device_ops hw_switchdev_stats_ops = { .counter_dealloc = mlx5_ib_counter_dealloc, .counter_alloc_stats = mlx5_ib_counter_alloc_stats, .counter_update_stats = mlx5_ib_counter_update_stats, + .counter_init = mlx5_ib_counter_init, + + INIT_RDMA_OBJ_SIZE(rdma_counter, mlx5_rdma_counter, rdma_counter), }; static const struct ib_device_ops counters_ops = { diff --git a/drivers/infiniband/hw/mlx5/counters.h b/drivers/infiniband/hw/mlx5/counters.h index 6bcaaa52e2b2..bd03cee42014 100644 --- a/drivers/infiniband/hw/mlx5/counters.h +++ b/drivers/infiniband/hw/mlx5/counters.h @@ -8,10 +8,25 @@ #include "mlx5_ib.h" +struct mlx5_rdma_counter { + struct rdma_counter rdma_counter; + + struct mlx5_fc *fc[MLX5_IB_OPCOUNTER_MAX]; + struct xarray qpn_opfc_xa; +}; + +static inline struct mlx5_rdma_counter * +to_mcounter(struct rdma_counter *counter) +{ + return container_of(counter, struct mlx5_rdma_counter, rdma_counter); +} + int mlx5_ib_counters_init(struct mlx5_ib_dev *dev); void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev); void mlx5_ib_counters_clear_description(struct ib_counters *counters); int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters, struct mlx5_ib_create_flow *ucmd); u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num); +bool mlx5r_is_opfc_shared_and_in_use(struct mlx5_ib_op_fc *opfcs, u32 type, + struct mlx5_ib_op_fc **opfc); #endif /* _MLX5_IB_COUNTERS_H */ diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 4c54dc578069..1aa5311b03e9 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -490,7 +490,7 @@ repoll: } qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff; - if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) { + if (!*cur_qp || (qpn != (*cur_qp)->trans_qp.base.mqp.qpn)) { /* We do not have to take the QP table lock here, * because CQs will be locked while QPs are removed * from the table. diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 4186884c66e1..2479da8620ca 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -13,6 +13,7 @@ #include <rdma/uverbs_std_types.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/fs.h> +#include <rdma/ib_ucaps.h> #include "mlx5_ib.h" #include "devx.h" #include "qp.h" @@ -122,7 +123,27 @@ devx_ufile2uctx(const struct uverbs_attr_bundle *attrs) return to_mucontext(ib_uverbs_get_ucontext(attrs)); } -int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) +static int set_uctx_ucaps(struct mlx5_ib_dev *dev, u64 req_ucaps, u32 *cap) +{ + if (UCAP_ENABLED(req_ucaps, RDMA_UCAP_MLX5_CTRL_LOCAL)) { + if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL) + *cap |= MLX5_UCTX_CAP_RDMA_CTRL; + else + return -EOPNOTSUPP; + } + + if (UCAP_ENABLED(req_ucaps, RDMA_UCAP_MLX5_CTRL_OTHER_VHCA)) { + if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & + MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA) + *cap |= MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA; + else + return -EOPNOTSUPP; + } + + return 0; +} + +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user, u64 req_ucaps) { u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {}; u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {}; @@ -136,14 +157,22 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) return -EINVAL; uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx); - if (is_user && capable(CAP_NET_RAW) && - (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX)) + if (is_user && + (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX) && + capable(CAP_NET_RAW)) cap |= MLX5_UCTX_CAP_RAW_TX; - if (is_user && capable(CAP_SYS_RAWIO) && + if (is_user && (MLX5_CAP_GEN(dev->mdev, uctx_cap) & - MLX5_UCTX_CAP_INTERNAL_DEV_RES)) + MLX5_UCTX_CAP_INTERNAL_DEV_RES) && + capable(CAP_SYS_RAWIO)) cap |= MLX5_UCTX_CAP_INTERNAL_DEV_RES; + if (req_ucaps) { + err = set_uctx_ucaps(dev, req_ucaps, &cap); + if (err) + return err; + } + MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX); MLX5_SET(uctx, uctx, cap, cap); @@ -2573,7 +2602,7 @@ int mlx5_ib_devx_init(struct mlx5_ib_dev *dev) struct mlx5_devx_event_table *table = &dev->devx_event_table; int uid; - uid = mlx5_ib_devx_create(dev, false); + uid = mlx5_ib_devx_create(dev, false, 0); if (uid > 0) { dev->devx_whitelist_uid = uid; xa_init(&table->event_xa); diff --git a/drivers/infiniband/hw/mlx5/devx.h b/drivers/infiniband/hw/mlx5/devx.h index 1344bf4c9d21..ee9e7d3af93f 100644 --- a/drivers/infiniband/hw/mlx5/devx.h +++ b/drivers/infiniband/hw/mlx5/devx.h @@ -24,13 +24,14 @@ struct devx_obj { struct list_head event_sub; /* holds devx_event_subscription entries */ }; #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) -int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user, u64 req_ucaps); void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); int mlx5_ib_devx_init(struct mlx5_ib_dev *dev); void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev); void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile); #else -static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) +static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user, + u64 req_ucaps) { return -EOPNOTSUPP; } diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 520034acf73a..251246c73b33 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -12,6 +12,7 @@ #include <rdma/mlx5_user_ioctl_verbs.h> #include <rdma/ib_hdrs.h> #include <rdma/ib_umem.h> +#include <rdma/ib_ucaps.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/fs.h> #include <linux/mlx5/fs_helpers.h> @@ -32,6 +33,11 @@ enum { MATCH_CRITERIA_ENABLE_MISC2_BIT }; + +struct mlx5_per_qp_opfc { + struct mlx5_ib_op_fc opfcs[MLX5_IB_OPCOUNTER_MAX]; +}; + #define HEADER_IS_ZERO(match_criteria, headers) \ !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \ 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \ @@ -678,7 +684,7 @@ enum flow_table_type { #define MLX5_FS_MAX_TYPES 6 #define MLX5_FS_MAX_ENTRIES BIT(16) -static bool mlx5_ib_shared_ft_allowed(struct ib_device *device) +static bool __maybe_unused mlx5_ib_shared_ft_allowed(struct ib_device *device) { struct mlx5_ib_dev *dev = to_mdev(device); @@ -690,7 +696,7 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *prio, int priority, int num_entries, int num_groups, - u32 flags) + u32 flags, u16 vport) { struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_table *ft; @@ -698,6 +704,7 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev, ft_attr.prio = priority; ft_attr.max_fte = num_entries; ft_attr.flags = flags; + ft_attr.vport = vport; ft_attr.autogroup.max_num_groups = num_groups; ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); if (IS_ERR(ft)) @@ -792,18 +799,25 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, ft = prio->flow_table; if (!ft) return _get_prio(dev, ns, prio, priority, max_table_size, - num_groups, flags); + num_groups, flags, 0); return prio; } enum { + RDMA_RX_ECN_OPCOUNTER_PER_QP_PRIO, + RDMA_RX_CNP_OPCOUNTER_PER_QP_PRIO, + RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO, RDMA_RX_ECN_OPCOUNTER_PRIO, RDMA_RX_CNP_OPCOUNTER_PRIO, + RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO, }; enum { + RDMA_TX_CNP_OPCOUNTER_PER_QP_PRIO, + RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO, RDMA_TX_CNP_OPCOUNTER_PRIO, + RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO, }; static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num, @@ -867,6 +881,344 @@ static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num, return 0; } +/* Returns the prio we should use for the given optional counter type, + * whereas for bytes type we use the packet type, since they share the same + * resources. + */ +static struct mlx5_ib_flow_prio *get_opfc_prio(struct mlx5_ib_dev *dev, + u32 type) +{ + u32 prio_type; + + switch (type) { + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES: + prio_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES: + prio_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS; + break; + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP: + prio_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP: + prio_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP; + break; + default: + prio_type = type; + } + + return &dev->flow_db->opfcs[prio_type]; +} + +static void put_per_qp_prio(struct mlx5_ib_dev *dev, + enum mlx5_ib_optional_counter_type type) +{ + enum mlx5_ib_optional_counter_type per_qp_type; + struct mlx5_ib_flow_prio *prio; + + switch (type) { + case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS: + per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS: + per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS: + per_qp_type = MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS: + per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES: + per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS: + per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES: + per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; + break; + default: + return; + } + + prio = get_opfc_prio(dev, per_qp_type); + put_flow_table(dev, prio, true); +} + +static int get_per_qp_prio(struct mlx5_ib_dev *dev, + enum mlx5_ib_optional_counter_type type) +{ + enum mlx5_ib_optional_counter_type per_qp_type; + enum mlx5_flow_namespace_type fn_type; + struct mlx5_flow_namespace *ns; + struct mlx5_ib_flow_prio *prio; + int priority; + + switch (type) { + case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS: + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; + priority = RDMA_RX_ECN_OPCOUNTER_PER_QP_PRIO; + per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS: + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; + priority = RDMA_RX_CNP_OPCOUNTER_PER_QP_PRIO; + per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS: + fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS; + priority = RDMA_TX_CNP_OPCOUNTER_PER_QP_PRIO; + per_qp_type = MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS: + fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS; + priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO; + per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES: + fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS; + priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO; + per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS: + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; + priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO; + per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES: + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; + priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO; + per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; + break; + default: + return -EINVAL; + } + + ns = mlx5_get_flow_namespace(dev->mdev, fn_type); + if (!ns) + return -EOPNOTSUPP; + + prio = get_opfc_prio(dev, per_qp_type); + if (prio->flow_table) + return 0; + + prio = _get_prio(dev, ns, prio, priority, MLX5_FS_MAX_POOL_SIZE, 1, 0, 0); + if (IS_ERR(prio)) + return PTR_ERR(prio); + + prio->refcount = 1; + + return 0; +} + +static struct mlx5_per_qp_opfc * +get_per_qp_opfc(struct mlx5_rdma_counter *mcounter, u32 qp_num, bool *new) +{ + struct mlx5_per_qp_opfc *per_qp_opfc; + + *new = false; + + per_qp_opfc = xa_load(&mcounter->qpn_opfc_xa, qp_num); + if (per_qp_opfc) + return per_qp_opfc; + per_qp_opfc = kzalloc(sizeof(*per_qp_opfc), GFP_KERNEL); + + if (!per_qp_opfc) + return NULL; + + *new = true; + return per_qp_opfc; +} + +static int add_op_fc_rules(struct mlx5_ib_dev *dev, + struct mlx5_rdma_counter *mcounter, + struct mlx5_per_qp_opfc *per_qp_opfc, + struct mlx5_ib_flow_prio *prio, + enum mlx5_ib_optional_counter_type type, + u32 qp_num, u32 port_num) +{ + struct mlx5_ib_op_fc *opfc = &per_qp_opfc->opfcs[type], *in_use_opfc; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_destination dst; + struct mlx5_flow_spec *spec; + int i, err, spec_num; + bool is_tx; + + if (opfc->fc) + return -EEXIST; + + if (mlx5r_is_opfc_shared_and_in_use(per_qp_opfc->opfcs, type, + &in_use_opfc)) { + opfc->fc = in_use_opfc->fc; + opfc->rule[0] = in_use_opfc->rule[0]; + return 0; + } + + opfc->fc = mcounter->fc[type]; + + spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto null_fc; + } + + switch (type) { + case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP: + if (set_ecn_ce_spec(dev, port_num, &spec[0], + MLX5_FS_IPV4_VERSION) || + set_ecn_ce_spec(dev, port_num, &spec[1], + MLX5_FS_IPV6_VERSION)) { + err = -EOPNOTSUPP; + goto free_spec; + } + spec_num = 2; + is_tx = false; + + MLX5_SET_TO_ONES(fte_match_param, spec[1].match_criteria, + misc_parameters.bth_dst_qp); + MLX5_SET(fte_match_param, spec[1].match_value, + misc_parameters.bth_dst_qp, qp_num); + spec[1].match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + break; + case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP: + if (!MLX5_CAP_FLOWTABLE( + dev->mdev, + ft_field_support_2_nic_receive_rdma.bth_opcode) || + set_cnp_spec(dev, port_num, &spec[0])) { + err = -EOPNOTSUPP; + goto free_spec; + } + spec_num = 1; + is_tx = false; + break; + case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP: + if (!MLX5_CAP_FLOWTABLE( + dev->mdev, + ft_field_support_2_nic_transmit_rdma.bth_opcode) || + set_cnp_spec(dev, port_num, &spec[0])) { + err = -EOPNOTSUPP; + goto free_spec; + } + spec_num = 1; + is_tx = true; + break; + case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP: + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP: + spec_num = 1; + is_tx = true; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP: + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP: + spec_num = 1; + is_tx = false; + break; + default: + err = -EINVAL; + goto free_spec; + } + + if (is_tx) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters.source_sqn); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters.source_sqn, qp_num); + } else { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters.bth_dst_qp); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters.bth_dst_qp, qp_num); + } + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + + dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dst.counter = opfc->fc; + + flow_act.action = + MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW; + + for (i = 0; i < spec_num; i++) { + opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i], + &flow_act, &dst, 1); + if (IS_ERR(opfc->rule[i])) { + err = PTR_ERR(opfc->rule[i]); + goto del_rules; + } + } + prio->refcount += spec_num; + + err = xa_err(xa_store(&mcounter->qpn_opfc_xa, qp_num, per_qp_opfc, + GFP_KERNEL)); + if (err) + goto del_rules; + + kfree(spec); + + return 0; + +del_rules: + while (i--) + mlx5_del_flow_rules(opfc->rule[i]); + put_flow_table(dev, prio, false); +free_spec: + kfree(spec); +null_fc: + opfc->fc = NULL; + return err; +} + +static bool is_fc_shared_and_in_use(struct mlx5_rdma_counter *mcounter, + u32 type, struct mlx5_fc **fc) +{ + u32 shared_fc_type; + + switch (type) { + case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; + break; + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP: + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP; + break; + default: + return false; + } + + *fc = mcounter->fc[shared_fc_type]; + if (!(*fc)) + return false; + + return true; +} + +void mlx5r_fs_destroy_fcs(struct mlx5_ib_dev *dev, + struct rdma_counter *counter) +{ + struct mlx5_rdma_counter *mcounter = to_mcounter(counter); + struct mlx5_fc *in_use_fc; + int i; + + for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP; + i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) { + if (!mcounter->fc[i]) + continue; + + if (is_fc_shared_and_in_use(mcounter, i, &in_use_fc)) { + mcounter->fc[i] = NULL; + continue; + } + + mlx5_fc_destroy(dev->mdev, mcounter->fc[i]); + mcounter->fc[i] = NULL; + } +} + int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, struct mlx5_ib_op_fc *opfc, enum mlx5_ib_optional_counter_type type) @@ -921,6 +1273,20 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, priority = RDMA_TX_CNP_OPCOUNTER_PRIO; break; + case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS: + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES: + spec_num = 1; + fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS; + priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO; + break; + + case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS: + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES: + spec_num = 1; + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; + priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO; + break; + default: err = -EOPNOTSUPP; goto free; @@ -932,18 +1298,22 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, goto free; } - prio = &dev->flow_db->opfcs[type]; + prio = get_opfc_prio(dev, type); if (!prio->flow_table) { + err = get_per_qp_prio(dev, type); + if (err) + goto free; + prio = _get_prio(dev, ns, prio, priority, - dev->num_ports * MAX_OPFC_RULES, 1, 0); + dev->num_ports * MAX_OPFC_RULES, 1, 0, 0); if (IS_ERR(prio)) { err = PTR_ERR(prio); - goto free; + goto put_prio; } } dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dst.counter_id = mlx5_fc_id(opfc->fc); + dst.counter = opfc->fc; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW; @@ -965,6 +1335,8 @@ del_rules: for (i -= 1; i >= 0; i--) mlx5_del_flow_rules(opfc->rule[i]); put_flow_table(dev, prio, false); +put_prio: + put_per_qp_prio(dev, type); free: kfree(spec); return err; @@ -974,12 +1346,115 @@ void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev, struct mlx5_ib_op_fc *opfc, enum mlx5_ib_optional_counter_type type) { + struct mlx5_ib_flow_prio *prio; int i; + prio = get_opfc_prio(dev, type); + for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) { mlx5_del_flow_rules(opfc->rule[i]); - put_flow_table(dev, &dev->flow_db->opfcs[type], true); + put_flow_table(dev, prio, true); + } + + put_per_qp_prio(dev, type); +} + +void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct rdma_counter *counter) +{ + struct mlx5_rdma_counter *mcounter = to_mcounter(counter); + struct mlx5_ib_dev *dev = to_mdev(counter->device); + struct mlx5_per_qp_opfc *per_qp_opfc; + struct mlx5_ib_op_fc *in_use_opfc; + struct mlx5_ib_flow_prio *prio; + int i, j; + + per_qp_opfc = xa_load(&mcounter->qpn_opfc_xa, qp->qp_num); + if (!per_qp_opfc) + return; + + for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP; + i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) { + if (!per_qp_opfc->opfcs[i].fc) + continue; + + if (mlx5r_is_opfc_shared_and_in_use(per_qp_opfc->opfcs, i, + &in_use_opfc)) { + per_qp_opfc->opfcs[i].fc = NULL; + continue; + } + + for (j = 0; j < MAX_OPFC_RULES; j++) { + if (!per_qp_opfc->opfcs[i].rule[j]) + continue; + mlx5_del_flow_rules(per_qp_opfc->opfcs[i].rule[j]); + prio = get_opfc_prio(dev, i); + put_flow_table(dev, prio, true); + } + per_qp_opfc->opfcs[i].fc = NULL; + } + + kfree(per_qp_opfc); + xa_erase(&mcounter->qpn_opfc_xa, qp->qp_num); +} + +int mlx5r_fs_bind_op_fc(struct ib_qp *qp, struct rdma_counter *counter, + u32 port) +{ + struct mlx5_rdma_counter *mcounter = to_mcounter(counter); + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_per_qp_opfc *per_qp_opfc; + struct mlx5_ib_flow_prio *prio; + struct mlx5_ib_counters *cnts; + struct mlx5_ib_op_fc *opfc; + struct mlx5_fc *in_use_fc; + int i, err, per_qp_type; + bool new; + + if (!counter->mode.bind_opcnt) + return 0; + + cnts = &dev->port[port - 1].cnts; + + for (i = 0; i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES; i++) { + opfc = &cnts->opfcs[i]; + if (!opfc->fc) + continue; + + per_qp_type = i + MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP; + prio = get_opfc_prio(dev, per_qp_type); + WARN_ON(!prio->flow_table); + + if (is_fc_shared_and_in_use(mcounter, per_qp_type, &in_use_fc)) + mcounter->fc[per_qp_type] = in_use_fc; + + if (!mcounter->fc[per_qp_type]) { + mcounter->fc[per_qp_type] = mlx5_fc_create(dev->mdev, + false); + if (IS_ERR(mcounter->fc[per_qp_type])) + return PTR_ERR(mcounter->fc[per_qp_type]); + } + + per_qp_opfc = get_per_qp_opfc(mcounter, qp->qp_num, &new); + if (!per_qp_opfc) { + err = -ENOMEM; + goto free_fc; + } + err = add_op_fc_rules(dev, mcounter, per_qp_opfc, prio, + per_qp_type, qp->qp_num, port); + if (err) + goto del_rules; } + + return 0; + +del_rules: + mlx5r_fs_unbind_op_fc(qp, counter); + if (new) + kfree(per_qp_opfc); +free_fc: + if (xa_empty(&mcounter->qpn_opfc_xa)) + mlx5r_fs_destroy_fcs(dev, counter); + return err; } static void set_underlay_qp(struct mlx5_ib_dev *dev, @@ -1113,8 +1588,8 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, handler->ibcounters = flow_act.counters; dest_arr[dest_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest_arr[dest_num].counter_id = - mlx5_fc_id(mcounters->hw_cntrs_hndl); + dest_arr[dest_num].counter = + mcounters->hw_cntrs_hndl; dest_num++; } @@ -1413,17 +1888,51 @@ free_ucmd: return ERR_PTR(err); } +static int mlx5_ib_fill_transport_ns_info(struct mlx5_ib_dev *dev, + enum mlx5_flow_namespace_type type, + u32 *flags, u16 *vport_idx, + u16 *vport, + struct mlx5_core_dev **ft_mdev, + u32 ib_port) +{ + struct mlx5_core_dev *esw_mdev; + + if (!is_mdev_switchdev_mode(dev->mdev)) + return 0; + + if (!MLX5_CAP_ADV_RDMA(dev->mdev, rdma_transport_manager)) + return -EOPNOTSUPP; + + if (!dev->port[ib_port - 1].rep) + return -EINVAL; + + esw_mdev = mlx5_eswitch_get_core_dev(dev->port[ib_port - 1].rep->esw); + if (esw_mdev != dev->mdev) + return -EOPNOTSUPP; + + *flags |= MLX5_FLOW_TABLE_OTHER_VPORT; + *ft_mdev = esw_mdev; + *vport = dev->port[ib_port - 1].rep->vport; + *vport_idx = dev->port[ib_port - 1].rep->vport_index; + + return 0; +} + static struct mlx5_ib_flow_prio * _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority, enum mlx5_flow_namespace_type ns_type, - bool mcast) + bool mcast, u32 ib_port) { + struct mlx5_core_dev *ft_mdev = dev->mdev; struct mlx5_flow_namespace *ns = NULL; struct mlx5_ib_flow_prio *prio = NULL; int max_table_size = 0; + u16 vport_idx = 0; bool esw_encap; u32 flags = 0; + u16 vport = 0; int priority; + int ret; if (mcast) priority = MLX5_IB_FLOW_MCAST_PRIO; @@ -1471,13 +1980,38 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority, MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size)); priority = user_priority; break; + case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX: + case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX: + if (ib_port == 0 || user_priority > MLX5_RDMA_TRANSPORT_BYPASS_PRIO) + return ERR_PTR(-EINVAL); + ret = mlx5_ib_fill_transport_ns_info(dev, ns_type, &flags, + &vport_idx, &vport, + &ft_mdev, ib_port); + if (ret) + return ERR_PTR(ret); + + if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) + max_table_size = + BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX( + ft_mdev, log_max_ft_size)); + else + max_table_size = + BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX( + ft_mdev, log_max_ft_size)); + priority = user_priority; + break; default: break; } max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); - ns = mlx5_get_flow_namespace(dev->mdev, ns_type); + if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX || + ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) + ns = mlx5_get_flow_vport_namespace(ft_mdev, ns_type, vport_idx); + else + ns = mlx5_get_flow_namespace(ft_mdev, ns_type); + if (!ns) return ERR_PTR(-EOPNOTSUPP); @@ -1497,6 +2031,12 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority, case MLX5_FLOW_NAMESPACE_RDMA_TX: prio = &dev->flow_db->rdma_tx[priority]; break; + case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX: + prio = &dev->flow_db->rdma_transport_rx[ib_port - 1]; + break; + case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX: + prio = &dev->flow_db->rdma_transport_tx[ib_port - 1]; + break; default: return ERR_PTR(-EINVAL); } @@ -1507,7 +2047,7 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority, return prio; return _get_prio(dev, ns, prio, priority, max_table_size, - MLX5_FS_MAX_TYPES, flags); + MLX5_FS_MAX_TYPES, flags, vport); } static struct mlx5_ib_flow_handler * @@ -1603,7 +2143,7 @@ static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher, static struct mlx5_ib_flow_handler *raw_fs_rule_add( struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act, - u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type) + struct mlx5_fc *counter, void *cmd_in, int inlen, int dest_id, int dest_type) { struct mlx5_flow_destination *dst; struct mlx5_ib_flow_prio *ft_prio; @@ -1626,7 +2166,8 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add( mutex_lock(&dev->flow_db->lock); ft_prio = _get_flow_table(dev, fs_matcher->priority, - fs_matcher->ns_type, mcast); + fs_matcher->ns_type, mcast, + fs_matcher->ib_port); if (IS_ERR(ft_prio)) { err = PTR_ERR(ft_prio); goto unlock; @@ -1652,8 +2193,12 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add( } if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + if (WARN_ON(!counter)) { + err = -EINVAL; + goto unlock; + } dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dst[dst_num].counter_id = counter_id; + dst[dst_num].counter = counter; dst_num++; } @@ -1738,6 +2283,12 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type, case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX: *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX; break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_RX: + *namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX; + break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_TX: + *namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX; + break; default: return -EINVAL; } @@ -1827,7 +2378,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs, return -EINVAL; /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */ - if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && + if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX || + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) && ((!dest_devx && !dest_qp) || (dest_devx && dest_qp))) return -EINVAL; @@ -1844,7 +2396,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs, return -EINVAL; /* Allow only flow table as dest when inserting to FDB or RDMA_RX */ if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS || - fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX || + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) && *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) return -EINVAL; } else if (dest_qp) { @@ -1865,20 +2418,23 @@ static int get_dests(struct uverbs_attr_bundle *attrs, *dest_id = mqp->raw_packet_qp.rq.tirn; *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; } else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS || - fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) && + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX || + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) && !(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) { *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT; } if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR && (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS || - fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX)) + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX || + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX)) return -EINVAL; return 0; } -static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id) +static bool +is_flow_counter(void *obj, u32 offset, u32 *counter_id, u32 *fc_bulk_size) { struct devx_obj *devx_obj = obj; u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); @@ -1888,6 +2444,7 @@ static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id) if (offset && offset >= devx_obj->flow_counter_bulk_size) return false; + *fc_bulk_size = devx_obj->flow_counter_bulk_size; *counter_id = MLX5_GET(dealloc_flow_counter_in, devx_obj->dinbox, flow_counter_id); @@ -1904,13 +2461,13 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( { struct mlx5_flow_context flow_context = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; - u32 *offset_attr, offset = 0, counter_id = 0; int dest_id, dest_type = -1, inlen, len, ret, i; struct mlx5_ib_flow_handler *flow_handler; struct mlx5_ib_flow_matcher *fs_matcher; struct ib_uobject **arr_flow_actions; struct ib_uflow_resources *uflow_res; struct mlx5_flow_act flow_act = {}; + struct mlx5_fc *counter = NULL; struct ib_qp *qp = NULL; void *devx_obj, *cmd_in; struct ib_uobject *uobj; @@ -1937,6 +2494,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( len = uverbs_attr_get_uobjs_arr(attrs, MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions); if (len) { + u32 *offset_attr, fc_bulk_size, offset = 0, counter_id = 0; devx_obj = arr_flow_actions[0]->object; if (uverbs_attr_is_valid(attrs, @@ -1956,8 +2514,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( offset = *offset_attr; } - if (!is_flow_counter(devx_obj, offset, &counter_id)) + if (!is_flow_counter(devx_obj, offset, &counter_id, &fc_bulk_size)) return -EINVAL; + counter = mlx5_fc_local_create(counter_id, offset, fc_bulk_size); + if (IS_ERR(counter)) + return PTR_ERR(counter); flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; } @@ -1968,8 +2529,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS); - if (!uflow_res) - return -ENOMEM; + if (!uflow_res) { + ret = -ENOMEM; + goto destroy_counter; + } len = uverbs_attr_get_uobjs_arr(attrs, MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions); @@ -1996,7 +2559,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( flow_handler = raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act, - counter_id, cmd_in, inlen, dest_id, dest_type); + counter, cmd_in, inlen, dest_id, dest_type); if (IS_ERR(flow_handler)) { ret = PTR_ERR(flow_handler); goto err_out; @@ -2007,6 +2570,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( return 0; err_out: ib_uverbs_flow_resources_free(uflow_res); +destroy_counter: + if (counter) + mlx5_fc_local_destroy(counter); return ret; } @@ -2338,6 +2904,15 @@ static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs, return 0; } +static bool verify_context_caps(struct mlx5_ib_dev *dev, u64 enabled_caps) +{ + if (is_mdev_switchdev_mode(dev->mdev)) + return UCAP_ENABLED(enabled_caps, + RDMA_UCAP_MLX5_CTRL_OTHER_VHCA); + + return UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_LOCAL); +} + static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( struct uverbs_attr_bundle *attrs) { @@ -2386,6 +2961,26 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( goto end; } + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT)) { + err = uverbs_copy_from(&obj->ib_port, attrs, + MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT); + if (err) + goto end; + if (!rdma_is_port_valid(&dev->ib_dev, obj->ib_port)) { + err = -EINVAL; + goto end; + } + if (obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX && + obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) { + err = -EINVAL; + goto end; + } + if (!verify_context_caps(dev, uobj->context->enabled_caps)) { + err = -EOPNOTSUPP; + goto end; + } + } + uobj->object = obj; obj->mdev = dev->mdev; atomic_set(&obj->usecnt, 0); @@ -2433,7 +3028,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)( mutex_lock(&dev->flow_db->lock); - ft_prio = _get_flow_table(dev, priority, ns_type, 0); + ft_prio = _get_flow_table(dev, priority, ns_type, 0, 0); if (IS_ERR(ft_prio)) { err = PTR_ERR(ft_prio); goto free_obj; @@ -2819,7 +3414,10 @@ DECLARE_UVERBS_NAMED_METHOD( UA_OPTIONAL), UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE, enum mlx5_ib_uapi_flow_table_type, - UA_OPTIONAL)); + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT, + UVERBS_ATTR_TYPE(u32), + UA_OPTIONAL)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, @@ -2863,6 +3461,7 @@ DECLARE_UVERBS_NAMED_OBJECT( &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY)); const struct uapi_definition mlx5_ib_flow_defs[] = { +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) UAPI_DEF_CHAIN_OBJ_TREE_NAMED( MLX5_IB_OBJECT_FLOW_MATCHER), UAPI_DEF_CHAIN_OBJ_TREE( @@ -2873,6 +3472,7 @@ const struct uapi_definition mlx5_ib_flow_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED( MLX5_IB_OBJECT_STEERING_ANCHOR, UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)), +#endif {}, }; @@ -2889,8 +3489,26 @@ int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) if (!dev->flow_db) return -ENOMEM; + dev->flow_db->rdma_transport_rx = kcalloc(dev->num_ports, + sizeof(struct mlx5_ib_flow_prio), + GFP_KERNEL); + if (!dev->flow_db->rdma_transport_rx) + goto free_flow_db; + + dev->flow_db->rdma_transport_tx = kcalloc(dev->num_ports, + sizeof(struct mlx5_ib_flow_prio), + GFP_KERNEL); + if (!dev->flow_db->rdma_transport_tx) + goto free_rdma_transport_rx; + mutex_init(&dev->flow_db->lock); ib_set_device_ops(&dev->ib_dev, &flow_ops); return 0; + +free_rdma_transport_rx: + kfree(dev->flow_db->rdma_transport_rx); +free_flow_db: + kfree(dev->flow_db); + return -ENOMEM; } diff --git a/drivers/infiniband/hw/mlx5/fs.h b/drivers/infiniband/hw/mlx5/fs.h index b9734904f5f0..2ebe86e5be10 100644 --- a/drivers/infiniband/hw/mlx5/fs.h +++ b/drivers/infiniband/hw/mlx5/fs.h @@ -8,23 +8,8 @@ #include "mlx5_ib.h" -#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int mlx5_ib_fs_init(struct mlx5_ib_dev *dev); void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev); -#else -static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) -{ - dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); - - if (!dev->flow_db) - return -ENOMEM; - - mutex_init(&dev->flow_db->lock); - return 0; -} - -inline void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev) {} -#endif static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev) { @@ -40,6 +25,8 @@ static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev) * is a safe assumption that all references are gone. */ mlx5_ib_fs_cleanup_anchor(dev); + kfree(dev->flow_db->rdma_transport_tx); + kfree(dev->flow_db->rdma_transport_rx); kfree(dev->flow_db); } #endif /* _MLX5_IB_FS_H */ diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index bc7930d0c564..d07cacaa0abd 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -47,6 +47,7 @@ #include <rdma/uverbs_ioctl.h> #include <rdma/mlx5_user_ioctl_verbs.h> #include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/ib_ucaps.h> #include "macsec.h" #include "data_direct.h" @@ -242,6 +243,10 @@ static int mlx5_netdev_event(struct notifier_block *this, case NETDEV_DOWN: { struct net_device *upper = NULL; + if (!netif_is_lag_master(ndev) && !netif_is_lag_port(ndev) && + !mlx5_core_mp_enabled(mdev)) + return NOTIFY_DONE; + if (mlx5_lag_is_roce(mdev) || mlx5_lag_is_sriov(mdev)) { struct net_device *lag_ndev; @@ -1930,6 +1935,12 @@ static int set_ucontext_resp(struct ib_ucontext *uctx, return 0; } +static bool uctx_rdma_ctrl_is_enabled(u64 enabled_caps) +{ + return UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_LOCAL) || + UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_OTHER_VHCA); +} + static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { @@ -1972,10 +1983,17 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, return -EINVAL; if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { - err = mlx5_ib_devx_create(dev, true); + err = mlx5_ib_devx_create(dev, true, uctx->enabled_caps); if (err < 0) goto out_ctx; context->devx_uid = err; + + if (uctx_rdma_ctrl_is_enabled(uctx->enabled_caps)) { + err = mlx5_cmd_add_privileged_uid(dev->mdev, + context->devx_uid); + if (err) + goto out_devx; + } } lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR; @@ -1990,7 +2008,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, /* updates req->total_num_bfregs */ err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi); if (err) - goto out_devx; + goto out_ucap; mutex_init(&bfregi->lock); bfregi->lib_uar_4k = lib_uar_4k; @@ -1998,7 +2016,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, GFP_KERNEL); if (!bfregi->count) { err = -ENOMEM; - goto out_devx; + goto out_ucap; } bfregi->sys_pages = kcalloc(bfregi->num_sys_pages, @@ -2062,6 +2080,11 @@ out_sys_pages: out_count: kfree(bfregi->count); +out_ucap: + if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX && + uctx_rdma_ctrl_is_enabled(uctx->enabled_caps)) + mlx5_cmd_remove_privileged_uid(dev->mdev, context->devx_uid); + out_devx: if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) mlx5_ib_devx_destroy(dev, context->devx_uid); @@ -2106,8 +2129,12 @@ static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) kfree(bfregi->sys_pages); kfree(bfregi->count); - if (context->devx_uid) + if (context->devx_uid) { + if (uctx_rdma_ctrl_is_enabled(ibcontext->enabled_caps)) + mlx5_cmd_remove_privileged_uid(dev->mdev, + context->devx_uid); mlx5_ib_devx_destroy(dev, context->devx_uid); + } } static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, @@ -2839,7 +2866,7 @@ static int mlx5_ib_get_plane_num(struct mlx5_core_dev *mdev, u8 *num_plane) int err; *num_plane = 0; - if (!MLX5_CAP_GEN(mdev, ib_virt)) + if (!MLX5_CAP_GEN(mdev, ib_virt) || !MLX5_CAP_GEN_2(mdev, multiplane)) return 0; err = mlx5_query_hca_vport_context(mdev, 0, 1, 0, &vport_ctx); @@ -3639,7 +3666,8 @@ static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev) list_for_each_entry(mpi, &mlx5_ib_unaffiliated_port_list, list) { if (dev->sys_image_guid == mpi->sys_image_guid && - (mlx5_core_native_port_num(mpi->mdev) - 1) == i) { + (mlx5_core_native_port_num(mpi->mdev) - 1) == i && + mlx5_core_same_coredev_type(dev->mdev, mpi->mdev)) { bound = mlx5_ib_bind_slave_port(dev, mpi); } @@ -4196,8 +4224,47 @@ static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev) return (var_table->bitmap) ? 0 : -ENOMEM; } +static void mlx5_ib_cleanup_ucaps(struct mlx5_ib_dev *dev) +{ + if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL) + ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL); + + if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & + MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA) + ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_OTHER_VHCA); +} + +static int mlx5_ib_init_ucaps(struct mlx5_ib_dev *dev) +{ + int ret; + + if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL) { + ret = ib_create_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL); + if (ret) + return ret; + } + + if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & + MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA) { + ret = ib_create_ucap(RDMA_UCAP_MLX5_CTRL_OTHER_VHCA); + if (ret) + goto remove_local; + } + + return 0; + +remove_local: + if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL) + ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL); + return ret; +} + static void mlx5_ib_stage_caps_cleanup(struct mlx5_ib_dev *dev) { + if (MLX5_CAP_GEN_2_64(dev->mdev, general_obj_types_127_64) & + MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL) + mlx5_ib_cleanup_ucaps(dev); + bitmap_free(dev->var_table.bitmap); } @@ -4248,6 +4315,13 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) return err; } + if (MLX5_CAP_GEN_2_64(dev->mdev, general_obj_types_127_64) & + MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL) { + err = mlx5_ib_init_ucaps(dev); + if (err) + return err; + } + dev->ib_dev.use_cq_dim = true; return 0; @@ -4785,7 +4859,8 @@ static int mlx5r_mp_probe(struct auxiliary_device *adev, mutex_lock(&mlx5_ib_multiport_mutex); list_for_each_entry(dev, &mlx5_ib_dev_list, ib_dev_list) { - if (dev->sys_image_guid == mpi->sys_image_guid) + if (dev->sys_image_guid == mpi->sys_image_guid && + mlx5_core_same_coredev_type(dev->mdev, mpi->mdev)) bound = mlx5_ib_bind_slave_port(dev, mpi); if (bound) { diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index a01b592aa716..ace2df3e1d9f 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -276,6 +276,7 @@ struct mlx5_ib_flow_matcher { struct mlx5_core_dev *mdev; atomic_t usecnt; u8 match_criteria_enable; + u32 ib_port; }; struct mlx5_ib_steering_anchor { @@ -293,6 +294,18 @@ enum mlx5_ib_optional_counter_type { MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS, MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS, MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS, + MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS, + MLX5_IB_OPCOUNTER_RDMA_TX_BYTES, + MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS, + MLX5_IB_OPCOUNTER_RDMA_RX_BYTES, + + MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP, + MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP, + MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP, + MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP, + MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP, + MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP, + MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP, MLX5_IB_OPCOUNTER_MAX, }; @@ -307,6 +320,8 @@ struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX]; struct mlx5_flow_table *lag_demux_ft; + struct mlx5_ib_flow_prio *rdma_transport_rx; + struct mlx5_ib_flow_prio *rdma_transport_tx; /* Protect flow steering bypass flow tables * when add/del flow rules. * only single add/removal of flow steering rule could be done @@ -669,6 +684,12 @@ struct mlx5_ib_mkey { #define mlx5_update_odp_stats(mr, counter_name, value) \ atomic64_add(value, &((mr)->odp_stats.counter_name)) +#define mlx5_update_odp_stats_with_handled(mr, counter_name, value) \ + do { \ + mlx5_update_odp_stats(mr, counter_name, value); \ + atomic64_add(1, &((mr)->odp_stats.counter_name##_handled)); \ + } while (0) + struct mlx5_ib_mr { struct ib_mr ibmr; struct mlx5_ib_mkey mmkey; @@ -877,6 +898,14 @@ void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev, struct mlx5_ib_op_fc *opfc, enum mlx5_ib_optional_counter_type type); +int mlx5r_fs_bind_op_fc(struct ib_qp *qp, struct rdma_counter *counter, + u32 port); + +void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct rdma_counter *counter); + +void mlx5r_fs_destroy_fcs(struct mlx5_ib_dev *dev, + struct rdma_counter *counter); + struct mlx5_ib_multiport_info; struct mlx5_ib_multiport { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 45d9dc9c6c8f..5fbebafc8774 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -56,7 +56,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context); static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, u64 iova, int access_flags, - unsigned int page_size, bool populate, + unsigned long page_size, bool populate, int access_mode); static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr); @@ -718,8 +718,7 @@ mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev, } static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - struct mlx5_cache_ent *ent, - int access_flags) + struct mlx5_cache_ent *ent) { struct mlx5_ib_mr *mr; int err; @@ -794,7 +793,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, if (!ent) return ERR_PTR(-EOPNOTSUPP); - return _mlx5_mr_cache_alloc(dev, ent, access_flags); + return _mlx5_mr_cache_alloc(dev, ent); } static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) @@ -919,6 +918,25 @@ mkeys_err: return ERR_PTR(ret); } +static void mlx5r_destroy_cache_entries(struct mlx5_ib_dev *dev) +{ + struct rb_root *root = &dev->cache.rb_root; + struct mlx5_cache_ent *ent; + struct rb_node *node; + + mutex_lock(&dev->cache.rb_lock); + node = rb_first(root); + while (node) { + ent = rb_entry(node, struct mlx5_cache_ent, node); + node = rb_next(node); + clean_keys(dev, ent); + rb_erase(&ent->node, root); + mlx5r_mkeys_uninit(ent); + kfree(ent); + } + mutex_unlock(&dev->cache.rb_lock); +} + int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) { struct mlx5_mkey_cache *cache = &dev->cache; @@ -970,6 +988,8 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) err: mutex_unlock(&cache->rb_lock); mlx5_mkey_cache_debugfs_cleanup(dev); + mlx5r_destroy_cache_entries(dev); + destroy_workqueue(cache->wq); mlx5_ib_warn(dev, "failed to create mkey cache entry\n"); return ret; } @@ -1003,20 +1023,10 @@ void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); /* At this point all entries are disabled and have no concurrent work. */ - mutex_lock(&dev->cache.rb_lock); - node = rb_first(root); - while (node) { - ent = rb_entry(node, struct mlx5_cache_ent, node); - node = rb_next(node); - clean_keys(dev, ent); - rb_erase(&ent->node, root); - mlx5r_mkeys_uninit(ent); - kfree(ent); - } - mutex_unlock(&dev->cache.rb_lock); + mlx5r_destroy_cache_entries(dev); destroy_workqueue(dev->cache.wq); - del_timer_sync(&dev->delay_timer); + timer_delete_sync(&dev->delay_timer); } struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) @@ -1115,7 +1125,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, struct mlx5r_cache_rb_key rb_key = {}; struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; - unsigned int page_size; + unsigned long page_size; if (umem->is_dmabuf) page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova); @@ -1144,7 +1154,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, return mr; } - mr = _mlx5_mr_cache_alloc(dev, ent, access_flags); + mr = _mlx5_mr_cache_alloc(dev, ent); if (IS_ERR(mr)) return mr; @@ -1219,7 +1229,7 @@ err_1: */ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, u64 iova, int access_flags, - unsigned int page_size, bool populate, + unsigned long page_size, bool populate, int access_mode) { struct mlx5_ib_dev *dev = to_mdev(pd->device); @@ -1425,7 +1435,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, mr = alloc_cacheable_mr(pd, umem, iova, access_flags, MLX5_MKC_ACCESS_MODE_MTT); } else { - unsigned int page_size = + unsigned long page_size = mlx5_umem_mkc_find_best_pgsz(dev, umem, iova); mutex_lock(&dev->slow_path_mutex); @@ -1550,7 +1560,7 @@ static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach) dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); - if (!umem_dmabuf->sgt) + if (!umem_dmabuf->sgt || !mr) return; mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); @@ -1935,7 +1945,8 @@ err: static void mlx5_free_priv_descs(struct mlx5_ib_mr *mr) { - if (!mr->umem && !mr->data_direct && mr->descs) { + if (!mr->umem && !mr->data_direct && + mr->ibmr.type != IB_MR_TYPE_DM && mr->descs) { struct ib_device *device = mr->ibmr.device; int size = mr->max_descs * mr->desc_size; struct mlx5_ib_dev *dev = to_mdev(device); @@ -1956,7 +1967,6 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev, if (mr->mmkey.cache_ent) { spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); - mr->mmkey.cache_ent->in_use--; goto end; } @@ -2021,18 +2031,31 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) { struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; + bool is_odp = is_odp_mr(mr); + bool is_odp_dma_buf = is_dmabuf_mr(mr) && + !to_ib_umem_dmabuf(mr->umem)->pinned; + bool from_cache = !!ent; + int ret = 0; + + if (is_odp) + mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); + + if (is_odp_dma_buf) + dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, NULL); if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) { ent = mr->mmkey.cache_ent; /* upon storing to a clean temp entry - schedule its cleanup */ spin_lock_irq(&ent->mkeys_queue.lock); + if (from_cache) + ent->in_use--; if (ent->is_tmp && !ent->tmp_cleanup_scheduled) { mod_delayed_work(ent->dev->cache.wq, &ent->dwork, msecs_to_jiffies(30 * 1000)); ent->tmp_cleanup_scheduled = true; } spin_unlock_irq(&ent->mkeys_queue.lock); - return 0; + goto out; } if (ent) { @@ -2041,7 +2064,21 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) mr->mmkey.cache_ent = NULL; spin_unlock_irq(&ent->mkeys_queue.lock); } - return destroy_mkey(dev, mr); + ret = destroy_mkey(dev, mr); +out: + if (is_odp) { + if (!ret) + to_ib_umem_odp(mr->umem)->private = NULL; + mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex); + } + + if (is_odp_dma_buf) { + if (!ret) + to_ib_umem_dmabuf(mr->umem)->private = NULL; + dma_resv_unlock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); + } + + return ret; } static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 4b37446758fd..86d8fa63bf69 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -228,13 +228,28 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; struct mlx5_ib_mr *imr = mr->parent; + /* + * If userspace is racing freeing the parent implicit ODP MR then we can + * loose the race with parent destruction. In this case + * mlx5_ib_free_odp_mr() will free everything in the implicit_children + * xarray so NOP is fine. This child MR cannot be destroyed here because + * we are under its umem_mutex. + */ if (!refcount_inc_not_zero(&imr->mmkey.usecount)) return; - xa_erase(&imr->implicit_children, idx); + xa_lock(&imr->implicit_children); + if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_KERNEL) != + mr) { + xa_unlock(&imr->implicit_children); + mlx5r_deref_odp_mkey(&imr->mmkey); + return; + } + if (MLX5_CAP_ODP(mr_to_mdev(mr)->mdev, mem_page_fault)) - xa_erase(&mr_to_mdev(mr)->odp_mkeys, - mlx5_base_mkey(mr->mmkey.key)); + __xa_erase(&mr_to_mdev(mr)->odp_mkeys, + mlx5_base_mkey(mr->mmkey.key)); + xa_unlock(&imr->implicit_children); /* Freeing a MR is a sleeping operation, so bounce to a work queue */ INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work); @@ -268,6 +283,8 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, if (!umem_odp->npages) goto out; mr = umem_odp->private; + if (!mr) + goto out; start = max_t(u64, ib_umem_start(umem_odp), range->start); end = min_t(u64, ib_umem_end(umem_odp), range->end); @@ -292,9 +309,6 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, blk_start_idx = idx; in_block = 1; } - - /* Count page invalidations */ - invalidations += idx - blk_start_idx + 1; } else { u64 umr_offset = idx & umr_block_mask; @@ -304,16 +318,21 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); in_block = 0; + /* Count page invalidations */ + invalidations += idx - blk_start_idx + 1; } } } - if (in_block) + if (in_block) { mlx5r_umr_update_xlt(mr, blk_start_idx, idx - blk_start_idx + 1, 0, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); + /* Count page invalidations */ + invalidations += idx - blk_start_idx + 1; + } - mlx5_update_odp_stats(mr, invalidations, invalidations); + mlx5_update_odp_stats_with_handled(mr, invalidations, invalidations); /* * We are now sure that the device will not access the @@ -500,18 +519,18 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, refcount_inc(&ret->mmkey.usecount); goto out_lock; } - xa_unlock(&imr->implicit_children); if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) { - ret = xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_KERNEL); + ret = __xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey, GFP_KERNEL); if (xa_is_err(ret)) { ret = ERR_PTR(xa_err(ret)); - xa_erase(&imr->implicit_children, idx); - goto out_mr; + __xa_erase(&imr->implicit_children, idx); + goto out_lock; } mr->mmkey.type = MLX5_MKEY_IMPLICIT_CHILD; } + xa_unlock(&imr->implicit_children); mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr); return mr; @@ -944,8 +963,7 @@ out: /* * Handle a single data segment in a page-fault WQE or RDMA region. * - * Returns number of OS pages retrieved on success. The caller may continue to - * the next data segment. + * Returns zero on success. The caller may continue to the next data segment. * Can return the following error codes: * -EAGAIN to designate a temporary error. The caller will abort handling the * page fault and resolve it. @@ -958,7 +976,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 *bytes_committed, u32 *bytes_mapped) { - int npages = 0, ret, i, outlen, cur_outlen = 0, depth = 0; + int ret, i, outlen, cur_outlen = 0, depth = 0, pages_in_range; struct pf_frame *head = NULL, *frame; struct mlx5_ib_mkey *mmkey; struct mlx5_ib_mr *mr; @@ -993,13 +1011,20 @@ next_mr: case MLX5_MKEY_MR: mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); + pages_in_range = (ALIGN(io_virt + bcnt, PAGE_SIZE) - + (io_virt & PAGE_MASK)) >> + PAGE_SHIFT; ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0, false); if (ret < 0) goto end; - mlx5_update_odp_stats(mr, faults, ret); + mlx5_update_odp_stats_with_handled(mr, faults, ret); + + if (ret < pages_in_range) { + ret = -EFAULT; + goto end; + } - npages += ret; ret = 0; break; @@ -1090,7 +1115,7 @@ end: kfree(out); *bytes_committed = 0; - return ret ? ret : npages; + return ret; } /* @@ -1109,8 +1134,7 @@ end: * the committed bytes). * @receive_queue: receive WQE end of sg list * - * Returns the number of pages loaded if positive, zero for an empty WQE, or a - * negative error code. + * Returns zero for success or a negative error code. */ static int pagefault_data_segments(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault, @@ -1118,7 +1142,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, void *wqe_end, u32 *bytes_mapped, u32 *total_wqe_bytes, bool receive_queue) { - int ret = 0, npages = 0; + int ret = 0; u64 io_virt; __be32 key; u32 byte_count; @@ -1175,10 +1199,9 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, bytes_mapped); if (ret < 0) break; - npages += ret; } - return ret < 0 ? ret : npages; + return ret; } /* @@ -1414,12 +1437,6 @@ resolve_page_fault: free_page((unsigned long)wqe_start); } -static int pages_in_range(u64 address, u32 length) -{ - return (ALIGN(address + length, PAGE_SIZE) - - (address & PAGE_MASK)) >> PAGE_SHIFT; -} - static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault) { @@ -1458,7 +1475,7 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, if (ret == -EAGAIN) { /* We're racing with an invalidation, don't prefetch */ prefetch_activated = 0; - } else if (ret < 0 || pages_in_range(address, length) > ret) { + } else if (ret < 0) { mlx5_ib_page_fault_resume(dev, pfault, 1); if (ret != -ENOENT) mlx5_ib_dbg(dev, "PAGE FAULT error %d. QP 0x%llx, type: 0x%x\n", @@ -1529,7 +1546,7 @@ static void mlx5_ib_mr_memory_pfault_handler(struct mlx5_ib_dev *dev, goto err; } - mlx5_update_odp_stats(mr, faults, ret); + mlx5_update_odp_stats_with_handled(mr, faults, ret); mlx5r_deref_odp_mkey(mmkey); if (pfault->memory.flags & MLX5_MEMORY_PAGE_FAULT_FLAGS_LAST) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index a43eba9d3572..88724d15705d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3447,11 +3447,11 @@ static int ib_to_mlx5_rate_map(u8 rate) return 0; } -static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) +int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate) { u32 stat_rate_support; - if (rate == IB_RATE_PORT_CURRENT) + if (rate == IB_RATE_PORT_CURRENT || rate == IB_RATE_800_GBPS) return 0; if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_800_GBPS) @@ -3596,7 +3596,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, sizeof(grh->dgid.raw)); } - err = ib_rate_to_mlx5(dev, rdma_ah_get_static_rate(ah)); + err = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah)); if (err < 0) return err; MLX5_SET(ads, path, stat_rate, err); @@ -4579,6 +4579,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1); MLX5_SET(dctc, dctc, counter_set_id, set_id); + + qp->port = attr->port_num; } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { struct mlx5_ib_modify_qp_resp resp = {}; u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {}; @@ -5074,7 +5076,7 @@ static int mlx5_ib_dct_query_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *mqp, } if (qp_attr_mask & IB_QP_PORT) - qp_attr->port_num = MLX5_GET(dctc, dctc, port); + qp_attr->port_num = mqp->port; if (qp_attr_mask & IB_QP_MIN_RNR_TIMER) qp_attr->min_rnr_timer = MLX5_GET(dctc, dctc, min_rnr_nak); if (qp_attr_mask & IB_QP_AV) { diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h index b6ee7c3ee1ca..2530e7730635 100644 --- a/drivers/infiniband/hw/mlx5/qp.h +++ b/drivers/infiniband/hw/mlx5/qp.h @@ -56,4 +56,5 @@ int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn); int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); int mlx5_ib_qp_event_init(void); void mlx5_ib_qp_event_cleanup(void); +int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate); #endif /* _MLX5_IB_QP_H */ diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c index affcf8fe943c..67841922c7b8 100644 --- a/drivers/infiniband/hw/mlx5/restrack.c +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -96,9 +96,18 @@ static int fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr) atomic64_read(&mr->odp_stats.faults))) goto err_table; if (rdma_nl_stat_hwcounter_entry( + msg, "page_faults_handled", + atomic64_read(&mr->odp_stats.faults_handled))) + goto err_table; + if (rdma_nl_stat_hwcounter_entry( msg, "page_invalidations", atomic64_read(&mr->odp_stats.invalidations))) goto err_table; + if (rdma_nl_stat_hwcounter_entry( + msg, "page_invalidations_handled", + atomic64_read(&mr->odp_stats.invalidations_handled))) + goto err_table; + if (rdma_nl_stat_hwcounter_entry(msg, "page_prefetch", atomic64_read(&mr->odp_stats.prefetch))) goto err_table; diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c index 887fd6fa3ba9..793f3c5c4d01 100644 --- a/drivers/infiniband/hw/mlx5/umr.c +++ b/drivers/infiniband/hw/mlx5/umr.c @@ -231,30 +231,6 @@ void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev) ib_dealloc_pd(dev->umrc.pd); } -static int mlx5r_umr_recover(struct mlx5_ib_dev *dev) -{ - struct umr_common *umrc = &dev->umrc; - struct ib_qp_attr attr; - int err; - - attr.qp_state = IB_QPS_RESET; - err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE); - if (err) { - mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); - goto err; - } - - err = mlx5r_umr_qp_rst2rts(dev, umrc->qp); - if (err) - goto err; - - umrc->state = MLX5_UMR_STATE_ACTIVE; - return 0; - -err: - umrc->state = MLX5_UMR_STATE_ERR; - return err; -} static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe, struct mlx5r_umr_wqe *wqe, bool with_data) @@ -302,6 +278,61 @@ out: return err; } +static int mlx5r_umr_recover(struct mlx5_ib_dev *dev, u32 mkey, + struct mlx5r_umr_context *umr_context, + struct mlx5r_umr_wqe *wqe, bool with_data) +{ + struct umr_common *umrc = &dev->umrc; + struct ib_qp_attr attr; + int err; + + mutex_lock(&umrc->lock); + /* Preventing any further WRs to be sent now */ + if (umrc->state != MLX5_UMR_STATE_RECOVER) { + mlx5_ib_warn(dev, "UMR recovery encountered an unexpected state=%d\n", + umrc->state); + umrc->state = MLX5_UMR_STATE_RECOVER; + } + mutex_unlock(&umrc->lock); + + /* Sending a final/barrier WR (the failed one) and wait for its completion. + * This will ensure that all the previous WRs got a completion before + * we set the QP state to RESET. + */ + err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context->cqe, wqe, + with_data); + if (err) { + mlx5_ib_warn(dev, "UMR recovery post send failed, err %d\n", err); + goto err; + } + + /* Since the QP is in an error state, it will only receive + * IB_WC_WR_FLUSH_ERR. However, as it serves only as a barrier + * we don't care about its status. + */ + wait_for_completion(&umr_context->done); + + attr.qp_state = IB_QPS_RESET; + err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE); + if (err) { + mlx5_ib_warn(dev, "Couldn't modify UMR QP to RESET, err=%d\n", err); + goto err; + } + + err = mlx5r_umr_qp_rst2rts(dev, umrc->qp); + if (err) { + mlx5_ib_warn(dev, "Couldn't modify UMR QP to RTS, err=%d\n", err); + goto err; + } + + umrc->state = MLX5_UMR_STATE_ACTIVE; + return 0; + +err: + umrc->state = MLX5_UMR_STATE_ERR; + return err; +} + static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc) { struct mlx5_ib_umr_context *context = @@ -366,9 +397,7 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey, mlx5_ib_warn(dev, "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n", umr_context.status, mkey); - mutex_lock(&umrc->lock); - err = mlx5r_umr_recover(dev); - mutex_unlock(&umrc->lock); + err = mlx5r_umr_recover(dev, mkey, &umr_context, wqe, with_data); if (err) mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n", err); diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index ffb98eaaf1c2..6eabef9aa211 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -171,7 +171,7 @@ void mthca_start_catas_poll(struct mthca_dev *dev) void mthca_stop_catas_poll(struct mthca_dev *dev) { - del_timer_sync(&dev->catas_err.timer); + timer_delete_sync(&dev->catas_err.timer); if (dev->catas_err.map) iounmap(dev->catas_err.map); diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 4fcbef99e400..bdd724add147 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -768,7 +768,7 @@ int qib_reset_device(int unit) ppd = dd->pport + pidx; if (atomic_read(&ppd->led_override_timer_active)) { /* Need to stop LED timer, _then_ shut off LEDs */ - del_timer_sync(&ppd->led_override_timer); + timer_delete_sync(&ppd->led_override_timer); atomic_set(&ppd->led_override_timer_active, 0); } diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index b27791029fa9..b9f4a2937c3a 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -55,6 +55,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry, struct inode *inode = new_inode(dir->i_sb); if (!inode) { + dput(dentry); error = -EPERM; goto bail; } diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 78dfe98ebcf7..302c0d19f57d 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1656,7 +1656,7 @@ static void qib_7220_quiet_serdes(struct qib_pportdata *ppd) ppd->cpspec->chase_end = 0; if (ppd->cpspec->chase_timer.function) /* if initted */ - del_timer_sync(&ppd->cpspec->chase_timer); + timer_delete_sync(&ppd->cpspec->chase_timer); if (ppd->cpspec->ibsymdelta || ppd->cpspec->iblnkerrdelta || ppd->cpspec->ibdeltainprog) { @@ -2605,7 +2605,7 @@ static int qib_7220_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val) * wait forpending timer, but don't clear .data (ppd)! */ if (ppd->cpspec->chase_timer.expires) { - del_timer_sync(&ppd->cpspec->chase_timer); + timer_delete_sync(&ppd->cpspec->chase_timer); ppd->cpspec->chase_timer.expires = 0; } break; diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 9db29916e35a..7b4bf06c3b38 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -2512,7 +2512,7 @@ static void qib_7322_mini_quiet_serdes(struct qib_pportdata *ppd) ppd->cpspec->chase_end = 0; if (ppd->cpspec->chase_timer.function) /* if initted */ - del_timer_sync(&ppd->cpspec->chase_timer); + timer_delete_sync(&ppd->cpspec->chase_timer); /* * Despite the name, actually disables IBC as well. Do it when @@ -4239,7 +4239,7 @@ static int qib_7322_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val) * wait forpending timer, but don't clear .data (ppd)! */ if (ppd->cpspec->chase_timer.expires) { - del_timer_sync(&ppd->cpspec->chase_timer); + timer_delete_sync(&ppd->cpspec->chase_timer); ppd->cpspec->chase_timer.expires = 0; } break; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 4100656fe9a3..33c23adec101 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -796,19 +796,19 @@ static void qib_stop_timers(struct qib_devdata *dd) int pidx; if (dd->stats_timer.function) - del_timer_sync(&dd->stats_timer); + timer_delete_sync(&dd->stats_timer); if (dd->intrchk_timer.function) - del_timer_sync(&dd->intrchk_timer); + timer_delete_sync(&dd->intrchk_timer); for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; if (ppd->hol_timer.function) - del_timer_sync(&ppd->hol_timer); + timer_delete_sync(&ppd->hol_timer); if (ppd->led_override_timer.function) { - del_timer_sync(&ppd->led_override_timer); + timer_delete_sync(&ppd->led_override_timer); atomic_set(&ppd->led_override_timer_active, 0); } if (ppd->symerr_clear_timer.function) - del_timer_sync(&ppd->symerr_clear_timer); + timer_delete_sync(&ppd->symerr_clear_timer); } } diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index ef02f2bfddb2..568deb77ab4d 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2441,7 +2441,7 @@ void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx) struct qib_devdata, verbs_dev); if (dd->pport[port_idx].cong_stats.timer.function) - del_timer_sync(&dd->pport[port_idx].cong_stats.timer); + timer_delete_sync(&dd->pport[port_idx].cong_stats.timer); if (dd->pport[port_idx].ibport_data.smi_ah) rdma_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah, diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c index 1dc3ccf0cf1f..c4ee120ac7fb 100644 --- a/drivers/infiniband/hw/qib/qib_sd7220.c +++ b/drivers/infiniband/hw/qib/qib_sd7220.c @@ -1375,7 +1375,7 @@ void toggle_7220_rclkrls(struct qib_devdata *dd) void shutdown_7220_relock_poll(struct qib_devdata *dd) { if (dd->cspec->relock_timer_active) - del_timer_sync(&dd->cspec->relock_timer); + timer_delete_sync(&dd->cspec->relock_timer); } static unsigned qib_relock_by_timer = 1; diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index ba2cd68b53e6..805e37dc7621 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -214,8 +214,8 @@ static const struct attribute_group port_linkcontrol_group = { * Congestion control table size followed by table entries */ static ssize_t cc_table_bin_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, - loff_t pos, size_t count) + const struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) { struct qib_pportdata *ppd = qib_get_pportdata_kobj(kobj); int ret; @@ -241,7 +241,7 @@ static ssize_t cc_table_bin_read(struct file *filp, struct kobject *kobj, return count; } -static BIN_ATTR_RO(cc_table_bin, PAGE_SIZE); +static const BIN_ATTR_RO(cc_table_bin, PAGE_SIZE); /* * Congestion settings: port control, control map and an array of 16 @@ -249,8 +249,8 @@ static BIN_ATTR_RO(cc_table_bin, PAGE_SIZE); * trigger threshold and the minimum injection rate delay. */ static ssize_t cc_setting_bin_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, - loff_t pos, size_t count) + const struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) { struct qib_pportdata *ppd = qib_get_pportdata_kobj(kobj); int ret; @@ -274,9 +274,9 @@ static ssize_t cc_setting_bin_read(struct file *filp, struct kobject *kobj, return count; } -static BIN_ATTR_RO(cc_setting_bin, PAGE_SIZE); +static const BIN_ATTR_RO(cc_setting_bin, PAGE_SIZE); -static struct bin_attribute *port_ccmgta_attributes[] = { +static const struct bin_attribute *const port_ccmgta_attributes[] = { &bin_attr_cc_setting_bin, &bin_attr_cc_table_bin, NULL, @@ -295,7 +295,7 @@ static umode_t qib_ccmgta_is_bin_visible(struct kobject *kobj, static const struct attribute_group port_ccmgta_attribute_group = { .name = "CCMgtA", .is_bin_visible = qib_ccmgta_is_bin_visible, - .bin_attrs = port_ccmgta_attributes, + .bin_attrs_new = port_ccmgta_attributes, }; /* Start sl2vl */ diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 5fcb41970ad9..9832567a8bb8 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1655,7 +1655,7 @@ void qib_unregister_ib_device(struct qib_devdata *dd) if (!list_empty(&dev->memwait)) qib_dev_err(dd, "memwait list not empty!\n"); - del_timer_sync(&dev->mem_timer); + timer_delete_sync(&dev->mem_timer); while (!list_empty(&dev->txreq_free)) { struct list_head *l = dev->txreq_free.next; struct qib_verbs_txreq *tx; diff --git a/drivers/infiniband/hw/usnic/usnic_abi.h b/drivers/infiniband/hw/usnic/usnic_abi.h index 7fe9502ce8d3..86a82a4da0aa 100644 --- a/drivers/infiniband/hw/usnic/usnic_abi.h +++ b/drivers/infiniband/hw/usnic/usnic_abi.h @@ -72,7 +72,7 @@ struct usnic_ib_create_qp_resp { u64 bar_bus_addr; u32 bar_len; /* - * WQ, RQ, CQ are explicity specified bc exposing a generic resources inteface + * WQ, RQ, CQ are explicitly specified bc exposing a generic resources inteface * expands the scope of ABI to many files. */ u32 wq_cnt; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 13b654ddd3cc..4ddcd5860e0f 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -151,34 +151,6 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev, ib_event.element.port_num = 1; ib_dispatch_event(&ib_event); break; - case NETDEV_UP: - case NETDEV_DOWN: - case NETDEV_CHANGE: - if (!us_ibdev->ufdev->link_up && - netif_carrier_ok(netdev)) { - usnic_fwd_carrier_up(us_ibdev->ufdev); - usnic_info("Link UP on %s\n", - dev_name(&us_ibdev->ib_dev.dev)); - ib_event.event = IB_EVENT_PORT_ACTIVE; - ib_event.device = &us_ibdev->ib_dev; - ib_event.element.port_num = 1; - ib_dispatch_event(&ib_event); - } else if (us_ibdev->ufdev->link_up && - !netif_carrier_ok(netdev)) { - usnic_fwd_carrier_down(us_ibdev->ufdev); - usnic_info("Link DOWN on %s\n", - dev_name(&us_ibdev->ib_dev.dev)); - usnic_ib_qp_grp_modify_active_to_err(us_ibdev); - ib_event.event = IB_EVENT_PORT_ERR; - ib_event.device = &us_ibdev->ib_dev; - ib_event.element.port_num = 1; - ib_dispatch_event(&ib_event); - } else { - usnic_dbg("Ignoring %s on %s\n", - netdev_cmd_to_name(event), - dev_name(&us_ibdev->ib_dev.dev)); - } - break; case NETDEV_CHANGEADDR: if (!memcmp(us_ibdev->ufdev->mac, netdev->dev_addr, sizeof(us_ibdev->ufdev->mac))) { @@ -218,6 +190,50 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev, mutex_unlock(&us_ibdev->usdev_lock); } +static void usnic_ib_handle_port_event(struct ib_device *ibdev, + struct net_device *netdev, + unsigned long event) +{ + struct usnic_ib_dev *us_ibdev = + container_of(ibdev, struct usnic_ib_dev, ib_dev); + struct ib_event ib_event; + + mutex_lock(&us_ibdev->usdev_lock); + switch (event) { + case NETDEV_UP: + case NETDEV_DOWN: + case NETDEV_CHANGE: + if (!us_ibdev->ufdev->link_up && + netif_carrier_ok(netdev)) { + usnic_fwd_carrier_up(us_ibdev->ufdev); + usnic_info("Link UP on %s\n", + dev_name(&us_ibdev->ib_dev.dev)); + ib_event.event = IB_EVENT_PORT_ACTIVE; + ib_event.device = &us_ibdev->ib_dev; + ib_event.element.port_num = 1; + ib_dispatch_event(&ib_event); + } else if (us_ibdev->ufdev->link_up && + !netif_carrier_ok(netdev)) { + usnic_fwd_carrier_down(us_ibdev->ufdev); + usnic_info("Link DOWN on %s\n", + dev_name(&us_ibdev->ib_dev.dev)); + usnic_ib_qp_grp_modify_active_to_err(us_ibdev); + ib_event.event = IB_EVENT_PORT_ERR; + ib_event.device = &us_ibdev->ib_dev; + ib_event.element.port_num = 1; + ib_dispatch_event(&ib_event); + } else { + usnic_dbg("Ignoring %s on %s\n", + netdev_cmd_to_name(event), + dev_name(&us_ibdev->ib_dev.dev)); + } + break; + default: + break; + } + mutex_unlock(&us_ibdev->usdev_lock); +} + static int usnic_ib_netdevice_event(struct notifier_block *notifier, unsigned long event, void *ptr) { @@ -358,6 +374,7 @@ static const struct ib_device_ops usnic_dev_ops = { .query_port = usnic_ib_query_port, .query_qp = usnic_ib_query_qp, .reg_user_mr = usnic_ib_reg_mr, + .report_port_event = usnic_ib_handle_port_event, INIT_RDMA_OBJ_SIZE(ib_pd, usnic_ib_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_cq, usnic_ib_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_qp, usnic_ib_qp_grp, ibqp), diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 768aad364c89..1664d1d7d969 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -143,6 +143,46 @@ static int pvrdma_port_immutable(struct ib_device *ibdev, u32 port_num, return 0; } +static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port, + enum ib_event_type event) +{ + struct ib_event ib_event; + + memset(&ib_event, 0, sizeof(ib_event)); + ib_event.device = &dev->ib_dev; + ib_event.element.port_num = port; + ib_event.event = event; + ib_dispatch_event(&ib_event); +} + +static void pvrdma_report_event_handle(struct ib_device *ibdev, + struct net_device *ndev, + unsigned long event) +{ + struct pvrdma_dev *dev = container_of(ibdev, struct pvrdma_dev, ib_dev); + + switch (event) { + case NETDEV_DOWN: + pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR); + break; + case NETDEV_UP: + pvrdma_write_reg(dev, PVRDMA_REG_CTL, + PVRDMA_DEVICE_CTL_UNQUIESCE); + + mb(); + + if (pvrdma_read_reg(dev, PVRDMA_REG_ERR)) + dev_err(&dev->pdev->dev, + "failed to activate device during link up\n"); + else + pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); + break; + + default: + break; + } +} + static const struct ib_device_ops pvrdma_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_VMW_PVRDMA, @@ -181,6 +221,7 @@ static const struct ib_device_ops pvrdma_dev_ops = { .query_qp = pvrdma_query_qp, .reg_user_mr = pvrdma_reg_user_mr, .req_notify_cq = pvrdma_req_notify_cq, + .report_port_event = pvrdma_report_event_handle, INIT_RDMA_OBJ_SIZE(ib_ah, pvrdma_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, pvrdma_cq, ibcq), @@ -362,18 +403,6 @@ static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type) } } -static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port, - enum ib_event_type event) -{ - struct ib_event ib_event; - - memset(&ib_event, 0, sizeof(ib_event)); - ib_event.device = &dev->ib_dev; - ib_event.element.port_num = port; - ib_event.event = event; - ib_dispatch_event(&ib_event); -} - static void pvrdma_dev_event(struct pvrdma_dev *dev, u8 port, int type) { if (port < 1 || port > dev->dsr->caps.phys_port_cnt) { @@ -666,21 +695,8 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, switch (event) { case NETDEV_REBOOT: - case NETDEV_DOWN: pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR); break; - case NETDEV_UP: - pvrdma_write_reg(dev, PVRDMA_REG_CTL, - PVRDMA_DEVICE_CTL_UNQUIESCE); - - mb(); - - if (pvrdma_read_reg(dev, PVRDMA_REG_ERR)) - dev_err(&dev->pdev->dev, - "failed to activate device during link up\n"); - else - pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); - break; case NETDEV_UNREGISTER: ib_device_set_netdev(&dev->ib_dev, NULL, 1); dev_put(dev->netdev); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index 9f54aa90a35a..bcd43dc30e21 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -237,34 +237,6 @@ enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev, return IB_LINK_LAYER_ETHERNET; } -int pvrdma_modify_device(struct ib_device *ibdev, int mask, - struct ib_device_modify *props) -{ - unsigned long flags; - - if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | - IB_DEVICE_MODIFY_NODE_DESC)) { - dev_warn(&to_vdev(ibdev)->pdev->dev, - "unsupported device modify mask %#x\n", mask); - return -EOPNOTSUPP; - } - - if (mask & IB_DEVICE_MODIFY_NODE_DESC) { - spin_lock_irqsave(&to_vdev(ibdev)->desc_lock, flags); - memcpy(ibdev->node_desc, props->node_desc, 64); - spin_unlock_irqrestore(&to_vdev(ibdev)->desc_lock, flags); - } - - if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) { - mutex_lock(&to_vdev(ibdev)->port_mutex); - to_vdev(ibdev)->sys_image_guid = - cpu_to_be64(props->sys_image_guid); - mutex_unlock(&to_vdev(ibdev)->port_mutex); - } - - return 0; -} - /** * pvrdma_modify_port - modify device port attributes * @ibdev: the device to modify diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index 4b9edc03d73d..fd47b0b1df5c 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -356,8 +356,6 @@ int pvrdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey); enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev, u32 port); -int pvrdma_modify_device(struct ib_device *ibdev, int mask, - struct ib_device_modify *props); int pvrdma_modify_port(struct ib_device *ibdev, u32 port, int mask, struct ib_port_modify *props); int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); |