From f786eebbbefa0c080d45533c5e0f66d500268961 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Sun, 1 Sep 2024 22:52:30 -0700 Subject: RDMA/bnxt_re: Avoid an extra hwrm per MR creation Firmware now have a new mr registration command where both MR allocation and registration can be done in a single hwrm command. Driver has to issue this new hwrm command whenever the support flag is set. This reduces the number of hwrm issued per MR creation and speed up the MR creation. Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/1725256351-12751-4-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/qplib_res.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/infiniband/hw/bnxt_re/qplib_res.h') diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index a0f78cde314f..b452b2f46ceb 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -565,4 +565,9 @@ static inline u8 bnxt_qplib_dbr_pacing_en(struct bnxt_qplib_chip_ctx *cctx) return cctx->modes.dbr_pacing; } +static inline bool _is_alloc_mr_unified(u16 dev_cap_flags) +{ + return dev_cap_flags & CREQ_QUERY_FUNC_RESP_SB_MR_REGISTER_ALLOC; +} + #endif /* __BNXT_QPLIB_RES_H__ */ -- cgit v1.2.3 From dc116b7fddbdad000b6f2a8ca41d1fe5371b403c Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Sun, 1 Sep 2024 22:52:31 -0700 Subject: RDMA/bnxt_re: Add support for MR Relaxed Ordering Some of the adapters support Relaxed Ordering for the MRs. Driver queries support for Memory region relax ordering support from firmware and set relax ordering bit in REGISTER_MR request, if the users request for the support. Also, this is supported only if the PCIe device has enabled relaxed ordering attribute. Reviewed-by: Chandramohan Akula Reviewed-by: Selvin Xavier Reviewed-by: Vijay Kumar Mandadapu Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/1725256351-12751-5-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 14 ++++++++++++++ drivers/infiniband/hw/bnxt_re/qplib_res.h | 5 +++++ 2 files changed, 19 insertions(+) (limited to 'drivers/infiniband/hw/bnxt_re/qplib_res.h') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 13e3d71d256a..82c1f3b2f825 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -115,6 +115,14 @@ static enum ib_access_flags __to_ib_access_flags(int qflags) return iflags; }; +static void bnxt_re_check_and_set_relaxed_ordering(struct bnxt_re_dev *rdev, + struct bnxt_qplib_mrw *qplib_mr) +{ + if (_is_relaxed_ordering_supported(rdev->dev_attr.dev_cap_flags2) && + pcie_relaxed_ordering_enabled(rdev->en_dev->pdev)) + qplib_mr->flags |= CMDQ_REGISTER_MR_FLAGS_ENABLE_RO; +} + static int bnxt_re_build_sgl(struct ib_sge *ib_sg_list, struct bnxt_qplib_sge *sg_list, int num) { @@ -3888,6 +3896,9 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags) mr->qplib_mr.access_flags = __from_ib_access_flags(mr_access_flags); mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR; + if (mr_access_flags & IB_ACCESS_RELAXED_ORDERING) + bnxt_re_check_and_set_relaxed_ordering(rdev, &mr->qplib_mr); + /* Allocate and register 0 as the address */ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr); if (rc) @@ -4121,6 +4132,9 @@ static struct ib_mr *__bnxt_re_user_reg_mr(struct ib_pd *ib_pd, u64 length, u64 mr->qplib_mr.va = virt_addr; mr->qplib_mr.total_size = length; + if (mr_access_flags & IB_ACCESS_RELAXED_ORDERING) + bnxt_re_check_and_set_relaxed_ordering(rdev, &mr->qplib_mr); + umem_pgs = ib_umem_num_dma_blocks(umem, page_size); rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, umem, umem_pgs, page_size); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index b452b2f46ceb..049805ac95cf 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -570,4 +570,9 @@ static inline bool _is_alloc_mr_unified(u16 dev_cap_flags) return dev_cap_flags & CREQ_QUERY_FUNC_RESP_SB_MR_REGISTER_ALLOC; } +static inline bool _is_relaxed_ordering_supported(u16 dev_cap_ext_flags2) +{ + return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_MEMORY_REGION_RO_SUPPORTED; +} + #endif /* __BNXT_QPLIB_RES_H__ */ -- cgit v1.2.3 From cc5b9b48d44756a87170f3901c6c2fd99e6b89b2 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 10 Sep 2024 21:08:30 -0700 Subject: RDMA/bnxt_re: Recover the device when FW error is detected If the FW crashes, L2 driver gets notified and it notifies the RoCE driver. Currently driver doesn't re-initialize the device. Add support for re-initialize the RoCE device. RoCE device is removed and re-attached in the ulp_stop and ulp_start respectively. The recovery logic expects the RoCE driver to be registered with L2 driver while its being removed. So the driver avoids unregistering with L2 driver in the recovery path. Signed-off-by: Chandramohan Akula Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/1726027710-2292-5-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 15 +++++++ drivers/infiniband/hw/bnxt_re/main.c | 70 +++++++++++++++++-------------- drivers/infiniband/hw/bnxt_re/qplib_res.h | 1 + 3 files changed, 55 insertions(+), 31 deletions(-) (limited to 'drivers/infiniband/hw/bnxt_re/qplib_res.h') diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 5df3ce1284c7..e94518b12f86 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -91,6 +91,15 @@ struct bnxt_re_ring_attr { u8 mode; }; +/* + * Data structure and defines to handle + * recovery + */ +#define BNXT_RE_PRE_RECOVERY_REMOVE 0x1 +#define BNXT_RE_COMPLETE_REMOVE 0x2 +#define BNXT_RE_POST_RECOVERY_INIT 0x4 +#define BNXT_RE_COMPLETE_INIT 0x8 + struct bnxt_re_sqp_entries { struct bnxt_qplib_sge sge; u64 wrid; @@ -224,4 +233,10 @@ static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev) } extern const struct uapi_definition bnxt_re_uapi_defs[]; + +static inline void bnxt_re_set_pacing_dev_state(struct bnxt_re_dev *rdev) +{ + rdev->qplib_res.pacing_data->dev_err_state = + test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); +} #endif diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index dc63ad07b2c2..adff9e494c9d 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -83,7 +83,7 @@ static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev); static int bnxt_re_netdev_event(struct notifier_block *notifier, unsigned long event, void *ptr); static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev); -static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev); +static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type); static int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev); static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len, @@ -169,6 +169,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) en_dev = rdev->en_dev; + rdev->qplib_res.pdev = en_dev->pdev; chip_ctx = kzalloc(sizeof(*chip_ctx), GFP_KERNEL); if (!chip_ctx) return -ENOMEM; @@ -301,7 +302,7 @@ static void bnxt_re_shutdown(struct auxiliary_device *adev) rdev = en_info->rdev; ib_unregister_device(&rdev->ibdev); - bnxt_re_dev_uninit(rdev); + bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE); } static void bnxt_re_stop_irq(void *handle) @@ -385,14 +386,9 @@ static struct bnxt_ulp_ops bnxt_re_ulp_ops = { static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev) { struct bnxt_en_dev *en_dev; - int rc; en_dev = rdev->en_dev; - - rc = bnxt_register_dev(en_dev, &bnxt_re_ulp_ops, rdev->adev); - if (!rc) - rdev->qplib_res.pdev = rdev->en_dev->pdev; - return rc; + return bnxt_register_dev(en_dev, &bnxt_re_ulp_ops, rdev->adev); } static void bnxt_re_init_hwrm_hdr(struct input *hdr, u16 opcd) @@ -1593,7 +1589,7 @@ static int bnxt_re_ib_init(struct bnxt_re_dev *rdev) return rc; } -static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev) +static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) { u8 type; int rc; @@ -1626,8 +1622,10 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev) bnxt_re_deinitialize_dbr_pacing(rdev); bnxt_re_destroy_chip_ctx(rdev); - if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) - bnxt_unregister_dev(rdev->en_dev); + if (op_type == BNXT_RE_COMPLETE_REMOVE) { + if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) + bnxt_unregister_dev(rdev->en_dev); + } } /* worker thread for polling periodic events. Now used for QoS programming*/ @@ -1640,7 +1638,7 @@ static void bnxt_re_worker(struct work_struct *work) schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000)); } -static int bnxt_re_dev_init(struct bnxt_re_dev *rdev) +static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) { struct bnxt_re_ring_attr rattr = {}; struct bnxt_qplib_creq_ctx *creq; @@ -1649,12 +1647,14 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev) u8 type; int rc; - /* Registered a new RoCE device instance to netdev */ - rc = bnxt_re_register_netdev(rdev); - if (rc) { - ibdev_err(&rdev->ibdev, - "Failed to register with netedev: %#x\n", rc); - return -EINVAL; + if (op_type == BNXT_RE_COMPLETE_INIT) { + /* Registered a new RoCE device instance to netdev */ + rc = bnxt_re_register_netdev(rdev); + if (rc) { + ibdev_err(&rdev->ibdev, + "Failed to register with netedev: %#x\n", rc); + return -EINVAL; + } } set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); @@ -1807,7 +1807,7 @@ free_ring: free_rcfw: bnxt_qplib_free_rcfw_channel(&rdev->rcfw); fail: - bnxt_re_dev_uninit(rdev); + bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE); return rc; } @@ -1827,7 +1827,7 @@ static void bnxt_re_update_en_info_rdev(struct bnxt_re_dev *rdev, rtnl_unlock(); } -static int bnxt_re_add_device(struct auxiliary_device *adev) +static int bnxt_re_add_device(struct auxiliary_device *adev, u8 op_type) { struct bnxt_aux_priv *aux_priv = container_of(adev, struct bnxt_aux_priv, aux_dev); @@ -1839,8 +1839,6 @@ static int bnxt_re_add_device(struct auxiliary_device *adev) en_info = auxiliary_get_drvdata(adev); en_dev = en_info->en_dev; - /* en_dev should never be NULL as long as adev and aux_dev are valid. */ - en_dev = aux_priv->edev; rdev = bnxt_re_dev_add(aux_priv, en_dev); if (!rdev || !rdev_to_dev(rdev)) { @@ -1850,7 +1848,7 @@ static int bnxt_re_add_device(struct auxiliary_device *adev) bnxt_re_update_en_info_rdev(rdev, en_info, adev); - rc = bnxt_re_dev_init(rdev); + rc = bnxt_re_dev_init(rdev, op_type); if (rc) goto re_dev_dealloc; @@ -1875,7 +1873,7 @@ static int bnxt_re_add_device(struct auxiliary_device *adev) re_dev_uninit: bnxt_re_update_en_info_rdev(NULL, en_info, adev); - bnxt_re_dev_uninit(rdev); + bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE); re_dev_dealloc: ib_dealloc_device(&rdev->ibdev); exit: @@ -1958,7 +1956,7 @@ exit: #define BNXT_ADEV_NAME "bnxt_en" -static void bnxt_re_remove_device(struct bnxt_re_dev *rdev, +static void bnxt_re_remove_device(struct bnxt_re_dev *rdev, u8 op_type, struct auxiliary_device *aux_dev) { if (rdev->nb.notifier_call) { @@ -1972,7 +1970,7 @@ static void bnxt_re_remove_device(struct bnxt_re_dev *rdev, } bnxt_re_setup_cc(rdev, false); ib_unregister_device(&rdev->ibdev); - bnxt_re_dev_uninit(rdev); + bnxt_re_dev_uninit(rdev, op_type); ib_dealloc_device(&rdev->ibdev); } @@ -1991,7 +1989,7 @@ static void bnxt_re_remove(struct auxiliary_device *adev) rdev = en_info->rdev; if (rdev) - bnxt_re_remove_device(rdev, adev); + bnxt_re_remove_device(rdev, BNXT_RE_COMPLETE_REMOVE, adev); kfree(en_info); mutex_unlock(&bnxt_re_mutex); } @@ -2017,7 +2015,7 @@ static int bnxt_re_probe(struct auxiliary_device *adev, auxiliary_set_drvdata(adev, en_info); - rc = bnxt_re_add_device(adev); + rc = bnxt_re_add_device(adev, BNXT_RE_COMPLETE_INIT); if (rc) goto err; mutex_unlock(&bnxt_re_mutex); @@ -2033,12 +2031,14 @@ err: static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state) { struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev); + struct bnxt_en_dev *en_dev; struct bnxt_re_dev *rdev; if (!en_info) return 0; rdev = en_info->rdev; + en_dev = en_info->en_dev; mutex_lock(&bnxt_re_mutex); /* L2 driver may invoke this callback during device error/crash or device * reset. Current RoCE driver doesn't recover the device in case of @@ -2057,13 +2057,20 @@ static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state) set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags); bnxt_re_dev_stop(rdev); - bnxt_re_stop_irq(rdev); + bnxt_re_stop_irq(adev); /* Move the device states to detached and avoid sending any more * commands to HW */ set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags); wake_up_all(&rdev->rcfw.cmdq.waitq); + + if (rdev->pacing.dbr_pacing) + bnxt_re_set_pacing_dev_state(rdev); + + ibdev_info(&rdev->ibdev, "%s: L2 driver notified to stop en_state 0x%lx", + __func__, en_dev->en_state); + bnxt_re_remove_device(rdev, BNXT_RE_PRE_RECOVERY_REMOVE, adev); mutex_unlock(&bnxt_re_mutex); return 0; @@ -2077,7 +2084,6 @@ static int bnxt_re_resume(struct auxiliary_device *adev) if (!en_info) return 0; - rdev = en_info->rdev; mutex_lock(&bnxt_re_mutex); /* L2 driver may invoke this callback during device recovery, resume. * reset. Current RoCE driver doesn't recover the device in case of @@ -2086,7 +2092,9 @@ static int bnxt_re_resume(struct auxiliary_device *adev) * L2 driver want to modify the MSIx table. */ - ibdev_info(&rdev->ibdev, "Handle device resume call"); + bnxt_re_add_device(adev, BNXT_RE_POST_RECOVERY_INIT); + rdev = en_info->rdev; + ibdev_info(&rdev->ibdev, "Device resume completed"); mutex_unlock(&bnxt_re_mutex); return 0; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 049805ac95cf..c2f710364e0f 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -82,6 +82,7 @@ struct bnxt_qplib_db_pacing_data { u32 fifo_room_mask; u32 fifo_room_shift; u32 grc_reg_offset; + u32 dev_err_state; }; #define BNXT_QPLIB_DBR_PF_DB_OFFSET 0x10000 -- cgit v1.2.3