diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-17 11:18:18 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-17 11:18:18 -0700 |
commit | a1e16bc7d5f7ca3599d8a7f061841c93a563665e (patch) | |
tree | 73e557536e5dc52894e0e24439406db3510c5434 /drivers/infiniband/hw | |
parent | 2a934b38c066ff221b08a9c703314a2a1c885dbd (diff) | |
parent | c7a198c700763ac89abbb166378f546aeb9afb33 (diff) | |
download | lwn-a1e16bc7d5f7ca3599d8a7f061841c93a563665e.tar.gz lwn-a1e16bc7d5f7ca3599d8a7f061841c93a563665e.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"A usual cycle for RDMA with a typical mix of driver and core subsystem
updates:
- Driver minor changes and bug fixes for mlx5, efa, rxe, vmw_pvrdma,
hns, usnic, qib, qedr, cxgb4, hns, bnxt_re
- Various rtrs fixes and updates
- Bug fix for mlx4 CM emulation for virtualization scenarios where
MRA wasn't working right
- Use tracepoints instead of pr_debug in the CM code
- Scrub the locking in ucma and cma to close more syzkaller bugs
- Use tasklet_setup in the subsystem
- Revert the idea that 'destroy' operations are not allowed to fail
at the driver level. This proved unworkable from a HW perspective.
- Revise how the umem API works so drivers make fewer mistakes using
it
- XRC support for qedr
- Convert uverbs objects RWQ and MW to new the allocation scheme
- Large queue entry sizes for hns
- Use hmm_range_fault() for mlx5 On Demand Paging
- uverbs APIs to inspect the GID table instead of sysfs
- Move some of the RDMA code for building large page SGLs into
lib/scatterlist"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (191 commits)
RDMA/ucma: Fix use after free in destroy id flow
RDMA/rxe: Handle skb_clone() failure in rxe_recv.c
RDMA/rxe: Move the definitions for rxe_av.network_type to uAPI
RDMA: Explicitly pass in the dma_device to ib_register_device
lib/scatterlist: Do not limit max_segment to PAGE_ALIGNED values
IB/mlx4: Convert rej_tmout radix-tree to XArray
RDMA/rxe: Fix bug rejecting all multicast packets
RDMA/rxe: Fix skb lifetime in rxe_rcv_mcast_pkt()
RDMA/rxe: Remove duplicate entries in struct rxe_mr
IB/hfi,rdmavt,qib,opa_vnic: Update MAINTAINERS
IB/rdmavt: Fix sizeof mismatch
MAINTAINERS: CISCO VIC LOW LATENCY NIC DRIVER
RDMA/bnxt_re: Fix sizeof mismatch for allocation of pbl_tbl.
RDMA/bnxt_re: Use rdma_umem_for_each_dma_block()
RDMA/umem: Move to allocate SG table from pages
lib/scatterlist: Add support in dynamic allocation of SG table from pages
tools/testing/scatterlist: Show errors in human readable form
tools/testing/scatterlist: Rejuvenate bit-rotten test
RDMA/ipoib: Set rtnl_link_ops for ipoib interfaces
RDMA/uverbs: Expose the new GID query API to user space
...
Diffstat (limited to 'drivers/infiniband/hw')
104 files changed, 2624 insertions, 1877 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index a300588634c5..b930ea3dab7a 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -150,7 +150,7 @@ struct bnxt_re_dev { struct delayed_work worker; u8 cur_prio_map; - u8 active_speed; + u16 active_speed; u8 active_width; /* FP Notification Queue (CQ & SRQ) */ diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 1d7a9ca5240c..cf3db9628397 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -532,7 +532,7 @@ fail: } /* Protection Domains */ -void bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata) +int bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata) { struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; @@ -542,6 +542,7 @@ void bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata) if (pd->qplib_pd.id) bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl, &pd->qplib_pd); + return 0; } int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) @@ -601,13 +602,14 @@ fail: } /* Address Handles */ -void bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) +int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) { struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah); struct bnxt_re_dev *rdev = ah->rdev; bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, !(flags & RDMA_DESTROY_AH_SLEEPABLE)); + return 0; } static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype) @@ -938,9 +940,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, return PTR_ERR(umem); qp->sumem = umem; - qplib_qp->sq.sg_info.sghead = umem->sg_head.sgl; - qplib_qp->sq.sg_info.npages = ib_umem_num_pages(umem); - qplib_qp->sq.sg_info.nmap = umem->nmap; + qplib_qp->sq.sg_info.umem = umem; qplib_qp->sq.sg_info.pgsize = PAGE_SIZE; qplib_qp->sq.sg_info.pgshft = PAGE_SHIFT; qplib_qp->qp_handle = ureq.qp_handle; @@ -953,9 +953,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, if (IS_ERR(umem)) goto rqfail; qp->rumem = umem; - qplib_qp->rq.sg_info.sghead = umem->sg_head.sgl; - qplib_qp->rq.sg_info.npages = ib_umem_num_pages(umem); - qplib_qp->rq.sg_info.nmap = umem->nmap; + qplib_qp->rq.sg_info.umem = umem; qplib_qp->rq.sg_info.pgsize = PAGE_SIZE; qplib_qp->rq.sg_info.pgshft = PAGE_SHIFT; } @@ -1568,7 +1566,7 @@ static enum ib_mtu __to_ib_mtu(u32 mtu) } /* Shared Receive Queues */ -void bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) +int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) { struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); @@ -1583,6 +1581,7 @@ void bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) atomic_dec(&rdev->srq_count); if (nq) nq->budget--; + return 0; } static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, @@ -1608,9 +1607,7 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, return PTR_ERR(umem); srq->umem = umem; - qplib_srq->sg_info.sghead = umem->sg_head.sgl; - qplib_srq->sg_info.npages = ib_umem_num_pages(umem); - qplib_srq->sg_info.nmap = umem->nmap; + qplib_srq->sg_info.umem = umem; qplib_srq->sg_info.pgsize = PAGE_SIZE; qplib_srq->sg_info.pgshft = PAGE_SHIFT; qplib_srq->srq_handle = ureq.srq_handle; @@ -2800,7 +2797,7 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr, } /* Completion Queues */ -void bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct bnxt_re_cq *cq; struct bnxt_qplib_nq *nq; @@ -2816,6 +2813,7 @@ void bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) atomic_dec(&rdev->cq_count); nq->budget--; kfree(cq->cql); + return 0; } int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, @@ -2860,9 +2858,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, rc = PTR_ERR(cq->umem); goto fail; } - cq->qplib_cq.sg_info.sghead = cq->umem->sg_head.sgl; - cq->qplib_cq.sg_info.npages = ib_umem_num_pages(cq->umem); - cq->qplib_cq.sg_info.nmap = cq->umem->nmap; + cq->qplib_cq.sg_info.umem = cq->umem; cq->qplib_cq.dpi = &uctx->dpi; } else { cq->max_cql = min_t(u32, entries, MAX_CQL_PER_POLL); @@ -3774,23 +3770,6 @@ int bnxt_re_dealloc_mw(struct ib_mw *ib_mw) return rc; } -static int bnxt_re_page_size_ok(int page_shift) -{ - switch (page_shift) { - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4K: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_8K: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_64K: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_2M: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_256K: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1M: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4M: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1G: - return 1; - default: - return 0; - } -} - static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig, int page_shift) { @@ -3798,7 +3777,7 @@ static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig, u64 page_size = BIT_ULL(page_shift); struct ib_block_iter biter; - rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, page_size) + rdma_umem_for_each_dma_block(umem, &biter, page_size) *pbl_tbl++ = rdma_block_iter_dma_address(&biter); return pbl_tbl - pbl_tbl_orig; @@ -3814,7 +3793,8 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, struct bnxt_re_mr *mr; struct ib_umem *umem; u64 *pbl_tbl = NULL; - int umem_pgs, page_shift, rc; + unsigned long page_size; + int umem_pgs, rc; if (length > BNXT_RE_MAX_MR_SIZE) { ibdev_err(&rdev->ibdev, "MR Size: %lld > Max supported:%lld\n", @@ -3848,42 +3828,34 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, mr->ib_umem = umem; mr->qplib_mr.va = virt_addr; - umem_pgs = ib_umem_page_count(umem); - if (!umem_pgs) { - ibdev_err(&rdev->ibdev, "umem is invalid!"); - rc = -EINVAL; - goto free_umem; - } - mr->qplib_mr.total_size = length; - - pbl_tbl = kcalloc(umem_pgs, sizeof(u64 *), GFP_KERNEL); - if (!pbl_tbl) { - rc = -ENOMEM; - goto free_umem; - } - - page_shift = __ffs(ib_umem_find_best_pgsz(umem, - BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M, - virt_addr)); - - if (!bnxt_re_page_size_ok(page_shift)) { + page_size = ib_umem_find_best_pgsz( + umem, BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M, virt_addr); + if (!page_size) { ibdev_err(&rdev->ibdev, "umem page size unsupported!"); rc = -EFAULT; - goto fail; + goto free_umem; } + mr->qplib_mr.total_size = length; - if (page_shift == BNXT_RE_PAGE_SHIFT_4K && + if (page_size == BNXT_RE_PAGE_SIZE_4K && length > BNXT_RE_MAX_MR_SIZE_LOW) { ibdev_err(&rdev->ibdev, "Requested MR Sz:%llu Max sup:%llu", length, (u64)BNXT_RE_MAX_MR_SIZE_LOW); rc = -EINVAL; - goto fail; + goto free_umem; + } + + umem_pgs = ib_umem_num_dma_blocks(umem, page_size); + pbl_tbl = kcalloc(umem_pgs, sizeof(*pbl_tbl), GFP_KERNEL); + if (!pbl_tbl) { + rc = -ENOMEM; + goto free_umem; } /* Map umem buf ptrs to the PBL */ - umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, page_shift); + umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, order_base_2(page_size)); rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl, - umem_pgs, false, 1 << page_shift); + umem_pgs, false, page_size); if (rc) { ibdev_err(&rdev->ibdev, "Failed to register user MR"); goto fail; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 1daeb30e06fd..9a8130b79256 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -163,12 +163,12 @@ int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num, enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev, u8 port_num); int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); -void bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); +int bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); -void bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags); +int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags); int bnxt_re_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); @@ -176,7 +176,7 @@ int bnxt_re_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); -void bnxt_re_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); +int bnxt_re_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); int bnxt_re_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd, @@ -193,7 +193,7 @@ int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -void bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 53aee5a42ab8..04621ba8fa76 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -736,7 +736,8 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) if (ret) return ret; - return ib_register_device(ibdev, "bnxt_re%d"); + dma_set_max_seg_size(&rdev->en_dev->pdev->dev, UINT_MAX); + return ib_register_device(ibdev, "bnxt_re%d", &rdev->en_dev->pdev->dev); } static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index f78da54a0bc5..995d4633b0a1 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -295,9 +295,9 @@ static void __wait_for_all_nqes(struct bnxt_qplib_cq *cq, u16 cnq_events) } } -static void bnxt_qplib_service_nq(unsigned long data) +static void bnxt_qplib_service_nq(struct tasklet_struct *t) { - struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data; + struct bnxt_qplib_nq *nq = from_tasklet(nq, t, nq_tasklet); struct bnxt_qplib_hwq *hwq = &nq->hwq; int num_srqne_processed = 0; int num_cqne_processed = 0; @@ -448,8 +448,7 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx, nq->msix_vec = msix_vector; if (need_init) - tasklet_init(&nq->nq_tasklet, bnxt_qplib_service_nq, - (unsigned long)nq); + tasklet_setup(&nq->nq_tasklet, bnxt_qplib_service_nq); else tasklet_enable(&nq->nq_tasklet); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index f7736e34ac64..441eb421e5e5 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -50,7 +50,7 @@ #include "qplib_sp.h" #include "qplib_fp.h" -static void bnxt_qplib_service_creq(unsigned long data); +static void bnxt_qplib_service_creq(struct tasklet_struct *t); /* Hardware communication channel */ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) @@ -79,7 +79,7 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) goto done; do { mdelay(1); /* 1m sec */ - bnxt_qplib_service_creq((unsigned long)rcfw); + bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet); } while (test_bit(cbit, cmdq->cmdq_bitmap) && --count); done: return count ? 0 : -ETIMEDOUT; @@ -370,9 +370,9 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, } /* SP - CREQ Completion handlers */ -static void bnxt_qplib_service_creq(unsigned long data) +static void bnxt_qplib_service_creq(struct tasklet_struct *t) { - struct bnxt_qplib_rcfw *rcfw = (struct bnxt_qplib_rcfw *)data; + struct bnxt_qplib_rcfw *rcfw = from_tasklet(rcfw, t, creq.creq_tasklet); struct bnxt_qplib_creq_ctx *creq = &rcfw->creq; u32 type, budget = CREQ_ENTRY_POLL_BUDGET; struct bnxt_qplib_hwq *hwq = &creq->hwq; @@ -687,8 +687,7 @@ int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, creq->msix_vec = msix_vector; if (need_init) - tasklet_init(&creq->creq_tasklet, - bnxt_qplib_service_creq, (unsigned long)rcfw); + tasklet_setup(&creq->creq_tasklet, bnxt_qplib_service_creq); else tasklet_enable(&creq->creq_tasklet); rc = request_irq(creq->msix_vec, bnxt_qplib_creq_irq, 0, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 7efa6e5dce62..fa7878336100 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -45,6 +45,9 @@ #include <linux/dma-mapping.h> #include <linux/if_vlan.h> #include <linux/vmalloc.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_umem.h> + #include "roce_hsi.h" #include "qplib_res.h" #include "qplib_sp.h" @@ -87,12 +90,11 @@ static void __free_pbl(struct bnxt_qplib_res *res, struct bnxt_qplib_pbl *pbl, static void bnxt_qplib_fill_user_dma_pages(struct bnxt_qplib_pbl *pbl, struct bnxt_qplib_sg_info *sginfo) { - struct scatterlist *sghead = sginfo->sghead; - struct sg_dma_page_iter sg_iter; + struct ib_block_iter biter; int i = 0; - for_each_sg_dma_page(sghead, &sg_iter, sginfo->nmap, 0) { - pbl->pg_map_arr[i] = sg_page_iter_dma_address(&sg_iter); + rdma_umem_for_each_dma_block(sginfo->umem, &biter, sginfo->pgsize) { + pbl->pg_map_arr[i] = rdma_block_iter_dma_address(&biter); pbl->pg_arr[i] = NULL; pbl->pg_count++; i++; @@ -104,15 +106,16 @@ static int __alloc_pbl(struct bnxt_qplib_res *res, struct bnxt_qplib_sg_info *sginfo) { struct pci_dev *pdev = res->pdev; - struct scatterlist *sghead; bool is_umem = false; u32 pages; int i; if (sginfo->nopte) return 0; - pages = sginfo->npages; - sghead = sginfo->sghead; + if (sginfo->umem) + pages = ib_umem_num_dma_blocks(sginfo->umem, sginfo->pgsize); + else + pages = sginfo->npages; /* page ptr arrays */ pbl->pg_arr = vmalloc(pages * sizeof(void *)); if (!pbl->pg_arr) @@ -127,7 +130,7 @@ static int __alloc_pbl(struct bnxt_qplib_res *res, pbl->pg_count = 0; pbl->pg_size = sginfo->pgsize; - if (!sghead) { + if (!sginfo->umem) { for (i = 0; i < pages; i++) { pbl->pg_arr[i] = dma_alloc_coherent(&pdev->dev, pbl->pg_size, @@ -183,14 +186,12 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, struct bnxt_qplib_sg_info sginfo = {}; u32 depth, stride, npbl, npde; dma_addr_t *src_phys_ptr, **dst_virt_ptr; - struct scatterlist *sghead = NULL; struct bnxt_qplib_res *res; struct pci_dev *pdev; int i, rc, lvl; res = hwq_attr->res; pdev = res->pdev; - sghead = hwq_attr->sginfo->sghead; pg_size = hwq_attr->sginfo->pgsize; hwq->level = PBL_LVL_MAX; @@ -204,7 +205,7 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, aux_pages++; } - if (!sghead) { + if (!hwq_attr->sginfo->umem) { hwq->is_user = false; npages = (depth * stride) / pg_size + aux_pages; if ((depth * stride) % pg_size) @@ -213,11 +214,14 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, return -EINVAL; hwq_attr->sginfo->npages = npages; } else { + unsigned long sginfo_num_pages = ib_umem_num_dma_blocks( + hwq_attr->sginfo->umem, hwq_attr->sginfo->pgsize); + hwq->is_user = true; - npages = hwq_attr->sginfo->npages; + npages = sginfo_num_pages; npages = (npages * PAGE_SIZE) / BIT_ULL(hwq_attr->sginfo->pgshft); - if ((hwq_attr->sginfo->npages * PAGE_SIZE) % + if ((sginfo_num_pages * PAGE_SIZE) % BIT_ULL(hwq_attr->sginfo->pgshft)) if (!npages) npages++; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 9da470d1e4a3..7a1ab38b95da 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -126,8 +126,7 @@ struct bnxt_qplib_pbl { }; struct bnxt_qplib_sg_info { - struct scatterlist *sghead; - u32 nmap; + struct ib_umem *umem; u32 npages; u32 pgshft; u32 pgsize; diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 1f288c73ccfc..8769e7aa097f 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -77,9 +77,9 @@ static int enable_ecn; module_param(enable_ecn, int, 0644); MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)"); -static int dack_mode = 1; +static int dack_mode; module_param(dack_mode, int, 0644); -MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)"); +MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=0)"); uint c4iw_max_read_depth = 32; module_param(c4iw_max_read_depth, int, 0644); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 352b8af1998a..28349ed50885 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -967,7 +967,7 @@ int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) return !err || err == -ENODATA ? npolled : err; } -void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct c4iw_cq *chp; struct c4iw_ucontext *ucontext; @@ -985,6 +985,7 @@ void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx, chp->destroy_skb, chp->wr_waitp); c4iw_put_wr_wait(chp->wr_waitp); + return 0; } int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 2b2b009b371a..a27899402f59 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -985,21 +985,20 @@ int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int c4iw_dealloc_mw(struct ib_mw *mw); void c4iw_dealloc(struct uld_ctx *ctx); -struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata); +int c4iw_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata); struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc); int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); -void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); +int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); -void c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata); +int c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata); int c4iw_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attrs, struct ib_udata *udata); int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 73936c3341b7..42234df896fb 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -510,7 +510,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, __be64 *pages; int shift, n, i; int err = -ENOMEM; - struct sg_dma_page_iter sg_iter; + struct ib_block_iter biter; struct c4iw_dev *rhp; struct c4iw_pd *php; struct c4iw_mr *mhp; @@ -548,7 +548,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, shift = PAGE_SHIFT; - n = ib_umem_num_pages(mhp->umem); + n = ib_umem_num_dma_blocks(mhp->umem, 1 << shift); err = alloc_pbl(mhp, n); if (err) goto err_umem_release; @@ -561,8 +561,8 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, i = n = 0; - for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) { - pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter)); + rdma_umem_for_each_dma_block(mhp->umem, &biter, 1 << shift) { + pages[i++] = cpu_to_be64(rdma_block_iter_dma_address(&biter)); if (i == PAGE_SIZE / sizeof(*pages)) { err = write_pbl(&mhp->rhp->rdev, pages, mhp->attr.pbl_addr + (n << 3), i, @@ -611,30 +611,23 @@ err_free_mhp: return ERR_PTR(err); } -struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata) +int c4iw_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) { + struct c4iw_mw *mhp = to_c4iw_mw(ibmw); struct c4iw_dev *rhp; struct c4iw_pd *php; - struct c4iw_mw *mhp; u32 mmid; u32 stag = 0; int ret; - if (type != IB_MW_TYPE_1) - return ERR_PTR(-EINVAL); + if (ibmw->type != IB_MW_TYPE_1) + return -EINVAL; - php = to_c4iw_pd(pd); + php = to_c4iw_pd(ibmw->pd); rhp = php->rhp; - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - return ERR_PTR(-ENOMEM); - mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); - if (!mhp->wr_waitp) { - ret = -ENOMEM; - goto free_mhp; - } + if (!mhp->wr_waitp) + return -ENOMEM; mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); if (!mhp->dereg_skb) { @@ -645,18 +638,19 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, ret = allocate_window(&rhp->rdev, &stag, php->pdid, mhp->wr_waitp); if (ret) goto free_skb; + mhp->rhp = rhp; mhp->attr.pdid = php->pdid; mhp->attr.type = FW_RI_STAG_MW; mhp->attr.stag = stag; mmid = (stag) >> 8; - mhp->ibmw.rkey = stag; + ibmw->rkey = stag; if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) { ret = -ENOMEM; goto dealloc_win; } pr_debug("mmid 0x%x mhp %p stag 0x%x\n", mmid, mhp, stag); - return &(mhp->ibmw); + return 0; dealloc_win: deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb, @@ -665,9 +659,7 @@ free_skb: kfree_skb(mhp->dereg_skb); free_wr_wait: c4iw_put_wr_wait(mhp->wr_waitp); -free_mhp: - kfree(mhp); - return ERR_PTR(ret); + return ret; } int c4iw_dealloc_mw(struct ib_mw *mw) @@ -684,8 +676,6 @@ int c4iw_dealloc_mw(struct ib_mw *mw) mhp->wr_waitp); kfree_skb(mhp->dereg_skb); c4iw_put_wr_wait(mhp->wr_waitp); - pr_debug("ib_mw %p mmid 0x%x ptr %p\n", mw, mmid, mhp); - kfree(mhp); return 0; } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 6c579d2d3997..8138c57a1e43 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -190,7 +190,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return ret; } -static void c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) +static int c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_pd *php; @@ -202,6 +202,7 @@ static void c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) mutex_lock(&rhp->rdev.stats.lock); rhp->rdev.stats.pd.cur--; mutex_unlock(&rhp->rdev.stats.lock); + return 0; } static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_udata *udata) @@ -497,8 +498,10 @@ static const struct ib_device_ops c4iw_dev_ops = { .query_qp = c4iw_ib_query_qp, .reg_user_mr = c4iw_reg_user_mr, .req_notify_cq = c4iw_arm_cq, - INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_cq, c4iw_cq, ibcq), + INIT_RDMA_OBJ_SIZE(ib_mw, c4iw_mw, ibmw), + INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_srq, c4iw_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext), }; @@ -567,7 +570,9 @@ void c4iw_register_device(struct work_struct *work) ret = set_netdevs(&dev->ibdev, &dev->rdev); if (ret) goto err_dealloc_ctx; - ret = ib_register_device(&dev->ibdev, "cxgb4_%d"); + dma_set_max_seg_size(&dev->rdev.lldi.pdev->dev, UINT_MAX); + ret = ib_register_device(&dev->ibdev, "cxgb4_%d", + &dev->rdev.lldi.pdev->dev); if (ret) goto err_dealloc_ctx; return; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index cbddb20c6121..f20379e4e2ec 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2797,7 +2797,7 @@ err_free_wr_wait: return ret; } -void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +int c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_srq *srq; @@ -2813,4 +2813,5 @@ void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) srq->wr_waitp); c4iw_free_srq_idx(&rhp->rdev, srq->idx); c4iw_put_wr_wait(srq->wr_waitp); + return 0; } diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index 1889dd172a25..e5d9712e98c4 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -33,7 +33,8 @@ struct efa_irq { char name[EFA_IRQNAME_SIZE]; }; -struct efa_sw_stats { +/* Don't use anything other than atomic64 */ +struct efa_stats { atomic64_t alloc_pd_err; atomic64_t create_qp_err; atomic64_t create_cq_err; @@ -41,11 +42,6 @@ struct efa_sw_stats { atomic64_t alloc_ucontext_err; atomic64_t create_ah_err; atomic64_t mmap_err; -}; - -/* Don't use anything other than atomic64 */ -struct efa_stats { - struct efa_sw_stats sw_stats; atomic64_t keep_alive_rcvd; }; @@ -134,12 +130,12 @@ int efa_query_gid(struct ib_device *ibdev, u8 port, int index, int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey); int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); -void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); struct ib_qp *efa_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); -void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, @@ -156,7 +152,7 @@ void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry); int efa_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); -void efa_destroy_ah(struct ib_ah *ibah, u32 flags); +int efa_destroy_ah(struct ib_ah *ibah, u32 flags); int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_udata *udata); enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 5484b08bbc5d..b199e4ac6cf9 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -61,6 +61,8 @@ enum efa_admin_qp_state { enum efa_admin_get_stats_type { EFA_ADMIN_GET_STATS_TYPE_BASIC = 0, + EFA_ADMIN_GET_STATS_TYPE_MESSAGES = 1, + EFA_ADMIN_GET_STATS_TYPE_RDMA_READ = 2, }; enum efa_admin_get_stats_scope { @@ -68,14 +70,6 @@ enum efa_admin_get_stats_scope { EFA_ADMIN_GET_STATS_SCOPE_QUEUE = 1, }; -enum efa_admin_modify_qp_mask_bits { - EFA_ADMIN_QP_STATE_BIT = 0, - EFA_ADMIN_CUR_QP_STATE_BIT = 1, - EFA_ADMIN_QKEY_BIT = 2, - EFA_ADMIN_SQ_PSN_BIT = 3, - EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT = 4, -}; - /* * QP allocation sizes, converted by fabric QueuePair (QP) create command * from QP capabilities. @@ -199,8 +193,14 @@ struct efa_admin_modify_qp_cmd { struct efa_admin_aq_common_desc aq_common_desc; /* - * Mask indicating which fields should be updated see enum - * efa_admin_modify_qp_mask_bits + * Mask indicating which fields should be updated + * 0 : qp_state + * 1 : cur_qp_state + * 2 : qkey + * 3 : sq_psn + * 4 : sq_drained_async_notify + * 5 : rnr_retry + * 31:6 : reserved */ u32 modify_mask; @@ -222,8 +222,8 @@ struct efa_admin_modify_qp_cmd { /* Enable async notification when SQ is drained */ u8 sq_drained_async_notify; - /* MBZ */ - u8 reserved1; + /* Number of RNR retries (valid only for SRD QPs) */ + u8 rnr_retry; /* MBZ */ u16 reserved2; @@ -258,8 +258,8 @@ struct efa_admin_query_qp_resp { /* Indicates that draining is in progress */ u8 sq_draining; - /* MBZ */ - u8 reserved1; + /* Number of RNR retries (valid only for SRD QPs) */ + u8 rnr_retry; /* MBZ */ u16 reserved2; @@ -530,10 +530,36 @@ struct efa_admin_basic_stats { u64 rx_drops; }; +struct efa_admin_messages_stats { + u64 send_bytes; + + u64 send_wrs; + + u64 recv_bytes; + + u64 recv_wrs; +}; + +struct efa_admin_rdma_read_stats { + u64 read_wrs; + + u64 read_bytes; + + u64 read_wr_err; + + u64 read_resp_bytes; +}; + struct efa_admin_acq_get_stats_resp { struct efa_admin_acq_common_desc acq_common_desc; - struct efa_admin_basic_stats basic_stats; + union { + struct efa_admin_basic_stats basic_stats; + + struct efa_admin_messages_stats messages_stats; + + struct efa_admin_rdma_read_stats rdma_read_stats; + } u; }; struct efa_admin_get_set_feature_common_desc { @@ -576,7 +602,9 @@ struct efa_admin_feature_device_attr_desc { /* * 0 : rdma_read - If set, RDMA Read is supported on * TX queues - * 31:1 : reserved - MBZ + * 1 : rnr_retry - If set, RNR retry is supported on + * modify QP command + * 31:2 : reserved - MBZ */ u32 device_caps; @@ -862,6 +890,14 @@ struct efa_admin_host_info { #define EFA_ADMIN_CREATE_QP_CMD_SQ_VIRT_MASK BIT(0) #define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_MASK BIT(1) +/* modify_qp_cmd */ +#define EFA_ADMIN_MODIFY_QP_CMD_QP_STATE_MASK BIT(0) +#define EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE_MASK BIT(1) +#define EFA_ADMIN_MODIFY_QP_CMD_QKEY_MASK BIT(2) +#define EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN_MASK BIT(3) +#define EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY_MASK BIT(4) +#define EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY_MASK BIT(5) + /* reg_mr_cmd */ #define EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK GENMASK(4, 0) #define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK BIT(7) @@ -878,6 +914,7 @@ struct efa_admin_host_info { /* feature_device_attr_desc */ #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0) +#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1) /* host_info */ #define EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE_MASK GENMASK(7, 0) diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index 6ac23627f65a..f752ef64159c 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -76,6 +76,7 @@ int efa_com_modify_qp(struct efa_com_dev *edev, cmd.qkey = params->qkey; cmd.sq_psn = params->sq_psn; cmd.sq_drained_async_notify = params->sq_drained_async_notify; + cmd.rnr_retry = params->rnr_retry; err = efa_com_cmd_exec(aq, (struct efa_admin_aq_entry *)&cmd, @@ -121,6 +122,7 @@ int efa_com_query_qp(struct efa_com_dev *edev, result->qkey = resp.qkey; result->sq_draining = resp.sq_draining; result->sq_psn = resp.sq_psn; + result->rnr_retry = resp.rnr_retry; return 0; } @@ -750,11 +752,27 @@ int efa_com_get_stats(struct efa_com_dev *edev, return err; } - result->basic_stats.tx_bytes = resp.basic_stats.tx_bytes; - result->basic_stats.tx_pkts = resp.basic_stats.tx_pkts; - result->basic_stats.rx_bytes = resp.basic_stats.rx_bytes; - result->basic_stats.rx_pkts = resp.basic_stats.rx_pkts; - result->basic_stats.rx_drops = resp.basic_stats.rx_drops; + switch (cmd.type) { + case EFA_ADMIN_GET_STATS_TYPE_BASIC: + result->basic_stats.tx_bytes = resp.u.basic_stats.tx_bytes; + result->basic_stats.tx_pkts = resp.u.basic_stats.tx_pkts; + result->basic_stats.rx_bytes = resp.u.basic_stats.rx_bytes; + result->basic_stats.rx_pkts = resp.u.basic_stats.rx_pkts; + result->basic_stats.rx_drops = resp.u.basic_stats.rx_drops; + break; + case EFA_ADMIN_GET_STATS_TYPE_MESSAGES: + result->messages_stats.send_bytes = resp.u.messages_stats.send_bytes; + result->messages_stats.send_wrs = resp.u.messages_stats.send_wrs; + result->messages_stats.recv_bytes = resp.u.messages_stats.recv_bytes; + result->messages_stats.recv_wrs = resp.u.messages_stats.recv_wrs; + break; + case EFA_ADMIN_GET_STATS_TYPE_RDMA_READ: + result->rdma_read_stats.read_wrs = resp.u.rdma_read_stats.read_wrs; + result->rdma_read_stats.read_bytes = resp.u.rdma_read_stats.read_bytes; + result->rdma_read_stats.read_wr_err = resp.u.rdma_read_stats.read_wr_err; + result->rdma_read_stats.read_resp_bytes = resp.u.rdma_read_stats.read_resp_bytes; + break; + } return 0; } diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index 190bac23f585..eea4ebfbe6ec 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -47,6 +47,7 @@ struct efa_com_modify_qp_params { u32 qkey; u32 sq_psn; u8 sq_drained_async_notify; + u8 rnr_retry; }; struct efa_com_query_qp_params { @@ -58,6 +59,7 @@ struct efa_com_query_qp_result { u32 qkey; u32 sq_draining; u32 sq_psn; + u8 rnr_retry; }; struct efa_com_destroy_qp_params { @@ -238,8 +240,24 @@ struct efa_com_basic_stats { u64 rx_drops; }; +struct efa_com_messages_stats { + u64 send_bytes; + u64 send_wrs; + u64 recv_bytes; + u64 recv_wrs; +}; + +struct efa_com_rdma_read_stats { + u64 read_wrs; + u64 read_bytes; + u64 read_wr_err; + u64 read_resp_bytes; +}; + union efa_com_get_stats_result { struct efa_com_basic_stats basic_stats; + struct efa_com_messages_stats messages_stats; + struct efa_com_rdma_read_stats rdma_read_stats; }; void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low); diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 92d701146320..6faed3a81e08 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -331,7 +331,7 @@ static int efa_ib_device_add(struct efa_dev *dev) ib_set_device_ops(&dev->ibdev, &efa_dev_ops); - err = ib_register_device(&dev->ibdev, "efa_%d"); + err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev); if (err) goto err_release_doorbell_bar; @@ -418,7 +418,7 @@ static int efa_device_init(struct efa_com_dev *edev, struct pci_dev *pdev) err); return err; } - + dma_set_max_seg_size(&pdev->dev, UINT_MAX); return 0; } diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 9e201f169289..191e0843f090 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -4,6 +4,7 @@ */ #include <linux/vmalloc.h> +#include <linux/log2.h> #include <rdma/ib_addr.h> #include <rdma/ib_umem.h> @@ -35,6 +36,14 @@ struct efa_user_mmap_entry { op(EFA_RX_BYTES, "rx_bytes") \ op(EFA_RX_PKTS, "rx_pkts") \ op(EFA_RX_DROPS, "rx_drops") \ + op(EFA_SEND_BYTES, "send_bytes") \ + op(EFA_SEND_WRS, "send_wrs") \ + op(EFA_RECV_BYTES, "recv_bytes") \ + op(EFA_RECV_WRS, "recv_wrs") \ + op(EFA_RDMA_READ_WRS, "rdma_read_wrs") \ + op(EFA_RDMA_READ_BYTES, "rdma_read_bytes") \ + op(EFA_RDMA_READ_WR_ERR, "rdma_read_wr_err") \ + op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \ op(EFA_SUBMITTED_CMDS, "submitted_cmds") \ op(EFA_COMPLETED_CMDS, "completed_cmds") \ op(EFA_CMDS_ERR, "cmds_err") \ @@ -142,10 +151,9 @@ to_emmap(struct rdma_user_mmap_entry *rdma_entry) return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry); } -static inline bool is_rdma_read_cap(struct efa_dev *dev) -{ - return dev->dev_attr.device_caps & EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK; -} +#define EFA_DEV_CAP(dev, cap) \ + ((dev)->dev_attr.device_caps & \ + EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_##cap##_MASK) #define is_reserved_cleared(reserved) \ !memchr_inv(reserved, 0, sizeof(reserved)) @@ -221,9 +229,12 @@ int efa_query_device(struct ib_device *ibdev, resp.max_rq_wr = dev_attr->max_rq_depth; resp.max_rdma_size = dev_attr->max_rdma_size; - if (is_rdma_read_cap(dev)) + if (EFA_DEV_CAP(dev, RDMA_READ)) resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ; + if (EFA_DEV_CAP(dev, RNR_RETRY)) + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY; + err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); if (err) { @@ -269,7 +280,7 @@ int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, #define EFA_QUERY_QP_SUPP_MASK \ (IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \ - IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP) + IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP | IB_QP_RNR_RETRY) if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) { ibdev_dbg(&dev->ibdev, @@ -291,6 +302,7 @@ int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->sq_psn = result.sq_psn; qp_attr->sq_draining = result.sq_draining; qp_attr->port_num = 1; + qp_attr->rnr_retry = result.rnr_retry; qp_attr->cap.max_send_wr = qp->max_send_wr; qp_attr->cap.max_recv_wr = qp->max_recv_wr; @@ -376,17 +388,18 @@ int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) err_dealloc_pd: efa_pd_dealloc(dev, result.pdn); err_out: - atomic64_inc(&dev->stats.sw_stats.alloc_pd_err); + atomic64_inc(&dev->stats.alloc_pd_err); return err; } -void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct efa_dev *dev = to_edev(ibpd->device); struct efa_pd *pd = to_epd(ibpd); ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn); efa_pd_dealloc(dev, pd->pdn); + return 0; } static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle) @@ -737,18 +750,130 @@ err_free_mapped: err_free_qp: kfree(qp); err_out: - atomic64_inc(&dev->stats.sw_stats.create_qp_err); + atomic64_inc(&dev->stats.create_qp_err); return ERR_PTR(err); } +static const struct { + int valid; + enum ib_qp_attr_mask req_param; + enum ib_qp_attr_mask opt_param; +} srd_qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_INIT] = { + .valid = 1, + .req_param = IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_QKEY, + }, + }, + [IB_QPS_INIT] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_INIT] = { + .valid = 1, + .opt_param = IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_QKEY, + }, + [IB_QPS_RTR] = { + .valid = 1, + .opt_param = IB_QP_PKEY_INDEX | + IB_QP_QKEY, + }, + }, + [IB_QPS_RTR] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .req_param = IB_QP_SQ_PSN, + .opt_param = IB_QP_CUR_STATE | + IB_QP_QKEY | + IB_QP_RNR_RETRY, + + } + }, + [IB_QPS_RTS] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .opt_param = IB_QP_CUR_STATE | + IB_QP_QKEY, + }, + [IB_QPS_SQD] = { + .valid = 1, + .opt_param = IB_QP_EN_SQD_ASYNC_NOTIFY, + }, + }, + [IB_QPS_SQD] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .opt_param = IB_QP_CUR_STATE | + IB_QP_QKEY, + }, + [IB_QPS_SQD] = { + .valid = 1, + .opt_param = IB_QP_PKEY_INDEX | + IB_QP_QKEY, + } + }, + [IB_QPS_SQE] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .opt_param = IB_QP_CUR_STATE | + IB_QP_QKEY, + } + }, + [IB_QPS_ERR] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + } +}; + +static bool efa_modify_srd_qp_is_ok(enum ib_qp_state cur_state, + enum ib_qp_state next_state, + enum ib_qp_attr_mask mask) +{ + enum ib_qp_attr_mask req_param, opt_param; + + if (mask & IB_QP_CUR_STATE && + cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS && + cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) + return false; + + if (!srd_qp_state_table[cur_state][next_state].valid) + return false; + + req_param = srd_qp_state_table[cur_state][next_state].req_param; + opt_param = srd_qp_state_table[cur_state][next_state].opt_param; + + if ((mask & req_param) != req_param) + return false; + + if (mask & ~(req_param | opt_param | IB_QP_STATE)) + return false; + + return true; +} + static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) { + int err; + #define EFA_MODIFY_QP_SUPP_MASK \ (IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \ - IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN) + IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN | \ + IB_QP_RNR_RETRY) if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) { ibdev_dbg(&dev->ibdev, @@ -757,8 +882,14 @@ static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp, return -EOPNOTSUPP; } - if (!ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD, - qp_attr_mask)) { + if (qp->ibqp.qp_type == IB_QPT_DRIVER) + err = !efa_modify_srd_qp_is_ok(cur_state, new_state, + qp_attr_mask); + else + err = !ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD, + qp_attr_mask); + + if (err) { ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n"); return -EINVAL; } @@ -805,28 +936,36 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, params.qp_handle = qp->qp_handle; if (qp_attr_mask & IB_QP_STATE) { - params.modify_mask |= BIT(EFA_ADMIN_QP_STATE_BIT) | - BIT(EFA_ADMIN_CUR_QP_STATE_BIT); + EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QP_STATE, + 1); + EFA_SET(¶ms.modify_mask, + EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE, 1); params.cur_qp_state = qp_attr->cur_qp_state; params.qp_state = qp_attr->qp_state; } if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) { - params.modify_mask |= - BIT(EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT); + EFA_SET(¶ms.modify_mask, + EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY, 1); params.sq_drained_async_notify = qp_attr->en_sqd_async_notify; } if (qp_attr_mask & IB_QP_QKEY) { - params.modify_mask |= BIT(EFA_ADMIN_QKEY_BIT); + EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QKEY, 1); params.qkey = qp_attr->qkey; } if (qp_attr_mask & IB_QP_SQ_PSN) { - params.modify_mask |= BIT(EFA_ADMIN_SQ_PSN_BIT); + EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN, 1); params.sq_psn = qp_attr->sq_psn; } + if (qp_attr_mask & IB_QP_RNR_RETRY) { + EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY, + 1); + params.rnr_retry = qp_attr->rnr_retry; + } + err = efa_com_modify_qp(&dev->edev, ¶ms); if (err) return err; @@ -843,7 +982,7 @@ static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx) return efa_com_destroy_cq(&dev->edev, ¶ms); } -void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct efa_dev *dev = to_edev(ibcq->device); struct efa_cq *cq = to_ecq(ibcq); @@ -856,6 +995,7 @@ void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) efa_destroy_cq_idx(dev, cq->cq_idx); efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, DMA_FROM_DEVICE); + return 0; } static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, @@ -996,7 +1136,7 @@ err_free_mapped: DMA_FROM_DEVICE); err_out: - atomic64_inc(&dev->stats.sw_stats.create_cq_err); + atomic64_inc(&dev->stats.create_cq_err); return err; } @@ -1013,8 +1153,7 @@ static int umem_to_page_list(struct efa_dev *dev, ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n", hp_cnt, pages_in_hp); - rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, - BIT(hp_shift)) + rdma_umem_for_each_dma_block(umem, &biter, BIT(hp_shift)) page_list[hp_idx++] = rdma_block_iter_dma_address(&biter); return 0; @@ -1026,7 +1165,7 @@ static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt) struct page *pg; int i; - sglist = kcalloc(page_cnt, sizeof(*sglist), GFP_KERNEL); + sglist = kmalloc_array(page_cnt, sizeof(*sglist), GFP_KERNEL); if (!sglist) return NULL; sg_init_table(sglist, page_cnt); @@ -1370,7 +1509,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, supp_access_flags = IB_ACCESS_LOCAL_WRITE | - (is_rdma_read_cap(dev) ? IB_ACCESS_REMOTE_READ : 0); + (EFA_DEV_CAP(dev, RDMA_READ) ? IB_ACCESS_REMOTE_READ : 0); access_flags &= ~IB_ACCESS_OPTIONAL; if (access_flags & ~supp_access_flags) { @@ -1410,9 +1549,8 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, goto err_unmap; } - params.page_shift = __ffs(pg_sz); - params.page_num = DIV_ROUND_UP(length + (start & (pg_sz - 1)), - pg_sz); + params.page_shift = order_base_2(pg_sz); + params.page_num = ib_umem_num_dma_blocks(mr->umem, pg_sz); ibdev_dbg(&dev->ibdev, "start %#llx length %#llx params.page_shift %u params.page_num %u\n", @@ -1451,7 +1589,7 @@ err_unmap: err_free: kfree(mr); err_out: - atomic64_inc(&dev->stats.sw_stats.reg_mr_err); + atomic64_inc(&dev->stats.reg_mr_err); return ERR_PTR(err); } @@ -1569,19 +1707,17 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) resp.max_tx_batch = dev->dev_attr.max_tx_batch; resp.min_sq_wr = dev->dev_attr.min_sq_depth; - if (udata && udata->outlen) { - err = ib_copy_to_udata(udata, &resp, - min(sizeof(resp), udata->outlen)); - if (err) - goto err_dealloc_uar; - } + err = ib_copy_to_udata(udata, &resp, + min(sizeof(resp), udata->outlen)); + if (err) + goto err_dealloc_uar; return 0; err_dealloc_uar: efa_dealloc_uar(dev, result.uarn); err_out: - atomic64_inc(&dev->stats.sw_stats.alloc_ucontext_err); + atomic64_inc(&dev->stats.alloc_ucontext_err); return err; } @@ -1614,7 +1750,7 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, ibdev_dbg(&dev->ibdev, "pgoff[%#lx] does not have valid entry\n", vma->vm_pgoff); - atomic64_inc(&dev->stats.sw_stats.mmap_err); + atomic64_inc(&dev->stats.mmap_err); return -EINVAL; } entry = to_emmap(rdma_entry); @@ -1656,7 +1792,7 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n", entry->address, rdma_entry->npages * PAGE_SIZE, entry->mmap_flag, err); - atomic64_inc(&dev->stats.sw_stats.mmap_err); + atomic64_inc(&dev->stats.mmap_err); } rdma_user_mmap_entry_put(rdma_entry); @@ -1741,11 +1877,11 @@ int efa_create_ah(struct ib_ah *ibah, err_destroy_ah: efa_ah_destroy(dev, ah); err_out: - atomic64_inc(&dev->stats.sw_stats.create_ah_err); + atomic64_inc(&dev->stats.create_ah_err); return err; } -void efa_destroy_ah(struct ib_ah *ibah, u32 flags) +int efa_destroy_ah(struct ib_ah *ibah, u32 flags) { struct efa_dev *dev = to_edev(ibah->pd->device); struct efa_ah *ah = to_eah(ibah); @@ -1755,10 +1891,11 @@ void efa_destroy_ah(struct ib_ah *ibah, u32 flags) if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) { ibdev_dbg(&dev->ibdev, "Destroy address handle is not supported in atomic context\n"); - return; + return -EOPNOTSUPP; } efa_ah_destroy(dev, ah); + return 0; } struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u8 port_num) @@ -1774,13 +1911,15 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, struct efa_com_get_stats_params params = {}; union efa_com_get_stats_result result; struct efa_dev *dev = to_edev(ibdev); + struct efa_com_rdma_read_stats *rrs; + struct efa_com_messages_stats *ms; struct efa_com_basic_stats *bs; struct efa_com_stats_admin *as; struct efa_stats *s; int err; - params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC; params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL; + params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC; err = efa_com_get_stats(&dev->edev, ¶ms, &result); if (err) @@ -1793,6 +1932,28 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, stats->value[EFA_RX_PKTS] = bs->rx_pkts; stats->value[EFA_RX_DROPS] = bs->rx_drops; + params.type = EFA_ADMIN_GET_STATS_TYPE_MESSAGES; + err = efa_com_get_stats(&dev->edev, ¶ms, &result); + if (err) + return err; + + ms = &result.messages_stats; + stats->value[EFA_SEND_BYTES] = ms->send_bytes; + stats->value[EFA_SEND_WRS] = ms->send_wrs; + stats->value[EFA_RECV_BYTES] = ms->recv_bytes; + stats->value[EFA_RECV_WRS] = ms->recv_wrs; + + params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_READ; + err = efa_com_get_stats(&dev->edev, ¶ms, &result); + if (err) + return err; + + rrs = &result.rdma_read_stats; + stats->value[EFA_RDMA_READ_WRS] = rrs->read_wrs; + stats->value[EFA_RDMA_READ_BYTES] = rrs->read_bytes; + stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err; + stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes; + as = &dev->edev.aq.stats; stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd); stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd); @@ -1801,13 +1962,14 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, s = &dev->stats; stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd); - stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->sw_stats.alloc_pd_err); - stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->sw_stats.create_qp_err); - stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->sw_stats.create_cq_err); - stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->sw_stats.reg_mr_err); - stats->value[EFA_ALLOC_UCONTEXT_ERR] = atomic64_read(&s->sw_stats.alloc_ucontext_err); - stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->sw_stats.create_ah_err); - stats->value[EFA_MMAP_ERR] = atomic64_read(&s->sw_stats.mmap_err); + stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->alloc_pd_err); + stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->create_qp_err); + stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->create_cq_err); + stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->reg_mr_err); + stats->value[EFA_ALLOC_UCONTEXT_ERR] = + atomic64_read(&s->alloc_ucontext_err); + stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err); + stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err); return ARRAY_SIZE(efa_stats_names); } diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 04575c9afd61..a307d4c8b15a 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -232,11 +232,11 @@ static const struct sdma_set_state_action sdma_action_table[] = { static void sdma_complete(struct kref *); static void sdma_finalput(struct sdma_state *); static void sdma_get(struct sdma_state *); -static void sdma_hw_clean_up_task(unsigned long); +static void sdma_hw_clean_up_task(struct tasklet_struct *); static void sdma_put(struct sdma_state *); static void sdma_set_state(struct sdma_engine *, enum sdma_states); static void sdma_start_hw_clean_up(struct sdma_engine *); -static void sdma_sw_clean_up_task(unsigned long); +static void sdma_sw_clean_up_task(struct tasklet_struct *); static void sdma_sendctrl(struct sdma_engine *, unsigned); static void init_sdma_regs(struct sdma_engine *, u32, uint); static void sdma_process_event( @@ -545,9 +545,10 @@ static void sdma_err_progress_check(struct timer_list *t) schedule_work(&sde->err_halt_worker); } -static void sdma_hw_clean_up_task(unsigned long opaque) +static void sdma_hw_clean_up_task(struct tasklet_struct *t) { - struct sdma_engine *sde = (struct sdma_engine *)opaque; + struct sdma_engine *sde = from_tasklet(sde, t, + sdma_hw_clean_up_task); u64 statuscsr; while (1) { @@ -604,9 +605,9 @@ static void sdma_flush_descq(struct sdma_engine *sde) sdma_desc_avail(sde, sdma_descq_freecnt(sde)); } -static void sdma_sw_clean_up_task(unsigned long opaque) +static void sdma_sw_clean_up_task(struct tasklet_struct *t) { - struct sdma_engine *sde = (struct sdma_engine *)opaque; + struct sdma_engine *sde = from_tasklet(sde, t, sdma_sw_clean_up_task); unsigned long flags; spin_lock_irqsave(&sde->tail_lock, flags); @@ -1454,11 +1455,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) sde->tail_csr = get_kctxt_csr_addr(dd, this_idx, SD(TAIL)); - tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task, - (unsigned long)sde); - - tasklet_init(&sde->sdma_sw_clean_up_task, sdma_sw_clean_up_task, - (unsigned long)sde); + tasklet_setup(&sde->sdma_hw_clean_up_task, + sdma_hw_clean_up_task); + tasklet_setup(&sde->sdma_sw_clean_up_task, + sdma_sw_clean_up_task); INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait); INIT_WORK(&sde->flush_worker, sdma_field_flush); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 30865635b449..3591923abebb 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1424,7 +1424,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, props->gid_tbl_len = HFI1_GUIDS_PER_PORT; props->active_width = (u8)opa_width_to_ib(ppd->link_width_active); /* see rate_show() in ib core/sysfs.c */ - props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active); + props->active_speed = opa_speed_to_ib(ppd->link_speed_active); props->max_vl_num = ppd->vls_supported; /* Once we are a "first class" citizen and have added the OPA MTUs to diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 5b2f9314edd3..75b06db60f7c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -39,6 +39,22 @@ #define HNS_ROCE_VLAN_SL_BIT_MASK 7 #define HNS_ROCE_VLAN_SL_SHIFT 13 +static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr) +{ + u32 fl = ah_attr->grh.flow_label; + u16 sport; + + if (!fl) + sport = get_random_u32() % + (IB_ROCE_UDP_ENCAP_VALID_PORT_MAX + 1 - + IB_ROCE_UDP_ENCAP_VALID_PORT_MIN) + + IB_ROCE_UDP_ENCAP_VALID_PORT_MIN; + else + sport = rdma_flow_label_to_udp_sport(fl); + + return sport; +} + int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { @@ -79,6 +95,8 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); ah->av.sl = rdma_ah_get_sl(ah_attr); + ah->av.flowlabel = grh->flow_label; + ah->av.udp_sport = get_ah_udp_sport(ah_attr); return 0; } @@ -98,8 +116,3 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } - -void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags) -{ - return; -} diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index a522cb2d29ea..a6b23dec1adc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -268,8 +268,7 @@ int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, } /* convert system page cnt to hw page cnt */ - rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, - 1 << page_shift) { + rdma_umem_for_each_dma_block(umem, &biter, 1 << page_shift) { addr = rdma_block_iter_dma_address(&biter); if (idx >= start) { bufs[total++] = addr; diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index e87d616f7988..809b22aa5056 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -150,7 +150,7 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, int err; buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; - buf_attr.region[0].size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz; + buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size; buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num; buf_attr.region_count = 1; buf_attr.fixed_page = true; @@ -224,6 +224,21 @@ static void free_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, } } +static void set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata, + struct hns_roce_ib_create_cq *ucmd) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device); + + if (udata) { + if (udata->inlen >= offsetofend(typeof(*ucmd), cqe_size)) + hr_cq->cqe_size = ucmd->cqe_size; + else + hr_cq->cqe_size = HNS_ROCE_V2_CQE_SIZE; + } else { + hr_cq->cqe_size = hr_dev->caps.cqe_sz; + } +} + int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, struct ib_udata *udata) { @@ -258,7 +273,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, INIT_LIST_HEAD(&hr_cq->rq_list); if (udata) { - ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); + ret = ib_copy_from_udata(&ucmd, udata, + min(sizeof(ucmd), udata->inlen)); if (ret) { ibdev_err(ibdev, "Failed to copy CQ udata, err %d\n", ret); @@ -266,6 +282,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, } } + set_cqe_size(hr_cq, udata, &ucmd); + ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr); if (ret) { ibdev_err(ibdev, "Failed to alloc CQ buf, err %d\n", ret); @@ -287,7 +305,7 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, /* * For the QP created by kernel space, tptr value should be initialized * to zero; For the QP created by user space, it will cause synchronous - * problems if tptr is set to zero here, so we initialze it in user + * problems if tptr is set to zero here, so we initialize it in user * space. */ if (!udata && hr_cq->tptr_addr) @@ -311,7 +329,7 @@ err_cq_buf: return ret; } -void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); @@ -322,6 +340,7 @@ void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) free_cq_buf(hr_dev, hr_cq); free_cq_db(hr_dev, hr_cq, udata); free_cqc(hr_dev, hr_cq); + return 0; } void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 6edcbdcd8f43..6d2acff69f98 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -37,8 +37,8 @@ #define DRV_NAME "hns_roce" -/* hip08 is a pci device */ #define PCI_REVISION_ID_HIP08 0x21 +#define PCI_REVISION_ID_HIP09 0x30 #define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6') @@ -57,7 +57,6 @@ /* Hardware specification only for v1 engine */ #define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7 #define HNS_ROCE_MAX_MTPT_PBL_NUM 0x100000 -#define HNS_ROCE_MAX_SGE_NUM 2 #define HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS 20 #define HNS_ROCE_MAX_FREE_CQ_WAIT_CNT \ @@ -76,15 +75,18 @@ #define HNS_ROCE_CEQ 0 #define HNS_ROCE_AEQ 1 -#define HNS_ROCE_CEQ_ENTRY_SIZE 0x4 -#define HNS_ROCE_AEQ_ENTRY_SIZE 0x10 +#define HNS_ROCE_CEQE_SIZE 0x4 +#define HNS_ROCE_AEQE_SIZE 0x10 -#define HNS_ROCE_SL_SHIFT 28 -#define HNS_ROCE_TCLASS_SHIFT 20 -#define HNS_ROCE_FLOW_LABEL_MASK 0xfffff +#define HNS_ROCE_V3_EQE_SIZE 0x40 + +#define HNS_ROCE_V2_CQE_SIZE 32 +#define HNS_ROCE_V3_CQE_SIZE 64 + +#define HNS_ROCE_V2_QPC_SZ 256 +#define HNS_ROCE_V3_QPC_SZ 512 #define HNS_ROCE_MAX_PORTS 6 -#define HNS_ROCE_MAX_GID_NUM 16 #define HNS_ROCE_GID_SIZE 16 #define HNS_ROCE_SGE_SIZE 16 @@ -112,8 +114,6 @@ #define PAGES_SHIFT_24 24 #define PAGES_SHIFT_32 32 -#define HNS_ROCE_PCI_BAR_NUM 2 - #define HNS_ROCE_IDX_QUE_ENTRY_SZ 4 #define SRQ_DB_REG 0x230 @@ -467,6 +467,7 @@ struct hns_roce_cq { void __iomem *cq_db_l; u16 *tptr_addr; int arm_sn; + int cqe_size; unsigned long cqn; u32 vector; atomic_t refcount; @@ -535,17 +536,18 @@ struct hns_roce_raq_table { }; struct hns_roce_av { - u8 port; - u8 gid_index; - u8 stat_rate; - u8 hop_limit; - u32 flowlabel; - u8 sl; - u8 tclass; - u8 dgid[HNS_ROCE_GID_SIZE]; - u8 mac[ETH_ALEN]; - u16 vlan_id; - bool vlan_en; + u8 port; + u8 gid_index; + u8 stat_rate; + u8 hop_limit; + u32 flowlabel; + u16 udp_sport; + u8 sl; + u8 tclass; + u8 dgid[HNS_ROCE_GID_SIZE]; + u8 mac[ETH_ALEN]; + u16 vlan_id; + bool vlan_en; }; struct hns_roce_ah { @@ -655,6 +657,8 @@ struct hns_roce_qp { struct hns_roce_sge sge; u32 next_sge; + enum ib_mtu path_mtu; + u32 max_inline_data; /* 0: flush needed, 1: unneeded */ unsigned long flush_flag; @@ -678,7 +682,8 @@ enum { }; struct hns_roce_ceqe { - __le32 comp; + __le32 comp; + __le32 rsv[15]; }; struct hns_roce_aeqe { @@ -715,6 +720,7 @@ struct hns_roce_aeqe { u8 rsv0; } __packed cmd; } event; + __le32 rsv[12]; }; struct hns_roce_eq { @@ -791,15 +797,15 @@ struct hns_roce_caps { int num_pds; int reserved_pds; u32 mtt_entry_sz; - u32 cq_entry_sz; + u32 cqe_sz; u32 page_size_cap; u32 reserved_lkey; int mtpt_entry_sz; - int qpc_entry_sz; + int qpc_sz; int irrl_entry_sz; int trrl_entry_sz; int cqc_entry_sz; - int sccc_entry_sz; + int sccc_sz; int qpc_timer_entry_sz; int cqc_timer_entry_sz; int srqc_entry_sz; @@ -809,6 +815,8 @@ struct hns_roce_caps { u32 pbl_hop_num; int aeqe_depth; int ceqe_depth; + u32 aeqe_size; + u32 ceqe_size; enum ib_mtu max_mtu; u32 qpc_bt_num; u32 qpc_timer_bt_num; @@ -930,7 +938,7 @@ struct hns_roce_hw { int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, struct ib_udata *udata); - void (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata); + int (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*init_eq)(struct hns_roce_dev *hr_dev); void (*cleanup_eq)(struct hns_roce_dev *hr_dev); @@ -1178,10 +1186,13 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags); +static inline int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags) +{ + return 0; +} int hns_roce_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); -void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); +int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, @@ -1200,8 +1211,7 @@ int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, unsigned long mpt_index); unsigned long key_to_hw_index(u32 key); -struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type, - struct ib_udata *udata); +int hns_roce_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); int hns_roce_dealloc_mw(struct ib_mw *ibmw); void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf); @@ -1220,7 +1230,7 @@ int hns_roce_create_srq(struct ib_srq *srq, int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); -void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); +int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd, struct ib_qp_init_attr *init_attr, @@ -1247,7 +1257,7 @@ int to_hr_qp_type(int qp_type); int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); +int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); int hns_roce_db_map_user(struct hns_roce_ucontext *context, struct ib_udata *udata, unsigned long virt, struct hns_roce_db *db); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index c8db6f8ae018..7487cf3d2c37 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -338,8 +338,8 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, void __iomem *bt_cmd; __le32 bt_cmd_val[2]; __le32 bt_cmd_h = 0; - __le32 bt_cmd_l = 0; - u64 bt_ba = 0; + __le32 bt_cmd_l; + u64 bt_ba; int ret = 0; /* Find the HEM(Hardware Entry Memory) entry */ @@ -1027,7 +1027,7 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) if (hr_dev->caps.cqc_timer_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cqc_timer_table); - if (hr_dev->caps.sccc_entry_sz) + if (hr_dev->caps.sccc_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.sccc_table); if (hr_dev->caps.trrl_entry_sz) @@ -1404,7 +1404,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, { const struct hns_roce_buf_region *r; int ofs, end; - int ret = 0; + int ret; int unit; int i; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index aeb3a6fa7d47..5f4d8a32ed6d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -70,15 +70,15 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct hns_roce_qp *qp = to_hr_qp(ibqp); struct device *dev = &hr_dev->pdev->dev; struct hns_roce_sq_db sq_db = {}; - int ps_opcode = 0, i = 0; + int ps_opcode, i; unsigned long flags = 0; void *wqe = NULL; __le32 doorbell[2]; - u32 wqe_idx = 0; - int nreq = 0; int ret = 0; - u8 *smac; int loopback; + u32 wqe_idx; + int nreq; + u8 *smac; if (unlikely(ibqp->qp_type != IB_QPT_GSI && ibqp->qp_type != IB_QPT_RC)) { @@ -271,7 +271,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, ps_opcode = HNS_ROCE_WQE_OPCODE_SEND; break; case IB_WR_LOCAL_INV: - break; case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_LSO: @@ -888,7 +887,7 @@ static int hns_roce_db_init(struct hns_roce_dev *hr_dev) u32 odb_ext_mod; u32 sdb_evt_mod; u32 odb_evt_mod; - int ret = 0; + int ret; memset(db, 0, sizeof(*db)); @@ -1148,8 +1147,8 @@ static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) struct hns_roce_v1_priv *priv = hr_dev->priv; struct hns_roce_raq_table *raq = &priv->raq_table; struct device *dev = &hr_dev->pdev->dev; - int raq_shift = 0; dma_addr_t addr; + int raq_shift; __le32 tmp; u32 val; int ret; @@ -1360,7 +1359,7 @@ static int hns_roce_free_mr_init(struct hns_roce_dev *hr_dev) struct hns_roce_v1_priv *priv = hr_dev->priv; struct hns_roce_free_mr *free_mr = &priv->free_mr; struct device *dev = &hr_dev->pdev->dev; - int ret = 0; + int ret; free_mr->free_mr_wq = create_singlethread_workqueue("hns_roce_free_mr"); if (!free_mr->free_mr_wq) { @@ -1440,8 +1439,8 @@ static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset) static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) { - int i = 0; struct hns_roce_caps *caps = &hr_dev->caps; + int i; hr_dev->vendor_id = roce_read(hr_dev, ROCEE_VENDOR_ID_REG); hr_dev->vendor_part_id = roce_read(hr_dev, ROCEE_VENDOR_PART_ID_REG); @@ -1471,12 +1470,12 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) caps->max_qp_dest_rdma = HNS_ROCE_V1_MAX_QP_DEST_RDMA; caps->max_sq_desc_sz = HNS_ROCE_V1_MAX_SQ_DESC_SZ; caps->max_rq_desc_sz = HNS_ROCE_V1_MAX_RQ_DESC_SZ; - caps->qpc_entry_sz = HNS_ROCE_V1_QPC_ENTRY_SIZE; + caps->qpc_sz = HNS_ROCE_V1_QPC_SIZE; caps->irrl_entry_sz = HNS_ROCE_V1_IRRL_ENTRY_SIZE; caps->cqc_entry_sz = HNS_ROCE_V1_CQC_ENTRY_SIZE; caps->mtpt_entry_sz = HNS_ROCE_V1_MTPT_ENTRY_SIZE; caps->mtt_entry_sz = HNS_ROCE_V1_MTT_ENTRY_SIZE; - caps->cq_entry_sz = HNS_ROCE_V1_CQE_ENTRY_SIZE; + caps->cqe_sz = HNS_ROCE_V1_CQE_SIZE; caps->page_size_cap = HNS_ROCE_V1_PAGE_SIZE_SUPPORT; caps->reserved_lkey = 0; caps->reserved_pds = 0; @@ -1643,7 +1642,7 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev, unsigned long timeout) { u8 __iomem *hcr = hr_dev->reg_base + ROCEE_MB1_REG; - unsigned long end = 0; + unsigned long end; u32 status = 0; end = msecs_to_jiffies(timeout) + jiffies; @@ -1671,7 +1670,7 @@ static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port, { unsigned long flags; u32 *p = NULL; - u8 gid_idx = 0; + u8 gid_idx; gid_idx = hns_get_gid_index(hr_dev, port, gid_index); @@ -1897,8 +1896,7 @@ static int hns_roce_v1_write_mtpt(struct hns_roce_dev *hr_dev, void *mb_buf, static void *get_cqe(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(hr_cq->mtr.kmem, - n * HNS_ROCE_V1_CQE_ENTRY_SIZE); + return hns_roce_buf_offset(hr_cq->mtr.kmem, n * HNS_ROCE_V1_CQE_SIZE); } static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n) @@ -2445,7 +2443,7 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox; struct device *dev = &hr_dev->pdev->dev; - int ret = 0; + int ret; if (cur_state >= HNS_ROCE_QP_NUM_STATE || new_state >= HNS_ROCE_QP_NUM_STATE || @@ -3394,7 +3392,7 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct device *dev = &hr_dev->pdev->dev; struct hns_roce_qp_context *context; - int tmp_qp_state = 0; + int tmp_qp_state; int ret = 0; int state; @@ -3572,7 +3570,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) return 0; } -static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); @@ -3603,6 +3601,7 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) } wait_time++; } + return 0; } static void set_eq_cons_index_v1(struct hns_roce_eq *eq, int req_not) @@ -3775,8 +3774,7 @@ static void hns_roce_v1_db_overflow_handle(struct hns_roce_dev *hr_dev, static struct hns_roce_aeqe *get_aeqe_v1(struct hns_roce_eq *eq, u32 entry) { - unsigned long off = (entry & (eq->entries - 1)) * - HNS_ROCE_AEQ_ENTRY_SIZE; + unsigned long off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQE_SIZE; return (struct hns_roce_aeqe *)((u8 *) (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) + @@ -3881,8 +3879,7 @@ static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev, static struct hns_roce_ceqe *get_ceqe_v1(struct hns_roce_eq *eq, u32 entry) { - unsigned long off = (entry & (eq->entries - 1)) * - HNS_ROCE_CEQ_ENTRY_SIZE; + unsigned long off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQE_SIZE; return (struct hns_roce_ceqe *)((u8 *) (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) + @@ -3934,7 +3931,7 @@ static irqreturn_t hns_roce_v1_msix_interrupt_eq(int irq, void *eq_ptr) { struct hns_roce_eq *eq = eq_ptr; struct hns_roce_dev *hr_dev = eq->hr_dev; - int int_work = 0; + int int_work; if (eq->type_flag == HNS_ROCE_CEQ) /* CEQ irq routine, CEQ is pulse irq, not clear */ @@ -4132,9 +4129,9 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev, void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn]; struct device *dev = &hr_dev->pdev->dev; dma_addr_t tmp_dma_addr; - u32 eqconsindx_val = 0; u32 eqcuridx_val = 0; - u32 eqshift_val = 0; + u32 eqconsindx_val; + u32 eqshift_val; __le32 tmp2 = 0; __le32 tmp1 = 0; __le32 tmp = 0; @@ -4253,7 +4250,7 @@ static int hns_roce_v1_init_eq_table(struct hns_roce_dev *hr_dev) CEQ_REG_OFFSET * i; eq->entries = hr_dev->caps.ceqe_depth; eq->log_entries = ilog2(eq->entries); - eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE; + eq->eqe_size = HNS_ROCE_CEQE_SIZE; } else { /* AEQ */ eq_table->eqc_base[i] = hr_dev->reg_base + @@ -4263,7 +4260,7 @@ static int hns_roce_v1_init_eq_table(struct hns_roce_dev *hr_dev) ROCEE_CAEP_AEQE_CONS_IDX_REG; eq->entries = hr_dev->caps.aeqe_depth; eq->log_entries = ilog2(eq->entries); - eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE; + eq->eqe_size = HNS_ROCE_AEQE_SIZE; } } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index 52307b2c7100..ffd0156080f5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -68,13 +68,13 @@ #define HNS_ROCE_V1_COMP_EQE_NUM 0x8000 #define HNS_ROCE_V1_ASYNC_EQE_NUM 0x400 -#define HNS_ROCE_V1_QPC_ENTRY_SIZE 256 +#define HNS_ROCE_V1_QPC_SIZE 256 #define HNS_ROCE_V1_IRRL_ENTRY_SIZE 8 #define HNS_ROCE_V1_CQC_ENTRY_SIZE 64 #define HNS_ROCE_V1_MTPT_ENTRY_SIZE 64 #define HNS_ROCE_V1_MTT_ENTRY_SIZE 64 -#define HNS_ROCE_V1_CQE_ENTRY_SIZE 32 +#define HNS_ROCE_V1_CQE_SIZE 32 #define HNS_ROCE_V1_PAGE_SIZE_SUPPORT 0xFFFFF000 #define HNS_ROCE_V1_TABLE_CHUNK_SIZE (1 << 17) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 4cda95ed1fbe..6d30850696c5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -153,6 +153,67 @@ static void set_atomic_seg(const struct ib_send_wr *wr, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); } +static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, + const struct ib_send_wr *wr, + unsigned int *sge_idx, u32 msg_len) +{ + struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev; + unsigned int dseg_len = sizeof(struct hns_roce_v2_wqe_data_seg); + unsigned int ext_sge_sz = qp->sq.max_gs * dseg_len; + unsigned int left_len_in_pg; + unsigned int idx = *sge_idx; + unsigned int i = 0; + unsigned int len; + void *addr; + void *dseg; + + if (msg_len > ext_sge_sz) { + ibdev_err(ibdev, + "no enough extended sge space for inline data.\n"); + return -EINVAL; + } + + dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1)); + left_len_in_pg = hr_hw_page_align((uintptr_t)dseg) - (uintptr_t)dseg; + len = wr->sg_list[0].length; + addr = (void *)(unsigned long)(wr->sg_list[0].addr); + + /* When copying data to extended sge space, the left length in page may + * not long enough for current user's sge. So the data should be + * splited into several parts, one in the first page, and the others in + * the subsequent pages. + */ + while (1) { + if (len <= left_len_in_pg) { + memcpy(dseg, addr, len); + + idx += len / dseg_len; + + i++; + if (i >= wr->num_sge) + break; + + left_len_in_pg -= len; + len = wr->sg_list[i].length; + addr = (void *)(unsigned long)(wr->sg_list[i].addr); + dseg += len; + } else { + memcpy(dseg, addr, left_len_in_pg); + + len -= left_len_in_pg; + addr += left_len_in_pg; + idx += left_len_in_pg / dseg_len; + dseg = hns_roce_get_extend_sge(qp, + idx & (qp->sge.sge_cnt - 1)); + left_len_in_pg = 1 << HNS_HW_PAGE_SHIFT; + } + } + + *sge_idx = idx; + + return 0; +} + static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, unsigned int *sge_ind, unsigned int valid_num_sge) { @@ -177,73 +238,115 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, *sge_ind = idx; } +static bool check_inl_data_len(struct hns_roce_qp *qp, unsigned int len) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device); + int mtu = ib_mtu_enum_to_int(qp->path_mtu); + + if (len > qp->max_inline_data || len > mtu) { + ibdev_err(&hr_dev->ib_dev, + "invalid length of data, data len = %u, max inline len = %u, path mtu = %d.\n", + len, qp->max_inline_data, mtu); + return false; + } + + return true; +} + +static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr, + struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, + unsigned int *sge_idx) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device); + u32 msg_len = le32_to_cpu(rc_sq_wqe->msg_len); + struct ib_device *ibdev = &hr_dev->ib_dev; + unsigned int curr_idx = *sge_idx; + void *dseg = rc_sq_wqe; + unsigned int i; + int ret; + + if (unlikely(wr->opcode == IB_WR_RDMA_READ)) { + ibdev_err(ibdev, "invalid inline parameters!\n"); + return -EINVAL; + } + + if (!check_inl_data_len(qp, msg_len)) + return -EINVAL; + + dseg += sizeof(struct hns_roce_v2_rc_send_wqe); + + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, 1); + + if (msg_len <= HNS_ROCE_V2_MAX_RC_INL_INN_SZ) { + roce_set_bit(rc_sq_wqe->byte_20, + V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 0); + + for (i = 0; i < wr->num_sge; i++) { + memcpy(dseg, ((void *)wr->sg_list[i].addr), + wr->sg_list[i].length); + dseg += wr->sg_list[i].length; + } + } else { + roce_set_bit(rc_sq_wqe->byte_20, + V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 1); + + ret = fill_ext_sge_inl_data(qp, wr, &curr_idx, msg_len); + if (ret) + return ret; + + roce_set_field(rc_sq_wqe->byte_16, + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, + curr_idx - *sge_idx); + } + + *sge_idx = curr_idx; + + return 0; +} + static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, unsigned int *sge_ind, unsigned int valid_num_sge) { - struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_v2_wqe_data_seg *dseg = (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe); - struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_qp *qp = to_hr_qp(ibqp); - void *wqe = dseg; int j = 0; int i; - if (wr->send_flags & IB_SEND_INLINE && valid_num_sge) { - if (unlikely(le32_to_cpu(rc_sq_wqe->msg_len) > - hr_dev->caps.max_sq_inline)) { - ibdev_err(ibdev, "inline len(1-%d)=%d, illegal", - rc_sq_wqe->msg_len, - hr_dev->caps.max_sq_inline); - return -EINVAL; - } + roce_set_field(rc_sq_wqe->byte_20, + V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, + V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, + (*sge_ind) & (qp->sge.sge_cnt - 1)); - if (unlikely(wr->opcode == IB_WR_RDMA_READ)) { - ibdev_err(ibdev, "Not support inline data!\n"); - return -EINVAL; - } + if (wr->send_flags & IB_SEND_INLINE) + return set_rc_inl(qp, wr, rc_sq_wqe, sge_ind); + if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) { for (i = 0; i < wr->num_sge; i++) { - memcpy(wqe, ((void *)wr->sg_list[i].addr), - wr->sg_list[i].length); - wqe += wr->sg_list[i].length; + if (likely(wr->sg_list[i].length)) { + set_data_seg_v2(dseg, wr->sg_list + i); + dseg++; + } } - - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, - 1); } else { - if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) { - for (i = 0; i < wr->num_sge; i++) { - if (likely(wr->sg_list[i].length)) { - set_data_seg_v2(dseg, wr->sg_list + i); - dseg++; - } + for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE; i++) { + if (likely(wr->sg_list[i].length)) { + set_data_seg_v2(dseg, wr->sg_list + i); + dseg++; + j++; } - } else { - roce_set_field(rc_sq_wqe->byte_20, - V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, - V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, - (*sge_ind) & (qp->sge.sge_cnt - 1)); - - for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE; - i++) { - if (likely(wr->sg_list[i].length)) { - set_data_seg_v2(dseg, wr->sg_list + i); - dseg++; - j++; - } - } - - set_extend_sge(qp, wr, sge_ind, valid_num_sge); } - roce_set_field(rc_sq_wqe->byte_16, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); + set_extend_sge(qp, wr, sge_ind, valid_num_sge); } + roce_set_field(rc_sq_wqe->byte_16, + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); + return 0; } @@ -292,6 +395,33 @@ static unsigned int calc_wr_sge_num(const struct ib_send_wr *wr, return valid_num; } +static __le32 get_immtdata(const struct ib_send_wr *wr) +{ + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: + case IB_WR_RDMA_WRITE_WITH_IMM: + return cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); + default: + return 0; + } +} + +static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, + const struct ib_send_wr *wr) +{ + u32 ib_op = wr->opcode; + + if (ib_op != IB_WR_SEND && ib_op != IB_WR_SEND_WITH_IMM) + return -EINVAL; + + ud_sq_wqe->immtdata = get_immtdata(wr); + + roce_set_field(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OPCODE_M, + V2_UD_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op)); + + return 0; +} + static inline int set_ud_wqe(struct hns_roce_qp *qp, const struct ib_send_wr *wr, void *wqe, unsigned int *sge_idx, @@ -305,10 +435,15 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, u32 msg_len = 0; bool loopback; u8 *smac; + int ret; valid_num_sge = calc_wr_sge_num(wr, &msg_len); memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe)); + ret = set_ud_opcode(ud_sq_wqe, wr); + if (WARN_ON(ret)) + return ret; + roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M, V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]); roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M, @@ -329,23 +464,8 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback); - roce_set_field(ud_sq_wqe->byte_4, - V2_UD_SEND_WQE_BYTE_4_OPCODE_M, - V2_UD_SEND_WQE_BYTE_4_OPCODE_S, - HNS_ROCE_V2_WQE_OP_SEND); - ud_sq_wqe->msg_len = cpu_to_le32(msg_len); - switch (wr->opcode) { - case IB_WR_SEND_WITH_IMM: - case IB_WR_RDMA_WRITE_WITH_IMM: - ud_sq_wqe->immtdata = cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); - break; - default: - ud_sq_wqe->immtdata = 0; - break; - } - /* Set sig attr */ roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S, (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); @@ -369,7 +489,7 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, curr_idx & (qp->sge.sge_cnt - 1)); roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, - V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, 0); + V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport); ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ? qp->qkey : ud_wr(wr)->remote_qkey); roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M, @@ -402,6 +522,46 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, return 0; } +static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, + const struct ib_send_wr *wr) +{ + u32 ib_op = wr->opcode; + + rc_sq_wqe->immtdata = get_immtdata(wr); + + switch (ib_op) { + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey); + rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr); + break; + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + break; + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + rc_sq_wqe->rkey = cpu_to_le32(atomic_wr(wr)->rkey); + rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr); + break; + case IB_WR_REG_MR: + set_frmr_seg(rc_sq_wqe, reg_wr(wr)); + break; + case IB_WR_LOCAL_INV: + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1); + fallthrough; + case IB_WR_SEND_WITH_INV: + rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); + break; + default: + return -EINVAL; + } + + roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M, + V2_RC_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op)); + + return 0; +} static inline int set_rc_wqe(struct hns_roce_qp *qp, const struct ib_send_wr *wr, void *wqe, unsigned int *sge_idx, @@ -411,25 +571,16 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, unsigned int curr_idx = *sge_idx; unsigned int valid_num_sge; u32 msg_len = 0; - int ret = 0; + int ret; valid_num_sge = calc_wr_sge_num(wr, &msg_len); memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe)); rc_sq_wqe->msg_len = cpu_to_le32(msg_len); - switch (wr->opcode) { - case IB_WR_SEND_WITH_IMM: - case IB_WR_RDMA_WRITE_WITH_IMM: - rc_sq_wqe->immtdata = cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); - break; - case IB_WR_SEND_WITH_INV: - rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); - break; - default: - rc_sq_wqe->immtdata = 0; - break; - } + ret = set_rc_opcode(rc_sq_wqe, wr); + if (WARN_ON(ret)) + return ret; roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S, (wr->send_flags & IB_SEND_FENCE) ? 1 : 0); @@ -443,33 +594,6 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit); - switch (wr->opcode) { - case IB_WR_RDMA_READ: - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey); - rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr); - break; - case IB_WR_LOCAL_INV: - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1); - rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); - break; - case IB_WR_REG_MR: - set_frmr_seg(rc_sq_wqe, reg_wr(wr)); - break; - case IB_WR_ATOMIC_CMP_AND_SWP: - case IB_WR_ATOMIC_FETCH_AND_ADD: - rc_sq_wqe->rkey = cpu_to_le32(atomic_wr(wr)->rkey); - rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr); - break; - default: - break; - } - - roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M, - V2_RC_SEND_WQE_BYTE_4_OPCODE_S, - to_hr_opcode(wr->opcode)); - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) set_atomic_seg(wr, rc_sq_wqe, valid_num_sge); @@ -1682,7 +1806,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->max_sq_desc_sz = HNS_ROCE_V2_MAX_SQ_DESC_SZ; caps->max_rq_desc_sz = HNS_ROCE_V2_MAX_RQ_DESC_SZ; caps->max_srq_desc_sz = HNS_ROCE_V2_MAX_SRQ_DESC_SZ; - caps->qpc_entry_sz = HNS_ROCE_V2_QPC_ENTRY_SZ; + caps->qpc_sz = HNS_ROCE_V2_QPC_SZ; caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ; caps->trrl_entry_sz = HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ; caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ; @@ -1690,7 +1814,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ; caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ; caps->idx_entry_sz = HNS_ROCE_V2_IDX_ENTRY_SZ; - caps->cq_entry_sz = HNS_ROCE_V2_CQE_ENTRY_SIZE; + caps->cqe_sz = HNS_ROCE_V2_CQE_SIZE; caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED; caps->reserved_lkey = 0; caps->reserved_pds = 0; @@ -1739,6 +1863,8 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM; caps->aeqe_depth = HNS_ROCE_V2_ASYNC_EQE_NUM; + caps->aeqe_size = HNS_ROCE_AEQE_SIZE; + caps->ceqe_size = HNS_ROCE_CEQE_SIZE; caps->local_ca_ack_delay = 0; caps->max_mtu = IB_MTU_4096; @@ -1760,19 +1886,26 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->cqc_timer_buf_pg_sz = 0; caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ; + caps->sccc_sz = HNS_ROCE_V2_SCCC_SZ; caps->sccc_ba_pg_sz = 0; caps->sccc_buf_pg_sz = 0; caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM; + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { + caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE; + caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE; + caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE; + caps->qpc_sz = HNS_ROCE_V3_QPC_SZ; + } } static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num, int *buf_page_size, int *bt_page_size, u32 hem_type) { u64 obj_per_chunk; - int bt_chunk_size = 1 << PAGE_SHIFT; - int buf_chunk_size = 1 << PAGE_SHIFT; - int obj_per_chunk_default = buf_chunk_size / obj_size; + u64 bt_chunk_size = PAGE_SIZE; + u64 buf_chunk_size = PAGE_SIZE; + u64 obj_per_chunk_default = buf_chunk_size / obj_size; *buf_page_size = 0; *bt_page_size = 0; @@ -1855,7 +1988,7 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->max_sq_desc_sz = resp_a->max_sq_desc_sz; caps->max_rq_desc_sz = resp_a->max_rq_desc_sz; caps->max_srq_desc_sz = resp_a->max_srq_desc_sz; - caps->cq_entry_sz = resp_a->cq_entry_sz; + caps->cqe_sz = HNS_ROCE_V2_CQE_SIZE; caps->mtpt_entry_sz = resp_b->mtpt_entry_sz; caps->irrl_entry_sz = resp_b->irrl_entry_sz; @@ -1863,9 +1996,9 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->cqc_entry_sz = resp_b->cqc_entry_sz; caps->srqc_entry_sz = resp_b->srqc_entry_sz; caps->idx_entry_sz = resp_b->idx_entry_sz; - caps->sccc_entry_sz = resp_b->scc_ctx_entry_sz; + caps->sccc_sz = resp_b->sccc_sz; caps->max_mtu = resp_b->max_mtu; - caps->qpc_entry_sz = le16_to_cpu(resp_b->qpc_entry_sz); + caps->qpc_sz = HNS_ROCE_V2_QPC_SZ; caps->min_cqes = resp_b->min_cqes; caps->min_wqes = resp_b->min_wqes; caps->page_size_cap = le32_to_cpu(resp_b->page_size_cap); @@ -1958,6 +2091,8 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ; caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ; caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS; + caps->ceqe_size = HNS_ROCE_CEQE_SIZE; + caps->aeqe_size = HNS_ROCE_AEQE_SIZE; caps->mtt_ba_pg_sz = 0; caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS; caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS; @@ -1981,7 +2116,15 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M, V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S); - calc_pg_sz(caps->num_qps, caps->qpc_entry_sz, caps->qpc_hop_num, + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { + caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE; + caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE; + caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE; + caps->qpc_sz = HNS_ROCE_V3_QPC_SZ; + caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ; + } + + calc_pg_sz(caps->num_qps, caps->qpc_sz, caps->qpc_hop_num, caps->qpc_bt_num, &caps->qpc_buf_pg_sz, &caps->qpc_ba_pg_sz, HEM_TYPE_QPC); calc_pg_sz(caps->num_mtpts, caps->mtpt_entry_sz, caps->mpt_hop_num, @@ -1998,7 +2141,7 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - calc_pg_sz(caps->num_qps, caps->sccc_entry_sz, + calc_pg_sz(caps->num_qps, caps->sccc_sz, caps->sccc_hop_num, caps->sccc_bt_num, &caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz, HEM_TYPE_SCCC); @@ -2018,6 +2161,56 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) return 0; } +static int hns_roce_config_qpc_size(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_cfg_entry_size *cfg_size = + (struct hns_roce_cfg_entry_size *)desc.data; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_ENTRY_SIZE, + false); + + cfg_size->type = cpu_to_le32(HNS_ROCE_CFG_QPC_SIZE); + cfg_size->size = cpu_to_le32(hr_dev->caps.qpc_sz); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + +static int hns_roce_config_sccc_size(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_cfg_entry_size *cfg_size = + (struct hns_roce_cfg_entry_size *)desc.data; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_ENTRY_SIZE, + false); + + cfg_size->type = cpu_to_le32(HNS_ROCE_CFG_SCCC_SIZE); + cfg_size->size = cpu_to_le32(hr_dev->caps.sccc_sz); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + +static int hns_roce_config_entry_size(struct hns_roce_dev *hr_dev) +{ + int ret; + + if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09) + return 0; + + ret = hns_roce_config_qpc_size(hr_dev); + if (ret) { + dev_err(hr_dev->dev, "failed to cfg qpc sz, ret = %d.\n", ret); + return ret; + } + + ret = hns_roce_config_sccc_size(hr_dev); + if (ret) + dev_err(hr_dev->dev, "failed to cfg sccc sz, ret = %d.\n", ret); + + return ret; +} + static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) { struct hns_roce_caps *caps = &hr_dev->caps; @@ -2090,9 +2283,14 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) } ret = hns_roce_v2_set_bt(hr_dev); - if (ret) - dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n", - ret); + if (ret) { + dev_err(hr_dev->dev, + "Configure bt attribute fail, ret = %d.\n", ret); + return ret; + } + + /* Configure the size of QPC, SCCC, etc. */ + ret = hns_roce_config_entry_size(hr_dev); return ret; } @@ -2757,8 +2955,7 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw) static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(hr_cq->mtr.kmem, - n * HNS_ROCE_V2_CQE_ENTRY_SIZE); + return hns_roce_buf_offset(hr_cq->mtr.kmem, n * hr_cq->cqe_size); } static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n) @@ -2858,6 +3055,10 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M, V2_CQC_BYTE_8_CQN_S, hr_cq->cqn); + roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQE_SIZE_M, + V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size == + HNS_ROCE_V3_CQE_SIZE ? 1 : 0); + cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0])); roce_set_field(cq_context->byte_16_hop_addr, @@ -3025,7 +3226,8 @@ out: } static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, - struct hns_roce_v2_cqe *cqe, struct ib_wc *wc) + struct hns_roce_cq *cq, struct hns_roce_v2_cqe *cqe, + struct ib_wc *wc) { static const struct { u32 cqe_status; @@ -3066,7 +3268,7 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, ibdev_err(&hr_dev->ib_dev, "error cqe status 0x%x:\n", cqe_status); print_hex_dump(KERN_ERR, "", DUMP_PREFIX_NONE, 16, 4, cqe, - sizeof(*cqe), false); + cq->cqe_size, false); /* * For hns ROCEE, GENERAL_ERR is an error type that is not defined in @@ -3163,7 +3365,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, ++wq->tail; } - get_cqe_status(hr_dev, *cur_qp, cqe, wc); + get_cqe_status(hr_dev, *cur_qp, hr_cq, cqe, wc); if (unlikely(wc->status != IB_WC_SUCCESS)) return 0; @@ -3514,16 +3716,21 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, static int hns_roce_v2_qp_modify(struct hns_roce_dev *hr_dev, struct hns_roce_v2_qp_context *context, + struct hns_roce_v2_qp_context *qpc_mask, struct hns_roce_qp *hr_qp) { struct hns_roce_cmd_mailbox *mailbox; + int qpc_size; int ret; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); - memcpy(mailbox->buf, context, sizeof(*context) * 2); + /* The qpc size of HIP08 is only 256B, which is half of HIP09 */ + qpc_size = hr_dev->caps.qpc_sz; + memcpy(mailbox->buf, context, qpc_size); + memcpy(mailbox->buf + qpc_size, qpc_mask, qpc_size); ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0, HNS_ROCE_CMD_MODIFY_QPC, @@ -3641,9 +3848,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, V2_QPC_BYTE_76_SRQ_EN_S, 1); } - roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_ACK_REQ_FREQ_M, - V2_QPC_BYTE_172_ACK_REQ_FREQ_S, 4); - roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1); hr_qp->access_flags = attr->qp_access_flags; @@ -3954,6 +4158,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, dma_addr_t trrl_ba; dma_addr_t irrl_ba; enum ib_mtu mtu; + u8 lp_pktn_ini; u8 port_num; u64 *mtts; u8 *dmac; @@ -4052,6 +4257,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_52_DMAC_S, 0); mtu = get_mtu(ibqp, attr); + hr_qp->path_mtu = mtu; if (attr_mask & IB_QP_PATH_MTU) { roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, @@ -4061,13 +4267,21 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, } #define MAX_LP_MSG_LEN 65536 - /* MTU*(2^LP_PKTN_INI) shouldn't be bigger than 64kb */ + /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 64KB */ + lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / ib_mtu_enum_to_int(mtu)); + roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, - V2_QPC_BYTE_56_LP_PKTN_INI_S, - ilog2(MAX_LP_MSG_LEN / ib_mtu_enum_to_int(mtu))); + V2_QPC_BYTE_56_LP_PKTN_INI_S, lp_pktn_ini); roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, V2_QPC_BYTE_56_LP_PKTN_INI_S, 0); + /* ACK_REQ_FREQ should be larger than or equal to LP_PKTN_INI */ + roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_ACK_REQ_FREQ_M, + V2_QPC_BYTE_172_ACK_REQ_FREQ_S, lp_pktn_ini); + roce_set_field(qpc_mask->byte_172_sq_psn, + V2_QPC_BYTE_172_ACK_REQ_FREQ_M, + V2_QPC_BYTE_172_ACK_REQ_FREQ_S, 0); + roce_set_bit(qpc_mask->byte_108_rx_reqepsn, V2_QPC_BYTE_108_RX_REQ_PSN_ERR_S, 0); roce_set_field(qpc_mask->byte_96_rx_reqmsn, V2_QPC_BYTE_96_RX_REQ_MSN_M, @@ -4164,6 +4378,14 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, return 0; } +static inline u16 get_udp_sport(u32 fl, u32 lqpn, u32 rqpn) +{ + if (!fl) + fl = rdma_calc_flow_label(lqpn, rqpn); + + return rdma_flow_label_to_udp_sport(fl); +} + static int hns_roce_v2_set_path(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, @@ -4227,7 +4449,8 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_UDPSPN_M, V2_QPC_BYTE_52_UDPSPN_S, - is_udp ? 0x12b7 : 0); + is_udp ? get_udp_sport(grh->flow_label, ibqp->qp_num, + attr->dest_qp_num) : 0); roce_set_field(qpc_mask->byte_52_udpspn_dmac, V2_QPC_BYTE_52_UDPSPN_M, V2_QPC_BYTE_52_UDPSPN_S, 0); @@ -4259,11 +4482,19 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, V2_QPC_BYTE_28_FL_S, 0); memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw)); + + hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); + if (unlikely(hr_qp->sl > MAX_SERVICE_LEVEL)) { + ibdev_err(ibdev, + "failed to fill QPC, sl (%d) shouldn't be larger than %d.\n", + hr_qp->sl, MAX_SERVICE_LEVEL); + return -EINVAL; + } + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, - V2_QPC_BYTE_28_SL_S, rdma_ah_get_sl(&attr->ah_attr)); + V2_QPC_BYTE_28_SL_S, hr_qp->sl); roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, V2_QPC_BYTE_28_SL_S, 0); - hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); return 0; } @@ -4309,7 +4540,7 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, } if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { - memset(qpc_mask, 0, sizeof(*qpc_mask)); + memset(qpc_mask, 0, hr_dev->caps.qpc_sz); modify_qp_reset_to_init(ibqp, attr, attr_mask, context, qpc_mask); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { @@ -4532,8 +4763,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, * we should set all bits of the relevant fields in context mask to * 0 at the same time, else set them to 0x1. */ - memset(context, 0, sizeof(*context)); - memset(qpc_mask, 0xff, sizeof(*qpc_mask)); + memset(context, 0, hr_dev->caps.qpc_sz); + memset(qpc_mask, 0xff, hr_dev->caps.qpc_sz); + ret = hns_roce_v2_set_abs_fields(ibqp, attr, attr_mask, cur_state, new_state, context, qpc_mask); if (ret) @@ -4583,7 +4815,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, V2_QPC_BYTE_60_QP_ST_S, 0); /* SW pass context to HW */ - ret = hns_roce_v2_qp_modify(hr_dev, ctx, hr_qp); + ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp); if (ret) { ibdev_err(ibdev, "failed to modify QP, ret = %d\n", ret); goto out; @@ -4646,7 +4878,7 @@ static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev, if (ret) goto out; - memcpy(hr_context, mailbox->buf, sizeof(*hr_context)); + memcpy(hr_context, mailbox->buf, hr_dev->caps.qpc_sz); out: hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -4759,7 +4991,9 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->retry_cnt = roce_get_field(context.byte_212_lsn, V2_QPC_BYTE_212_RETRY_CNT_M, V2_QPC_BYTE_212_RETRY_CNT_S); - qp_attr->rnr_retry = le32_to_cpu(context.rq_rnr_timer); + qp_attr->rnr_retry = roce_get_field(context.byte_244_rnr_rxack, + V2_QPC_BYTE_244_RNR_CNT_M, + V2_QPC_BYTE_244_RNR_CNT_S); done: qp_attr->cur_qp_state = qp_attr->qp_state; @@ -4775,6 +5009,7 @@ done: } qp_init_attr->cap = qp_attr->cap; + qp_init_attr->sq_sig_type = hr_qp->sq_signal_bits; out: mutex_unlock(&hr_qp->mutex); @@ -5004,6 +5239,10 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, struct hns_roce_cmd_mailbox *mailbox; int ret; + /* Resizing SRQs is not supported yet */ + if (srq_attr_mask & IB_SRQ_MAX_WR) + return -EINVAL; + if (srq_attr_mask & IB_SRQ_LIMIT) { if (srq_attr->srq_limit >= srq->wqe_cnt) return -EINVAL; @@ -5233,7 +5472,7 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq) aeqe = hns_roce_buf_offset(eq->mtr.kmem, (eq->cons_index & (eq->entries - 1)) * - HNS_ROCE_AEQ_ENTRY_SIZE); + eq->eqe_size); return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^ !!(eq->cons_index & eq->entries)) ? aeqe : NULL; @@ -5333,7 +5572,8 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq) ceqe = hns_roce_buf_offset(eq->mtr.kmem, (eq->cons_index & (eq->entries - 1)) * - HNS_ROCE_CEQ_ENTRY_SIZE); + eq->eqe_size); + return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^ (!!(eq->cons_index & eq->entries)) ? ceqe : NULL; } @@ -5374,7 +5614,7 @@ static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr) { struct hns_roce_eq *eq = eq_ptr; struct hns_roce_dev *hr_dev = eq->hr_dev; - int int_work = 0; + int int_work; if (eq->type_flag == HNS_ROCE_CEQ) /* Completion event interrupt */ @@ -5609,14 +5849,16 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, roce_set_field(eqc->byte_36, HNS_ROCE_EQC_CONS_INDX_M, HNS_ROCE_EQC_CONS_INDX_S, HNS_ROCE_EQ_INIT_CONS_IDX); - /* set nex_eqe_ba[43:12] */ - roce_set_field(eqc->nxt_eqe_ba0, HNS_ROCE_EQC_NXT_EQE_BA_L_M, + roce_set_field(eqc->byte_40, HNS_ROCE_EQC_NXT_EQE_BA_L_M, HNS_ROCE_EQC_NXT_EQE_BA_L_S, eqe_ba[1] >> 12); - /* set nex_eqe_ba[63:44] */ - roce_set_field(eqc->nxt_eqe_ba1, HNS_ROCE_EQC_NXT_EQE_BA_H_M, + roce_set_field(eqc->byte_44, HNS_ROCE_EQC_NXT_EQE_BA_H_M, HNS_ROCE_EQC_NXT_EQE_BA_H_S, eqe_ba[1] >> 44); + roce_set_field(eqc->byte_44, HNS_ROCE_EQC_EQE_SIZE_M, + HNS_ROCE_EQC_EQE_SIZE_S, + eq->eqe_size == HNS_ROCE_V3_EQE_SIZE ? 1 : 0); + return 0; } @@ -5807,7 +6049,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) eq_cmd = HNS_ROCE_CMD_CREATE_CEQC; eq->type_flag = HNS_ROCE_CEQ; eq->entries = hr_dev->caps.ceqe_depth; - eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE; + eq->eqe_size = hr_dev->caps.ceqe_size; eq->irq = hr_dev->irq[i + other_num + aeq_num]; eq->eq_max_cnt = HNS_ROCE_CEQ_DEFAULT_BURST_NUM; eq->eq_period = HNS_ROCE_CEQ_DEFAULT_INTERVAL; @@ -5816,7 +6058,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) eq_cmd = HNS_ROCE_CMD_CREATE_AEQC; eq->type_flag = HNS_ROCE_AEQ; eq->entries = hr_dev->caps.aeqe_depth; - eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE; + eq->eqe_size = hr_dev->caps.aeqe_size; eq->irq = hr_dev->irq[i - comp_num + other_num]; eq->eq_max_cnt = HNS_ROCE_AEQ_DEFAULT_BURST_NUM; eq->eq_period = HNS_ROCE_AEQ_DEFAULT_INTERVAL; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index ac29be43b6bd..29c9dd4bcbc6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -60,6 +60,7 @@ #define HNS_ROCE_V2_MAX_SQ_SGE_NUM 64 #define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000 #define HNS_ROCE_V2_MAX_SQ_INLINE 0x20 +#define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32 #define HNS_ROCE_V2_UAR_NUM 256 #define HNS_ROCE_V2_PHY_UAR_NUM 1 #define HNS_ROCE_V2_MAX_IRQ_NUM 65 @@ -77,7 +78,6 @@ #define HNS_ROCE_V2_MAX_SQ_DESC_SZ 64 #define HNS_ROCE_V2_MAX_RQ_DESC_SZ 16 #define HNS_ROCE_V2_MAX_SRQ_DESC_SZ 64 -#define HNS_ROCE_V2_QPC_ENTRY_SZ 256 #define HNS_ROCE_V2_IRRL_ENTRY_SZ 64 #define HNS_ROCE_V2_TRRL_ENTRY_SZ 48 #define HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ 100 @@ -86,8 +86,10 @@ #define HNS_ROCE_V2_MTPT_ENTRY_SZ 64 #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 #define HNS_ROCE_V2_IDX_ENTRY_SZ 4 -#define HNS_ROCE_V2_CQE_ENTRY_SIZE 32 -#define HNS_ROCE_V2_SCCC_ENTRY_SZ 32 + +#define HNS_ROCE_V2_SCCC_SZ 32 +#define HNS_ROCE_V3_SCCC_SZ 64 + #define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 @@ -229,6 +231,7 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404, HNS_ROCE_OPC_QUERY_PF_TIMER_RES = 0x8406, HNS_ROCE_OPC_QUERY_PF_CAPS_NUM = 0x8408, + HNS_ROCE_OPC_CFG_ENTRY_SIZE = 0x8409, HNS_ROCE_OPC_CFG_SGID_TB = 0x8500, HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501, HNS_ROCE_OPC_POST_MB = 0x8504, @@ -309,6 +312,9 @@ struct hns_roce_v2_cq_context { #define V2_CQC_BYTE_8_CQN_S 0 #define V2_CQC_BYTE_8_CQN_M GENMASK(23, 0) +#define V2_CQC_BYTE_8_CQE_SIZE_S 27 +#define V2_CQC_BYTE_8_CQE_SIZE_M GENMASK(28, 27) + #define V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S 0 #define V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M GENMASK(19, 0) @@ -512,6 +518,7 @@ struct hns_roce_v2_qp_context { __le32 byte_248_ack_psn; __le32 byte_252_err_txcqn; __le32 byte_256_sqflush_rqcqe; + __le32 ext[64]; }; #define V2_QPC_BYTE_4_TST_S 0 @@ -896,6 +903,7 @@ struct hns_roce_v2_cqe { u8 smac[4]; __le32 byte_28; __le32 byte_32; + __le32 rsv[8]; }; #define V2_CQE_BYTE_4_OPCODE_S 0 @@ -1187,6 +1195,8 @@ struct hns_roce_v2_rc_send_wqe { #define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0 #define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0) +#define V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S 31 + struct hns_roce_wqe_frmr_seg { __le32 pbl_size; __le32 mode_buf_pg_sz; @@ -1537,6 +1547,18 @@ struct hns_roce_cfg_sgid_tb { __le32 vf_sgid_h; __le32 vf_sgid_type_rsv; }; + +enum { + HNS_ROCE_CFG_QPC_SIZE = BIT(0), + HNS_ROCE_CFG_SCCC_SIZE = BIT(1), +}; + +struct hns_roce_cfg_entry_size { + __le32 type; + __le32 rsv[4]; + __le32 size; +}; + #define CFG_SGID_TB_TABLE_IDX_S 0 #define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0) @@ -1571,7 +1593,7 @@ struct hns_roce_query_pf_caps_a { u8 max_sq_desc_sz; u8 max_rq_desc_sz; u8 max_srq_desc_sz; - u8 cq_entry_sz; + u8 cqe_sz; }; struct hns_roce_query_pf_caps_b { @@ -1581,9 +1603,9 @@ struct hns_roce_query_pf_caps_b { u8 cqc_entry_sz; u8 srqc_entry_sz; u8 idx_entry_sz; - u8 scc_ctx_entry_sz; + u8 sccc_sz; u8 max_mtu; - __le16 qpc_entry_sz; + __le16 qpc_sz; __le16 qpc_timer_entry_sz; __le16 cqc_timer_entry_sz; u8 min_cqes; @@ -1777,8 +1799,8 @@ struct hns_roce_eq_context { __le32 byte_28; __le32 byte_32; __le32 byte_36; - __le32 nxt_eqe_ba0; - __le32 nxt_eqe_ba1; + __le32 byte_40; + __le32 byte_44; __le32 rsv[5]; }; @@ -1920,6 +1942,9 @@ struct hns_roce_eq_context { #define HNS_ROCE_EQC_NXT_EQE_BA_H_S 0 #define HNS_ROCE_EQC_NXT_EQE_BA_H_M GENMASK(19, 0) +#define HNS_ROCE_EQC_EQE_SIZE_S 20 +#define HNS_ROCE_EQC_EQE_SIZE_M GENMASK(21, 20) + #define HNS_ROCE_V2_CEQE_COMP_CQN_S 0 #define HNS_ROCE_V2_CEQE_COMP_CQN_M GENMASK(23, 0) @@ -1941,6 +1966,8 @@ struct hns_roce_eq_context { #define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0 #define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0) +#define MAX_SERVICE_LEVEL 0x7 + struct hns_roce_wqe_atomic_seg { __le64 fetchadd_swap_data; __le64 cmp_data; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 5907cfd878a6..afeffafc59f9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -141,8 +141,8 @@ static int hns_roce_netdev_event(struct notifier_block *self, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct hns_roce_ib_iboe *iboe = NULL; struct hns_roce_dev *hr_dev = NULL; - u8 port = 0; - int ret = 0; + int ret; + u8 port; hr_dev = container_of(self, struct hns_roce_dev, iboe.nb); iboe = &hr_dev->iboe; @@ -323,6 +323,8 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, mutex_init(&context->page_mutex); } + resp.cqe_size = hr_dev->caps.cqe_sz; + ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); if (ret) goto error_fail_copy_to_udata; @@ -454,6 +456,8 @@ static const struct ib_device_ops hns_roce_dev_mr_ops = { static const struct ib_device_ops hns_roce_dev_mw_ops = { .alloc_mw = hns_roce_alloc_mw, .dealloc_mw = hns_roce_dealloc_mw, + + INIT_RDMA_OBJ_SIZE(ib_mw, hns_roce_mw, ibmw), }; static const struct ib_device_ops hns_roce_dev_frmr_ops = { @@ -545,7 +549,8 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) if (ret) return ret; } - ret = ib_register_device(ib_dev, "hns_%d"); + dma_set_max_seg_size(dev, UINT_MAX); + ret = ib_register_device(ib_dev, "hns_%d", dev); if (ret) { dev_err(dev, "ib_register_device failed!\n"); return ret; @@ -587,7 +592,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) } ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.qp_table, - HEM_TYPE_QPC, hr_dev->caps.qpc_entry_sz, + HEM_TYPE_QPC, hr_dev->caps.qpc_sz, hr_dev->caps.num_qps, 1); if (ret) { dev_err(dev, "Failed to init QP context memory, aborting.\n"); @@ -638,11 +643,11 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) } } - if (hr_dev->caps.sccc_entry_sz) { + if (hr_dev->caps.sccc_sz) { ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.sccc_table, HEM_TYPE_SCCC, - hr_dev->caps.sccc_entry_sz, + hr_dev->caps.sccc_sz, hr_dev->caps.num_qps, 1); if (ret) { dev_err(dev, @@ -682,7 +687,7 @@ err_unmap_qpc_timer: hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qpc_timer_table); err_unmap_ctx: - if (hr_dev->caps.sccc_entry_sz) + if (hr_dev->caps.sccc_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.sccc_table); err_unmap_srq: diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index e5df3884b41d..7f81a695e9af 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -589,28 +589,22 @@ err_table: return ret; } -struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, - struct ib_udata *udata) +int hns_roce_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) { - struct hns_roce_dev *hr_dev = to_hr_dev(ib_pd->device); - struct hns_roce_mw *mw; + struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device); + struct hns_roce_mw *mw = to_hr_mw(ibmw); unsigned long index = 0; int ret; - mw = kmalloc(sizeof(*mw), GFP_KERNEL); - if (!mw) - return ERR_PTR(-ENOMEM); - /* Allocate a key for mw from bitmap */ ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index); if (ret) - goto err_bitmap; + return ret; mw->rkey = hw_index_to_key(index); - mw->ibmw.rkey = mw->rkey; - mw->ibmw.type = type; - mw->pdn = to_hr_pd(ib_pd)->pdn; + ibmw->rkey = mw->rkey; + mw->pdn = to_hr_pd(ibmw->pd)->pdn; mw->pbl_hop_num = hr_dev->caps.pbl_hop_num; mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; @@ -619,15 +613,11 @@ struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, if (ret) goto err_mw; - return &mw->ibmw; + return 0; err_mw: hns_roce_mw_free(hr_dev, mw); - -err_bitmap: - kfree(mw); - - return ERR_PTR(ret); + return ret; } int hns_roce_dealloc_mw(struct ib_mw *ibmw) @@ -636,8 +626,6 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw) struct hns_roce_mw *mw = to_hr_mw(ibmw); hns_roce_mw_free(hr_dev, mw); - kfree(mw); - return 0; } @@ -707,19 +695,6 @@ static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr) return size; } -static inline int mtr_umem_page_count(struct ib_umem *umem, - unsigned int page_shift) -{ - int count = ib_umem_page_count(umem); - - if (page_shift >= PAGE_SHIFT) - count >>= page_shift - PAGE_SHIFT; - else - count <<= PAGE_SHIFT - page_shift; - - return count; -} - static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size, unsigned int page_shift) { @@ -767,13 +742,11 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, struct ib_udata *udata, unsigned long user_addr) { struct ib_device *ibdev = &hr_dev->ib_dev; - unsigned int max_pg_shift = buf_attr->page_shift; - unsigned int best_pg_shift = 0; + unsigned int best_pg_shift; int all_pg_count = 0; size_t direct_size; size_t total_size; - unsigned long tmp; - int ret = 0; + int ret; total_size = mtr_bufs_size(buf_attr); if (total_size < 1) { @@ -782,6 +755,9 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, } if (udata) { + unsigned long pgsz_bitmap; + unsigned long page_size; + mtr->kmem = NULL; mtr->umem = ib_umem_get(ibdev, user_addr, total_size, buf_attr->user_access); @@ -790,15 +766,17 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, PTR_ERR(mtr->umem)); return -ENOMEM; } - if (buf_attr->fixed_page) { - best_pg_shift = max_pg_shift; - } else { - tmp = GENMASK(max_pg_shift, 0); - ret = ib_umem_find_best_pgsz(mtr->umem, tmp, user_addr); - best_pg_shift = (ret <= PAGE_SIZE) ? - PAGE_SHIFT : ilog2(ret); - } - all_pg_count = mtr_umem_page_count(mtr->umem, best_pg_shift); + if (buf_attr->fixed_page) + pgsz_bitmap = 1 << buf_attr->page_shift; + else + pgsz_bitmap = GENMASK(buf_attr->page_shift, PAGE_SHIFT); + + page_size = ib_umem_find_best_pgsz(mtr->umem, pgsz_bitmap, + user_addr); + if (!page_size) + return -EINVAL; + best_pg_shift = order_base_2(page_size); + all_pg_count = ib_umem_num_dma_blocks(mtr->umem, page_size); ret = 0; } else { mtr->umem = NULL; @@ -808,16 +786,15 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, return -ENOMEM; } direct_size = mtr_kmem_direct_size(is_direct, total_size, - max_pg_shift); + buf_attr->page_shift); ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size, - mtr->kmem, max_pg_shift); + mtr->kmem, buf_attr->page_shift); if (ret) { ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret); goto err_alloc_mem; - } else { - best_pg_shift = max_pg_shift; - all_pg_count = mtr->kmem->npages; } + best_pg_shift = buf_attr->page_shift; + all_pg_count = mtr->kmem->npages; } /* must bigger than minimum hardware page shift */ @@ -967,7 +944,7 @@ static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev, unsigned int *buf_page_shift) { struct hns_roce_buf_region *r; - unsigned int page_shift = 0; + unsigned int page_shift; int page_cnt = 0; size_t buf_size; int region_cnt; diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index b10c50b8736e..98f69496adb4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -82,9 +82,10 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } -void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { hns_roce_pd_free(to_hr_dev(pd->device), to_hr_pd(pd)->pdn); + return 0; } int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index c063c450c715..6c081dd985fc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -41,8 +41,6 @@ #include "hns_roce_hem.h" #include <rdma/hns-abi.h> -#define SQP_NUM (2 * HNS_ROCE_MAX_PORTS) - static void flush_work_handle(struct work_struct *work) { struct hns_roce_work *flush_work = container_of(work, @@ -288,7 +286,7 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) } } - if (hr_dev->caps.sccc_entry_sz) { + if (hr_dev->caps.sccc_sz) { /* Alloc memory for SCC CTX */ ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table, hr_qp->qpn); @@ -551,10 +549,9 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, int ret; if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes || - cap->max_send_sge > hr_dev->caps.max_sq_sg || - cap->max_inline_data > hr_dev->caps.max_sq_inline) { + cap->max_send_sge > hr_dev->caps.max_sq_sg) { ibdev_err(ibdev, - "failed to check SQ WR, SGE or inline num, ret = %d.\n", + "failed to check SQ WR or SGE num, ret = %d.\n", -EINVAL); return -EINVAL; } @@ -577,9 +574,6 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, cap->max_send_wr = cnt; cap->max_send_sge = hr_qp->sq.max_gs; - /* We don't support inline sends for kernel QPs (yet) */ - cap->max_inline_data = 0; - return 0; } @@ -847,6 +841,11 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hr_qp->ibqp.qp_type = init_attr->qp_type; + if (init_attr->cap.max_inline_data > hr_dev->caps.max_sq_inline) + init_attr->cap.max_inline_data = hr_dev->caps.max_sq_inline; + + hr_qp->max_inline_data = init_attr->cap.max_inline_data; + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR; else @@ -1014,53 +1013,32 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, int ret; switch (init_attr->qp_type) { - case IB_QPT_RC: { - hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); - if (!hr_qp) - return ERR_PTR(-ENOMEM); - - ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, - hr_qp); - if (ret) { - ibdev_err(ibdev, "Create QP 0x%06lx failed(%d)\n", - hr_qp->qpn, ret); - kfree(hr_qp); - return ERR_PTR(ret); - } - + case IB_QPT_RC: + case IB_QPT_GSI: break; + default: + ibdev_err(ibdev, "not support QP type %d\n", + init_attr->qp_type); + return ERR_PTR(-EOPNOTSUPP); } - case IB_QPT_GSI: { - /* Userspace is not allowed to create special QPs: */ - if (udata) { - ibdev_err(ibdev, "not support usr space GSI\n"); - return ERR_PTR(-EINVAL); - } - hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); - if (!hr_qp) - return ERR_PTR(-ENOMEM); + hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); + if (!hr_qp) + return ERR_PTR(-ENOMEM); + if (init_attr->qp_type == IB_QPT_GSI) { hr_qp->port = init_attr->port_num - 1; hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port]; - - ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, - hr_qp); - if (ret) { - ibdev_err(ibdev, "Create GSI QP failed!\n"); - kfree(hr_qp); - return ERR_PTR(ret); - } - - break; - } - default:{ - ibdev_err(ibdev, "not support QP type %d\n", - init_attr->qp_type); - return ERR_PTR(-EOPNOTSUPP); - } } + ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, hr_qp); + if (ret) { + ibdev_err(ibdev, "Create QP type 0x%x failed(%d)\n", + init_attr->qp_type, ret); + ibdev_err(ibdev, "Create GSI QP failed!\n"); + kfree(hr_qp); + return ERR_PTR(ret); + } return &hr_qp->ibqp; } @@ -1161,8 +1139,10 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, mutex_lock(&hr_qp->mutex); - cur_state = attr_mask & IB_QP_CUR_STATE ? - attr->cur_qp_state : (enum ib_qp_state)hr_qp->state; + if (attr_mask & IB_QP_CUR_STATE && attr->cur_qp_state != hr_qp->state) + goto out; + + cur_state = hr_qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; if (ibqp->uobject && diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index b9e2dbd372b6..8caf74e44efd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -285,7 +285,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, struct hns_roce_srq *srq = to_hr_srq(ib_srq); struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_ib_create_srq ucmd = {}; - int ret = 0; + int ret; u32 cqn; /* Check the actual SRQ wqe and SRQ sge num */ @@ -363,7 +363,7 @@ err_buf_alloc: return ret; } -void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); struct hns_roce_srq *srq = to_hr_srq(ibsrq); @@ -372,6 +372,7 @@ void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) free_srq_idx(hr_dev, srq); free_srq_wrid(srq); free_srq_buf(hr_dev, srq); + return 0; } int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 25747b85a79c..832b80de004f 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -409,8 +409,8 @@ static inline struct i40iw_qp *to_iwqp(struct ib_qp *ibqp) } /* i40iw.c */ -void i40iw_add_ref(struct ib_qp *); -void i40iw_rem_ref(struct ib_qp *); +void i40iw_qp_add_ref(struct ib_qp *ibqp); +void i40iw_qp_rem_ref(struct ib_qp *ibqp); struct ib_qp *i40iw_get_qp(struct ib_device *, int); void i40iw_flush_wqes(struct i40iw_device *iwdev, @@ -554,9 +554,8 @@ enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev, bool wait); void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf); void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp); -void i40iw_free_qp_resources(struct i40iw_device *iwdev, - struct i40iw_qp *iwqp, - u32 qp_num); +void i40iw_free_qp_resources(struct i40iw_qp *iwqp); + enum i40iw_status_code i40iw_obj_aligned_mem(struct i40iw_device *iwdev, struct i40iw_dma_mem *memptr, u32 size, u32 mask); diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index a3b95805c154..3053c345a5a3 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2322,7 +2322,7 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node) iwqp = cm_node->iwqp; if (iwqp) { iwqp->cm_node = NULL; - i40iw_rem_ref(&iwqp->ibqp); + i40iw_qp_rem_ref(&iwqp->ibqp); cm_node->iwqp = NULL; } else if (cm_node->qhash_set) { i40iw_get_addr_info(cm_node, &nfo); @@ -3452,7 +3452,7 @@ void i40iw_cm_disconn(struct i40iw_qp *iwqp) kfree(work); return; } - i40iw_add_ref(&iwqp->ibqp); + i40iw_qp_add_ref(&iwqp->ibqp); spin_unlock_irqrestore(&iwdev->qptable_lock, flags); work->iwqp = iwqp; @@ -3623,7 +3623,7 @@ static void i40iw_disconnect_worker(struct work_struct *work) kfree(dwork); i40iw_cm_disconn_true(iwqp); - i40iw_rem_ref(&iwqp->ibqp); + i40iw_qp_rem_ref(&iwqp->ibqp); } /** @@ -3745,7 +3745,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->lsmm_size = accept.size + conn_param->private_data_len; i40iw_cm_init_tsa_conn(iwqp, cm_node); cm_id->add_ref(cm_id); - i40iw_add_ref(&iwqp->ibqp); + i40iw_qp_add_ref(&iwqp->ibqp); attr.qp_state = IB_QPS_RTS; cm_node->qhash_set = false; @@ -3908,7 +3908,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) iwqp->cm_node = cm_node; cm_node->iwqp = iwqp; iwqp->cm_id = cm_id; - i40iw_add_ref(&iwqp->ibqp); + i40iw_qp_add_ref(&iwqp->ibqp); if (cm_node->state != I40IW_CM_STATE_OFFLOADED) { cm_node->state = I40IW_CM_STATE_SYN_SENT; diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index e1085634b8d9..56fdc161f6f8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -313,7 +313,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) __func__, info->qp_cq_id); continue; } - i40iw_add_ref(&iwqp->ibqp); + i40iw_qp_add_ref(&iwqp->ibqp); spin_unlock_irqrestore(&iwdev->qptable_lock, flags); qp = &iwqp->sc_qp; spin_lock_irqsave(&iwqp->lock, flags); @@ -426,7 +426,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) break; } if (info->qp) - i40iw_rem_ref(&iwqp->ibqp); + i40iw_qp_rem_ref(&iwqp->ibqp); } while (1); if (aeqcnt) diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 58a433135a03..2408b279e4c2 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -192,9 +192,9 @@ static void i40iw_enable_intr(struct i40iw_sc_dev *dev, u32 msix_id) * i40iw_dpc - tasklet for aeq and ceq 0 * @data: iwarp device */ -static void i40iw_dpc(unsigned long data) +static void i40iw_dpc(struct tasklet_struct *t) { - struct i40iw_device *iwdev = (struct i40iw_device *)data; + struct i40iw_device *iwdev = from_tasklet(iwdev, t, dpc_tasklet); if (iwdev->msix_shared) i40iw_process_ceq(iwdev, iwdev->ceqlist); @@ -206,9 +206,9 @@ static void i40iw_dpc(unsigned long data) * i40iw_ceq_dpc - dpc handler for CEQ * @data: data points to CEQ */ -static void i40iw_ceq_dpc(unsigned long data) +static void i40iw_ceq_dpc(struct tasklet_struct *t) { - struct i40iw_ceq *iwceq = (struct i40iw_ceq *)data; + struct i40iw_ceq *iwceq = from_tasklet(iwceq, t, dpc_tasklet); struct i40iw_device *iwdev = iwceq->iwdev; i40iw_process_ceq(iwdev, iwceq); @@ -689,10 +689,10 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw enum i40iw_status_code status; if (iwdev->msix_shared && !ceq_id) { - tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev); + tasklet_setup(&iwdev->dpc_tasklet, i40iw_dpc); status = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "AEQCEQ", iwdev); } else { - tasklet_init(&iwceq->dpc_tasklet, i40iw_ceq_dpc, (unsigned long)iwceq); + tasklet_setup(&iwceq->dpc_tasklet, i40iw_ceq_dpc); status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq); } @@ -841,7 +841,7 @@ static enum i40iw_status_code i40iw_configure_aeq_vector(struct i40iw_device *iw u32 ret = 0; if (!iwdev->msix_shared) { - tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev); + tasklet_setup(&iwdev->dpc_tasklet, i40iw_dpc); ret = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "i40iw", iwdev); } if (ret) { @@ -1573,7 +1573,7 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl, status = i40iw_save_msix_info(iwdev, ldev); if (status) return status; - iwdev->hw.dev_context = (void *)ldev->pcidev; + iwdev->hw.pcidev = ldev->pcidev; iwdev->hw.hw_addr = ldev->hw_addr; status = i40iw_allocate_dma_mem(&iwdev->hw, &iwdev->obj_mem, 8192, 4096); diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c index 540aab5e502d..5f97643e22e5 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_pble.c +++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c @@ -167,7 +167,7 @@ static enum i40iw_status_code add_sd_direct(struct i40iw_sc_dev *dev, */ static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chunk) { - struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context; + struct pci_dev *pcidev = hw->pcidev; int i; if (!chunk->pg_cnt) @@ -193,7 +193,7 @@ static enum i40iw_status_code i40iw_get_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chunk, int pg_cnt) { - struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context; + struct pci_dev *pcidev = hw->pcidev; struct page *page; u8 *addr; u32 size; diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index 54c323c40d96..c3babf3cbb8e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -73,6 +73,7 @@ struct i40iw_pd_ops; struct i40iw_priv_qp_ops; struct i40iw_priv_cq_ops; struct i40iw_hmc_ops; +struct pci_dev; enum i40iw_page_size { I40IW_PAGE_SIZE_4K, @@ -261,7 +262,7 @@ struct i40iw_vsi_pestat { struct i40iw_hw { u8 __iomem *hw_addr; - void *dev_context; + struct pci_dev *pcidev; struct i40iw_hmc_info hmc; }; diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index e07fb37af086..644f8c641aa0 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -478,25 +478,6 @@ void i40iw_cleanup_pending_cqp_op(struct i40iw_device *iwdev) } /** - * i40iw_free_qp - callback after destroy cqp completes - * @cqp_request: cqp request for destroy qp - * @num: not used - */ -static void i40iw_free_qp(struct i40iw_cqp_request *cqp_request, u32 num) -{ - struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)cqp_request->param; - struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp; - struct i40iw_device *iwdev; - u32 qp_num = iwqp->ibqp.qp_num; - - iwdev = iwqp->iwdev; - - i40iw_rem_pdusecount(iwqp->iwpd, iwdev); - i40iw_free_qp_resources(iwdev, iwqp, qp_num); - i40iw_rem_devusecount(iwdev); -} - -/** * i40iw_wait_event - wait for completion * @iwdev: iwarp device * @cqp_request: cqp request to wait @@ -616,26 +597,23 @@ void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev) } /** - * i40iw_add_ref - add refcount for qp + * i40iw_qp_add_ref - add refcount for qp * @ibqp: iqarp qp */ -void i40iw_add_ref(struct ib_qp *ibqp) +void i40iw_qp_add_ref(struct ib_qp *ibqp) { struct i40iw_qp *iwqp = (struct i40iw_qp *)ibqp; - atomic_inc(&iwqp->refcount); + refcount_inc(&iwqp->refcount); } /** - * i40iw_rem_ref - rem refcount for qp and free if 0 + * i40iw_qp_rem_ref - rem refcount for qp and free if 0 * @ibqp: iqarp qp */ -void i40iw_rem_ref(struct ib_qp *ibqp) +void i40iw_qp_rem_ref(struct ib_qp *ibqp) { struct i40iw_qp *iwqp; - enum i40iw_status_code status; - struct i40iw_cqp_request *cqp_request; - struct cqp_commands_info *cqp_info; struct i40iw_device *iwdev; u32 qp_num; unsigned long flags; @@ -643,7 +621,7 @@ void i40iw_rem_ref(struct ib_qp *ibqp) iwqp = to_iwqp(ibqp); iwdev = iwqp->iwdev; spin_lock_irqsave(&iwdev->qptable_lock, flags); - if (!atomic_dec_and_test(&iwqp->refcount)) { + if (!refcount_dec_and_test(&iwqp->refcount)) { spin_unlock_irqrestore(&iwdev->qptable_lock, flags); return; } @@ -651,25 +629,8 @@ void i40iw_rem_ref(struct ib_qp *ibqp) qp_num = iwqp->ibqp.qp_num; iwdev->qp_table[qp_num] = NULL; spin_unlock_irqrestore(&iwdev->qptable_lock, flags); - cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false); - if (!cqp_request) - return; - - cqp_request->callback_fcn = i40iw_free_qp; - cqp_request->param = (void *)&iwqp->sc_qp; - cqp_info = &cqp_request->info; - cqp_info->cqp_cmd = OP_QP_DESTROY; - cqp_info->post_sq = 1; - cqp_info->in.u.qp_destroy.qp = &iwqp->sc_qp; - cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request; - cqp_info->in.u.qp_destroy.remove_hash_idx = true; - status = i40iw_handle_cqp_op(iwdev, cqp_request); - if (!status) - return; + complete(&iwqp->free_qp); - i40iw_rem_pdusecount(iwqp->iwpd, iwdev); - i40iw_free_qp_resources(iwdev, iwqp, qp_num); - i40iw_rem_devusecount(iwdev); } /** @@ -751,7 +712,7 @@ enum i40iw_status_code i40iw_allocate_dma_mem(struct i40iw_hw *hw, u64 size, u32 alignment) { - struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context; + struct pci_dev *pcidev = hw->pcidev; if (!mem) return I40IW_ERR_PARAM; @@ -770,7 +731,7 @@ enum i40iw_status_code i40iw_allocate_dma_mem(struct i40iw_hw *hw, */ void i40iw_free_dma_mem(struct i40iw_hw *hw, struct i40iw_dma_mem *mem) { - struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context; + struct pci_dev *pcidev = hw->pcidev; if (!mem || !mem->va) return; @@ -936,7 +897,7 @@ static void i40iw_terminate_timeout(struct timer_list *t) struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)&iwqp->sc_qp; i40iw_terminate_done(qp, 1); - i40iw_rem_ref(&iwqp->ibqp); + i40iw_qp_rem_ref(&iwqp->ibqp); } /** @@ -948,7 +909,7 @@ void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp) struct i40iw_qp *iwqp; iwqp = (struct i40iw_qp *)qp->back_qp; - i40iw_add_ref(&iwqp->ibqp); + i40iw_qp_add_ref(&iwqp->ibqp); timer_setup(&iwqp->terminate_timer, i40iw_terminate_timeout, 0); iwqp->terminate_timer.expires = jiffies + HZ; add_timer(&iwqp->terminate_timer); @@ -964,7 +925,7 @@ void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp) iwqp = (struct i40iw_qp *)qp->back_qp; if (del_timer(&iwqp->terminate_timer)) - i40iw_rem_ref(&iwqp->ibqp); + i40iw_qp_rem_ref(&iwqp->ibqp); } /** diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index b51339328a51..581ecbadf586 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -328,12 +328,13 @@ error: * @ibpd: ptr of pd to be deallocated * @udata: user data or null for kernel object */ -static void i40iw_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +static int i40iw_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct i40iw_pd *iwpd = to_iwpd(ibpd); struct i40iw_device *iwdev = to_iwdev(ibpd->device); i40iw_rem_pdusecount(iwpd, iwdev); + return 0; } /** @@ -363,11 +364,11 @@ static struct i40iw_pbl *i40iw_get_pbl(unsigned long va, * @iwqp: qp ptr (user or kernel) * @qp_num: qp number assigned */ -void i40iw_free_qp_resources(struct i40iw_device *iwdev, - struct i40iw_qp *iwqp, - u32 qp_num) +void i40iw_free_qp_resources(struct i40iw_qp *iwqp) { struct i40iw_pbl *iwpbl = &iwqp->iwpbl; + struct i40iw_device *iwdev = iwqp->iwdev; + u32 qp_num = iwqp->ibqp.qp_num; i40iw_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp); i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp); @@ -379,7 +380,7 @@ void i40iw_free_qp_resources(struct i40iw_device *iwdev, i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->kqp.dma_mem); kfree(iwqp->kqp.wrid_mem); iwqp->kqp.wrid_mem = NULL; - kfree(iwqp->allocated_buffer); + kfree(iwqp); } /** @@ -401,6 +402,10 @@ static void i40iw_clean_cqes(struct i40iw_qp *iwqp, struct i40iw_cq *iwcq) static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct i40iw_qp *iwqp = to_iwqp(ibqp); + struct ib_qp_attr attr; + struct i40iw_device *iwdev = iwqp->iwdev; + + memset(&attr, 0, sizeof(attr)); iwqp->destroyed = 1; @@ -415,7 +420,15 @@ static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) } } - i40iw_rem_ref(&iwqp->ibqp); + attr.qp_state = IB_QPS_ERR; + i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL); + i40iw_qp_rem_ref(&iwqp->ibqp); + wait_for_completion(&iwqp->free_qp); + i40iw_cqp_qp_destroy_cmd(&iwdev->sc_dev, &iwqp->sc_qp); + i40iw_rem_pdusecount(iwqp->iwpd, iwdev); + i40iw_free_qp_resources(iwqp); + i40iw_rem_devusecount(iwdev); + return 0; } @@ -524,7 +537,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, struct i40iw_create_qp_req req; struct i40iw_create_qp_resp uresp; u32 qp_num = 0; - void *mem; enum i40iw_status_code ret; int err_code; int sq_size; @@ -566,16 +578,15 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge; init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data; - mem = kzalloc(sizeof(*iwqp), GFP_KERNEL); - if (!mem) + iwqp = kzalloc(sizeof(*iwqp), GFP_KERNEL); + if (!iwqp) return ERR_PTR(-ENOMEM); - iwqp = (struct i40iw_qp *)mem; - iwqp->allocated_buffer = mem; qp = &iwqp->sc_qp; qp->back_qp = (void *)iwqp; qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; + iwqp->iwdev = iwdev; iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info; if (i40iw_allocate_dma_mem(dev->hw, @@ -600,7 +611,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, goto error; } - iwqp->iwdev = iwdev; iwqp->iwpd = iwpd; iwqp->ibqp.qp_num = qp_num; qp = &iwqp->sc_qp; @@ -714,7 +724,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, goto error; } - i40iw_add_ref(&iwqp->ibqp); + refcount_set(&iwqp->refcount, 1); spin_lock_init(&iwqp->lock); iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0; iwdev->qp_table[qp_num] = iwqp; @@ -736,10 +746,11 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, } init_completion(&iwqp->sq_drained); init_completion(&iwqp->rq_drained); + init_completion(&iwqp->free_qp); return &iwqp->ibqp; error: - i40iw_free_qp_resources(iwdev, iwqp, qp_num); + i40iw_free_qp_resources(iwqp); return ERR_PTR(err_code); } @@ -1052,7 +1063,7 @@ void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq) * @ib_cq: cq pointer * @udata: user data or NULL for kernel object */ -static void i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +static int i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct i40iw_cq *iwcq; struct i40iw_device *iwdev; @@ -1064,6 +1075,7 @@ static void i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) i40iw_cq_wq_destroy(iwdev, cq); cq_free_resources(iwdev, iwcq); i40iw_rem_devusecount(iwdev); + return 0; } /** @@ -1320,8 +1332,7 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr, if (iwmr->type == IW_MEMREG_TYPE_QP) iwpbl->qp_mr.sq_page = sg_page(region->sg_head.sgl); - rdma_for_each_block(region->sg_head.sgl, &biter, region->nmap, - iwmr->page_size) { + rdma_umem_for_each_dma_block(region, &biter, iwmr->page_size) { *pbl = rdma_block_iter_dma_address(&biter); pbl = i40iw_next_pbl_addr(pbl, &pinfo, &idx); } @@ -1744,15 +1755,12 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, struct i40iw_mr *iwmr; struct ib_umem *region; struct i40iw_mem_reg_req req; - u64 pbl_depth = 0; u32 stag = 0; u16 access; - u64 region_length; bool use_pbles = false; unsigned long flags; int err = -ENOSYS; int ret; - int pg_shift; if (!udata) return ERR_PTR(-EOPNOTSUPP); @@ -1787,18 +1795,13 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, if (req.reg_type == IW_MEMREG_TYPE_MEM) iwmr->page_size = ib_umem_find_best_pgsz(region, SZ_4K | SZ_2M, virt); - - region_length = region->length + (start & (iwmr->page_size - 1)); - pg_shift = ffs(iwmr->page_size) - 1; - pbl_depth = region_length >> pg_shift; - pbl_depth += (region_length & (iwmr->page_size - 1)) ? 1 : 0; iwmr->length = region->length; iwpbl->user_base = virt; palloc = &iwpbl->pble_alloc; iwmr->type = req.reg_type; - iwmr->page_cnt = (u32)pbl_depth; + iwmr->page_cnt = ib_umem_num_dma_blocks(region, iwmr->page_size); switch (req.reg_type) { case IW_MEMREG_TYPE_QP: @@ -2636,13 +2639,13 @@ static const struct ib_device_ops i40iw_dev_ops = { .get_hw_stats = i40iw_get_hw_stats, .get_port_immutable = i40iw_port_immutable, .iw_accept = i40iw_accept, - .iw_add_ref = i40iw_add_ref, + .iw_add_ref = i40iw_qp_add_ref, .iw_connect = i40iw_connect, .iw_create_listen = i40iw_create_listen, .iw_destroy_listen = i40iw_destroy_listen, .iw_get_qp = i40iw_get_qp, .iw_reject = i40iw_reject, - .iw_rem_ref = i40iw_rem_ref, + .iw_rem_ref = i40iw_qp_rem_ref, .map_mr_sg = i40iw_map_mr_sg, .mmap = i40iw_mmap, .modify_qp = i40iw_modify_qp, @@ -2668,7 +2671,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev { struct i40iw_ib_device *iwibdev; struct net_device *netdev = iwdev->netdev; - struct pci_dev *pcidev = (struct pci_dev *)iwdev->hw.dev_context; + struct pci_dev *pcidev = iwdev->hw.pcidev; iwibdev = ib_alloc_device(i40iw_ib_device, ibdev); if (!iwibdev) { @@ -2758,7 +2761,8 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev) if (ret) goto error; - ret = ib_register_device(&iwibdev->ibdev, "i40iw%d"); + dma_set_max_seg_size(&iwdev->hw.pcidev->dev, UINT_MAX); + ret = ib_register_device(&iwibdev->ibdev, "i40iw%d", &iwdev->hw.pcidev->dev); if (ret) goto error; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h index 331bc21cbcc7..bab71f3e5637 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h @@ -139,7 +139,7 @@ struct i40iw_qp { struct i40iw_qp_host_ctx_info ctx_info; struct i40iwarp_offload_info iwarp_info; void *allocated_buffer; - atomic_t refcount; + refcount_t refcount; struct iw_cm_id *cm_id; void *cm_node; struct ib_mr *lsmm_mr; @@ -174,5 +174,6 @@ struct i40iw_qp { struct i40iw_dma_mem ietf_mem; struct completion sq_drained; struct completion rq_drained; + struct completion free_qp; }; #endif diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 5f8f8d5c0ce0..7321d6ab5fe1 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -232,8 +232,3 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } - -void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags) -{ - return; -} diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index b591861934b3..4aff1c8298b1 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -54,11 +54,20 @@ struct id_map_entry { struct delayed_work timeout; }; +struct rej_tmout_entry { + int slave; + u32 rem_pv_cm_id; + struct delayed_work timeout; + struct xarray *xa_rej_tmout; +}; + struct cm_generic_msg { struct ib_mad_hdr hdr; __be32 local_comm_id; __be32 remote_comm_id; + unsigned char unused[2]; + __be16 rej_reason; }; struct cm_sidr_generic_msg { @@ -280,11 +289,15 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id) if (!sriov->is_going_down && !id->scheduled_delete) { id->scheduled_delete = 1; schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT); + } else if (id->scheduled_delete) { + /* Adjust timeout if already scheduled */ + mod_delayed_work(system_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT); } spin_unlock_irqrestore(&sriov->going_down_lock, flags); spin_unlock(&sriov->id_map_lock); } +#define REJ_REASON(m) be16_to_cpu(((struct cm_generic_msg *)(m))->rej_reason) int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id, struct ib_mad *mad) { @@ -293,8 +306,10 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id int pv_cm_id = -1; if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID || - mad->mad_hdr.attr_id == CM_REP_ATTR_ID || - mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + mad->mad_hdr.attr_id == CM_REP_ATTR_ID || + mad->mad_hdr.attr_id == CM_MRA_ATTR_ID || + mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID || + (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID && REJ_REASON(mad) == IB_CM_REJ_TIMEOUT)) { sl_cm_id = get_local_comm_id(mad); id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id); if (id) @@ -314,8 +329,8 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id } if (!id) { - pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL!\n", - slave_id, sl_cm_id); + pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL! attr_id: 0x%x\n", + slave_id, sl_cm_id, be16_to_cpu(mad->mad_hdr.attr_id)); return -EINVAL; } @@ -327,11 +342,94 @@ cont: return 0; } +static void rej_tmout_timeout(struct work_struct *work) +{ + struct delayed_work *delay = to_delayed_work(work); + struct rej_tmout_entry *item = container_of(delay, struct rej_tmout_entry, timeout); + struct rej_tmout_entry *deleted; + + deleted = xa_cmpxchg(item->xa_rej_tmout, item->rem_pv_cm_id, item, NULL, 0); + + if (deleted != item) + pr_debug("deleted(%p) != item(%p)\n", deleted, item); + + kfree(item); +} + +static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int slave) +{ + struct rej_tmout_entry *item; + struct rej_tmout_entry *old; + int ret = 0; + + xa_lock(&sriov->xa_rej_tmout); + item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id); + + if (item) { + if (xa_err(item)) + ret = xa_err(item); + else + /* If a retry, adjust delayed work */ + mod_delayed_work(system_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT); + goto err_or_exists; + } + xa_unlock(&sriov->xa_rej_tmout); + + item = kmalloc(sizeof(*item), GFP_KERNEL); + if (!item) + return -ENOMEM; + + INIT_DELAYED_WORK(&item->timeout, rej_tmout_timeout); + item->slave = slave; + item->rem_pv_cm_id = rem_pv_cm_id; + item->xa_rej_tmout = &sriov->xa_rej_tmout; + + old = xa_cmpxchg(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id, NULL, item, GFP_KERNEL); + if (old) { + pr_debug( + "Non-null old entry (%p) or error (%d) when inserting\n", + old, xa_err(old)); + kfree(item); + return xa_err(old); + } + + schedule_delayed_work(&item->timeout, CM_CLEANUP_CACHE_TIMEOUT); + + return 0; + +err_or_exists: + xa_unlock(&sriov->xa_rej_tmout); + return ret; +} + +static int lookup_rej_tmout_slave(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id) +{ + struct rej_tmout_entry *item; + int slave; + + xa_lock(&sriov->xa_rej_tmout); + item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id); + + if (!item || xa_err(item)) { + pr_debug("Could not find slave. rem_pv_cm_id 0x%x error: %d\n", + rem_pv_cm_id, xa_err(item)); + slave = !item ? -ENOENT : xa_err(item); + } else { + slave = item->slave; + } + xa_unlock(&sriov->xa_rej_tmout); + + return slave; +} + int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, struct ib_mad *mad) { + struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; + u32 rem_pv_cm_id = get_local_comm_id(mad); u32 pv_cm_id; struct id_map_entry *id; + int sts; if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID || mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { @@ -347,6 +445,13 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, be64_to_cpu(gid.global.interface_id)); return -ENOENT; } + + sts = alloc_rej_tmout(sriov, rem_pv_cm_id, *slave); + if (sts) + /* Even if this fails, we pass on the REQ to the slave */ + pr_debug("Could not allocate rej_tmout entry. rem_pv_cm_id 0x%x slave %d status %d\n", + rem_pv_cm_id, *slave, sts); + return 0; } @@ -354,7 +459,14 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, id = id_map_get(ibdev, (int *)&pv_cm_id, -1, -1); if (!id) { - pr_debug("Couldn't find an entry for pv_cm_id 0x%x\n", pv_cm_id); + if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID && + REJ_REASON(mad) == IB_CM_REJ_TIMEOUT && slave) { + *slave = lookup_rej_tmout_slave(sriov, rem_pv_cm_id); + + return (*slave < 0) ? *slave : 0; + } + pr_debug("Couldn't find an entry for pv_cm_id 0x%x, attr_id 0x%x\n", + pv_cm_id, be16_to_cpu(mad->mad_hdr.attr_id)); return -ENOENT; } @@ -375,6 +487,34 @@ void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev) INIT_LIST_HEAD(&dev->sriov.cm_list); dev->sriov.sl_id_map = RB_ROOT; xa_init_flags(&dev->sriov.pv_id_table, XA_FLAGS_ALLOC); + xa_init(&dev->sriov.xa_rej_tmout); +} + +static void rej_tmout_xa_cleanup(struct mlx4_ib_sriov *sriov, int slave) +{ + struct rej_tmout_entry *item; + bool flush_needed = false; + unsigned long id; + int cnt = 0; + + xa_lock(&sriov->xa_rej_tmout); + xa_for_each(&sriov->xa_rej_tmout, id, item) { + if (slave < 0 || slave == item->slave) { + mod_delayed_work(system_wq, &item->timeout, 0); + flush_needed = true; + ++cnt; + } + } + xa_unlock(&sriov->xa_rej_tmout); + + if (flush_needed) { + flush_scheduled_work(); + pr_debug("Deleted %d entries in xarray for slave %d during cleanup\n", + cnt, slave); + } + + if (slave < 0) + WARN_ON(!xa_empty(&sriov->xa_rej_tmout)); } /* slave = -1 ==> all slaves */ @@ -444,4 +584,6 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave) list_del(&map->list); kfree(map); } + + rej_tmout_xa_cleanup(sriov, slave); } diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 8a3436994f80..e9b5a4d57fb1 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -149,7 +149,6 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_udata *udata, if (IS_ERR(*umem)) return PTR_ERR(*umem); - n = ib_umem_page_count(*umem); shift = mlx4_ib_umem_calc_optimal_mtt_size(*umem, 0, &n); err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt); @@ -475,7 +474,7 @@ out: return err; } -void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(cq->device); struct mlx4_ib_cq *mcq = to_mcq(cq); @@ -495,6 +494,7 @@ void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) mlx4_db_free(dev->dev, &mcq->db); } ib_umem_release(mcq->umem); + return 0; } static void dump_cqe(void *cqe) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index abe68708d6d6..8bd16474708f 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -500,6 +500,13 @@ static int get_gids_from_l3_hdr(struct ib_grh *grh, union ib_gid *sgid, sgid, dgid); } +static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave) +{ + int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave; + + return (qpn >= proxy_start && qpn <= proxy_start + 1); +} + int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, struct ib_wc *wc, struct ib_grh *grh, struct ib_mad *mad) @@ -520,8 +527,10 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, u16 cached_pkey; u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; - if (dest_qpt > IB_QPT_GSI) + if (dest_qpt > IB_QPT_GSI) { + pr_debug("dest_qpt (%d) > IB_QPT_GSI\n", dest_qpt); return -EINVAL; + } tun_ctx = dev->sriov.demux[port-1].tun[slave]; @@ -538,12 +547,20 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, if (dest_qpt) { u16 pkey_ix; ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey); - if (ret) + if (ret) { + pr_debug("unable to get %s cached pkey for index %d, ret %d\n", + is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI", + wc->pkey_index, ret); return -EINVAL; + } ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix); - if (ret) + if (ret) { + pr_debug("unable to get %s pkey ix for pkey 0x%x, ret %d\n", + is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI", + cached_pkey, ret); return -EINVAL; + } tun_pkey_ix = pkey_ix; } else tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0]; @@ -715,7 +732,8 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad); if (err) - pr_debug("failed sending to slave %d via tunnel qp (%d)\n", + pr_debug("failed sending %s to slave %d via tunnel qp (%d)\n", + is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI", slave, err); return 0; } @@ -794,7 +812,8 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad); if (err) - pr_debug("failed sending to slave %d via tunnel qp (%d)\n", + pr_debug("failed sending %s to slave %d via tunnel qp (%d)\n", + is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI", slave, err); return 0; } @@ -807,27 +826,6 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, int err; struct ib_port_attr pattr; - if (in_wc && in_wc->qp) { - pr_debug("received MAD: port:%d slid:%d sqpn:%d " - "dlid_bits:%d dqpn:%d wc_flags:0x%x tid:%016llx cls:%x mtd:%x atr:%x\n", - port_num, - in_wc->slid, in_wc->src_qp, - in_wc->dlid_path_bits, - in_wc->qp->qp_num, - in_wc->wc_flags, - be64_to_cpu(in_mad->mad_hdr.tid), - in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method, - be16_to_cpu(in_mad->mad_hdr.attr_id)); - if (in_wc->wc_flags & IB_WC_GRH) { - pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n", - be64_to_cpu(in_grh->sgid.global.subnet_prefix), - be64_to_cpu(in_grh->sgid.global.interface_id)); - pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n", - be64_to_cpu(in_grh->dgid.global.subnet_prefix), - be64_to_cpu(in_grh->dgid.global.interface_id)); - } - } - slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) { @@ -1299,6 +1297,18 @@ static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg) spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); } +static void mlx4_ib_wire_comp_handler(struct ib_cq *cq, void *arg) +{ + unsigned long flags; + struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context; + struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); + + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE) + queue_work(ctx->wi_wq, &ctx->work); + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); +} + static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx, struct mlx4_ib_demux_pv_qp *tun_qp, int index) @@ -1341,14 +1351,6 @@ static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port, return ret; } -static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave) -{ - int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave; - - return (qpn >= proxy_start && qpn <= proxy_start + 1); -} - - int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, u32 qkey, struct rdma_ah_attr *attr, @@ -1401,10 +1403,10 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, spin_lock(&sqp->tx_lock); if (sqp->tx_ix_head - sqp->tx_ix_tail >= - (MLX4_NUM_TUNNEL_BUFS - 1)) + (MLX4_NUM_WIRE_BUFS - 1)) ret = -EAGAIN; else - wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1); + wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_WIRE_BUFS - 1); spin_unlock(&sqp->tx_lock); if (ret) goto out; @@ -1484,6 +1486,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc u16 vlan_id; u8 qos; u8 *dmac; + int sts; /* Get slave that sent this packet */ if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn || @@ -1580,13 +1583,17 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc &vlan_id, &qos)) rdma_ah_set_sl(&ah_attr, qos); - mlx4_ib_send_to_wire(dev, slave, ctx->port, - is_proxy_qp0(dev, wc->src_qp, slave) ? - IB_QPT_SMI : IB_QPT_GSI, - be16_to_cpu(tunnel->hdr.pkey_index), - be32_to_cpu(tunnel->hdr.remote_qpn), - be32_to_cpu(tunnel->hdr.qkey), - &ah_attr, wc->smac, vlan_id, &tunnel->mad); + sts = mlx4_ib_send_to_wire(dev, slave, ctx->port, + is_proxy_qp0(dev, wc->src_qp, slave) ? + IB_QPT_SMI : IB_QPT_GSI, + be16_to_cpu(tunnel->hdr.pkey_index), + be32_to_cpu(tunnel->hdr.remote_qpn), + be32_to_cpu(tunnel->hdr.qkey), + &ah_attr, wc->smac, vlan_id, &tunnel->mad); + if (sts) + pr_debug("failed sending %s to wire on behalf of slave %d (%d)\n", + is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI", + slave, sts); } static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, @@ -1595,19 +1602,20 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, int i; struct mlx4_ib_demux_pv_qp *tun_qp; int rx_buf_size, tx_buf_size; + const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS; if (qp_type > IB_QPT_GSI) return -EINVAL; tun_qp = &ctx->qp[qp_type]; - tun_qp->ring = kcalloc(MLX4_NUM_TUNNEL_BUFS, + tun_qp->ring = kcalloc(nmbr_bufs, sizeof(struct mlx4_ib_buf), GFP_KERNEL); if (!tun_qp->ring) return -ENOMEM; - tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS, + tun_qp->tx_ring = kcalloc(nmbr_bufs, sizeof (struct mlx4_ib_tun_tx_buf), GFP_KERNEL); if (!tun_qp->tx_ring) { @@ -1624,7 +1632,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, tx_buf_size = sizeof (struct mlx4_mad_snd_buf); } - for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + for (i = 0; i < nmbr_bufs; i++) { tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL); if (!tun_qp->ring[i].addr) goto err; @@ -1638,7 +1646,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, } } - for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + for (i = 0; i < nmbr_bufs; i++) { tun_qp->tx_ring[i].buf.addr = kmalloc(tx_buf_size, GFP_KERNEL); if (!tun_qp->tx_ring[i].buf.addr) @@ -1669,7 +1677,7 @@ tx_err: tx_buf_size, DMA_TO_DEVICE); kfree(tun_qp->tx_ring[i].buf.addr); } - i = MLX4_NUM_TUNNEL_BUFS; + i = nmbr_bufs; err: while (i > 0) { --i; @@ -1690,6 +1698,7 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx, int i; struct mlx4_ib_demux_pv_qp *tun_qp; int rx_buf_size, tx_buf_size; + const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS; if (qp_type > IB_QPT_GSI) return; @@ -1704,13 +1713,13 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx, } - for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + for (i = 0; i < nmbr_bufs; i++) { ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map, rx_buf_size, DMA_FROM_DEVICE); kfree(tun_qp->ring[i].addr); } - for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + for (i = 0; i < nmbr_bufs; i++) { ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map, tx_buf_size, DMA_TO_DEVICE); kfree(tun_qp->tx_ring[i].buf.addr); @@ -1744,9 +1753,6 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work) "buf:%lld\n", wc.wr_id); break; case IB_WC_SEND: - pr_debug("received tunnel send completion:" - "wrid=0x%llx, status=0x%x\n", - wc.wr_id, wc.status); rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah @@ -1793,6 +1799,7 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx, struct mlx4_ib_qp_tunnel_init_attr qp_init_attr; struct ib_qp_attr attr; int qp_attr_mask_INIT; + const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS; if (qp_type > IB_QPT_GSI) return -EINVAL; @@ -1803,8 +1810,8 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx, qp_init_attr.init_attr.send_cq = ctx->cq; qp_init_attr.init_attr.recv_cq = ctx->cq; qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; - qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS; - qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS; + qp_init_attr.init_attr.cap.max_send_wr = nmbr_bufs; + qp_init_attr.init_attr.cap.max_recv_wr = nmbr_bufs; qp_init_attr.init_attr.cap.max_send_sge = 1; qp_init_attr.init_attr.cap.max_recv_sge = 1; if (create_tun) { @@ -1866,7 +1873,7 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx, goto err_qp; } - for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + for (i = 0; i < nmbr_bufs; i++) { ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i); if (ret) { pr_err(" mlx4_ib_post_pv_buf error" @@ -1902,8 +1909,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) switch (wc.opcode) { case IB_WC_SEND: kfree(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); - sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah + (MLX4_NUM_WIRE_BUFS - 1)].ah); + sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); sqp->tx_ix_tail++; @@ -1912,13 +1919,13 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) case IB_WC_RECV: mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *) (sqp->ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload); + (MLX4_NUM_WIRE_BUFS - 1)].addr))->payload); grh = &(((struct mlx4_mad_rcv_buf *) (sqp->ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh); + (MLX4_NUM_WIRE_BUFS - 1)].addr))->grh); mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad); if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1))) + (MLX4_NUM_WIRE_BUFS - 1))) pr_err("Failed reposting SQP " "buf:%lld\n", wc.wr_id); break; @@ -1931,8 +1938,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) ctx->slave, wc.status, wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { kfree(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); - sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah + (MLX4_NUM_WIRE_BUFS - 1)].ah); + sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); sqp->tx_ix_tail++; @@ -1972,6 +1979,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, { int ret, cq_size; struct ib_cq_init_attr cq_attr = {}; + const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS; if (ctx->state != DEMUX_PV_STATE_DOWN) return -EEXIST; @@ -1996,12 +2004,13 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, goto err_out_qp0; } - cq_size = 2 * MLX4_NUM_TUNNEL_BUFS; + cq_size = 2 * nmbr_bufs; if (ctx->has_smi) cq_size *= 2; cq_attr.cqe = cq_size; - ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler, + ctx->cq = ib_create_cq(ctx->ib_dev, + create_tun ? mlx4_ib_tunnel_comp_handler : mlx4_ib_wire_comp_handler, NULL, ctx, &cq_attr); if (IS_ERR(ctx->cq)) { ret = PTR_ERR(ctx->cq); @@ -2038,6 +2047,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker); ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq; + ctx->wi_wq = to_mdev(ibdev)->sriov.demux[port - 1].wi_wq; ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP); if (ret) { @@ -2181,7 +2191,7 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, goto err_mcg; } - snprintf(name, sizeof name, "mlx4_ibt%d", port); + snprintf(name, sizeof(name), "mlx4_ibt%d", port); ctx->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!ctx->wq) { pr_err("Failed to create tunnelling WQ for port %d\n", port); @@ -2189,7 +2199,15 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, goto err_wq; } - snprintf(name, sizeof name, "mlx4_ibud%d", port); + snprintf(name, sizeof(name), "mlx4_ibwi%d", port); + ctx->wi_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); + if (!ctx->wi_wq) { + pr_err("Failed to create wire WQ for port %d\n", port); + ret = -ENOMEM; + goto err_wiwq; + } + + snprintf(name, sizeof(name), "mlx4_ibud%d", port); ctx->ud_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!ctx->ud_wq) { pr_err("Failed to create up/down WQ for port %d\n", port); @@ -2200,6 +2218,10 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, return 0; err_udwq: + destroy_workqueue(ctx->wi_wq); + ctx->wi_wq = NULL; + +err_wiwq: destroy_workqueue(ctx->wq); ctx->wq = NULL; @@ -2247,12 +2269,14 @@ static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx) ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING; } flush_workqueue(ctx->wq); + flush_workqueue(ctx->wi_wq); for (i = 0; i < dev->dev->caps.sqp_demux; i++) { destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0); free_pv_object(dev, i, ctx->port); } kfree(ctx->tun); destroy_workqueue(ctx->ud_wq); + destroy_workqueue(ctx->wi_wq); destroy_workqueue(ctx->wq); } } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index bd4f975e7f9a..cd0fba6b0964 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1215,9 +1215,10 @@ static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } -static void mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +static int mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn); + return 0; } static int mlx4_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata) @@ -1256,11 +1257,12 @@ err2: return err; } -static void mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) +static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { ib_destroy_cq(to_mxrcd(xrcd)->cq); ib_dealloc_pd(to_mxrcd(xrcd)->pd); mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn); + return 0; } static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) @@ -1533,23 +1535,11 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att struct mlx4_net_trans_rule_hw_ctrl *ctrl; int default_flow; - static const u16 __mlx4_domain[] = { - [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS, - [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL, - [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS, - [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC, - }; - if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) { pr_err("Invalid priority value %d\n", flow_attr->priority); return -EINVAL; } - if (domain >= IB_FLOW_DOMAIN_NUM) { - pr_err("Invalid domain value %d\n", domain); - return -EINVAL; - } - if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0) return -EINVAL; @@ -1558,8 +1548,7 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att return PTR_ERR(mailbox); ctrl = mailbox->buf; - ctrl->prio = cpu_to_be16(__mlx4_domain[domain] | - flow_attr->priority); + ctrl->prio = cpu_to_be16(domain | flow_attr->priority); ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type); ctrl->port = flow_attr->port; ctrl->qpn = cpu_to_be32(qp->qp_num); @@ -1701,8 +1690,8 @@ static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev, } static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, - struct ib_flow_attr *flow_attr, - int domain, struct ib_udata *udata) + struct ib_flow_attr *flow_attr, + struct ib_udata *udata) { int err = 0, i = 0, j = 0; struct mlx4_ib_flow *mflow; @@ -1768,8 +1757,8 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, } while (i < ARRAY_SIZE(type) && type[i]) { - err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i], - &mflow->reg_id[i].id); + err = __mlx4_ib_create_flow(qp, flow_attr, MLX4_DOMAIN_UVERBS, + type[i], &mflow->reg_id[i].id); if (err) goto err_create_flow; if (is_bonded) { @@ -1778,7 +1767,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, */ flow_attr->port = 2; err = __mlx4_ib_create_flow(qp, flow_attr, - domain, type[j], + MLX4_DOMAIN_UVERBS, type[j], &mflow->reg_id[j].mirror); flow_attr->port = 1; if (err) @@ -2589,11 +2578,16 @@ static const struct ib_device_ops mlx4_ib_dev_wq_ops = { .destroy_rwq_ind_table = mlx4_ib_destroy_rwq_ind_table, .destroy_wq = mlx4_ib_destroy_wq, .modify_wq = mlx4_ib_modify_wq, + + INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx4_ib_rwq_ind_table, + ib_rwq_ind_tbl), }; static const struct ib_device_ops mlx4_ib_dev_mw_ops = { .alloc_mw = mlx4_ib_alloc_mw, .dealloc_mw = mlx4_ib_dealloc_mw, + + INIT_RDMA_OBJ_SIZE(ib_mw, mlx4_ib_mw, ibmw), }; static const struct ib_device_ops mlx4_ib_dev_xrc_ops = { @@ -2847,7 +2841,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) goto err_steer_free_bitmap; rdma_set_device_sysfs_group(&ibdev->ib_dev, &mlx4_attr_group); - if (ib_register_device(&ibdev->ib_dev, "mlx4_%d")) + if (ib_register_device(&ibdev->ib_dev, "mlx4_%d", + &dev->persist->pdev->dev)) goto err_diag_counters; if (mlx4_ib_mad_init(ibdev)) @@ -2989,10 +2984,8 @@ int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, /* Add an empty rule for IB L2 */ memset(&ib_spec->mask, 0, sizeof(ib_spec->mask)); - err = __mlx4_ib_create_flow(&mqp->ibqp, flow, - IB_FLOW_DOMAIN_NIC, - MLX4_FS_REGULAR, - &mqp->reg_id); + err = __mlx4_ib_create_flow(&mqp->ibqp, flow, MLX4_DOMAIN_NIC, + MLX4_FS_REGULAR, &mqp->reg_id); } else { err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id); } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 38e87a700a2a..58df06492d69 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -233,7 +233,8 @@ enum mlx4_ib_mad_ifc_flags { }; enum { - MLX4_NUM_TUNNEL_BUFS = 256, + MLX4_NUM_TUNNEL_BUFS = 512, + MLX4_NUM_WIRE_BUFS = 2048, }; struct mlx4_ib_tunnel_header { @@ -298,6 +299,26 @@ struct mlx4_ib_rss { u8 rss_key[MLX4_EN_RSS_KEY_SIZE]; }; +enum { + /* + * Largest possible UD header: send with GRH and immediate + * data plus 18 bytes for an Ethernet header with VLAN/802.1Q + * tag. (LRH would only use 8 bytes, so Ethernet is the + * biggest case) + */ + MLX4_IB_UD_HEADER_SIZE = 82, + MLX4_IB_LSO_HEADER_SPARE = 128, +}; + +struct mlx4_ib_sqp { + int pkey_index; + u32 qkey; + u32 send_psn; + struct ib_ud_header ud_header; + u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; + struct ib_qp *roce_v2_gsi; +}; + struct mlx4_ib_qp { union { struct ib_qp ibqp; @@ -343,7 +364,10 @@ struct mlx4_ib_qp { struct mlx4_wqn_range *wqn_range; /* Number of RSS QP parents that uses this WQ */ u32 rss_usecnt; - struct mlx4_ib_rss *rss_ctx; + union { + struct mlx4_ib_rss *rss_ctx; + struct mlx4_ib_sqp *sqp; + }; }; struct mlx4_ib_srq { @@ -366,6 +390,10 @@ struct mlx4_ib_ah { union mlx4_ext_av av; }; +struct mlx4_ib_rwq_ind_table { + struct ib_rwq_ind_table ib_rwq_ind_tbl; +}; + /****************************************/ /* alias guid support */ /****************************************/ @@ -454,6 +482,7 @@ struct mlx4_ib_demux_pv_ctx { struct ib_pd *pd; struct work_struct work; struct workqueue_struct *wq; + struct workqueue_struct *wi_wq; struct mlx4_ib_demux_pv_qp qp[2]; }; @@ -461,6 +490,7 @@ struct mlx4_ib_demux_ctx { struct ib_device *ib_dev; int port; struct workqueue_struct *wq; + struct workqueue_struct *wi_wq; struct workqueue_struct *ud_wq; spinlock_t ud_lock; atomic64_t subnet_prefix; @@ -492,6 +522,7 @@ struct mlx4_ib_sriov { spinlock_t id_map_lock; struct rb_root sl_id_map; struct list_head cm_list; + struct xarray xa_rej_tmout; }; struct gid_cache_context { @@ -725,8 +756,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int mlx4_ib_dereg_mr(struct ib_mr *mr, struct ib_udata *udata); -struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata); +int mlx4_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); int mlx4_ib_dealloc_mw(struct ib_mw *mw); struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); @@ -736,7 +766,7 @@ int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); @@ -747,14 +777,17 @@ int mlx4_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, int slave_sgid_index, u8 *s_mac, u16 vlan_tag); int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags); +static inline int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags) +{ + return 0; +} int mlx4_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr, struct ib_udata *udata); int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); -void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); +int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index); int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); @@ -890,15 +923,18 @@ void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port); struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); -void mlx4_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); +int mlx4_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); -struct ib_rwq_ind_table -*mlx4_ib_create_rwq_ind_table(struct ib_device *device, - struct ib_rwq_ind_table_init_attr *init_attr, - struct ib_udata *udata); -int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); +int mlx4_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata); +static inline int +mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table) +{ + return 0; +} int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, int *num_of_mtts); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 1d5ef0de12c9..426fed005d53 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -271,6 +271,8 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, u64 total_len = 0; int i; + *num_of_mtts = ib_umem_num_dma_blocks(umem, PAGE_SIZE); + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) { /* * Initialization - save the first chunk start as the @@ -421,7 +423,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_free; } - n = ib_umem_page_count(mr->umem); shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n); err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, @@ -511,7 +512,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, mmr->umem = NULL; goto release_mpt_entry; } - n = ib_umem_page_count(mmr->umem); + n = ib_umem_num_dma_blocks(mmr->umem, PAGE_SIZE); shift = PAGE_SHIFT; err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr, @@ -610,37 +611,27 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) return 0; } -struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata) +int mlx4_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) { - struct mlx4_ib_dev *dev = to_mdev(pd->device); - struct mlx4_ib_mw *mw; + struct mlx4_ib_dev *dev = to_mdev(ibmw->device); + struct mlx4_ib_mw *mw = to_mmw(ibmw); int err; - mw = kmalloc(sizeof(*mw), GFP_KERNEL); - if (!mw) - return ERR_PTR(-ENOMEM); - - err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn, - to_mlx4_type(type), &mw->mmw); + err = mlx4_mw_alloc(dev->dev, to_mpd(ibmw->pd)->pdn, + to_mlx4_type(ibmw->type), &mw->mmw); if (err) - goto err_free; + return err; err = mlx4_mw_enable(dev->dev, &mw->mmw); if (err) goto err_mw; - mw->ibmw.rkey = mw->mmw.key; - - return &mw->ibmw; + ibmw->rkey = mw->mmw.key; + return 0; err_mw: mlx4_mw_free(dev->dev, &mw->mmw); - -err_free: - kfree(mw); - - return ERR_PTR(err); + return err; } int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) @@ -648,8 +639,6 @@ int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) struct mlx4_ib_mw *mw = to_mmw(ibmw); mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw); - kfree(mw); - return 0; } diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 2975f350b9fd..5cb8e602294c 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -68,27 +68,6 @@ enum { }; enum { - /* - * Largest possible UD header: send with GRH and immediate - * data plus 18 bytes for an Ethernet header with VLAN/802.1Q - * tag. (LRH would only use 8 bytes, so Ethernet is the - * biggest case) - */ - MLX4_IB_UD_HEADER_SIZE = 82, - MLX4_IB_LSO_HEADER_SPARE = 128, -}; - -struct mlx4_ib_sqp { - struct mlx4_ib_qp qp; - int pkey_index; - u32 qkey; - u32 send_psn; - struct ib_ud_header ud_header; - u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; - struct ib_qp *roce_v2_gsi; -}; - -enum { MLX4_IB_MIN_SQ_STRIDE = 6, MLX4_IB_CACHE_LINE_SIZE = 64, }; @@ -123,11 +102,6 @@ enum mlx4_ib_source_type { MLX4_IB_RWQ_SRC = 1, }; -static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) -{ - return container_of(mqp, struct mlx4_ib_sqp, qp); -} - static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { if (!mlx4_is_master(dev->dev)) @@ -656,8 +630,6 @@ static int create_qp_rss(struct mlx4_ib_dev *dev, if (err) goto err_qpn; - mutex_init(&qp->mutex); - INIT_LIST_HEAD(&qp->gid_list); INIT_LIST_HEAD(&qp->steering_rules); @@ -696,80 +668,72 @@ err_qpn: return err; } -static struct ib_qp *_mlx4_ib_create_qp_rss(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) +static int _mlx4_ib_create_qp_rss(struct ib_pd *pd, struct mlx4_ib_qp *qp, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) { - struct mlx4_ib_qp *qp; struct mlx4_ib_create_qp_rss ucmd = {}; size_t required_cmd_sz; int err; if (!udata) { pr_debug("RSS QP with NULL udata\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } if (udata->outlen) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; required_cmd_sz = offsetof(typeof(ucmd), reserved1) + sizeof(ucmd.reserved1); if (udata->inlen < required_cmd_sz) { pr_debug("invalid inlen\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) { pr_debug("copy failed\n"); - return ERR_PTR(-EFAULT); + return -EFAULT; } if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved))) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; if (ucmd.comp_mask || ucmd.reserved1) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; if (udata->inlen > sizeof(ucmd) && !ib_is_udata_cleared(udata, sizeof(ucmd), udata->inlen - sizeof(ucmd))) { pr_debug("inlen is not supported\n"); - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } if (init_attr->qp_type != IB_QPT_RAW_PACKET) { pr_debug("RSS QP with unsupported QP type %d\n", init_attr->qp_type); - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } if (init_attr->create_flags) { pr_debug("RSS QP doesn't support create flags\n"); - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } if (init_attr->send_cq || init_attr->cap.max_send_wr) { pr_debug("RSS QP with unsupported send attributes\n"); - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } - qp = kzalloc(sizeof(*qp), GFP_KERNEL); - if (!qp) - return ERR_PTR(-ENOMEM); - qp->pri.vid = 0xFFFF; qp->alt.vid = 0xFFFF; err = create_qp_rss(to_mdev(pd->device), init_attr, &ucmd, qp); - if (err) { - kfree(qp); - return ERR_PTR(err); - } + if (err) + return err; qp->ibqp.qp_num = qp->mqp.qpn; - - return &qp->ibqp; + return 0; } /* @@ -873,7 +837,6 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_PACKET; - mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); INIT_LIST_HEAD(&qp->gid_list); @@ -922,7 +885,6 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, goto err; } - n = ib_umem_page_count(qp->umem); shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n); err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt); @@ -989,13 +951,11 @@ err: static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, int sqpn, - struct mlx4_ib_qp **caller_qp) + struct mlx4_ib_qp *qp) { struct mlx4_ib_dev *dev = to_mdev(pd->device); int qpn; int err; - struct mlx4_ib_sqp *sqp = NULL; - struct mlx4_ib_qp *qp; struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context( udata, struct mlx4_ib_ucontext, ibucontext); enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; @@ -1043,27 +1003,18 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, sqpn = qpn; } - if (!*caller_qp) { - if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI || - (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER | - MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) { - sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL); - if (!sqp) - return -ENOMEM; - qp = &sqp->qp; - } else { - qp = kzalloc(sizeof(struct mlx4_ib_qp), GFP_KERNEL); - if (!qp) - return -ENOMEM; - } - qp->pri.vid = 0xFFFF; - qp->alt.vid = 0xFFFF; - } else - qp = *caller_qp; + if (init_attr->qp_type == IB_QPT_SMI || + init_attr->qp_type == IB_QPT_GSI || qp_type == MLX4_IB_QPT_SMI || + qp_type == MLX4_IB_QPT_GSI || + (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER | + MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) { + qp->sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL); + if (!qp->sqp) + return -ENOMEM; + } qp->mlx4_ib_qp_type = qp_type; - mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); INIT_LIST_HEAD(&qp->gid_list); @@ -1117,7 +1068,6 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, goto err; } - n = ib_umem_page_count(qp->umem); shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n); err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt); @@ -1239,9 +1189,6 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, qp->mqp.event = mlx4_ib_qp_event; - if (!*caller_qp) - *caller_qp = qp; - spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); mlx4_ib_lock_cqs(to_mcq(init_attr->send_cq), to_mcq(init_attr->recv_cq)); @@ -1293,10 +1240,7 @@ err_db: mlx4_db_free(dev->dev, &qp->db); err: - if (!sqp && !*caller_qp) - kfree(qp); - kfree(sqp); - + kfree(qp->sqp); return err; } @@ -1410,7 +1354,6 @@ static void destroy_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) mlx4_qp_free(dev->dev, &qp->mqp); mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); del_gid_entries(qp); - kfree(qp->rss_ctx); } static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, @@ -1529,17 +1472,16 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr) return dev->dev->caps.spec_qps[attr->port_num - 1].qp1_proxy; } -static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) +static int _mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) { - struct mlx4_ib_qp *qp = NULL; int err; int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; u16 xrcdn = 0; if (init_attr->rwq_ind_tbl) - return _mlx4_ib_create_qp_rss(pd, init_attr, udata); + return _mlx4_ib_create_qp_rss(pd, qp, init_attr, udata); /* * We only support LSO, vendor flag1, and multicast loopback blocking, @@ -1551,16 +1493,16 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, MLX4_IB_SRIOV_SQP | MLX4_IB_QP_NETIF | MLX4_IB_QP_CREATE_ROCE_V2_GSI)) - return ERR_PTR(-EINVAL); + return -EINVAL; if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { if (init_attr->qp_type != IB_QPT_UD) - return ERR_PTR(-EINVAL); + return -EINVAL; } if (init_attr->create_flags) { if (udata && init_attr->create_flags & ~(sup_u_create_flags)) - return ERR_PTR(-EINVAL); + return -EINVAL; if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_ROCE_V2_GSI | @@ -1570,7 +1512,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, init_attr->qp_type > IB_QPT_GSI) || (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI && init_attr->qp_type != IB_QPT_GSI)) - return ERR_PTR(-EINVAL); + return -EINVAL; } switch (init_attr->qp_type) { @@ -1581,53 +1523,43 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, fallthrough; case IB_QPT_XRC_INI: if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) - return ERR_PTR(-ENOSYS); + return -ENOSYS; init_attr->recv_cq = init_attr->send_cq; fallthrough; case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_RAW_PACKET: - qp = kzalloc(sizeof(*qp), GFP_KERNEL); - if (!qp) - return ERR_PTR(-ENOMEM); + case IB_QPT_UD: qp->pri.vid = 0xFFFF; qp->alt.vid = 0xFFFF; - fallthrough; - case IB_QPT_UD: - { - err = create_qp_common(pd, init_attr, udata, 0, &qp); - if (err) { - kfree(qp); - return ERR_PTR(err); - } + err = create_qp_common(pd, init_attr, udata, 0, qp); + if (err) + return err; qp->ibqp.qp_num = qp->mqp.qpn; qp->xrcdn = xrcdn; - break; - } case IB_QPT_SMI: case IB_QPT_GSI: { int sqpn; - /* Userspace is not allowed to create special QPs: */ - if (udata) - return ERR_PTR(-EINVAL); if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) { int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0, MLX4_RES_USAGE_DRIVER); if (res) - return ERR_PTR(res); + return res; } else { sqpn = get_sqp_num(to_mdev(pd->device), init_attr); } - err = create_qp_common(pd, init_attr, udata, sqpn, &qp); + qp->pri.vid = 0xFFFF; + qp->alt.vid = 0xFFFF; + err = create_qp_common(pd, init_attr, udata, sqpn, qp); if (err) - return ERR_PTR(err); + return err; qp->port = init_attr->port_num; qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : @@ -1636,25 +1568,33 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, } default: /* Don't support raw QPs */ - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } - - return &qp->ibqp; + return 0; } struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { struct ib_device *device = pd ? pd->device : init_attr->xrcd->device; - struct ib_qp *ibqp; struct mlx4_ib_dev *dev = to_mdev(device); + struct mlx4_ib_qp *qp; + int ret; - ibqp = _mlx4_ib_create_qp(pd, init_attr, udata); + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); - if (!IS_ERR(ibqp) && - (init_attr->qp_type == IB_QPT_GSI) && + mutex_init(&qp->mutex); + ret = _mlx4_ib_create_qp(pd, qp, init_attr, udata); + if (ret) { + kfree(qp); + return ERR_PTR(ret); + } + + if (init_attr->qp_type == IB_QPT_GSI && !(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) { - struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp))); + struct mlx4_ib_sqp *sqp = qp->sqp; int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num); if (is_eth && @@ -1666,14 +1606,14 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi)); sqp->roce_v2_gsi = NULL; } else { - sqp = to_msqp(to_mqp(sqp->roce_v2_gsi)); - sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP; + to_mqp(sqp->roce_v2_gsi)->flags |= + MLX4_IB_ROCE_V2_GSI_QP; } init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI; } } - return ibqp; + return &qp->ibqp; } static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) @@ -1700,10 +1640,8 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, udata); } - if (is_sqp(dev, mqp)) - kfree(to_msqp(mqp)); - else - kfree(mqp); + kfree(mqp->sqp); + kfree(mqp); return 0; } @@ -1713,7 +1651,7 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) struct mlx4_ib_qp *mqp = to_mqp(qp); if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { - struct mlx4_ib_sqp *sqp = to_msqp(mqp); + struct mlx4_ib_sqp *sqp = mqp->sqp; if (sqp->roce_v2_gsi) ib_destroy_qp(sqp->roce_v2_gsi); @@ -2575,7 +2513,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, qp->alt_port = attr->alt_port_num; if (is_sqp(dev, qp)) - store_sqp_attrs(to_msqp(qp), attr, attr_mask); + store_sqp_attrs(qp->sqp, attr, attr_mask); /* * If we moved QP0 to RTR, bring the IB link up; if we moved @@ -2852,7 +2790,7 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata); if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { - struct mlx4_ib_sqp *sqp = to_msqp(mqp); + struct mlx4_ib_sqp *sqp = mqp->sqp; int err = 0; if (sqp->roce_v2_gsi) @@ -2877,12 +2815,13 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey) return -EINVAL; } -static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, +static int build_sriov_qp0_header(struct mlx4_ib_qp *qp, const struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { - struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device); - struct ib_device *ib_dev = &mdev->ib_dev; + struct mlx4_ib_dev *mdev = to_mdev(qp->ibqp.device); + struct mlx4_ib_sqp *sqp = qp->sqp; + struct ib_device *ib_dev = qp->ibqp.device; struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; struct mlx4_ib_ah *ah = to_mah(wr->ah); @@ -2904,12 +2843,12 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, /* for proxy-qp0 sends, need to add in size of tunnel header */ /* for tunnel-qp0 sends, tunnel header is already in s/g list */ - if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) send_size += sizeof (struct mlx4_ib_tunnel_header); ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header); - if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) { + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) { sqp->ud_header.lrh.service_level = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; sqp->ud_header.lrh.destination_lid = @@ -2926,26 +2865,26 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, sqp->ud_header.lrh.virtual_lane = 0; sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); - err = ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey); + err = ib_get_cached_pkey(ib_dev, qp->port, 0, &pkey); if (err) return err; sqp->ud_header.bth.pkey = cpu_to_be16(pkey); - if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER) + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER) sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); else sqp->ud_header.bth.destination_qpn = - cpu_to_be32(mdev->dev->caps.spec_qps[sqp->qp.port - 1].qp0_tunnel); + cpu_to_be32(mdev->dev->caps.spec_qps[qp->port - 1].qp0_tunnel); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); if (mlx4_is_master(mdev->dev)) { - if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) + if (mlx4_get_parav_qkey(mdev->dev, qp->mqp.qpn, &qkey)) return -EINVAL; } else { - if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) + if (vf_get_qp0_qkey(mdev->dev, qp->mqp.qpn, &qkey)) return -EINVAL; } sqp->ud_header.deth.qkey = cpu_to_be32(qkey); - sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn); + sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->mqp.qpn); sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; sqp->ud_header.immediate_present = 0; @@ -3029,10 +2968,11 @@ static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num, } #define MLX4_ROCEV2_QP1_SPORT 0xC000 -static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, +static int build_mlx_header(struct mlx4_ib_qp *qp, const struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { - struct ib_device *ib_dev = sqp->qp.ibqp.device; + struct mlx4_ib_sqp *sqp = qp->sqp; + struct ib_device *ib_dev = qp->ibqp.device; struct mlx4_ib_dev *ibdev = to_mdev(ib_dev); struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_ctrl_seg *ctrl = wqe; @@ -3056,7 +2996,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, for (i = 0; i < wr->wr.num_sge; ++i) send_size += wr->wr.sg_list[i].length; - is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; + is_eth = rdma_port_get_link_layer(qp->ibqp.device, qp->port) == IB_LINK_LAYER_ETHERNET; is_grh = mlx4_ib_ah_grh_present(ah); if (is_eth) { enum ib_gid_type gid_type; @@ -3070,9 +3010,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, if (err) return err; } else { - err = fill_gid_by_hw_index(ibdev, sqp->qp.port, - ah->av.ib.gid_index, - &sgid, &gid_type); + err = fill_gid_by_hw_index(ibdev, qp->port, + ah->av.ib.gid_index, &sgid, + &gid_type); if (!err) { is_udp = gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP; if (is_udp) { @@ -3117,13 +3057,18 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, * indexes don't necessarily match the hw ones, so * we must use our own cache */ - sqp->ud_header.grh.source_gid.global.subnet_prefix = - cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov. - demux[sqp->qp.port - 1]. - subnet_prefix))); - sqp->ud_header.grh.source_gid.global.interface_id = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - guid_cache[ah->av.ib.gid_index]; + sqp->ud_header.grh.source_gid.global + .subnet_prefix = + cpu_to_be64(atomic64_read( + &(to_mdev(ib_dev) + ->sriov + .demux[qp->port - 1] + .subnet_prefix))); + sqp->ud_header.grh.source_gid.global + .interface_id = + to_mdev(ib_dev) + ->sriov.demux[qp->port - 1] + .guid_cache[ah->av.ib.gid_index]; } else { sqp->ud_header.grh.source_gid = ah->ibah.sgid_attr->gid; @@ -3155,10 +3100,13 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); if (!is_eth) { - mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | - (sqp->ud_header.lrh.destination_lid == - IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | - (sqp->ud_header.lrh.service_level << 8)); + mlx->flags |= + cpu_to_be32((!qp->ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | + (sqp->ud_header.lrh.destination_lid == + IB_LID_PERMISSIVE ? + MLX4_WQE_MLX_SLR : + 0) | + (sqp->ud_header.lrh.service_level << 8)); if (ah->av.ib.port_pd & cpu_to_be32(0x80000000)) mlx->flags |= cpu_to_be32(0x1); /* force loopback */ mlx->rlid = sqp->ud_header.lrh.destination_lid; @@ -3204,21 +3152,23 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); } } else { - sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : - sl_to_vl(to_mdev(ib_dev), - sqp->ud_header.lrh.service_level, - sqp->qp.port); - if (sqp->qp.ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15) + sqp->ud_header.lrh.virtual_lane = + !qp->ibqp.qp_num ? + 15 : + sl_to_vl(to_mdev(ib_dev), + sqp->ud_header.lrh.service_level, + qp->port); + if (qp->ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15) return -EINVAL; if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; } sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); - if (!sqp->qp.ibqp.qp_num) - err = ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, + if (!qp->ibqp.qp_num) + err = ib_get_cached_pkey(ib_dev, qp->port, sqp->pkey_index, &pkey); else - err = ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, + err = ib_get_cached_pkey(ib_dev, qp->port, wr->pkey_index, &pkey); if (err) return err; @@ -3228,7 +3178,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ? sqp->qkey : wr->remote_qkey); - sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); + sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->ibqp.qp_num); header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); @@ -3551,14 +3501,14 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { - struct mlx4_ib_sqp *sqp = to_msqp(qp); + struct mlx4_ib_sqp *sqp = qp->sqp; if (sqp->roce_v2_gsi) { struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah); enum ib_gid_type gid_type; union ib_gid gid; - if (!fill_gid_by_hw_index(mdev, sqp->qp.port, + if (!fill_gid_by_hw_index(mdev, qp->port, ah->av.ib.gid_index, &gid, &gid_type)) qp = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? @@ -3678,8 +3628,8 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, break; case MLX4_IB_QPT_TUN_SMI_OWNER: - err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr), - ctrl, &seglen); + err = build_sriov_qp0_header(qp, ud_wr(wr), ctrl, + &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; @@ -3715,8 +3665,8 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, break; case MLX4_IB_QPT_PROXY_SMI_OWNER: - err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr), - ctrl, &seglen); + err = build_sriov_qp0_header(qp, ud_wr(wr), ctrl, + &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; @@ -3749,8 +3699,7 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, case MLX4_IB_QPT_SMI: case MLX4_IB_QPT_GSI: - err = build_mlx_header(to_msqp(qp), ud_wr(wr), ctrl, - &seglen); + err = build_mlx_header(qp, ud_wr(wr), ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; @@ -4172,6 +4121,7 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, if (!qp) return ERR_PTR(-ENOMEM); + mutex_init(&qp->mutex); qp->pri.vid = 0xFFFF; qp->alt.vid = 0xFFFF; @@ -4327,7 +4277,7 @@ int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr, return err; } -void mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata) +int mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(ibwq->device); struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq); @@ -4338,36 +4288,35 @@ void mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata) destroy_qp_common(dev, qp, MLX4_IB_RWQ_SRC, udata); kfree(qp); + return 0; } -struct ib_rwq_ind_table -*mlx4_ib_create_rwq_ind_table(struct ib_device *device, - struct ib_rwq_ind_table_init_attr *init_attr, - struct ib_udata *udata) +int mlx4_ib_create_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata) { - struct ib_rwq_ind_table *rwq_ind_table; struct mlx4_ib_create_rwq_ind_tbl_resp resp = {}; unsigned int ind_tbl_size = 1 << init_attr->log_ind_tbl_size; + struct ib_device *device = rwq_ind_table->device; unsigned int base_wqn; size_t min_resp_len; - int i; - int err; + int i, err = 0; if (udata->inlen > 0 && !ib_is_udata_cleared(udata, 0, udata->inlen)) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); if (udata->outlen && udata->outlen < min_resp_len) - return ERR_PTR(-EINVAL); + return -EINVAL; if (ind_tbl_size > device->attrs.rss_caps.max_rwq_indirection_table_size) { pr_debug("log_ind_tbl_size = %d is bigger than supported = %d\n", ind_tbl_size, device->attrs.rss_caps.max_rwq_indirection_table_size); - return ERR_PTR(-EINVAL); + return -EINVAL; } base_wqn = init_attr->ind_tbl[0]->wq_num; @@ -4375,39 +4324,23 @@ struct ib_rwq_ind_table if (base_wqn % ind_tbl_size) { pr_debug("WQN=0x%x isn't aligned with indirection table size\n", base_wqn); - return ERR_PTR(-EINVAL); + return -EINVAL; } for (i = 1; i < ind_tbl_size; i++) { if (++base_wqn != init_attr->ind_tbl[i]->wq_num) { pr_debug("indirection table's WQNs aren't consecutive\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } } - rwq_ind_table = kzalloc(sizeof(*rwq_ind_table), GFP_KERNEL); - if (!rwq_ind_table) - return ERR_PTR(-ENOMEM); - if (udata->outlen) { resp.response_length = offsetof(typeof(resp), response_length) + sizeof(resp.response_length); err = ib_copy_to_udata(udata, &resp, resp.response_length); - if (err) - goto err; } - return rwq_ind_table; - -err: - kfree(rwq_ind_table); - return ERR_PTR(err); -} - -int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) -{ - kfree(ib_rwq_ind_tbl); - return 0; + return err; } struct mlx4_ib_drain_cqe { diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 8f9d5035142d..bf618529e734 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -115,8 +115,9 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq, if (IS_ERR(srq->umem)) return PTR_ERR(srq->umem); - err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem), - PAGE_SHIFT, &srq->mtt); + err = mlx4_mtt_init( + dev->dev, ib_umem_num_dma_blocks(srq->umem, PAGE_SIZE), + PAGE_SHIFT, &srq->mtt); if (err) goto err_buf; @@ -260,7 +261,7 @@ int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) return 0; } -void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) +int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(srq->device); struct mlx4_ib_srq *msrq = to_msrq(srq); @@ -282,6 +283,7 @@ void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) mlx4_db_free(dev->dev, &msrq->db); } ib_umem_release(msrq->umem); + return 0; } void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index) diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 59e5ec39b447..505bc47fd575 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -106,8 +106,8 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, if (ah_type == RDMA_AH_ATTR_TYPE_ROCE && udata) { int err; struct mlx5_ib_create_ah_resp resp = {}; - u32 min_resp_len = offsetof(typeof(resp), dmac) + - sizeof(resp.dmac); + u32 min_resp_len = + offsetofend(struct mlx5_ib_create_ah_resp, dmac); if (udata->outlen < min_resp_len) return -EINVAL; @@ -147,8 +147,3 @@ int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } - -void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags) -{ - return; -} diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index ebb2f108b64f..234f29912ba9 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -168,14 +168,14 @@ void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid) mlx5_cmd_exec_in(dev, destroy_tis, in); } -void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid) +int mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid) { u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {}; MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); MLX5_SET(destroy_rqt_in, in, rqtn, rqtn); MLX5_SET(destroy_rqt_in, in, uid, uid); - mlx5_cmd_exec_in(dev, destroy_rqt, in); + return mlx5_cmd_exec_in(dev, destroy_rqt, in); } int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, @@ -209,14 +209,14 @@ void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn, mlx5_cmd_exec_in(dev, dealloc_transport_domain, in); } -void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid) +int mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid) { u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {}; MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD); MLX5_SET(dealloc_pd_in, in, pd, pdn); MLX5_SET(dealloc_pd_in, in, uid, uid); - mlx5_cmd_exec_in(dev, dealloc_pd, in); + return mlx5_cmd_exec_in(dev, dealloc_pd, in); } int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 1d192a8ca87d..88ea6ef8f2cb 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -44,10 +44,10 @@ int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, u64 length, u32 alignment); void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length); -void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid); +int mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid); void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid); void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid); -void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid); +int mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid); int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, u16 uid); void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn, diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 145f3cb40ccb..70c8fd67ee2f 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -117,7 +117,7 @@ err_bound: return ret; } -static void mlx5_ib_destroy_counters(struct ib_counters *counters) +static int mlx5_ib_destroy_counters(struct ib_counters *counters) { struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); @@ -125,6 +125,7 @@ static void mlx5_ib_destroy_counters(struct ib_counters *counters) if (mcounters->hw_cntrs_hndl) mlx5_fc_destroy(to_mdev(counters->device)->mdev, mcounters->hw_cntrs_hndl); + return 0; } static int mlx5_ib_create_counters(struct ib_counters *counters, @@ -456,12 +457,12 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); num_counters += ARRAY_SIZE(ext_ppcnt_cnts); } - cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); + cnts->names = kcalloc(num_counters, sizeof(*cnts->names), GFP_KERNEL); if (!cnts->names) return -ENOMEM; cnts->offsets = kcalloc(num_counters, - sizeof(cnts->offsets), GFP_KERNEL); + sizeof(*cnts->offsets), GFP_KERNEL); if (!cnts->offsets) goto err_names; diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index dceb0eb2bed1..fb62f1d04afa 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -168,7 +168,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, { enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1); struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device); - struct mlx5_ib_srq *srq; + struct mlx5_ib_srq *srq = NULL; struct mlx5_ib_wq *wq; u16 wqe_ctr; u8 roce_packet_type; @@ -180,7 +180,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, if (qp->ibqp.xrcd) { msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn)); - srq = to_mibsrq(msrq); + if (msrq) + srq = to_mibsrq(msrq); } else { srq = to_msrq(qp->ibqp.srq); } @@ -254,7 +255,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, switch (roce_packet_type) { case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH: - wc->network_hdr_type = RDMA_NETWORK_IB; + wc->network_hdr_type = RDMA_NETWORK_ROCE_V1; break; case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6: wc->network_hdr_type = RDMA_NETWORK_IPV6; @@ -1023,16 +1024,21 @@ err_cqb: return err; } -void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(cq->device); struct mlx5_ib_cq *mcq = to_mcq(cq); + int ret; + + ret = mlx5_core_destroy_cq(dev->mdev, &mcq->mcq); + if (ret) + return ret; - mlx5_core_destroy_cq(dev->mdev, &mcq->mcq); if (udata) destroy_cq_user(mcq, udata); else destroy_cq_kernel(dev, mcq); + return 0; } static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn) diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index e9cfb9a2ef41..492cfe063bca 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -136,12 +136,9 @@ static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) #define LAST_COUNTERS_FIELD counters /* Field is the last supported field */ -#define FIELDS_NOT_SUPPORTED(filter, field)\ - memchr_inv((void *)&filter.field +\ - sizeof(filter.field), 0,\ - sizeof(filter) -\ - offsetof(typeof(filter), field) -\ - sizeof(filter.field)) +#define FIELDS_NOT_SUPPORTED(filter, field) \ + memchr_inv((void *)&filter.field + sizeof(filter.field), 0, \ + sizeof(filter) - offsetofend(typeof(filter), field)) int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, bool is_egress, @@ -767,6 +764,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, { bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; struct mlx5_flow_namespace *ns = NULL; + enum mlx5_flow_namespace_type fn_type; struct mlx5_ib_flow_prio *prio; struct mlx5_flow_table *ft; int max_table_size; @@ -780,11 +778,9 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, log_max_ft_size)); esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != DEVLINK_ESWITCH_ENCAP_MODE_NONE; - if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - enum mlx5_flow_namespace_type fn_type; - - if (flow_is_multicast_only(flow_attr) && - !dont_trap) + switch (flow_attr->type) { + case IB_FLOW_ATTR_NORMAL: + if (flow_is_multicast_only(flow_attr) && !dont_trap) priority = MLX5_IB_FLOW_MCAST_PRIO; else priority = ib_prio_to_core_prio(flow_attr->priority, @@ -797,12 +793,11 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; if (!dev->is_rep && !esw_encap && MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - reformat_l3_tunnel_to_l2)) + reformat_l3_tunnel_to_l2)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; } else { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, - log_max_ft_size)); + max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX( + dev->mdev, log_max_ft_size)); fn_type = MLX5_FLOW_NAMESPACE_EGRESS; prio = &dev->flow_db->egress_prios[priority]; if (!dev->is_rep && !esw_encap && @@ -812,27 +807,31 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, ns = mlx5_get_flow_namespace(dev->mdev, fn_type); num_entries = MLX5_FS_MAX_ENTRIES; num_groups = MLX5_FS_MAX_TYPES; - } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { + break; + case IB_FLOW_ATTR_ALL_DEFAULT: + case IB_FLOW_ATTR_MC_DEFAULT: ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_LEFTOVERS); - build_leftovers_ft_param(&priority, - &num_entries, - &num_groups); + build_leftovers_ft_param(&priority, &num_entries, &num_groups); prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; - } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + break; + case IB_FLOW_ATTR_SNIFFER: if (!MLX5_CAP_FLOWTABLE(dev->mdev, allow_sniffer_and_nic_rx_shared_tir)) return ERR_PTR(-EOPNOTSUPP); - ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ? - MLX5_FLOW_NAMESPACE_SNIFFER_RX : - MLX5_FLOW_NAMESPACE_SNIFFER_TX); + ns = mlx5_get_flow_namespace( + dev->mdev, ft_type == MLX5_IB_FT_RX ? + MLX5_FLOW_NAMESPACE_SNIFFER_RX : + MLX5_FLOW_NAMESPACE_SNIFFER_TX); prio = &dev->flow_db->sniffer[ft_type]; priority = 0; num_entries = 1; num_groups = 1; + break; + default: + break; } if (!ns) @@ -954,7 +953,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, if (!flow_is_multicast_only(flow_attr)) set_underlay_qp(dev, spec, underlay_qpn); - if (dev->is_rep) { + if (dev->is_rep && flow_attr->type != IB_FLOW_ATTR_SNIFFER) { struct mlx5_eswitch_rep *rep; rep = dev->port[flow_attr->port - 1].rep; @@ -1116,6 +1115,7 @@ static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, int err; static const struct ib_flow_attr flow_attr = { .num_of_specs = 0, + .type = IB_FLOW_ATTR_SNIFFER, .size = sizeof(flow_attr) }; @@ -1143,10 +1143,8 @@ err: return ERR_PTR(err); } - static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, - int domain, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(qp->device); @@ -1162,8 +1160,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, int underlay_qpn; if (udata && udata->inlen) { - min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) + - sizeof(ucmd_hdr.reserved); + min_ucmd_sz = offsetofend(struct mlx5_ib_create_flow, reserved); if (udata->inlen < min_ucmd_sz) return ERR_PTR(-EOPNOTSUPP); @@ -1197,10 +1194,9 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, goto free_ucmd; } - if (domain != IB_FLOW_DOMAIN_USER || - flow_attr->port > dev->num_ports || - (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | - IB_FLOW_ATTR_FLAGS_EGRESS))) { + if (flow_attr->port > dev->num_ports || + (flow_attr->flags & + ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS))) { err = -EINVAL; goto free_ucmd; } @@ -1245,19 +1241,22 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, dst->tir_num = mqp->raw_packet_qp.rq.tirn; } - if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { + switch (flow_attr->type) { + case IB_FLOW_ATTR_NORMAL: underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ? mqp->underlay_qpn : 0; handler = _create_flow_rule(dev, ft_prio, flow_attr, dst, underlay_qpn, ucmd); - } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { - handler = create_leftovers_rule(dev, ft_prio, flow_attr, - dst); - } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + break; + case IB_FLOW_ATTR_ALL_DEFAULT: + case IB_FLOW_ATTR_MC_DEFAULT: + handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst); + break; + case IB_FLOW_ATTR_SNIFFER: handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst); - } else { + break; + default: err = -EINVAL; goto destroy_ft; } @@ -1305,39 +1304,47 @@ _get_flow_table(struct mlx5_ib_dev *dev, esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != DEVLINK_ESWITCH_ENCAP_MODE_NONE; - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - log_max_ft_size)); + switch (fs_matcher->ns_type) { + case MLX5_FLOW_NAMESPACE_BYPASS: + max_table_size = BIT( + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size)); if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, reformat_l3_tunnel_to_l2) && !esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { + break; + case MLX5_FLOW_NAMESPACE_EGRESS: max_table_size = BIT( MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size)); - if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap) + if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && + !esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) { + break; + case MLX5_FLOW_NAMESPACE_FDB: max_table_size = BIT( MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size)); if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) && + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, + reformat_l3_tunnel_to_l2) && esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; priority = FDB_BYPASS_PATH; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, - log_max_ft_size)); + break; + case MLX5_FLOW_NAMESPACE_RDMA_RX: + max_table_size = BIT( + MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size)); priority = fs_matcher->priority; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, - log_max_ft_size)); + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX: + max_table_size = BIT( + MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size)); priority = fs_matcher->priority; + break; + default: + break; } max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); @@ -1346,16 +1353,24 @@ _get_flow_table(struct mlx5_ib_dev *dev, if (!ns) return ERR_PTR(-EOPNOTSUPP); - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) + switch (fs_matcher->ns_type) { + case MLX5_FLOW_NAMESPACE_BYPASS: prio = &dev->flow_db->prios[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) + break; + case MLX5_FLOW_NAMESPACE_EGRESS: prio = &dev->flow_db->egress_prios[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) + break; + case MLX5_FLOW_NAMESPACE_FDB: prio = &dev->flow_db->fdb; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) + break; + case MLX5_FLOW_NAMESPACE_RDMA_RX: prio = &dev->flow_db->rdma_rx[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX: prio = &dev->flow_db->rdma_tx[priority]; + break; + default: return ERR_PTR(-EINVAL); + } if (!prio) return ERR_PTR(-EINVAL); @@ -1488,20 +1503,25 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add( goto unlock; } - if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { + switch (dest_type) { + case MLX5_FLOW_DESTINATION_TYPE_TIR: dst[dst_num].type = dest_type; dst[dst_num++].tir_num = dest_id; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; dst[dst_num++].ft_num = dest_id; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) { + break; + case MLX5_FLOW_DESTINATION_TYPE_PORT: dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; + break; + default: + break; } - if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dst[dst_num].counter_id = counter_id; diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 40d418153891..7fcad9135276 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -35,44 +35,19 @@ struct mlx5_ib_gsi_wr { struct ib_cqe cqe; struct ib_wc wc; - int send_flags; bool completed:1; }; -struct mlx5_ib_gsi_qp { - struct ib_qp ibqp; - struct ib_qp *rx_qp; - u8 port_num; - struct ib_qp_cap cap; - enum ib_sig_type sq_sig_type; - /* Serialize qp state modifications */ - struct mutex mutex; - struct ib_cq *cq; - struct mlx5_ib_gsi_wr *outstanding_wrs; - u32 outstanding_pi, outstanding_ci; - int num_qps; - /* Protects access to the tx_qps. Post send operations synchronize - * with tx_qp creation in setup_qp(). Also protects the - * outstanding_wrs array and indices. - */ - spinlock_t lock; - struct ib_qp **tx_qps; -}; - -static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp) -{ - return container_of(qp, struct mlx5_ib_gsi_qp, ibqp); -} - static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev) { return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn); } /* Call with gsi->lock locked */ -static void generate_completions(struct mlx5_ib_gsi_qp *gsi) +static void generate_completions(struct mlx5_ib_qp *mqp) { - struct ib_cq *gsi_cq = gsi->ibqp.send_cq; + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; + struct ib_cq *gsi_cq = mqp->ibqp.send_cq; struct mlx5_ib_gsi_wr *wr; u32 index; @@ -83,10 +58,7 @@ static void generate_completions(struct mlx5_ib_gsi_qp *gsi) if (!wr->completed) break; - if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR || - wr->send_flags & IB_SEND_SIGNALED) - WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc)); - + WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc)); wr->completed = false; } @@ -98,6 +70,7 @@ static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc) struct mlx5_ib_gsi_qp *gsi = cq->cq_context; struct mlx5_ib_gsi_wr *wr = container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe); + struct mlx5_ib_qp *mqp = container_of(gsi, struct mlx5_ib_qp, gsi); u64 wr_id; unsigned long flags; @@ -106,19 +79,19 @@ static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc) wr_id = wr->wc.wr_id; wr->wc = *wc; wr->wc.wr_id = wr_id; - wr->wc.qp = &gsi->ibqp; + wr->wc.qp = &mqp->ibqp; - generate_completions(gsi); + generate_completions(mqp); spin_unlock_irqrestore(&gsi->lock, flags); } -struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr) +int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp, + struct ib_qp_init_attr *attr) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_gsi_qp *gsi; - struct ib_qp_init_attr hw_init_attr = *init_attr; - const u8 port_num = init_attr->port_num; + struct ib_qp_init_attr hw_init_attr = *attr; + const u8 port_num = attr->port_num; int num_qps = 0; int ret; @@ -130,26 +103,19 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, num_qps = MLX5_MAX_PORTS; } - gsi = kzalloc(sizeof(*gsi), GFP_KERNEL); - if (!gsi) - return ERR_PTR(-ENOMEM); - + gsi = &mqp->gsi; gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL); - if (!gsi->tx_qps) { - ret = -ENOMEM; - goto err_free; - } + if (!gsi->tx_qps) + return -ENOMEM; - gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr, - sizeof(*gsi->outstanding_wrs), - GFP_KERNEL); + gsi->outstanding_wrs = + kcalloc(attr->cap.max_send_wr, sizeof(*gsi->outstanding_wrs), + GFP_KERNEL); if (!gsi->outstanding_wrs) { ret = -ENOMEM; goto err_free_tx; } - mutex_init(&gsi->mutex); - mutex_lock(&dev->devr.mutex); if (dev->devr.ports[port_num - 1].gsi) { @@ -161,12 +127,10 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, gsi->num_qps = num_qps; spin_lock_init(&gsi->lock); - gsi->cap = init_attr->cap; - gsi->sq_sig_type = init_attr->sq_sig_type; - gsi->ibqp.qp_num = 1; + gsi->cap = attr->cap; gsi->port_num = port_num; - gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0, + gsi->cq = ib_alloc_cq(pd->device, gsi, attr->cap.max_send_wr, 0, IB_POLL_SOFTIRQ); if (IS_ERR(gsi->cq)) { mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n", @@ -182,19 +146,31 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, hw_init_attr.cap.max_send_sge = 0; hw_init_attr.cap.max_inline_data = 0; } - gsi->rx_qp = ib_create_qp(pd, &hw_init_attr); + + gsi->rx_qp = mlx5_ib_create_qp(pd, &hw_init_attr, NULL); if (IS_ERR(gsi->rx_qp)) { mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n", PTR_ERR(gsi->rx_qp)); ret = PTR_ERR(gsi->rx_qp); goto err_destroy_cq; } + gsi->rx_qp->device = pd->device; + gsi->rx_qp->pd = pd; + gsi->rx_qp->real_qp = gsi->rx_qp; + + gsi->rx_qp->qp_type = hw_init_attr.qp_type; + gsi->rx_qp->send_cq = hw_init_attr.send_cq; + gsi->rx_qp->recv_cq = hw_init_attr.recv_cq; + gsi->rx_qp->event_handler = hw_init_attr.event_handler; + spin_lock_init(&gsi->rx_qp->mr_lock); + INIT_LIST_HEAD(&gsi->rx_qp->rdma_mrs); + INIT_LIST_HEAD(&gsi->rx_qp->sig_mrs); - dev->devr.ports[init_attr->port_num - 1].gsi = gsi; + dev->devr.ports[attr->port_num - 1].gsi = gsi; mutex_unlock(&dev->devr.mutex); - return &gsi->ibqp; + return 0; err_destroy_cq: ib_free_cq(gsi->cq); @@ -203,23 +179,19 @@ err_free_wrs: kfree(gsi->outstanding_wrs); err_free_tx: kfree(gsi->tx_qps); -err_free: - kfree(gsi); - return ERR_PTR(ret); + return ret; } -int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp) +int mlx5_ib_destroy_gsi(struct mlx5_ib_qp *mqp) { - struct mlx5_ib_dev *dev = to_mdev(qp->device); - struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + struct mlx5_ib_dev *dev = to_mdev(mqp->ibqp.device); + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; const int port_num = gsi->port_num; int qp_index; int ret; - mlx5_ib_dbg(dev, "destroying GSI QP\n"); - mutex_lock(&dev->devr.mutex); - ret = ib_destroy_qp(gsi->rx_qp); + ret = mlx5_ib_destroy_qp(gsi->rx_qp, NULL); if (ret) { mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n", ret); @@ -241,7 +213,7 @@ int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp) kfree(gsi->outstanding_wrs); kfree(gsi->tx_qps); - kfree(gsi); + kfree(mqp); return 0; } @@ -259,7 +231,6 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi) .max_send_sge = gsi->cap.max_send_sge, .max_inline_data = gsi->cap.max_inline_data, }, - .sq_sig_type = gsi->sq_sig_type, .qp_type = IB_QPT_UD, .create_flags = MLX5_IB_QP_CREATE_SQPN_QP1, }; @@ -370,56 +341,54 @@ err_destroy_qp: static void setup_qps(struct mlx5_ib_gsi_qp *gsi) { + struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device); u16 qp_index; + mutex_lock(&dev->devr.mutex); for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) setup_qp(gsi, qp_index); + mutex_unlock(&dev->devr.mutex); } int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask) { struct mlx5_ib_dev *dev = to_mdev(qp->device); - struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + struct mlx5_ib_qp *mqp = to_mqp(qp); + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; int ret; mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state); - mutex_lock(&gsi->mutex); ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask); if (ret) { mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret); - goto unlock; + return ret; } if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS) setup_qps(gsi); - -unlock: - mutex_unlock(&gsi->mutex); - - return ret; + return 0; } int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { - struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + struct mlx5_ib_qp *mqp = to_mqp(qp); + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; int ret; - mutex_lock(&gsi->mutex); ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr); qp_init_attr->cap = gsi->cap; - mutex_unlock(&gsi->mutex); - return ret; } /* Call with gsi->lock locked */ -static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi, +static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_qp *mqp, struct ib_ud_wr *wr, struct ib_wc *wc) { + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device); struct mlx5_ib_gsi_wr *gsi_wr; @@ -448,22 +417,21 @@ static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi, } /* Call with gsi->lock locked */ -static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi, - struct ib_ud_wr *wr) +static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_qp *mqp, struct ib_ud_wr *wr) { struct ib_wc wc = { { .wr_id = wr->wr.wr_id }, .status = IB_WC_SUCCESS, .opcode = IB_WC_SEND, - .qp = &gsi->ibqp, + .qp = &mqp->ibqp, }; int ret; - ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc); + ret = mlx5_ib_add_outstanding_wr(mqp, wr, &wc); if (ret) return ret; - generate_completions(gsi); + generate_completions(mqp); return 0; } @@ -490,7 +458,8 @@ static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr) int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { - struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + struct mlx5_ib_qp *mqp = to_mqp(qp); + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; struct ib_qp *tx_qp; unsigned long flags; int ret; @@ -503,14 +472,14 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr, spin_lock_irqsave(&gsi->lock, flags); tx_qp = get_tx_qp(gsi, &cur_wr); if (!tx_qp) { - ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr); + ret = mlx5_ib_gsi_silent_drop(mqp, &cur_wr); if (ret) goto err; spin_unlock_irqrestore(&gsi->lock, flags); continue; } - ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL); + ret = mlx5_ib_add_outstanding_wr(mqp, &cur_wr, NULL); if (ret) goto err; @@ -534,7 +503,8 @@ err: int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { - struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + struct mlx5_ib_qp *mqp = to_mqp(qp); + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; return ib_post_recv(gsi->rx_qp, wr, bad_wr); } @@ -544,7 +514,5 @@ void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi) if (!gsi) return; - mutex_lock(&gsi->mutex); setup_qps(gsi); - mutex_unlock(&gsi->mutex); } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d60d63221b14..89e04ca62ae0 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -326,8 +326,8 @@ out: spin_unlock(&port->mp.mpi_lock); } -static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed, - u8 *active_width) +static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, + u16 *active_speed, u8 *active_width) { switch (eth_proto_oper) { case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII): @@ -384,7 +384,7 @@ static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed, return 0; } -static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed, +static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed, u8 *active_width) { switch (eth_proto_oper) { @@ -436,7 +436,7 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed, return 0; } -static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, +static int translate_eth_proto_oper(u32 eth_proto_oper, u16 *active_speed, u8 *active_width, bool ext) { return ext ? @@ -546,7 +546,7 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, unsigned int index, const union ib_gid *gid, const struct ib_gid_attr *attr) { - enum ib_gid_type gid_type = IB_GID_TYPE_IB; + enum ib_gid_type gid_type = IB_GID_TYPE_ROCE; u16 vlan_id = 0xffff; u8 roce_version = 0; u8 roce_l3_type = 0; @@ -561,7 +561,7 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, } switch (gid_type) { - case IB_GID_TYPE_IB: + case IB_GID_TYPE_ROCE: roce_version = MLX5_ROCE_VERSION_1; break; case IB_GID_TYPE_ROCE_UDP_ENCAP: @@ -840,7 +840,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, /* We support 'Gappy' memory registration too */ props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG; } - props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + /* IB_WR_REG_MR always requires changing the entity size with UMR */ + if (!MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (MLX5_CAP_GEN(mdev, sho)) { props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER; /* At this stage no support for signature handover */ @@ -1175,32 +1177,24 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, return 0; } -enum mlx5_ib_width { - MLX5_IB_WIDTH_1X = 1 << 0, - MLX5_IB_WIDTH_2X = 1 << 1, - MLX5_IB_WIDTH_4X = 1 << 2, - MLX5_IB_WIDTH_8X = 1 << 3, - MLX5_IB_WIDTH_12X = 1 << 4 -}; - -static void translate_active_width(struct ib_device *ibdev, u8 active_width, - u8 *ib_width) +static void translate_active_width(struct ib_device *ibdev, u16 active_width, + u8 *ib_width) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - if (active_width & MLX5_IB_WIDTH_1X) + if (active_width & MLX5_PTYS_WIDTH_1X) *ib_width = IB_WIDTH_1X; - else if (active_width & MLX5_IB_WIDTH_2X) + else if (active_width & MLX5_PTYS_WIDTH_2X) *ib_width = IB_WIDTH_2X; - else if (active_width & MLX5_IB_WIDTH_4X) + else if (active_width & MLX5_PTYS_WIDTH_4X) *ib_width = IB_WIDTH_4X; - else if (active_width & MLX5_IB_WIDTH_8X) + else if (active_width & MLX5_PTYS_WIDTH_8X) *ib_width = IB_WIDTH_8X; - else if (active_width & MLX5_IB_WIDTH_12X) + else if (active_width & MLX5_PTYS_WIDTH_12X) *ib_width = IB_WIDTH_12X; else { mlx5_ib_dbg(dev, "Invalid active_width %d, setting width to default value: 4x\n", - (int)active_width); + active_width); *ib_width = IB_WIDTH_4X; } @@ -1277,7 +1271,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, u16 max_mtu; u16 oper_mtu; int err; - u8 ib_link_width_oper; + u16 ib_link_width_oper; u8 vl_hw_cap; rep = kzalloc(sizeof(*rep), GFP_KERNEL); @@ -1310,16 +1304,13 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP) props->port_cap_flags2 = rep->cap_mask2; - err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port); + err = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper, + &props->active_speed, port); if (err) goto out; translate_active_width(ibdev, ib_link_width_oper, &props->active_width); - err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port); - if (err) - goto out; - mlx5_query_port_max_mtu(mdev, &max_mtu, port); props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu); @@ -2354,7 +2345,9 @@ static inline int check_dm_type_support(struct mlx5_ib_dev *dev, return -EPERM; if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) || - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner))) + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner) || + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2) || + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner_v2))) return -EOPNOTSUPP; break; } @@ -2569,12 +2562,12 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } -static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +static int mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct mlx5_ib_dev *mdev = to_mdev(pd->device); struct mlx5_ib_pd *mpd = to_mpd(pd); - mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid); + return mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid); } static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) @@ -2699,9 +2692,7 @@ static void pkey_change_handler(struct work_struct *work) container_of(work, struct mlx5_ib_port_resources, pkey_change_work); - mutex_lock(&ports->devr->mutex); mlx5_ib_gsi_pkey_change(ports->gsi); - mutex_unlock(&ports->devr->mutex); } static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev) @@ -3127,11 +3118,9 @@ static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev) atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s1->usecnt, 0); - for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) { + for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) INIT_WORK(&devr->ports[port].pkey_change_work, pkey_change_handler); - devr->ports[port].devr = devr; - } return 0; @@ -4098,6 +4087,8 @@ static const struct ib_device_ops mlx5_ib_dev_sriov_ops = { static const struct ib_device_ops mlx5_ib_dev_mw_ops = { .alloc_mw = mlx5_ib_alloc_mw, .dealloc_mw = mlx5_ib_dealloc_mw, + + INIT_RDMA_OBJ_SIZE(ib_mw, mlx5_ib_mw, ibmw), }; static const struct ib_device_ops mlx5_ib_dev_xrc_ops = { @@ -4268,6 +4259,9 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = { .destroy_wq = mlx5_ib_destroy_wq, .get_netdev = mlx5_ib_get_netdev, .modify_wq = mlx5_ib_modify_wq, + + INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx5_ib_rwq_ind_table, + ib_rwq_ind_tbl), }; static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) @@ -4386,7 +4380,7 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) name = "mlx5_%d"; else name = "mlx5_bond_%d"; - return ib_register_device(&dev->ib_dev, name); + return ib_register_device(&dev->ib_dev, name, &dev->mdev->pdev->dev); } static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index c19ec9fd8a63..13de3d2edd34 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -169,8 +169,8 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int page_shift, __be64 *pas, int access_flags) { return __mlx5_ib_populate_pas(dev, umem, page_shift, 0, - ib_umem_num_pages(umem), pas, - access_flags); + ib_umem_num_dma_blocks(umem, PAGE_SIZE), + pas, access_flags); } int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) { diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 5287fc868662..b1f2b34e5955 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -384,6 +384,22 @@ struct mlx5_ib_dct { u32 *in; }; +struct mlx5_ib_gsi_qp { + struct ib_qp *rx_qp; + u8 port_num; + struct ib_qp_cap cap; + struct ib_cq *cq; + struct mlx5_ib_gsi_wr *outstanding_wrs; + u32 outstanding_pi, outstanding_ci; + int num_qps; + /* Protects access to the tx_qps. Post send operations synchronize + * with tx_qp creation in setup_qp(). Also protects the + * outstanding_wrs array and indices. + */ + spinlock_t lock; + struct ib_qp **tx_qps; +}; + struct mlx5_ib_qp { struct ib_qp ibqp; union { @@ -391,6 +407,7 @@ struct mlx5_ib_qp { struct mlx5_ib_raw_packet_qp raw_packet_qp; struct mlx5_ib_rss_qp rss_qp; struct mlx5_ib_dct dct; + struct mlx5_ib_gsi_qp gsi; }; struct mlx5_frag_buf buf; @@ -693,10 +710,7 @@ struct mlx5_mr_cache { unsigned long last_add; }; -struct mlx5_ib_gsi_qp; - struct mlx5_ib_port_resources { - struct mlx5_ib_resources *devr; struct mlx5_ib_gsi_qp *gsi; struct work_struct pkey_change_work; }; @@ -1119,13 +1133,16 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags); +static inline int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags) +{ + return 0; +} int mlx5_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr); -void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); +int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp); @@ -1148,7 +1165,7 @@ int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer, size_t buflen, size_t *bc); int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); @@ -1163,8 +1180,7 @@ int mlx5_ib_advise_mr(struct ib_pd *pd, struct ib_sge *sg_list, u32 num_sge, struct uverbs_attr_bundle *attrs); -struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata); +int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); int mlx5_ib_dealloc_mw(struct ib_mw *mw); int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags); @@ -1193,7 +1209,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_mad *in, struct ib_mad *out, size_t *out_mad_size, u16 *out_mad_pkey_index); int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); -void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); +int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, @@ -1229,7 +1245,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - unsigned int entry); + unsigned int entry, int access_flags); void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr); @@ -1238,12 +1254,12 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); -void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); +int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); -struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, - struct ib_rwq_ind_table_init_attr *init_attr, - struct ib_udata *udata); +int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata); int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, struct ib_ucontext *context, @@ -1267,6 +1283,7 @@ void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge); +int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { @@ -1288,6 +1305,10 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, { return -EOPNOTSUPP; } +static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ extern const struct mmu_interval_notifier_ops mlx5_mn_ops; @@ -1318,9 +1339,9 @@ void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); /* GSI QP helper functions */ -struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr); -int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp); +int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp, + struct ib_qp_init_attr *attr); +int mlx5_ib_destroy_gsi(struct mlx5_ib_qp *mqp); int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask); int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, @@ -1358,7 +1379,7 @@ static inline void init_query_mad(struct ib_smp *mad) static inline int is_qp1(enum ib_qp_type qp_type) { - return qp_type == MLX5_IB_QPT_HW_GSI; + return qp_type == MLX5_IB_QPT_HW_GSI || qp_type == IB_QPT_GSI; } #define MLX5_MAX_UMR_SHIFT 16 @@ -1442,25 +1463,54 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, u32 bfregn, bool dyn_bfreg); -static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, - bool do_modify_atomic, int access_flags) +static inline bool mlx5_ib_can_load_pas_with_umr(struct mlx5_ib_dev *dev, + size_t length) { + /* + * umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is + * always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka + * MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey + * can never be enabled without this capability. Simplify this weird + * quirky hardware by just saying it can't use PAS lists with UMR at + * all. + */ if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) return false; - if (do_modify_atomic && + /* + * length is the size of the MR in bytes when mlx5_ib_update_xlt() is + * used. + */ + if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) && + length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE) + return false; + return true; +} + +/* + * true if an existing MR can be reconfigured to new access_flags using UMR. + * Older HW cannot use UMR to update certain elements of the MKC. See + * umr_check_mkey_mask(), get_umr_update_access_mask() and umr_check_mkey_mask() + */ +static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev, + unsigned int current_access_flags, + unsigned int target_access_flags) +{ + unsigned int diffs = current_access_flags ^ target_access_flags; + + if ((diffs & IB_ACCESS_REMOTE_ATOMIC) && MLX5_CAP_GEN(dev->mdev, atomic) && MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) return false; - if (access_flags & IB_ACCESS_RELAXED_ORDERING && + if ((diffs & IB_ACCESS_RELAXED_ORDERING) && MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) return false; - if (access_flags & IB_ACCESS_RELAXED_ORDERING && - MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && - !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) + if ((diffs & IB_ACCESS_RELAXED_ORDERING) && + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) return false; return true; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3e6f2f9c6655..b261797b258f 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -50,6 +50,29 @@ enum { static void create_mkey_callback(int status, struct mlx5_async_work *context); +static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, + struct ib_pd *pd) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + + MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, mkc, lr, 1); + + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) + MLX5_SET(mkc, mkc, relaxed_ordering_write, + !!(acc & IB_ACCESS_RELAXED_ORDERING)); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) + MLX5_SET(mkc, mkc, relaxed_ordering_read, + !!(acc & IB_ACCESS_RELAXED_ORDERING)); + + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET64(mkc, mkc, start_addr, start_addr); +} + static void assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, u32 *in) @@ -100,7 +123,8 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } -static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) +static inline bool mlx5_ib_pas_fits_in_mr(struct mlx5_ib_mr *mr, u64 start, + u64 length) { return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); @@ -152,12 +176,12 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) mr->cache_ent = ent; mr->dev = ent->dev; + set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd); MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7); - MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); MLX5_SET(mkc, mkc, log_page_size, ent->page); return mr; @@ -534,7 +558,7 @@ static void cache_work_func(struct work_struct *work) /* Allocate a special entry from the cache */ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - unsigned int entry) + unsigned int entry, int access_flags) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; @@ -544,6 +568,10 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, entry >= ARRAY_SIZE(cache->ent))) return ERR_PTR(-EINVAL); + /* Matches access in alloc_cache_mr() */ + if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) + return ERR_PTR(-EOPNOTSUPP); + ent = &cache->ent[entry]; spin_lock_irq(&ent->lock); if (list_empty(&ent->head)) { @@ -558,6 +586,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); } + mr->access_flags = access_flags; return mr; } @@ -730,8 +759,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) MLX5_IB_UMR_OCTOWORD; ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && - !dev->is_rep && - mlx5_core_is_pf(dev->mdev)) + !dev->is_rep && mlx5_core_is_pf(dev->mdev) && + mlx5_ib_can_load_pas_with_umr(dev, 0)) ent->limit = dev->mdev->profile->mr_cache[i].limit; else ent->limit = 0; @@ -774,29 +803,6 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) return 0; } -static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, - struct ib_pd *pd) -{ - struct mlx5_ib_dev *dev = to_mdev(pd->device); - - MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); - MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); - MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); - MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); - MLX5_SET(mkc, mkc, lr, 1); - - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) - MLX5_SET(mkc, mkc, relaxed_ordering_write, - !!(acc & IB_ACCESS_RELAXED_ORDERING)); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) - MLX5_SET(mkc, mkc, relaxed_ordering_read, - !!(acc & IB_ACCESS_RELAXED_ORDERING)); - - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET64(mkc, mkc, start_addr, start_addr); -} - struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx5_ib_dev *dev = to_mdev(pd->device); @@ -979,6 +985,11 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, if (!ent) return ERR_PTR(-E2BIG); + + /* Matches access in alloc_cache_mr() */ + if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) + return ERR_PTR(-EOPNOTSUPP); + mr = get_cache_mr(ent); if (!mr) { mr = create_cache_mr(ent); @@ -1181,38 +1192,31 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, goto err_1; } pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); - if (populate && !(access_flags & IB_ACCESS_ON_DEMAND)) + if (populate) { + if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND)) { + err = -EINVAL; + goto err_2; + } mlx5_ib_populate_pas(dev, umem, page_shift, pas, pg_cap ? MLX5_IB_MTT_PRESENT : 0); + } /* The pg_access bit allows setting the access flags * in the page list submitted with the command. */ MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + set_mkc_access_pd_addr_fields(mkc, access_flags, virt_addr, + populate ? pd : dev->umrc.pd); MLX5_SET(mkc, mkc, free, !populate); MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) - MLX5_SET(mkc, mkc, relaxed_ordering_write, - !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) - MLX5_SET(mkc, mkc, relaxed_ordering_read, - !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); - MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); - MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); - MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); - MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); - MLX5_SET(mkc, mkc, lr, 1); MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET64(mkc, mkc, start_addr, virt_addr); MLX5_SET64(mkc, mkc, len, length); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, bsf_octword_size, 0); MLX5_SET(mkc, mkc, translations_octword_size, get_octo_len(virt_addr, length, page_shift)); MLX5_SET(mkc, mkc, log_page_size, page_shift); - MLX5_SET(mkc, mkc, qpn, 0xffffff); if (populate) { MLX5_SET(create_mkey_in, in, translations_octword_actual_size, get_octo_len(virt_addr, length, page_shift)); @@ -1308,7 +1312,8 @@ int mlx5_ib_advise_mr(struct ib_pd *pd, struct uverbs_attr_bundle *attrs) { if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH && - advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE) + advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && + advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT) return -EOPNOTSUPP; return mlx5_ib_advise_mr_prefetch(pd, advice, flags, @@ -1353,7 +1358,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr = NULL; - bool use_umr; + bool xlt_with_umr; struct ib_umem *umem; int page_shift; int npages; @@ -1367,6 +1372,11 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); + xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, length); + /* ODP requires xlt update via umr to work. */ + if (!xlt_with_umr && (access_flags & IB_ACCESS_ON_DEMAND)) + return ERR_PTR(-EINVAL); + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start && length == U64_MAX) { if (virt_addr != start) @@ -1387,28 +1397,17 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (err < 0) return ERR_PTR(err); - use_umr = mlx5_ib_can_use_umr(dev, true, access_flags); - - if (order <= mr_cache_max_order(dev) && use_umr) { + if (xlt_with_umr) { mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, page_shift, order, access_flags); - if (PTR_ERR(mr) == -EAGAIN) { - mlx5_ib_dbg(dev, "cache empty for order %d\n", order); + if (IS_ERR(mr)) mr = NULL; - } - } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) { - if (access_flags & IB_ACCESS_ON_DEMAND) { - err = -EINVAL; - pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n"); - goto error; - } - use_umr = false; } if (!mr) { mutex_lock(&dev->slow_path_mutex); mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, - page_shift, access_flags, !use_umr); + page_shift, access_flags, !xlt_with_umr); mutex_unlock(&dev->slow_path_mutex); } @@ -1422,15 +1421,16 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->umem = umem; set_mr_fields(dev, mr, npages, length, access_flags); - if (use_umr) { + if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) { + /* + * If the MR was created with reg_create then it will be + * configured properly but left disabled. It is safe to go ahead + * and configure it again via UMR while enabling it. + */ int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; - if (access_flags & IB_ACCESS_ON_DEMAND) - update_xlt_flags |= MLX5_IB_UPD_XLT_ZAP; - err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift, update_xlt_flags); - if (err) { dereg_mr(dev, mr); return ERR_PTR(err); @@ -1448,6 +1448,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, dereg_mr(dev, mr); return ERR_PTR(err); } + + err = mlx5_ib_init_odp_mr(mr, xlt_with_umr); + if (err) { + dereg_mr(dev, mr); + return ERR_PTR(err); + } } return &mr->ibmr; @@ -1555,8 +1561,11 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, goto err; } - if (!mlx5_ib_can_use_umr(dev, true, access_flags) || - (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len))) { + if (!mlx5_ib_can_reconfig_with_umr(dev, mr->access_flags, + access_flags) || + !mlx5_ib_can_load_pas_with_umr(dev, len) || + (flags & IB_MR_REREG_TRANS && + !mlx5_ib_pas_fits_in_mr(mr, addr, len))) { /* * UMR can't be used - MKey needs to be replaced. */ @@ -1727,9 +1736,9 @@ static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs, mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + /* This is only used from the kernel, so setting the PD is OK. */ + set_mkc_access_pd_addr_fields(mkc, 0, 0, pd); MLX5_SET(mkc, mkc, free, 1); - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, translations_octword_size, ndescs); MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3); MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7); @@ -1973,12 +1982,11 @@ struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, max_num_meta_sg); } -struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata) +int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_dev *dev = to_mdev(ibmw->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - struct mlx5_ib_mw *mw = NULL; + struct mlx5_ib_mw *mw = to_mmw(ibmw); u32 *in = NULL; void *mkc; int ndescs; @@ -1991,21 +1999,20 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); if (err) - return ERR_PTR(err); + return err; if (req.comp_mask || req.reserved1 || req.reserved2) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; if (udata->inlen > sizeof(req) && !ib_is_udata_cleared(udata, sizeof(req), udata->inlen - sizeof(req))) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); - mw = kzalloc(sizeof(*mw), GFP_KERNEL); in = kzalloc(inlen, GFP_KERNEL); - if (!mw || !in) { + if (!in) { err = -ENOMEM; goto free; } @@ -2014,11 +2021,11 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, translations_octword_size, ndescs); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn); MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET(mkc, mkc, lr, 1); MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS); - MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2))); + MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2))); MLX5_SET(mkc, mkc, qpn, 0xffffff); err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen); @@ -2026,17 +2033,15 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, goto free; mw->mmkey.type = MLX5_MKEY_MW; - mw->ibmw.rkey = mw->mmkey.key; + ibmw->rkey = mw->mmkey.key; mw->ndescs = ndescs; - resp.response_length = min(offsetof(typeof(resp), response_length) + - sizeof(resp.response_length), udata->outlen); + resp.response_length = + min(offsetofend(typeof(resp), response_length), udata->outlen); if (resp.response_length) { err = ib_copy_to_udata(udata, &resp, resp.response_length); - if (err) { - mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); - goto free; - } + if (err) + goto free_mkey; } if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { @@ -2048,21 +2053,19 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, } kfree(in); - return &mw->ibmw; + return 0; free_mkey: mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); free: - kfree(mw); kfree(in); - return ERR_PTR(err); + return err; } int mlx5_ib_dealloc_mw(struct ib_mw *mw) { struct mlx5_ib_dev *dev = to_mdev(mw->device); struct mlx5_ib_mw *mmw = to_mmw(mw); - int err; if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)); @@ -2073,11 +2076,7 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw) synchronize_srcu(&dev->odp_srcu); } - err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); - if (err) - return err; - kfree(mmw); - return 0; + return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); } int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index cfd7efab114e..5c853ec1b0d8 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -382,7 +382,7 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) memset(caps, 0, sizeof(*caps)); if (!MLX5_CAP_GEN(dev->mdev, pg) || - !mlx5_ib_can_use_umr(dev, true, 0)) + !mlx5_ib_can_load_pas_with_umr(dev, 0)) return; caps->general_caps = IB_ODP_SUPPORT; @@ -476,12 +476,12 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, if (IS_ERR(odp)) return ERR_CAST(odp); - ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY); + ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY, + imr->access_flags); if (IS_ERR(mr)) goto out_umem; mr->ibmr.pd = imr->ibmr.pd; - mr->access_flags = imr->access_flags; mr->umem = &odp->umem; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; @@ -540,14 +540,13 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, if (IS_ERR(umem_odp)) return ERR_CAST(umem_odp); - imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY); + imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY, access_flags); if (IS_ERR(imr)) { err = PTR_ERR(imr); goto out_umem; } imr->ibmr.pd = &pd->ibpd; - imr->access_flags = access_flags; imr->mmkey.iova = 0; imr->umem = &umem_odp->umem; imr->ibmr.lkey = imr->mmkey.key; @@ -666,15 +665,21 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) } #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) +#define MLX5_PF_FLAGS_SNAPSHOT BIT(2) +#define MLX5_PF_FLAGS_ENABLE BIT(3) static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, u64 user_va, size_t bcnt, u32 *bytes_mapped, u32 flags) { int page_shift, ret, np; bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; - unsigned long current_seq; u64 access_mask; u64 start_idx; + bool fault = !(flags & MLX5_PF_FLAGS_SNAPSHOT); + u32 xlt_flags = MLX5_IB_UPD_XLT_ATOMIC; + + if (flags & MLX5_PF_FLAGS_ENABLE) + xlt_flags |= MLX5_IB_UPD_XLT_ENABLE; page_shift = odp->page_shift; start_idx = (user_va - ib_umem_start(odp)) >> page_shift; @@ -683,25 +688,15 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, if (odp->umem.writable && !downgrade) access_mask |= ODP_WRITE_ALLOWED_BIT; - current_seq = mmu_interval_read_begin(&odp->notifier); - - np = ib_umem_odp_map_dma_pages(odp, user_va, bcnt, access_mask, - current_seq); + np = ib_umem_odp_map_dma_and_lock(odp, user_va, bcnt, access_mask, fault); if (np < 0) return np; - mutex_lock(&odp->umem_mutex); - if (!mmu_interval_read_retry(&odp->notifier, current_seq)) { - /* - * No need to check whether the MTTs really belong to - * this MR, since ib_umem_odp_map_dma_pages already - * checks this. - */ - ret = mlx5_ib_update_xlt(mr, start_idx, np, - page_shift, MLX5_IB_UPD_XLT_ATOMIC); - } else { - ret = -EAGAIN; - } + /* + * No need to check whether the MTTs really belong to this MR, since + * ib_umem_odp_map_dma_and_lock already checks this. + */ + ret = mlx5_ib_update_xlt(mr, start_idx, np, page_shift, xlt_flags); mutex_unlock(&odp->umem_mutex); if (ret < 0) { @@ -836,6 +831,20 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, flags); } +int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable) +{ + u32 flags = MLX5_PF_FLAGS_SNAPSHOT; + int ret; + + if (enable) + flags |= MLX5_PF_FLAGS_ENABLE; + + ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), + mr->umem->address, mr->umem->length, NULL, + flags); + return ret >= 0 ? 0 : ret; +} + struct pf_frame { struct pf_frame *next; u32 key; @@ -1862,6 +1871,9 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; + if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT) + pf_flags |= MLX5_PF_FLAGS_SNAPSHOT; + if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH) return mlx5_ib_prefetch_sg_list(pd, advice, pf_flags, sg_list, num_sge); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 5758dbe64045..600e056798c0 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1477,7 +1477,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_RQN; resp->tirn = rq->tirn; resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN; - if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) { + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) || + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2)) { resp->tir_icm_addr = MLX5_GET( create_tir_out, out, icm_address_31_0); resp->tir_icm_addr |= @@ -1739,7 +1740,8 @@ create_tir: if (mucontext->devx_uid) { params->resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN; params->resp.tirn = qp->rss_qp.tirn; - if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) { + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) || + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2)) { params->resp.tir_icm_addr = MLX5_GET(create_tir_out, out, icm_address_31_0); params->resp.tir_icm_addr |= @@ -2409,6 +2411,9 @@ static int create_dct(struct mlx5_ib_dev *dev, struct ib_pd *pd, u32 uidx = params->uidx; void *dctc; + if (mlx5_lag_is_active(dev->mdev) && !MLX5_CAP_GEN(dev->mdev, lag_dct)) + return -EOPNOTSUPP; + qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL); if (!qp->dct.in) return -ENOMEM; @@ -2506,18 +2511,6 @@ static int check_valid_flow(struct mlx5_ib_dev *dev, struct ib_pd *pd, return -EINVAL; } - switch (attr->qp_type) { - case IB_QPT_SMI: - case MLX5_IB_QPT_HW_GSI: - case MLX5_IB_QPT_REG_UMR: - case IB_QPT_GSI: - mlx5_ib_dbg(dev, "Kernel doesn't support QP type %d\n", - attr->qp_type); - return -EINVAL; - default: - break; - } - /* * We don't need to see this warning, it means that kernel code * missing ib_pd. Placed here to catch developer's mistakes. @@ -2780,21 +2773,23 @@ static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, goto out; } - if (qp->type == MLX5_IB_QPT_DCT) { + switch (qp->type) { + case MLX5_IB_QPT_DCT: err = create_dct(dev, pd, qp, params); - goto out; - } - - if (qp->type == IB_QPT_XRC_TGT) { + break; + case IB_QPT_XRC_TGT: err = create_xrc_tgt_qp(dev, qp, params); - goto out; + break; + case IB_QPT_GSI: + err = mlx5_ib_create_gsi(pd, qp, params->attr); + break; + default: + if (params->udata) + err = create_user_qp(dev, pd, qp, params); + else + err = create_kernel_qp(dev, pd, qp, params); } - if (params->udata) - err = create_user_qp(dev, pd, qp, params); - else - err = create_kernel_qp(dev, pd, qp, params); - out: if (err) { mlx5_ib_err(dev, "Create QP type %d failed\n", qp->type); @@ -2934,9 +2929,6 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr, if (err) return ERR_PTR(err); - if (attr->qp_type == IB_QPT_GSI) - return mlx5_ib_gsi_create_qp(pd, attr); - params.udata = udata; params.uidx = MLX5_IB_DEFAULT_UIDX; params.attr = attr; @@ -3005,9 +2997,14 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr, return &qp->ibqp; destroy_qp: - if (qp->type == MLX5_IB_QPT_DCT) { + switch (qp->type) { + case MLX5_IB_QPT_DCT: mlx5_ib_destroy_dct(qp); - } else { + break; + case IB_QPT_GSI: + mlx5_ib_destroy_gsi(qp); + break; + default: /* * These lines below are temp solution till QP allocation * will be moved to be under IB/core responsiblity. @@ -3032,7 +3029,7 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) struct mlx5_ib_qp *mqp = to_mqp(qp); if (unlikely(qp->qp_type == IB_QPT_GSI)) - return mlx5_ib_gsi_destroy_qp(qp); + return mlx5_ib_destroy_gsi(mqp); if (mqp->type == MLX5_IB_QPT_DCT) return mlx5_ib_destroy_dct(mqp); @@ -3088,20 +3085,44 @@ enum { MLX5_PATH_FLAG_COUNTER = 1 << 2, }; +static int ib_to_mlx5_rate_map(u8 rate) +{ + switch (rate) { + case IB_RATE_PORT_CURRENT: + return 0; + case IB_RATE_56_GBPS: + return 1; + case IB_RATE_25_GBPS: + return 2; + case IB_RATE_100_GBPS: + return 3; + case IB_RATE_200_GBPS: + return 4; + case IB_RATE_50_GBPS: + return 5; + default: + return rate + MLX5_STAT_RATE_OFFSET; + }; + + return 0; +} + static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) { + u32 stat_rate_support; + if (rate == IB_RATE_PORT_CURRENT) return 0; if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS) return -EINVAL; + stat_rate_support = MLX5_CAP_GEN(dev->mdev, stat_rate_support); while (rate != IB_RATE_PORT_CURRENT && - !(1 << (rate + MLX5_STAT_RATE_OFFSET) & - MLX5_CAP_GEN(dev->mdev, stat_rate_support))) + !(1 << ib_to_mlx5_rate_map(rate) & stat_rate_support)) --rate; - return rate ? rate + MLX5_STAT_RATE_OFFSET : rate; + return ib_to_mlx5_rate_map(rate); } static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev, @@ -3643,14 +3664,12 @@ static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev, MLX5_MAX_PORTS + 1; } -static bool qp_supports_affinity(struct ib_qp *qp) +static bool qp_supports_affinity(struct mlx5_ib_qp *qp) { - if ((qp->qp_type == IB_QPT_RC) || - (qp->qp_type == IB_QPT_UD) || - (qp->qp_type == IB_QPT_UC) || - (qp->qp_type == IB_QPT_RAW_PACKET) || - (qp->qp_type == IB_QPT_XRC_INI) || - (qp->qp_type == IB_QPT_XRC_TGT)) + if ((qp->type == IB_QPT_RC) || (qp->type == IB_QPT_UD) || + (qp->type == IB_QPT_UC) || (qp->type == IB_QPT_RAW_PACKET) || + (qp->type == IB_QPT_XRC_INI) || (qp->type == IB_QPT_XRC_TGT) || + (qp->type == MLX5_IB_QPT_DCI)) return true; return false; } @@ -3668,7 +3687,7 @@ static unsigned int get_tx_affinity(struct ib_qp *qp, unsigned int tx_affinity; if (!(mlx5_ib_lag_should_assign_affinity(dev) && - qp_supports_affinity(qp))) + qp_supports_affinity(mqp))) return 0; if (mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1) @@ -4161,7 +4180,11 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, MLX5_SET(dctc, dctc, rae, 1); } MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index); - MLX5_SET(dctc, dctc, port, attr->port_num); + if (mlx5_lag_is_active(dev->mdev)) + MLX5_SET(dctc, dctc, port, + get_tx_affinity_rr(dev, udata)); + else + MLX5_SET(dctc, dctc, port, attr->port_num); set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1); MLX5_SET(dctc, dctc, counter_set_id, set_id); @@ -4716,12 +4739,12 @@ int mlx5_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata) return mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0); } -void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) +int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(xrcd->device); u32 xrcdn = to_mxrcd(xrcd)->xrcdn; - mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0); + return mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0); } static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type) @@ -4921,8 +4944,8 @@ static int prepare_user_rq(struct ib_pd *pd, int err; size_t required_cmd_sz; - required_cmd_sz = offsetof(typeof(ucmd), single_stride_log_num_of_bytes) - + sizeof(ucmd.single_stride_log_num_of_bytes); + required_cmd_sz = offsetofend(struct mlx5_ib_create_wq, + single_stride_log_num_of_bytes); if (udata->inlen < required_cmd_sz) { mlx5_ib_dbg(dev, "invalid inlen\n"); return -EINVAL; @@ -5006,7 +5029,7 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, if (!udata) return ERR_PTR(-ENOSYS); - min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); + min_resp_len = offsetofend(struct mlx5_ib_create_wq_resp, reserved); if (udata->outlen && udata->outlen < min_resp_len) return ERR_PTR(-EINVAL); @@ -5036,8 +5059,8 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, rwq->ibwq.wq_num = rwq->core_qp.qpn; rwq->ibwq.state = IB_WQS_RESET; if (udata->outlen) { - resp.response_length = offsetof(typeof(resp), response_length) + - sizeof(resp.response_length); + resp.response_length = offsetofend( + struct mlx5_ib_create_wq_resp, response_length); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) goto err_copy; @@ -5056,22 +5079,27 @@ err: return ERR_PTR(err); } -void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) +int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(wq->device); struct mlx5_ib_rwq *rwq = to_mrwq(wq); + int ret; - mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp); + ret = mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp); + if (ret) + return ret; destroy_user_rq(dev, wq->pd, rwq, udata); kfree(rwq); + return 0; } -struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, - struct ib_rwq_ind_table_init_attr *init_attr, - struct ib_udata *udata) +int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata) { - struct mlx5_ib_dev *dev = to_mdev(device); - struct mlx5_ib_rwq_ind_table *rwq_ind_tbl; + struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = + to_mrwq_ind_table(ib_rwq_ind_table); + struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_table->device); int sz = 1 << init_attr->log_ind_tbl_size; struct mlx5_ib_create_rwq_ind_tbl_resp resp = {}; size_t min_resp_len; @@ -5084,30 +5112,25 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, if (udata->inlen > 0 && !ib_is_udata_cleared(udata, 0, udata->inlen)) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; if (init_attr->log_ind_tbl_size > MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) { mlx5_ib_dbg(dev, "log_ind_tbl_size = %d is bigger than supported = %d\n", init_attr->log_ind_tbl_size, MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)); - return ERR_PTR(-EINVAL); + return -EINVAL; } - min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); + min_resp_len = + offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp, reserved); if (udata->outlen && udata->outlen < min_resp_len) - return ERR_PTR(-EINVAL); - - rwq_ind_tbl = kzalloc(sizeof(*rwq_ind_tbl), GFP_KERNEL); - if (!rwq_ind_tbl) - return ERR_PTR(-ENOMEM); + return -EINVAL; inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; in = kvzalloc(inlen, GFP_KERNEL); - if (!in) { - err = -ENOMEM; - goto err; - } + if (!in) + return -ENOMEM; rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); @@ -5122,26 +5145,24 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn); kvfree(in); - if (err) - goto err; + return err; rwq_ind_tbl->ib_rwq_ind_tbl.ind_tbl_num = rwq_ind_tbl->rqtn; if (udata->outlen) { - resp.response_length = offsetof(typeof(resp), response_length) + - sizeof(resp.response_length); + resp.response_length = + offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp, + response_length); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) goto err_copy; } - return &rwq_ind_tbl->ib_rwq_ind_tbl; + return 0; err_copy: mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid); -err: - kfree(rwq_ind_tbl); - return ERR_PTR(err); + return err; } int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) @@ -5149,10 +5170,7 @@ int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl); struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device); - mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid); - - kfree(rwq_ind_tbl); - return 0; + return mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid); } int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, @@ -5169,7 +5187,7 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, void *rqc; void *in; - required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved); + required_cmd_sz = offsetofend(struct mlx5_ib_modify_wq, reserved); if (udata->inlen < required_cmd_sz) return -EINVAL; diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h index ba899df44c5b..5d4e140db99c 100644 --- a/drivers/infiniband/hw/mlx5/qp.h +++ b/drivers/infiniband/hw/mlx5/qp.h @@ -26,8 +26,8 @@ int mlx5_core_dct_query(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct, int mlx5_core_set_delay_drop(struct mlx5_ib_dev *dev, u32 timeout_usec); -void mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev, - struct mlx5_core_qp *rq); +int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev, + struct mlx5_core_qp *rq); int mlx5_core_create_sq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen, struct mlx5_core_qp *sq); void mlx5_core_destroy_sq_tracked(struct mlx5_ib_dev *dev, diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index 7c3968ef9cd1..c683d7000168 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -576,11 +576,12 @@ err_destroy_rq: return err; } -void mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev, - struct mlx5_core_qp *rq) +int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev, + struct mlx5_core_qp *rq) { destroy_resource_common(dev, rq); destroy_rq_tracked(dev, rq->qpn, rq->uid); + return 0; } static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid) diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 7e10cbcb6d5c..e2f720eec1e1 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -389,24 +389,21 @@ out_box: return ret; } -void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) +int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(srq->device); struct mlx5_ib_srq *msrq = to_msrq(srq); + int ret; + + ret = mlx5_cmd_destroy_srq(dev, &msrq->msrq); + if (ret) + return ret; - mlx5_cmd_destroy_srq(dev, &msrq->msrq); - - if (srq->uobject) { - mlx5_ib_db_unmap_user( - rdma_udata_to_drv_context( - udata, - struct mlx5_ib_ucontext, - ibucontext), - &msrq->db); - ib_umem_release(msrq->umem); - } else { + if (udata) + destroy_srq_user(srq->pd, msrq, udata); + else destroy_srq_kernel(dev, msrq); - } + return 0; } void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index) diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h index af197c36d757..2c3627b2509d 100644 --- a/drivers/infiniband/hw/mlx5/srq.h +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -56,7 +56,7 @@ struct mlx5_srq_table { int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in); -void mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq); +int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq); int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *out); int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index 37aaacebd3f2..db889ec3fd48 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -590,22 +590,32 @@ err_destroy_srq_split: return err; } -void mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { struct mlx5_srq_table *table = &dev->srq_table; struct mlx5_core_srq *tmp; int err; - tmp = xa_erase_irq(&table->array, srq->srqn); - if (!tmp || tmp != srq) - return; + /* Delete entry, but leave index occupied */ + tmp = xa_cmpxchg_irq(&table->array, srq->srqn, srq, XA_ZERO_ENTRY, 0); + if (WARN_ON(tmp != srq)) + return xa_err(tmp) ?: -EINVAL; err = destroy_srq_split(dev, srq); - if (err) - return; + if (err) { + /* + * We don't need to check returned result for an error, + * because we are storing in pre-allocated space xarray + * entry and it can't fail at this stage. + */ + xa_cmpxchg_irq(&table->array, srq->srqn, XA_ZERO_ENTRY, srq, 0); + return err; + } + xa_erase_irq(&table->array, srq->srqn); mlx5_core_res_put(&srq->common); wait_for_completion(&srq->common.free); + return 0; } int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index 43880973a512..d6038fb6c50c 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -398,7 +398,8 @@ static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) seg->status = MLX5_MKEY_STATUS_FREE; } -static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, +static void set_reg_mkey_segment(struct mlx5_ib_dev *dev, + struct mlx5_mkey_seg *seg, const struct ib_send_wr *wr) { const struct mlx5_umr_wr *umrwr = umr_wr(wr); @@ -414,10 +415,12 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ)); MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE)); MLX5_SET(mkc, seg, lr, 1); - MLX5_SET(mkc, seg, relaxed_ordering_write, - !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); - MLX5_SET(mkc, seg, relaxed_ordering_read, - !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) + MLX5_SET(mkc, seg, relaxed_ordering_write, + !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) + MLX5_SET(mkc, seg, relaxed_ordering_read, + !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); if (umrwr->pd) MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn); @@ -863,13 +866,11 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC; u8 flags = 0; - if (!mlx5_ib_can_use_umr(dev, atomic, wr->access)) { - mlx5_ib_warn(to_mdev(qp->ibqp.device), - "Fast update of %s for MR is disabled\n", - (MLX5_CAP_GEN(dev->mdev, - umr_modify_entity_size_disabled)) ? - "entity size" : - "atomic access"); + /* Matches access in mlx5_set_umr_free_mkey() */ + if (!mlx5_ib_can_reconfig_with_umr(dev, 0, wr->access)) { + mlx5_ib_warn( + to_mdev(qp->ibqp.device), + "Fast update for MR access flags is not possible\n"); return -EINVAL; } @@ -1263,7 +1264,7 @@ static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - set_reg_mkey_segment(*seg, wr); + set_reg_mkey_segment(dev, *seg, wr); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 7550e9d03dec..9dbbf4d16796 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -548,7 +548,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev, struct ib_qp_cap *cap, int qpn, int port, - struct mthca_sqp *sqp, + struct mthca_qp *qp, struct ib_udata *udata); void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp); int mthca_create_ah(struct mthca_dev *dev, diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 9fa2f9164a47..c4d9cdc4ee97 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -373,9 +373,10 @@ static int mthca_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } -static void mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +static int mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { mthca_pd_free(to_mdev(pd->device), to_mpd(pd)); + return 0; } static int mthca_ah_create(struct ib_ah *ibah, @@ -389,9 +390,10 @@ static int mthca_ah_create(struct ib_ah *ibah, init_attr->ah_attr, ah); } -static void mthca_ah_destroy(struct ib_ah *ah, u32 flags) +static int mthca_ah_destroy(struct ib_ah *ah, u32 flags) { mthca_destroy_ah(to_mdev(ah->device), to_mah(ah)); + return 0; } static int mthca_create_srq(struct ib_srq *ibsrq, @@ -440,7 +442,7 @@ static int mthca_create_srq(struct ib_srq *ibsrq, return 0; } -static void mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) +static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { if (udata) { struct mthca_ucontext *context = @@ -454,6 +456,7 @@ static void mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) } mthca_free_srq(to_mdev(srq->device), to_msrq(srq)); + return 0; } static struct ib_qp *mthca_create_qp(struct ib_pd *pd, @@ -532,13 +535,14 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, case IB_QPT_SMI: case IB_QPT_GSI: { - /* Don't allow userspace to create special QPs */ - if (udata) - return ERR_PTR(-EINVAL); - - qp = kzalloc(sizeof(struct mthca_sqp), GFP_KERNEL); + qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); + qp->sqp = kzalloc(sizeof(struct mthca_sqp), GFP_KERNEL); + if (!qp->sqp) { + kfree(qp); + return ERR_PTR(-ENOMEM); + } qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; @@ -547,7 +551,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, to_mcq(init_attr->recv_cq), init_attr->sq_sig_type, &init_attr->cap, qp->ibqp.qp_num, init_attr->port_num, - to_msqp(qp), udata); + qp, udata); break; } default: @@ -556,6 +560,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, } if (err) { + kfree(qp->sqp); kfree(qp); return ERR_PTR(err); } @@ -588,7 +593,8 @@ static int mthca_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) to_mqp(qp)->rq.db_index); } mthca_free_qp(to_mdev(qp->device), to_mqp(qp)); - kfree(qp); + kfree(to_mqp(qp)->sqp); + kfree(to_mqp(qp)); return 0; } @@ -789,7 +795,7 @@ out: return ret; } -static void mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +static int mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { if (udata) { struct mthca_ucontext *context = @@ -808,6 +814,7 @@ static void mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) to_mcq(cq)->set_ci_db_index); } mthca_free_cq(to_mdev(cq->device), to_mcq(cq)); + return 0; } static inline u32 convert_access(int acc) @@ -846,7 +853,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata) { struct mthca_dev *dev = to_mdev(pd->device); - struct sg_dma_page_iter sg_iter; + struct ib_block_iter biter; struct mthca_ucontext *context = rdma_udata_to_drv_context( udata, struct mthca_ucontext, ibucontext); struct mthca_mr *mr; @@ -877,7 +884,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err; } - n = ib_umem_num_pages(mr->umem); + n = ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE); mr->mtt = mthca_alloc_mtt(dev, n); if (IS_ERR(mr->mtt)) { @@ -895,8 +902,8 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages)); - for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) { - pages[i++] = sg_page_iter_dma_address(&sg_iter); + rdma_umem_for_each_dma_block(mr->umem, &biter, PAGE_SIZE) { + pages[i++] = rdma_block_iter_dma_address(&biter); /* * Be friendly to write_mtt and pass it chunks @@ -1199,7 +1206,7 @@ int mthca_register_device(struct mthca_dev *dev) mutex_init(&dev->cap_mask_mutex); rdma_set_device_sysfs_group(&dev->ib_dev, &mthca_attr_group); - ret = ib_register_device(&dev->ib_dev, "mthca%d"); + ret = ib_register_device(&dev->ib_dev, "mthca%d", &dev->pdev->dev); if (ret) return ret; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 84c64bff0d92..8a77483bb33c 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -240,6 +240,16 @@ struct mthca_wq { __be32 *db; }; +struct mthca_sqp { + int pkey_index; + u32 qkey; + u32 send_psn; + struct ib_ud_header ud_header; + int header_buf_size; + void *header_buf; + dma_addr_t header_dma; +}; + struct mthca_qp { struct ib_qp ibqp; int refcount; @@ -265,17 +275,7 @@ struct mthca_qp { wait_queue_head_t wait; struct mutex mutex; -}; - -struct mthca_sqp { - struct mthca_qp qp; - int pkey_index; - u32 qkey; - u32 send_psn; - struct ib_ud_header ud_header; - int header_buf_size; - void *header_buf; - dma_addr_t header_dma; + struct mthca_sqp *sqp; }; static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext) @@ -313,9 +313,4 @@ static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp) return container_of(ibqp, struct mthca_qp, ibqp); } -static inline struct mthca_sqp *to_msqp(struct mthca_qp *qp) -{ - return container_of(qp, struct mthca_sqp, qp); -} - #endif /* MTHCA_PROVIDER_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index c6e95d0d760a..08a2a7afafd3 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -809,7 +809,7 @@ static int __mthca_modify_qp(struct ib_qp *ibqp, qp->alt_port = attr->alt_port_num; if (is_sqp(dev, qp)) - store_attrs(to_msqp(qp), attr, attr_mask); + store_attrs(qp->sqp, attr, attr_mask); /* * If we moved QP0 to RTR, bring the IB link up; if we moved @@ -1368,39 +1368,40 @@ int mthca_alloc_sqp(struct mthca_dev *dev, struct ib_qp_cap *cap, int qpn, int port, - struct mthca_sqp *sqp, + struct mthca_qp *qp, struct ib_udata *udata) { u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; int err; - sqp->qp.transport = MLX; - err = mthca_set_qp_size(dev, cap, pd, &sqp->qp); + qp->transport = MLX; + err = mthca_set_qp_size(dev, cap, pd, qp); if (err) return err; - sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE; - sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size, - &sqp->header_dma, GFP_KERNEL); - if (!sqp->header_buf) + qp->sqp->header_buf_size = qp->sq.max * MTHCA_UD_HEADER_SIZE; + qp->sqp->header_buf = + dma_alloc_coherent(&dev->pdev->dev, qp->sqp->header_buf_size, + &qp->sqp->header_dma, GFP_KERNEL); + if (!qp->sqp->header_buf) return -ENOMEM; spin_lock_irq(&dev->qp_table.lock); if (mthca_array_get(&dev->qp_table.qp, mqpn)) err = -EBUSY; else - mthca_array_set(&dev->qp_table.qp, mqpn, sqp); + mthca_array_set(&dev->qp_table.qp, mqpn, qp->sqp); spin_unlock_irq(&dev->qp_table.lock); if (err) goto err_out; - sqp->qp.port = port; - sqp->qp.qpn = mqpn; - sqp->qp.transport = MLX; + qp->port = port; + qp->qpn = mqpn; + qp->transport = MLX; err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, - send_policy, &sqp->qp, udata); + send_policy, qp, udata); if (err) goto err_out_free; @@ -1421,10 +1422,9 @@ int mthca_alloc_sqp(struct mthca_dev *dev, mthca_unlock_cqs(send_cq, recv_cq); - err_out: - dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size, - sqp->header_buf, sqp->header_dma); - +err_out: + dma_free_coherent(&dev->pdev->dev, qp->sqp->header_buf_size, + qp->sqp->header_buf, qp->sqp->header_dma); return err; } @@ -1487,20 +1487,19 @@ void mthca_free_qp(struct mthca_dev *dev, if (is_sqp(dev, qp)) { atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count)); - dma_free_coherent(&dev->pdev->dev, - to_msqp(qp)->header_buf_size, - to_msqp(qp)->header_buf, - to_msqp(qp)->header_dma); + dma_free_coherent(&dev->pdev->dev, qp->sqp->header_buf_size, + qp->sqp->header_buf, qp->sqp->header_dma); } else mthca_free(&dev->qp_table.alloc, qp->qpn); } /* Create UD header for an MLX send and build a data segment for it */ -static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, - int ind, const struct ib_ud_wr *wr, +static int build_mlx_header(struct mthca_dev *dev, struct mthca_qp *qp, int ind, + const struct ib_ud_wr *wr, struct mthca_mlx_seg *mlx, struct mthca_data_seg *data) { + struct mthca_sqp *sqp = qp->sqp; int header_size; int err; u16 pkey; @@ -1513,7 +1512,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, if (err) return err; mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1); - mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) | + mlx->flags |= cpu_to_be32((!qp->ibqp.qp_num ? MTHCA_MLX_VL15 : 0) | (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) | (sqp->ud_header.lrh.service_level << 8)); @@ -1534,29 +1533,29 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, return -EINVAL; } - sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; + sqp->ud_header.lrh.virtual_lane = !qp->ibqp.qp_num ? 15 : 0; if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); - if (!sqp->qp.ibqp.qp_num) - ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port, - sqp->pkey_index, &pkey); + if (!qp->ibqp.qp_num) + ib_get_cached_pkey(&dev->ib_dev, qp->port, sqp->pkey_index, + &pkey); else - ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port, - wr->pkey_index, &pkey); + ib_get_cached_pkey(&dev->ib_dev, qp->port, wr->pkey_index, + &pkey); sqp->ud_header.bth.pkey = cpu_to_be16(pkey); sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ? sqp->qkey : wr->remote_qkey); - sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); + sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->ibqp.qp_num); header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf + ind * MTHCA_UD_HEADER_SIZE); data->byte_count = cpu_to_be32(header_size); - data->lkey = cpu_to_be32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey); + data->lkey = cpu_to_be32(to_mpd(qp->ibqp.pd)->ntmr.ibmr.lkey); data->addr = cpu_to_be64(sqp->header_dma + ind * MTHCA_UD_HEADER_SIZE); @@ -1735,9 +1734,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, break; case MLX: - err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr), - wqe - sizeof (struct mthca_next_seg), - wqe); + err = build_mlx_header( + dev, qp, ind, ud_wr(wr), + wqe - sizeof(struct mthca_next_seg), wqe); if (err) { *bad_wr = wr; goto out; @@ -2065,9 +2064,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, break; case MLX: - err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr), - wqe - sizeof (struct mthca_next_seg), - wqe); + err = build_mlx_header( + dev, qp, ind, ud_wr(wr), + wqe - sizeof(struct mthca_next_seg), wqe); if (err) { *bad_wr = wr; goto out; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index fcfe0e82197a..5eb61c110090 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -185,7 +185,6 @@ struct ocrdma_hw_mr { u32 num_pbes; u32 pbl_size; u32 pbe_size; - u64 fbo; u64 va; }; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 6eea02b18968..699a8b719ed6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -215,12 +215,13 @@ av_err: return status; } -void ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags) +int ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags) { struct ocrdma_ah *ah = get_ocrdma_ah(ibah); struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device); ocrdma_free_av(dev, ah); + return 0; } int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 8b73b3489f3a..35cf2e2ff391 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -53,7 +53,7 @@ enum { int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); -void ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); +int ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int ocrdma_process_mad(struct ib_device *dev, int process_mad_flags, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index e07bf0b2209a..c51c3f40700e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1962,6 +1962,7 @@ static int ocrdma_mbx_reg_mr(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr, int i; struct ocrdma_reg_nsmr *cmd; struct ocrdma_reg_nsmr_rsp *rsp; + u64 fbo = hwmr->va & (hwmr->pbe_size - 1); cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_REGISTER_NSMR, sizeof(*cmd)); if (!cmd) @@ -1987,8 +1988,8 @@ static int ocrdma_mbx_reg_mr(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr, OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT; cmd->totlen_low = hwmr->len; cmd->totlen_high = upper_32_bits(hwmr->len); - cmd->fbo_low = (u32) (hwmr->fbo & 0xffffffff); - cmd->fbo_high = (u32) upper_32_bits(hwmr->fbo); + cmd->fbo_low = lower_32_bits(fbo); + cmd->fbo_high = upper_32_bits(fbo); cmd->va_loaddr = (u32) hwmr->va; cmd->va_hiaddr = (u32) upper_32_bits(hwmr->va); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index d8c47d24d6d6..9b96661a7143 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -255,7 +255,9 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) if (ret) return ret; - return ib_register_device(&dev->ibdev, "ocrdma%d"); + dma_set_max_seg_size(&dev->nic_info.pdev->dev, UINT_MAX); + return ib_register_device(&dev->ibdev, "ocrdma%d", + &dev->nic_info.pdev->dev); } static int ocrdma_alloc_resources(struct ocrdma_dev *dev) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index c1751c9a0f62..7350fe16f164 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -112,7 +112,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, } static inline void get_link_speed_and_width(struct ocrdma_dev *dev, - u8 *ib_speed, u8 *ib_width) + u16 *ib_speed, u8 *ib_width) { int status; u8 speed; @@ -664,7 +664,7 @@ exit: return status; } -void ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +int ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); @@ -682,10 +682,11 @@ void ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) if (is_ucontext_pd(uctx, pd)) { ocrdma_release_ucontext_pd(uctx); - return; + return 0; } } _ocrdma_dealloc_pd(dev, pd); + return 0; } static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr, @@ -810,14 +811,12 @@ static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr) return status; } -static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr, - u32 num_pbes) +static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr) { struct ocrdma_pbe *pbe; - struct sg_dma_page_iter sg_iter; + struct ib_block_iter biter; struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table; - struct ib_umem *umem = mr->umem; - int pbe_cnt, total_num_pbes = 0; + int pbe_cnt; u64 pg_addr; if (!mr->hwmr.num_pbes) @@ -826,19 +825,14 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr, pbe = (struct ocrdma_pbe *)pbl_tbl->va; pbe_cnt = 0; - for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { + rdma_umem_for_each_dma_block (mr->umem, &biter, PAGE_SIZE) { /* store the page address in pbe */ - pg_addr = sg_page_iter_dma_address(&sg_iter); + pg_addr = rdma_block_iter_dma_address(&biter); pbe->pa_lo = cpu_to_le32(pg_addr); pbe->pa_hi = cpu_to_le32(upper_32_bits(pg_addr)); pbe_cnt += 1; - total_num_pbes += 1; pbe++; - /* if done building pbes, issue the mbx cmd. */ - if (total_num_pbes == num_pbes) - return; - /* if the given pbl is full storing the pbes, * move to next pbl. */ @@ -857,7 +851,6 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); struct ocrdma_mr *mr; struct ocrdma_pd *pd; - u32 num_pbes; pd = get_ocrdma_pd(ibpd); @@ -872,13 +865,12 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, status = -EFAULT; goto umem_err; } - num_pbes = ib_umem_page_count(mr->umem); - status = ocrdma_get_pbl_info(dev, mr, num_pbes); + status = ocrdma_get_pbl_info( + dev, mr, ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE)); if (status) goto umem_err; mr->hwmr.pbe_size = PAGE_SIZE; - mr->hwmr.fbo = ib_umem_offset(mr->umem); mr->hwmr.va = usr_addr; mr->hwmr.len = len; mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; @@ -889,7 +881,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, status = ocrdma_build_pbl_tbl(dev, &mr->hwmr); if (status) goto umem_err; - build_user_pbes(dev, mr, num_pbes); + build_user_pbes(dev, mr); status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc); if (status) goto mbx_err; @@ -1056,7 +1048,7 @@ static void ocrdma_flush_cq(struct ocrdma_cq *cq) spin_unlock_irqrestore(&cq->cq_lock, flags); } -void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); struct ocrdma_eq *eq = NULL; @@ -1081,6 +1073,7 @@ void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) ocrdma_get_db_addr(dev, pdid), dev->nic_info.db_page_size); } + return 0; } static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp) @@ -1857,7 +1850,7 @@ int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) return status; } -void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct ocrdma_srq *srq; struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device); @@ -1872,6 +1865,7 @@ void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) kfree(srq->idx_bit_fields); kfree(srq->rqe_wr_id_tbl); + return 0; } /* unprivileged verbs and their support functions. */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index df8e3b923a44..425d554e7f3f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -67,12 +67,12 @@ void ocrdma_dealloc_ucontext(struct ib_ucontext *uctx); int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma); int ocrdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); -void ocrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); +int ocrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); -void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); struct ib_qp *ocrdma_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs, @@ -92,7 +92,7 @@ int ocrdma_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attr, int ocrdma_modify_srq(struct ib_srq *, struct ib_srq_attr *, enum ib_srq_attr_mask, struct ib_udata *); int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *); -void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); +int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); int ocrdma_post_srq_recv(struct ib_srq *, const struct ib_recv_wr *, const struct ib_recv_wr **bad_recv_wr); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index d85f992bac29..967641662b24 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -177,6 +177,8 @@ static int qedr_iw_register_device(struct qedr_dev *dev) } static const struct ib_device_ops qedr_roce_dev_ops = { + .alloc_xrcd = qedr_alloc_xrcd, + .dealloc_xrcd = qedr_dealloc_xrcd, .get_port_immutable = qedr_roce_port_immutable, .query_pkey = qedr_query_pkey, }; @@ -186,6 +188,10 @@ static void qedr_roce_register_device(struct qedr_dev *dev) dev->ibdev.node_type = RDMA_NODE_IB_CA; ib_set_device_ops(&dev->ibdev, &qedr_roce_dev_ops); + + dev->ibdev.uverbs_cmd_mask |= QEDR_UVERBS(OPEN_XRCD) | + QEDR_UVERBS(CLOSE_XRCD) | + QEDR_UVERBS(CREATE_XSRQ); } static const struct ib_device_ops qedr_dev_ops = { @@ -232,6 +238,7 @@ static const struct ib_device_ops qedr_dev_ops = { INIT_RDMA_OBJ_SIZE(ib_cq, qedr_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, qedr_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_srq, qedr_srq, ibsrq), + INIT_RDMA_OBJ_SIZE(ib_xrcd, qedr_xrcd, ibxrcd), INIT_RDMA_OBJ_SIZE(ib_ucontext, qedr_ucontext, ibucontext), }; @@ -286,7 +293,8 @@ static int qedr_register_device(struct qedr_dev *dev) if (rc) return rc; - return ib_register_device(&dev->ibdev, "qedr%d"); + dma_set_max_seg_size(&dev->pdev->dev, UINT_MAX); + return ib_register_device(&dev->ibdev, "qedr%d", &dev->pdev->dev); } /* This function allocates fast-path status block memory */ @@ -602,7 +610,7 @@ static int qedr_set_device_attr(struct qedr_dev *dev) qed_attr = dev->ops->rdma_query_device(dev->rdma_ctx); /* Part 2 - check capabilities */ - page_size = ~dev->attr.page_size_caps + 1; + page_size = ~qed_attr->page_size_caps + 1; if (page_size > PAGE_SIZE) { DP_ERR(dev, "Kernel PAGE_SIZE is %ld which is smaller than minimum page size (%d) required by qedr\n", @@ -705,6 +713,18 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) event.event = IB_EVENT_SRQ_ERR; event_type = EVENT_TYPE_SRQ; break; + case ROCE_ASYNC_EVENT_XRC_DOMAIN_ERR: + event.event = IB_EVENT_QP_ACCESS_ERR; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_INVALID_XRCETH_ERR: + event.event = IB_EVENT_QP_ACCESS_ERR; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_XRC_SRQ_CATASTROPHIC_ERR: + event.event = IB_EVENT_CQ_ERR; + event_type = EVENT_TYPE_CQ; + break; default: DP_ERR(dev, "unsupported event %d on handle=%llx\n", e_code, roce_handle64); @@ -1026,6 +1046,13 @@ static void qedr_notify(struct qedr_dev *dev, enum qede_rdma_event event) case QEDE_CHANGE_ADDR: qedr_mac_address_change(dev); break; + case QEDE_CHANGE_MTU: + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + if (dev->ndev->mtu != dev->iwarp_max_mtu) + DP_NOTICE(dev, + "Mtu was changed from %d to %d. This will not take affect for iWARP until qedr is reloaded\n", + dev->iwarp_max_mtu, dev->ndev->mtu); + break; default: pr_err("Event not supported\n"); } diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 460292179b32..9dde70373a55 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -310,6 +310,11 @@ struct qedr_pd { struct qedr_ucontext *uctx; }; +struct qedr_xrcd { + struct ib_xrcd ibxrcd; + u16 xrcd_id; +}; + struct qedr_qp_hwq_info { /* WQE Elements */ struct qed_chain pbl; @@ -361,6 +366,7 @@ struct qedr_srq { struct ib_umem *prod_umem; u16 srq_id; u32 srq_limit; + bool is_xrc; /* lock to protect srq recv post */ spinlock_t lock; }; @@ -573,6 +579,11 @@ static inline struct qedr_pd *get_qedr_pd(struct ib_pd *ibpd) return container_of(ibpd, struct qedr_pd, ibpd); } +static inline struct qedr_xrcd *get_qedr_xrcd(struct ib_xrcd *ibxrcd) +{ + return container_of(ibxrcd, struct qedr_xrcd, ibxrcd); +} + static inline struct qedr_cq *get_qedr_cq(struct ib_cq *ibcq) { return container_of(ibcq, struct qedr_cq, ibcq); @@ -598,6 +609,28 @@ static inline struct qedr_srq *get_qedr_srq(struct ib_srq *ibsrq) return container_of(ibsrq, struct qedr_srq, ibsrq); } +static inline bool qedr_qp_has_srq(struct qedr_qp *qp) +{ + return qp->srq; +} + +static inline bool qedr_qp_has_sq(struct qedr_qp *qp) +{ + if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_TGT) + return 0; + + return 1; +} + +static inline bool qedr_qp_has_rq(struct qedr_qp *qp) +{ + if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_INI || + qp->qp_type == IB_QPT_XRC_TGT || qedr_qp_has_srq(qp)) + return 0; + + return 1; +} + static inline struct qedr_user_mmap_entry * get_qedr_mmap_entry(struct rdma_user_mmap_entry *rdma_entry) { diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 97fc7dd353b0..c7169d2c69e5 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -736,7 +736,7 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct qedr_dev *dev = ep->dev; struct qedr_qp *qp; struct qed_iwarp_accept_in params; - int rc = 0; + int rc; DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn); @@ -759,8 +759,10 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) params.ord = conn_param->ord; if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, - &qp->iwarp_cm_flags)) + &qp->iwarp_cm_flags)) { + rc = -EINVAL; goto err; /* QP already destroyed */ + } rc = dev->ops->iwarp_accept(dev->rdma_ctx, ¶ms); if (rc) { diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index b49bef94637e..019642ff24a7 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -136,6 +136,8 @@ int qedr_query_device(struct ib_device *ibdev, IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; + if (!rdma_protocol_iwarp(&dev->ibdev, 1)) + attr->device_cap_flags |= IB_DEVICE_XRC; attr->max_send_sge = qattr->max_sge; attr->max_recv_sge = qattr->max_sge; attr->max_sge_rd = qattr->max_sge; @@ -157,13 +159,13 @@ int qedr_query_device(struct ib_device *ibdev, attr->local_ca_ack_delay = qattr->dev_ack_delay; attr->max_fast_reg_page_list_len = qattr->max_mr / 8; - attr->max_pkeys = QEDR_ROCE_PKEY_MAX; + attr->max_pkeys = qattr->max_pkey; attr->max_ah = qattr->max_ah; return 0; } -static inline void get_link_speed_and_width(int speed, u8 *ib_speed, +static inline void get_link_speed_and_width(int speed, u16 *ib_speed, u8 *ib_width) { switch (speed) { @@ -231,15 +233,16 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; } attr->max_mtu = IB_MTU_4096; - attr->active_mtu = iboe_get_mtu(dev->ndev->mtu); attr->lid = 0; attr->lmc = 0; attr->sm_lid = 0; attr->sm_sl = 0; attr->ip_gids = true; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + attr->active_mtu = iboe_get_mtu(dev->iwarp_max_mtu); attr->gid_tbl_len = 1; } else { + attr->active_mtu = iboe_get_mtu(dev->ndev->mtu); attr->gid_tbl_len = QEDR_MAX_SGID; attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN; } @@ -471,15 +474,33 @@ int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } -void qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +int qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct qedr_dev *dev = get_qedr_dev(ibpd->device); struct qedr_pd *pd = get_qedr_pd(ibpd); DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id); dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id); + return 0; +} + + +int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata) +{ + struct qedr_dev *dev = get_qedr_dev(ibxrcd->device); + struct qedr_xrcd *xrcd = get_qedr_xrcd(ibxrcd); + + return dev->ops->rdma_alloc_xrcd(dev->rdma_ctx, &xrcd->xrcd_id); } +int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata) +{ + struct qedr_dev *dev = get_qedr_dev(ibxrcd->device); + u16 xrcd_id = get_qedr_xrcd(ibxrcd)->xrcd_id; + + dev->ops->rdma_dealloc_xrcd(dev->rdma_ctx, xrcd_id); + return 0; +} static void qedr_free_pbl(struct qedr_dev *dev, struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl) { @@ -600,11 +621,9 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem, struct qedr_pbl_info *pbl_info, u32 pg_shift) { int pbe_cnt, total_num_pbes = 0; - u32 fw_pg_cnt, fw_pg_per_umem_pg; struct qedr_pbl *pbl_tbl; - struct sg_dma_page_iter sg_iter; + struct ib_block_iter biter; struct regpair *pbe; - u64 pg_addr; if (!pbl_info->num_pbes) return; @@ -625,32 +644,25 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem, pbe_cnt = 0; - fw_pg_per_umem_pg = BIT(PAGE_SHIFT - pg_shift); + rdma_umem_for_each_dma_block (umem, &biter, BIT(pg_shift)) { + u64 pg_addr = rdma_block_iter_dma_address(&biter); - for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { - pg_addr = sg_page_iter_dma_address(&sg_iter); - for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) { - pbe->lo = cpu_to_le32(pg_addr); - pbe->hi = cpu_to_le32(upper_32_bits(pg_addr)); + pbe->lo = cpu_to_le32(pg_addr); + pbe->hi = cpu_to_le32(upper_32_bits(pg_addr)); - pg_addr += BIT(pg_shift); - pbe_cnt++; - total_num_pbes++; - pbe++; + pbe_cnt++; + total_num_pbes++; + pbe++; - if (total_num_pbes == pbl_info->num_pbes) - return; + if (total_num_pbes == pbl_info->num_pbes) + return; - /* If the given pbl is full storing the pbes, - * move to next pbl. - */ - if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) { - pbl_tbl++; - pbe = (struct regpair *)pbl_tbl->va; - pbe_cnt = 0; - } - - fw_pg_cnt++; + /* If the given pbl is full storing the pbes, move to next pbl. + */ + if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) { + pbl_tbl++; + pbe = (struct regpair *)pbl_tbl->va; + pbe_cnt = 0; } } } @@ -792,9 +804,7 @@ static inline int qedr_init_user_queue(struct ib_udata *udata, return PTR_ERR(q->umem); } - fw_pages = ib_umem_page_count(q->umem) << - (PAGE_SHIFT - FW_PAGE_SHIFT); - + fw_pages = ib_umem_num_dma_blocks(q->umem, 1 << FW_PAGE_SHIFT); rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0); if (rc) goto err0; @@ -999,7 +1009,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, /* Generate doorbell address. */ cq->db.data.icid = cq->icid; cq->db_addr = dev->db_addr + db_offset; - cq->db.data.params = DB_AGG_CMD_SET << + cq->db.data.params = DB_AGG_CMD_MAX << RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT; /* point to the very last element, passing it we will toggle */ @@ -1051,7 +1061,7 @@ int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata) #define QEDR_DESTROY_CQ_MAX_ITERATIONS (10) #define QEDR_DESTROY_CQ_ITER_DURATION (10) -void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct qedr_dev *dev = get_qedr_dev(ibcq->device); struct qed_rdma_destroy_cq_out_params oparams; @@ -1066,7 +1076,7 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) /* GSIs CQs are handled by driver, so they don't exist in the FW */ if (cq->cq_type == QEDR_CQ_TYPE_GSI) { qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data); - return; + return 0; } iparams.icid = cq->icid; @@ -1114,6 +1124,7 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) * Since the destroy CQ ramrod has also been received on the EQ we can * be certain that there's no event handler in process. */ + return 0; } static inline int get_gid_info_from_table(struct ib_qp *ibqp, @@ -1146,7 +1157,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, SET_FIELD(qp_params->modify_flags, QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); break; - case RDMA_NETWORK_IB: + case RDMA_NETWORK_ROCE_V1: memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0], sizeof(qp_params->sgid)); memcpy(&qp_params->dgid.bytes[0], @@ -1166,6 +1177,8 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); qp_params->roce_mode = ROCE_V2_IPV4; break; + default: + return -EINVAL; } for (i = 0; i < 4; i++) { @@ -1186,7 +1199,10 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, struct qedr_device_attr *qattr = &dev->attr; /* QP0... attrs->qp_type == IB_QPT_GSI */ - if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) { + if (attrs->qp_type != IB_QPT_RC && + attrs->qp_type != IB_QPT_GSI && + attrs->qp_type != IB_QPT_XRC_INI && + attrs->qp_type != IB_QPT_XRC_TGT) { DP_DEBUG(dev, QEDR_MSG_QP, "create qp: unsupported qp type=0x%x requested\n", attrs->qp_type); @@ -1221,12 +1237,20 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, return -EINVAL; } - /* Unprivileged user space cannot create special QP */ - if (udata && attrs->qp_type == IB_QPT_GSI) { - DP_ERR(dev, - "create qp: userspace can't create special QPs of type=0x%x\n", - attrs->qp_type); - return -EINVAL; + /* verify consumer QPs are not trying to use GSI QP's CQ. + * TGT QP isn't associated with RQ/SQ + */ + if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created) && + (attrs->qp_type != IB_QPT_XRC_TGT)) { + struct qedr_cq *send_cq = get_qedr_cq(attrs->send_cq); + struct qedr_cq *recv_cq = get_qedr_cq(attrs->recv_cq); + + if ((send_cq->cq_type == QEDR_CQ_TYPE_GSI) || + (recv_cq->cq_type == QEDR_CQ_TYPE_GSI)) { + DP_ERR(dev, + "create qp: consumer QP cannot use GSI CQs.\n"); + return -EINVAL; + } } return 0; @@ -1248,8 +1272,8 @@ static int qedr_copy_srq_uresp(struct qedr_dev *dev, } static void qedr_copy_rq_uresp(struct qedr_dev *dev, - struct qedr_create_qp_uresp *uresp, - struct qedr_qp *qp) + struct qedr_create_qp_uresp *uresp, + struct qedr_qp *qp) { /* iWARP requires two doorbells per RQ. */ if (rdma_protocol_iwarp(&dev->ibdev, 1)) { @@ -1291,8 +1315,12 @@ static int qedr_copy_qp_uresp(struct qedr_dev *dev, int rc; memset(uresp, 0, sizeof(*uresp)); - qedr_copy_sq_uresp(dev, uresp, qp); - qedr_copy_rq_uresp(dev, uresp, qp); + + if (qedr_qp_has_sq(qp)) + qedr_copy_sq_uresp(dev, uresp, qp); + + if (qedr_qp_has_rq(qp)) + qedr_copy_rq_uresp(dev, uresp, qp); uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE; uresp->qp_id = qp->qp_id; @@ -1316,18 +1344,25 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, kref_init(&qp->refcnt); init_completion(&qp->iwarp_cm_comp); } + qp->pd = pd; qp->qp_type = attrs->qp_type; qp->max_inline_data = attrs->cap.max_inline_data; - qp->sq.max_sges = attrs->cap.max_send_sge; qp->state = QED_ROCE_QP_STATE_RESET; qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; - qp->sq_cq = get_qedr_cq(attrs->send_cq); qp->dev = dev; + if (qedr_qp_has_sq(qp)) { + qp->sq.max_sges = attrs->cap.max_send_sge; + qp->sq_cq = get_qedr_cq(attrs->send_cq); + DP_DEBUG(dev, QEDR_MSG_QP, + "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n", + qp->sq.max_sges, qp->sq_cq->icid); + } - if (attrs->srq) { + if (attrs->srq) qp->srq = get_qedr_srq(attrs->srq); - } else { + + if (qedr_qp_has_rq(qp)) { qp->rq_cq = get_qedr_cq(attrs->recv_cq); qp->rq.max_sges = attrs->cap.max_recv_sge; DP_DEBUG(dev, QEDR_MSG_QP, @@ -1346,30 +1381,26 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp) { - int rc; + int rc = 0; - qp->sq.db = dev->db_addr + - DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); - qp->sq.db_data.data.icid = qp->icid + 1; - rc = qedr_db_recovery_add(dev, qp->sq.db, - &qp->sq.db_data, - DB_REC_WIDTH_32B, - DB_REC_KERNEL); - if (rc) - return rc; + if (qedr_qp_has_sq(qp)) { + qp->sq.db = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); + qp->sq.db_data.data.icid = qp->icid + 1; + rc = qedr_db_recovery_add(dev, qp->sq.db, &qp->sq.db_data, + DB_REC_WIDTH_32B, DB_REC_KERNEL); + if (rc) + return rc; + } - if (!qp->srq) { + if (qedr_qp_has_rq(qp)) { qp->rq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); qp->rq.db_data.data.icid = qp->icid; - - rc = qedr_db_recovery_add(dev, qp->rq.db, - &qp->rq.db_data, - DB_REC_WIDTH_32B, - DB_REC_KERNEL); - if (rc) - qedr_db_recovery_del(dev, qp->sq.db, - &qp->sq.db_data); + rc = qedr_db_recovery_add(dev, qp->rq.db, &qp->rq.db_data, + DB_REC_WIDTH_32B, DB_REC_KERNEL); + if (rc && qedr_qp_has_sq(qp)) + qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data); } return rc; @@ -1392,6 +1423,10 @@ static int qedr_check_srq_params(struct qedr_dev *dev, DP_ERR(dev, "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n", attrs->attr.max_sge, qattr->max_sge); + } + + if (!udata && attrs->srq_type == IB_SRQT_XRC) { + DP_ERR(dev, "XRC SRQs are not supported in kernel-space\n"); return -EINVAL; } @@ -1516,6 +1551,7 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, return -EINVAL; srq->dev = dev; + srq->is_xrc = (init_attr->srq_type == IB_SRQT_XRC); hw_srq = &srq->hw_srq; spin_lock_init(&srq->lock); @@ -1557,6 +1593,14 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, in_params.prod_pair_addr = phy_prod_pair_addr; in_params.num_pages = page_cnt; in_params.page_size = page_size; + if (srq->is_xrc) { + struct qedr_xrcd *xrcd = get_qedr_xrcd(init_attr->ext.xrc.xrcd); + struct qedr_cq *cq = get_qedr_cq(init_attr->ext.cq); + + in_params.is_xrc = 1; + in_params.xrcd_id = xrcd->xrcd_id; + in_params.cq_cid = cq->icid; + } rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params); if (rc) @@ -1591,7 +1635,7 @@ err0: return -EFAULT; } -void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct qed_rdma_destroy_srq_in_params in_params = {}; struct qedr_dev *dev = get_qedr_dev(ibsrq->device); @@ -1599,6 +1643,7 @@ void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) xa_erase_irq(&dev->srqs, srq->srq_id); in_params.srq_id = srq->srq_id; + in_params.is_xrc = srq->is_xrc; dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params); if (ibsrq->uobject) @@ -1609,6 +1654,7 @@ void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) DP_DEBUG(dev, QEDR_MSG_SRQ, "destroy srq: destroyed srq with srq_id=0x%0x\n", srq->srq_id); + return 0; } int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, @@ -1649,6 +1695,20 @@ int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, return 0; } +static enum qed_rdma_qp_type qedr_ib_to_qed_qp_type(enum ib_qp_type ib_qp_type) +{ + switch (ib_qp_type) { + case IB_QPT_RC: + return QED_RDMA_QP_TYPE_RC; + case IB_QPT_XRC_INI: + return QED_RDMA_QP_TYPE_XRC_INI; + case IB_QPT_XRC_TGT: + return QED_RDMA_QP_TYPE_XRC_TGT; + default: + return QED_RDMA_QP_TYPE_INVAL; + } +} + static inline void qedr_init_common_qp_in_params(struct qedr_dev *dev, struct qedr_pd *pd, @@ -1663,20 +1723,27 @@ qedr_init_common_qp_in_params(struct qedr_dev *dev, params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR); params->fmr_and_reserved_lkey = fmr_and_reserved_lkey; - params->pd = pd->pd_id; - params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi; - params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid; + params->qp_type = qedr_ib_to_qed_qp_type(attrs->qp_type); params->stats_queue = 0; - params->srq_id = 0; - params->use_srq = false; - if (!qp->srq) { + if (pd) { + params->pd = pd->pd_id; + params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi; + } + + if (qedr_qp_has_sq(qp)) + params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid; + + if (qedr_qp_has_rq(qp)) params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid; - } else { + if (qedr_qp_has_srq(qp)) { params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid; params->srq_id = qp->srq->srq_id; params->use_srq = true; + } else { + params->srq_id = 0; + params->use_srq = false; } } @@ -1690,8 +1757,10 @@ static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp) "rq_len=%zd" "\n", qp, - qp->usq.buf_addr, - qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len); + qedr_qp_has_sq(qp) ? qp->usq.buf_addr : 0x0, + qedr_qp_has_sq(qp) ? qp->usq.buf_len : 0, + qedr_qp_has_rq(qp) ? qp->urq.buf_addr : 0x0, + qedr_qp_has_sq(qp) ? qp->urq.buf_len : 0); } static inline void @@ -1717,11 +1786,15 @@ static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_ucontext *ctx, struct qedr_qp *qp) { - ib_umem_release(qp->usq.umem); - qp->usq.umem = NULL; + if (qedr_qp_has_sq(qp)) { + ib_umem_release(qp->usq.umem); + qp->usq.umem = NULL; + } - ib_umem_release(qp->urq.umem); - qp->urq.umem = NULL; + if (qedr_qp_has_rq(qp)) { + ib_umem_release(qp->urq.umem); + qp->urq.umem = NULL; + } if (rdma_protocol_roce(&dev->ibdev, 1)) { qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl); @@ -1756,28 +1829,38 @@ static int qedr_create_user_qp(struct qedr_dev *dev, { struct qed_rdma_create_qp_in_params in_params; struct qed_rdma_create_qp_out_params out_params; - struct qedr_pd *pd = get_qedr_pd(ibpd); - struct qedr_create_qp_uresp uresp; - struct qedr_ucontext *ctx = pd ? pd->uctx : NULL; - struct qedr_create_qp_ureq ureq; + struct qedr_create_qp_uresp uresp = {}; + struct qedr_create_qp_ureq ureq = {}; int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); - int rc = -EINVAL; + struct qedr_ucontext *ctx = NULL; + struct qedr_pd *pd = NULL; + int rc = 0; qp->create_type = QEDR_QP_CREATE_USER; - memset(&ureq, 0, sizeof(ureq)); - rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen)); - if (rc) { - DP_ERR(dev, "Problem copying data from user space\n"); - return rc; + + if (ibpd) { + pd = get_qedr_pd(ibpd); + ctx = pd->uctx; } - /* SQ - read access only (0) */ - rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr, - ureq.sq_len, true, 0, alloc_and_init); - if (rc) - return rc; + if (udata) { + rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), + udata->inlen)); + if (rc) { + DP_ERR(dev, "Problem copying data from user space\n"); + return rc; + } + } - if (!qp->srq) { + if (qedr_qp_has_sq(qp)) { + /* SQ - read access only (0) */ + rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr, + ureq.sq_len, true, 0, alloc_and_init); + if (rc) + return rc; + } + + if (qedr_qp_has_rq(qp)) { /* RQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr, ureq.rq_len, true, 0, alloc_and_init); @@ -1789,9 +1872,21 @@ static int qedr_create_user_qp(struct qedr_dev *dev, qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params); in_params.qp_handle_lo = ureq.qp_handle_lo; in_params.qp_handle_hi = ureq.qp_handle_hi; - in_params.sq_num_pages = qp->usq.pbl_info.num_pbes; - in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa; - if (!qp->srq) { + + if (qp->qp_type == IB_QPT_XRC_TGT) { + struct qedr_xrcd *xrcd = get_qedr_xrcd(attrs->xrcd); + + in_params.xrcd_id = xrcd->xrcd_id; + in_params.qp_handle_lo = qp->qp_id; + in_params.use_srq = 1; + } + + if (qedr_qp_has_sq(qp)) { + in_params.sq_num_pages = qp->usq.pbl_info.num_pbes; + in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa; + } + + if (qedr_qp_has_rq(qp)) { in_params.rq_num_pages = qp->urq.pbl_info.num_pbes; in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa; } @@ -1813,39 +1908,32 @@ static int qedr_create_user_qp(struct qedr_dev *dev, qp->qp_id = out_params.qp_id; qp->icid = out_params.icid; - rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp); - if (rc) - goto err; + if (udata) { + rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp); + if (rc) + goto err; + } /* db offset was calculated in copy_qp_uresp, now set in the user q */ - ctx = pd->uctx; - qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset; - qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset; - - if (rdma_protocol_iwarp(&dev->ibdev, 1)) { - qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset; - - /* calculate the db_rec_db2 data since it is constant so no - * need to reflect from user - */ - qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid); - qp->urq.db_rec_db2_data.data.value = - cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD); + if (qedr_qp_has_sq(qp)) { + qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset; + rc = qedr_db_recovery_add(dev, qp->usq.db_addr, + &qp->usq.db_rec_data->db_data, + DB_REC_WIDTH_32B, + DB_REC_USER); + if (rc) + goto err; } - rc = qedr_db_recovery_add(dev, qp->usq.db_addr, - &qp->usq.db_rec_data->db_data, - DB_REC_WIDTH_32B, - DB_REC_USER); - if (rc) - goto err; - - rc = qedr_db_recovery_add(dev, qp->urq.db_addr, - &qp->urq.db_rec_data->db_data, - DB_REC_WIDTH_32B, - DB_REC_USER); - if (rc) - goto err; + if (qedr_qp_has_rq(qp)) { + qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset; + rc = qedr_db_recovery_add(dev, qp->urq.db_addr, + &qp->urq.db_rec_data->db_data, + DB_REC_WIDTH_32B, + DB_REC_USER); + if (rc) + goto err; + } if (rdma_protocol_iwarp(&dev->ibdev, 1)) { rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr, @@ -1856,7 +1944,6 @@ static int qedr_create_user_qp(struct qedr_dev *dev, goto err; } qedr_qp_user_print(dev, qp); - return rc; err: rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); @@ -2112,16 +2199,47 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev, return rc; } +static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, + struct ib_udata *udata) +{ + struct qedr_ucontext *ctx = + rdma_udata_to_drv_context(udata, struct qedr_ucontext, + ibucontext); + int rc; + + if (qp->qp_type != IB_QPT_GSI) { + rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); + if (rc) + return rc; + } + + if (qp->create_type == QEDR_QP_CREATE_USER) + qedr_cleanup_user(dev, ctx, qp); + else + qedr_cleanup_kernel(dev, qp); + + return 0; +} + struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *attrs, struct ib_udata *udata) { - struct qedr_dev *dev = get_qedr_dev(ibpd->device); - struct qedr_pd *pd = get_qedr_pd(ibpd); + struct qedr_xrcd *xrcd = NULL; + struct qedr_pd *pd = NULL; + struct qedr_dev *dev; struct qedr_qp *qp; struct ib_qp *ibqp; int rc = 0; + if (attrs->qp_type == IB_QPT_XRC_TGT) { + xrcd = get_qedr_xrcd(attrs->xrcd); + dev = get_qedr_dev(xrcd->ibxrcd.device); + } else { + pd = get_qedr_pd(ibpd); + dev = get_qedr_dev(ibpd->device); + } + DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n", udata ? "user library" : "kernel", pd); @@ -2152,25 +2270,27 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, return ibqp; } - if (udata) + if (udata || xrcd) rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs); else rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs); if (rc) - goto err; + goto out_free_qp; qp->ibqp.qp_num = qp->qp_id; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL); if (rc) - goto err; + goto out_free_qp_resources; } return &qp->ibqp; -err: +out_free_qp_resources: + qedr_free_qp_resources(dev, qp, udata); +out_free_qp: kfree(qp); return ERR_PTR(-EFAULT); @@ -2636,7 +2756,7 @@ int qedr_query_qp(struct ib_qp *ibqp, qp_attr->cap.max_recv_wr = qp->rq.max_wr; qp_attr->cap.max_send_sge = qp->sq.max_sges; qp_attr->cap.max_recv_sge = qp->rq.max_sges; - qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE; + qp_attr->cap.max_inline_data = dev->attr.max_inline; qp_init_attr->cap = qp_attr->cap; qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; @@ -2671,28 +2791,6 @@ err: return rc; } -static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, - struct ib_udata *udata) -{ - struct qedr_ucontext *ctx = - rdma_udata_to_drv_context(udata, struct qedr_ucontext, - ibucontext); - int rc; - - if (qp->qp_type != IB_QPT_GSI) { - rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); - if (rc) - return rc; - } - - if (qp->create_type == QEDR_QP_CREATE_USER) - qedr_cleanup_user(dev, ctx, qp); - else - qedr_cleanup_kernel(dev, qp); - - return 0; -} - int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct qedr_qp *qp = get_qedr_qp(ibqp); @@ -2752,6 +2850,8 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) if (rdma_protocol_iwarp(&dev->ibdev, 1)) qedr_iw_qp_rem_ref(&qp->ibqp); + else + kfree(qp); return 0; } @@ -2766,11 +2866,12 @@ int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, return 0; } -void qedr_destroy_ah(struct ib_ah *ibah, u32 flags) +int qedr_destroy_ah(struct ib_ah *ibah, u32 flags) { struct qedr_ah *ah = get_qedr_ah(ibah); rdma_destroy_ah_attr(&ah->attr); + return 0; } static void free_mr_info(struct qedr_dev *dev, struct mr_info *info) @@ -2861,7 +2962,8 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, goto err0; } - rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1); + rc = init_mr_info(dev, &mr->info, + ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE), 1); if (rc) goto err1; @@ -2888,10 +2990,8 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); mr->hw_mr.page_size_log = PAGE_SHIFT; - mr->hw_mr.fbo = ib_umem_offset(mr->umem); mr->hw_mr.length = len; mr->hw_mr.vaddr = usr_addr; - mr->hw_mr.zbva = false; mr->hw_mr.phy_mr = false; mr->hw_mr.dma_mr = false; @@ -2984,10 +3084,8 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, mr->hw_mr.pbl_ptr = 0; mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); - mr->hw_mr.fbo = 0; mr->hw_mr.length = 0; mr->hw_mr.vaddr = 0; - mr->hw_mr.zbva = false; mr->hw_mr.phy_mr = true; mr->hw_mr.dma_mr = false; @@ -3765,10 +3863,10 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, * in first 4 bytes and need to update WQE producer in * next 4 bytes. */ - srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod; + srq->hw_srq.virt_prod_pair_addr->sge_prod = cpu_to_le32(hw_srq->sge_prod); /* Make sure sge producer is updated first */ dma_wmb(); - srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod; + srq->hw_srq.virt_prod_pair_addr->wqe_prod = cpu_to_le32(hw_srq->wqe_prod); wr = wr->next; } diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 39dd6286ba39..2672c32bc2f7 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -47,12 +47,13 @@ void qedr_dealloc_ucontext(struct ib_ucontext *uctx); int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma); void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry); int qedr_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); -void qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); - +int qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); +int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata); +int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata); int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); -void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); struct ib_qp *qedr_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs, struct ib_udata *); @@ -67,12 +68,12 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *attr, int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); -void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); +int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_recv_wr); int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); -void qedr_destroy_ah(struct ib_ah *ibah, u32 flags); +int qedr_destroy_ah(struct ib_ah *ibah, u32 flags); int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc); diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 432d6d0fd7f4..ee211423058a 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -619,11 +619,11 @@ struct qib_pportdata { /* LID mask control */ u8 lmc; u8 link_width_supported; - u8 link_speed_supported; + u16 link_speed_supported; u8 link_width_enabled; - u8 link_speed_enabled; + u16 link_speed_enabled; u8 link_width_active; - u8 link_speed_active; + u16 link_speed_active; u8 vls_supported; u8 vls_operational; /* Rx Polarity inversion (compensate for ~tx on partner) */ diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index a10eab89aee4..189a0ce6056a 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1733,9 +1733,9 @@ done: return; } -static void qib_error_tasklet(unsigned long data) +static void qib_error_tasklet(struct tasklet_struct *t) { - struct qib_devdata *dd = (struct qib_devdata *)data; + struct qib_devdata *dd = from_tasklet(dd, t, error_tasklet); handle_7322_errors(dd); qib_write_kreg(dd, kr_errmask, dd->cspec->errormask); @@ -3537,8 +3537,7 @@ try_intx: for (i = 0; i < ARRAY_SIZE(redirect); i++) qib_write_kreg(dd, kr_intredirect + i, redirect[i]); dd->cspec->main_int_mask = mask; - tasklet_init(&dd->error_tasklet, qib_error_tasklet, - (unsigned long)dd); + tasklet_setup(&dd->error_tasklet, qib_error_tasklet); } /** diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index e7789e724f56..f83e331977f8 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2293,76 +2293,50 @@ static int process_cc(struct ib_device *ibdev, int mad_flags, struct ib_mad *out_mad) { struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad; - int ret; - *out_mad = *in_mad; if (ccp->class_version != 2) { ccp->status |= IB_SMP_UNSUP_VERSION; - ret = reply((struct ib_smp *)ccp); - goto bail; + return reply((struct ib_smp *)ccp); } switch (ccp->method) { case IB_MGMT_METHOD_GET: switch (ccp->attr_id) { case IB_CC_ATTR_CLASSPORTINFO: - ret = cc_get_classportinfo(ccp, ibdev); - goto bail; - + return cc_get_classportinfo(ccp, ibdev); case IB_CC_ATTR_CONGESTION_INFO: - ret = cc_get_congestion_info(ccp, ibdev, port); - goto bail; - + return cc_get_congestion_info(ccp, ibdev, port); case IB_CC_ATTR_CA_CONGESTION_SETTING: - ret = cc_get_congestion_setting(ccp, ibdev, port); - goto bail; - + return cc_get_congestion_setting(ccp, ibdev, port); case IB_CC_ATTR_CONGESTION_CONTROL_TABLE: - ret = cc_get_congestion_control_table(ccp, ibdev, port); - goto bail; - - fallthrough; + return cc_get_congestion_control_table(ccp, ibdev, port); default: ccp->status |= IB_SMP_UNSUP_METH_ATTR; - ret = reply((struct ib_smp *) ccp); - goto bail; + return reply((struct ib_smp *) ccp); } - case IB_MGMT_METHOD_SET: switch (ccp->attr_id) { case IB_CC_ATTR_CA_CONGESTION_SETTING: - ret = cc_set_congestion_setting(ccp, ibdev, port); - goto bail; - + return cc_set_congestion_setting(ccp, ibdev, port); case IB_CC_ATTR_CONGESTION_CONTROL_TABLE: - ret = cc_set_congestion_control_table(ccp, ibdev, port); - goto bail; - - fallthrough; + return cc_set_congestion_control_table(ccp, ibdev, port); default: ccp->status |= IB_SMP_UNSUP_METH_ATTR; - ret = reply((struct ib_smp *) ccp); - goto bail; + return reply((struct ib_smp *) ccp); } - case IB_MGMT_METHOD_GET_RESP: /* * The ib_mad module will call us to process responses * before checking for other consumers. * Just tell the caller to process it normally. */ - ret = IB_MAD_RESULT_SUCCESS; - goto bail; - - case IB_MGMT_METHOD_TRAP: - default: - ccp->status |= IB_SMP_UNSUP_METHOD; - ret = reply((struct ib_smp *) ccp); + return IB_MAD_RESULT_SUCCESS; } -bail: - return ret; + /* method is unsupported */ + ccp->status |= IB_SMP_UNSUP_METHOD; + return reply((struct ib_smp *) ccp); } /** diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index 8f8d61736656..5e86cbf7d70e 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -62,7 +62,7 @@ static void sdma_get(struct qib_sdma_state *); static void sdma_put(struct qib_sdma_state *); static void sdma_set_state(struct qib_pportdata *, enum qib_sdma_states); static void sdma_start_sw_clean_up(struct qib_pportdata *); -static void sdma_sw_clean_up_task(unsigned long); +static void sdma_sw_clean_up_task(struct tasklet_struct *); static void unmap_desc(struct qib_pportdata *, unsigned); static void sdma_get(struct qib_sdma_state *ss) @@ -119,9 +119,10 @@ static void clear_sdma_activelist(struct qib_pportdata *ppd) } } -static void sdma_sw_clean_up_task(unsigned long opaque) +static void sdma_sw_clean_up_task(struct tasklet_struct *t) { - struct qib_pportdata *ppd = (struct qib_pportdata *) opaque; + struct qib_pportdata *ppd = from_tasklet(ppd, t, + sdma_sw_clean_up_task); unsigned long flags; spin_lock_irqsave(&ppd->sdma_lock, flags); @@ -436,8 +437,7 @@ int qib_setup_sdma(struct qib_pportdata *ppd) INIT_LIST_HEAD(&ppd->sdma_activelist); - tasklet_init(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task, - (unsigned long)ppd); + tasklet_setup(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task); ret = dd->f_init_sdma_regs(ppd); if (ret) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 662e7fc7f628..aa2e65fc5cd6 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -315,7 +315,6 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num, if (err) return err; - immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; return 0; @@ -355,7 +354,6 @@ static const struct ib_device_ops usnic_dev_ops = { .modify_qp = usnic_ib_modify_qp, .query_device = usnic_ib_query_device, .query_gid = usnic_ib_query_gid, - .query_pkey = usnic_ib_query_pkey, .query_port = usnic_ib_query_port, .query_qp = usnic_ib_query_qp, .reg_user_mr = usnic_ib_reg_mr, @@ -427,7 +425,8 @@ static void *usnic_ib_device_add(struct pci_dev *dev) if (ret) goto err_fwd_dealloc; - if (ib_register_device(&us_ibdev->ib_dev, "usnic_%d")) + dma_set_max_seg_size(&dev->dev, SZ_2G); + if (ib_register_device(&us_ibdev->ib_dev, "usnic_%d", &dev->dev)) goto err_fwd_dealloc; usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index b8a77ce11590..9e961f8ffa10 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -367,7 +367,6 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port, props->port_cap_flags = 0; props->gid_tbl_len = 1; - props->pkey_tbl_len = 1; props->bad_pkey_cntr = 0; props->qkey_viol_cntr = 0; props->max_mtu = IB_MTU_4096; @@ -437,16 +436,6 @@ int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, return 0; } -int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, - u16 *pkey) -{ - if (index > 0) - return -EINVAL; - - *pkey = 0xffff; - return 0; -} - int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct usnic_ib_pd *pd = to_upd(ibpd); @@ -460,9 +449,10 @@ int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } -void usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +int usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd); + return 0; } struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, @@ -596,9 +586,9 @@ int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return 0; } -void usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { - return; + return 0; } struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 2aedf78c13cf..11fe1ba6bbc9 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -48,10 +48,8 @@ int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, struct ib_qp_init_attr *qp_init_attr); int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid); -int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, - u16 *pkey); int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); -void usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); +int usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); @@ -60,7 +58,7 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -void usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 4f6cc0de7ef9..319546a39a0d 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -142,7 +142,7 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_cq; } - npages = ib_umem_page_count(cq->umem); + npages = ib_umem_num_dma_blocks(cq->umem, PAGE_SIZE); } else { /* One extra page for shared ring state */ npages = 1 + (entries * sizeof(struct pvrdma_cqe) + @@ -235,7 +235,7 @@ static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq) * @cq: the completion queue to destroy. * @udata: user data or null for kernel object */ -void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct pvrdma_cq *vcq = to_vcq(cq); union pvrdma_cmd_req req; @@ -261,6 +261,7 @@ void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) pvrdma_free_cq(dev, vcq); atomic_dec(&dev->num_cqs); + return 0; } static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i) @@ -375,7 +376,7 @@ retry: * pvrdma_poll_cq - poll for work completion queue entries * @ibcq: completion queue * @num_entries: the maximum number of entries - * @entry: pointer to work completion array + * @wc: pointer to work completion array * * @return: number of polled completion entries */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 780fd2dfc07e..fa2a3fa0c3e4 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -270,7 +270,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) spin_lock_init(&dev->srq_tbl_lock); rdma_set_device_sysfs_group(&dev->ib_dev, &pvrdma_attr_group); - ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d"); + ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d", &dev->pdev->dev); if (ret) goto err_srq_free; @@ -854,7 +854,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, goto err_free_resource; } } - + dma_set_max_seg_size(&pdev->dev, UINT_MAX); pci_set_master(pdev); /* Map register space */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c index 7944c58ded0e..ba43ad07898c 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c @@ -182,17 +182,16 @@ int pvrdma_page_dir_insert_dma(struct pvrdma_page_dir *pdir, u64 idx, int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir, struct ib_umem *umem, u64 offset) { + struct ib_block_iter biter; u64 i = offset; int ret = 0; - struct sg_dma_page_iter sg_iter; if (offset >= pdir->npages) return -EINVAL; - for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { - dma_addr_t addr = sg_page_iter_dma_address(&sg_iter); - - ret = pvrdma_page_dir_insert_dma(pdir, i, addr); + rdma_umem_for_each_dma_block (umem, &biter, PAGE_SIZE) { + ret = pvrdma_page_dir_insert_dma( + pdir, i, rdma_block_iter_dma_address(&biter)); if (ret) goto exit; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c index 77a010e68208..e80848bfb3bd 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c @@ -133,7 +133,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_CAST(umem); } - npages = ib_umem_num_pages(umem); + npages = ib_umem_num_dma_blocks(umem, PAGE_SIZE); if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) { dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n", npages); @@ -270,6 +270,7 @@ freemr: /** * pvrdma_dereg_mr - deregister a memory region * @ibmr: memory region + * @udata: pointer to user data * * @return: 0 on success. */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 9a8f2a9507be..428256c55065 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -232,8 +232,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, switch (init_attr->qp_type) { case IB_QPT_GSI: if (init_attr->port_num == 0 || - init_attr->port_num > pd->device->phys_port_cnt || - udata) { + init_attr->port_num > pd->device->phys_port_cnt) { dev_warn(&dev->pdev->dev, "invalid queuepair attrs\n"); ret = -EINVAL; goto err_qp; @@ -298,9 +297,11 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, goto err_qp; } - qp->npages_send = ib_umem_page_count(qp->sumem); + qp->npages_send = + ib_umem_num_dma_blocks(qp->sumem, PAGE_SIZE); if (!is_srq) - qp->npages_recv = ib_umem_page_count(qp->rumem); + qp->npages_recv = ib_umem_num_dma_blocks( + qp->rumem, PAGE_SIZE); else qp->npages_recv = 0; qp->npages = qp->npages_send + qp->npages_recv; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index d330decfb80a..082208f9aa90 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -90,7 +90,7 @@ int pvrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) /** * pvrdma_create_srq - create shared receive queue - * @pd: protection domain + * @ibsrq: the IB shared receive queue * @init_attr: shared receive queue attributes * @udata: user data * @@ -152,7 +152,7 @@ int pvrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, goto err_srq; } - srq->npages = ib_umem_page_count(srq->umem); + srq->npages = ib_umem_num_dma_blocks(srq->umem, PAGE_SIZE); if (srq->npages < 0 || srq->npages > PVRDMA_PAGE_DIR_MAX_PAGES) { dev_warn(&dev->pdev->dev, @@ -240,7 +240,7 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) * * @return: 0 for success. */ -void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) +int pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct pvrdma_srq *vsrq = to_vsrq(srq); union pvrdma_cmd_req req; @@ -259,6 +259,7 @@ void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) ret); pvrdma_free_srq(dev, vsrq); + return 0; } /** diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index ccbded2d26ce..fc412cbfd042 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -479,9 +479,9 @@ err: * @pd: the protection domain to be released * @udata: user data or null for kernel object * - * @return: 0 on success, otherwise errno. + * @return: Always 0 */ -void pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +int pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct pvrdma_dev *dev = to_vdev(pd->device); union pvrdma_cmd_req req = {}; @@ -498,14 +498,14 @@ void pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) ret); atomic_dec(&dev->num_pds); + return 0; } /** * pvrdma_create_ah - create an address handle - * @pd: the protection domain - * @ah_attr: the attributes of the AH - * @udata: user data blob - * @flags: create address handle flags (see enum rdma_create_ah_flags) + * @ibah: the IB address handle + * @init_attr: the attributes of the AH + * @udata: pointer to user data * * @return: 0 on success, otherwise errno. */ @@ -548,9 +548,10 @@ int pvrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, * @flags: destroy address handle flags (see enum rdma_destroy_ah_flags) * */ -void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags) +int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags) { struct pvrdma_dev *dev = to_vdev(ah->device); atomic_dec(&dev->num_ahs); + return 0; } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index 699b20849a7e..f0e5ffba2d51 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -176,7 +176,7 @@ struct pvrdma_port_attr { u8 subnet_timeout; u8 init_type_reply; u8 active_width; - u8 active_speed; + u16 active_speed; u8 phys_state; u8 reserved[2]; }; @@ -399,7 +399,7 @@ int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); void pvrdma_dealloc_ucontext(struct ib_ucontext *context); int pvrdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); -void pvrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +int pvrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, @@ -411,19 +411,19 @@ int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); -void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags); +int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags); int pvrdma_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr, struct ib_udata *udata); int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); -void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); +int pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, |