From a2f409713e44ff59f704e00b3ec91fed1c19168b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:20:47 -0800 Subject: cxgb3: Convert cqidr to XArray It would make sense to convert this to an allocating XArray and remove the kfifo that is currently used to allocate the CQID, but that work is better done by someone who has the hardware to test with. Signed-off-by: Matthew Wilcox Acked-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb3/iwch.c | 4 ++-- drivers/infiniband/hw/cxgb3/iwch.h | 4 ++-- drivers/infiniband/hw/cxgb3/iwch_provider.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index fb03bc492ef7..15c1495755ac 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -105,7 +105,7 @@ static void iwch_db_drop_task(struct work_struct *work) static void rnic_init(struct iwch_dev *rnicp) { pr_debug("%s iwch_dev %p\n", __func__, rnicp); - idr_init(&rnicp->cqidr); + xa_init_flags(&rnicp->cqs, XA_FLAGS_LOCK_IRQ); idr_init(&rnicp->qpidr); idr_init(&rnicp->mmidr); spin_lock_init(&rnicp->lock); @@ -190,7 +190,7 @@ static void close_rnic_dev(struct t3cdev *tdev) list_del(&dev->entry); iwch_unregister_device(dev); cxio_rdev_close(&dev->rdev); - idr_destroy(&dev->cqidr); + WARN_ON(!xa_empty(&dev->cqs)); idr_destroy(&dev->qpidr); idr_destroy(&dev->mmidr); ib_dealloc_device(&dev->ibdev); diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index c69bc4f52049..d45de53392e7 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -106,7 +106,7 @@ struct iwch_dev { struct cxio_rdev rdev; u32 device_cap_flags; struct iwch_rnic_attributes attr; - struct idr cqidr; + struct xarray cqs; struct idr qpidr; struct idr mmidr; spinlock_t lock; @@ -136,7 +136,7 @@ static inline int t3a_device(const struct iwch_dev *rhp) static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid) { - return idr_find(&rhp->cqidr, cqid); + return xa_load(&rhp->cqs, cqid); } static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 4accf7b3dcf2..08e5ccf96394 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -95,7 +95,7 @@ static int iwch_destroy_cq(struct ib_cq *ib_cq) pr_debug("%s ib_cq %p\n", __func__, ib_cq); chp = to_iwch_cq(ib_cq); - remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid); + xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid); atomic_dec(&chp->refcnt); wait_event(chp->wait, !atomic_read(&chp->refcnt)); @@ -164,7 +164,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, spin_lock_init(&chp->comp_handler_lock); atomic_set(&chp->refcnt, 1); init_waitqueue_head(&chp->wait); - if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) { + if (xa_store_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL)) { cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); kfree(chp); return ERR_PTR(-ENOMEM); -- cgit v1.2.3 From 27114876ceaa66064472f1f0957068b8cdc51a29 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:20:48 -0800 Subject: cxgb3: Convert qpidr to XArray Signed-off-by: Matthew Wilcox Acked-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb3/iwch.c | 47 ++++++++++++----------------- drivers/infiniband/hw/cxgb3/iwch.h | 4 +-- drivers/infiniband/hw/cxgb3/iwch_ev.c | 18 +++++------ drivers/infiniband/hw/cxgb3/iwch_provider.c | 4 +-- 4 files changed, 33 insertions(+), 40 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 15c1495755ac..578bc9ed90b7 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -62,37 +62,30 @@ struct cxgb3_client t3c_client = { static LIST_HEAD(dev_list); static DEFINE_MUTEX(dev_mutex); -static int disable_qp_db(int id, void *p, void *data) -{ - struct iwch_qp *qhp = p; - - cxio_disable_wq_db(&qhp->wq); - return 0; -} - -static int enable_qp_db(int id, void *p, void *data) -{ - struct iwch_qp *qhp = p; - - if (data) - ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, qhp->wq.qpid); - cxio_enable_wq_db(&qhp->wq); - return 0; -} - static void disable_dbs(struct iwch_dev *rnicp) { - spin_lock_irq(&rnicp->lock); - idr_for_each(&rnicp->qpidr, disable_qp_db, NULL); - spin_unlock_irq(&rnicp->lock); + unsigned long index; + struct iwch_qp *qhp; + + xa_lock_irq(&rnicp->qps); + xa_for_each(&rnicp->qps, index, qhp) + cxio_disable_wq_db(&qhp->wq); + xa_unlock_irq(&rnicp->qps); } static void enable_dbs(struct iwch_dev *rnicp, int ring_db) { - spin_lock_irq(&rnicp->lock); - idr_for_each(&rnicp->qpidr, enable_qp_db, - (void *)(unsigned long)ring_db); - spin_unlock_irq(&rnicp->lock); + unsigned long index; + struct iwch_qp *qhp; + + xa_lock_irq(&rnicp->qps); + xa_for_each(&rnicp->qps, index, qhp) { + if (ring_db) + ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, + qhp->wq.qpid); + cxio_enable_wq_db(&qhp->wq); + } + xa_unlock_irq(&rnicp->qps); } static void iwch_db_drop_task(struct work_struct *work) @@ -106,7 +99,7 @@ static void rnic_init(struct iwch_dev *rnicp) { pr_debug("%s iwch_dev %p\n", __func__, rnicp); xa_init_flags(&rnicp->cqs, XA_FLAGS_LOCK_IRQ); - idr_init(&rnicp->qpidr); + xa_init_flags(&rnicp->qps, XA_FLAGS_LOCK_IRQ); idr_init(&rnicp->mmidr); spin_lock_init(&rnicp->lock); INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task); @@ -191,7 +184,7 @@ static void close_rnic_dev(struct t3cdev *tdev) iwch_unregister_device(dev); cxio_rdev_close(&dev->rdev); WARN_ON(!xa_empty(&dev->cqs)); - idr_destroy(&dev->qpidr); + WARN_ON(!xa_empty(&dev->qps)); idr_destroy(&dev->mmidr); ib_dealloc_device(&dev->ibdev); break; diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index d45de53392e7..70e086946d30 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -107,7 +107,7 @@ struct iwch_dev { u32 device_cap_flags; struct iwch_rnic_attributes attr; struct xarray cqs; - struct idr qpidr; + struct xarray qps; struct idr mmidr; spinlock_t lock; struct list_head entry; @@ -141,7 +141,7 @@ static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid) static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid) { - return idr_find(&rhp->qpidr, qpid); + return xa_load(&rhp->qps, qpid); } static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid) diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c index 4a0c82a8fb60..9d356c1301c7 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_ev.c +++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c @@ -48,14 +48,14 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, struct iwch_qp *qhp; unsigned long flag; - spin_lock(&rnicp->lock); - qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe)); + xa_lock(&rnicp->qps); + qhp = xa_load(&rnicp->qps, CQE_QPID(rsp_msg->cqe)); if (!qhp) { pr_err("%s unaffiliated error 0x%x qpid 0x%x\n", __func__, CQE_STATUS(rsp_msg->cqe), CQE_QPID(rsp_msg->cqe)); - spin_unlock(&rnicp->lock); + xa_unlock(&rnicp->qps); return; } @@ -65,7 +65,7 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, __func__, qhp->attr.state, qhp->wq.qpid, CQE_STATUS(rsp_msg->cqe)); - spin_unlock(&rnicp->lock); + xa_unlock(&rnicp->qps); return; } @@ -76,7 +76,7 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); atomic_inc(&qhp->refcnt); - spin_unlock(&rnicp->lock); + xa_unlock(&rnicp->qps); if (qhp->attr.state == IWCH_QP_STATE_RTS) { attrs.next_state = IWCH_QP_STATE_TERMINATE; @@ -114,21 +114,21 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) unsigned long flag; rnicp = (struct iwch_dev *) rdev_p->ulp; - spin_lock(&rnicp->lock); + xa_lock(&rnicp->qps); chp = get_chp(rnicp, cqid); - qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe)); + qhp = xa_load(&rnicp->qps, CQE_QPID(rsp_msg->cqe)); if (!chp || !qhp) { pr_err("BAD AE cqid 0x%x qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n", cqid, CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); - spin_unlock(&rnicp->lock); + xa_unlock(&rnicp->qps); goto out; } iwch_qp_add_ref(&qhp->ibqp); atomic_inc(&chp->refcnt); - spin_unlock(&rnicp->lock); + xa_unlock(&rnicp->qps); /* * 1) completion of our sending a TERMINATE. diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 08e5ccf96394..7dcac117d4aa 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -756,7 +756,7 @@ static int iwch_destroy_qp(struct ib_qp *ib_qp) iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0); wait_event(qhp->wait, !qhp->ep); - remove_handle(rhp, &rhp->qpidr, qhp->wq.qpid); + xa_erase_irq(&rhp->qps, qhp->wq.qpid); atomic_dec(&qhp->refcnt); wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); @@ -872,7 +872,7 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, init_waitqueue_head(&qhp->wait); atomic_set(&qhp->refcnt, 1); - if (insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid)) { + if (xa_store_irq(&rhp->qps, qhp->wq.qpid, qhp, GFP_KERNEL)) { cxio_destroy_qp(&rhp->rdev, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); kfree(qhp); -- cgit v1.2.3 From e64a7c02f101b91840f6a5ccde6937d67ffbd825 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:20:49 -0800 Subject: cxgb3: Convert mmidr to XArray Signed-off-by: Matthew Wilcox Acked-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb3/iwch.c | 5 ++--- drivers/infiniband/hw/cxgb3/iwch.h | 30 +++-------------------------- drivers/infiniband/hw/cxgb3/iwch_mem.c | 2 +- drivers/infiniband/hw/cxgb3/iwch_provider.c | 8 ++++---- 4 files changed, 10 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 578bc9ed90b7..56a8ab6210cf 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -100,8 +100,7 @@ static void rnic_init(struct iwch_dev *rnicp) pr_debug("%s iwch_dev %p\n", __func__, rnicp); xa_init_flags(&rnicp->cqs, XA_FLAGS_LOCK_IRQ); xa_init_flags(&rnicp->qps, XA_FLAGS_LOCK_IRQ); - idr_init(&rnicp->mmidr); - spin_lock_init(&rnicp->lock); + xa_init_flags(&rnicp->mrs, XA_FLAGS_LOCK_IRQ); INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task); rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; @@ -185,7 +184,7 @@ static void close_rnic_dev(struct t3cdev *tdev) cxio_rdev_close(&dev->rdev); WARN_ON(!xa_empty(&dev->cqs)); WARN_ON(!xa_empty(&dev->qps)); - idr_destroy(&dev->mmidr); + WARN_ON(!xa_empty(&dev->mrs)); ib_dealloc_device(&dev->ibdev); break; } diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index 70e086946d30..310a937bffcf 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include @@ -108,8 +108,7 @@ struct iwch_dev { struct iwch_rnic_attributes attr; struct xarray cqs; struct xarray qps; - struct idr mmidr; - spinlock_t lock; + struct xarray mrs; struct list_head entry; struct delayed_work db_drop_task; }; @@ -146,30 +145,7 @@ static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid) static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid) { - return idr_find(&rhp->mmidr, mmid); -} - -static inline int insert_handle(struct iwch_dev *rhp, struct idr *idr, - void *handle, u32 id) -{ - int ret; - - idr_preload(GFP_KERNEL); - spin_lock_irq(&rhp->lock); - - ret = idr_alloc(idr, handle, id, id + 1, GFP_NOWAIT); - - spin_unlock_irq(&rhp->lock); - idr_preload_end(); - - return ret < 0 ? ret : 0; -} - -static inline void remove_handle(struct iwch_dev *rhp, struct idr *idr, u32 id) -{ - spin_lock_irq(&rhp->lock); - idr_remove(idr, id); - spin_unlock_irq(&rhp->lock); + return xa_load(&rhp->mrs, mmid); } extern struct cxgb3_client t3c_client; diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c index 12886b1b4b10..ce0f2741821d 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_mem.c +++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c @@ -49,7 +49,7 @@ static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag) mmid = stag >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; pr_debug("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); - return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid); + return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL); } int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 7dcac117d4aa..c9a1fb323b5c 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -421,7 +421,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr) cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, mhp->attr.pbl_addr); iwch_free_pbl(mhp); - remove_handle(rhp, &rhp->mmidr, mmid); + xa_erase_irq(&rhp->mrs, mmid); if (mhp->kva) kfree((void *) (unsigned long) mhp->kva); if (mhp->umem) @@ -636,7 +636,7 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, mhp->attr.stag = stag; mmid = (stag) >> 8; mhp->ibmw.rkey = stag; - if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { + if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) { cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); kfree(mhp); return ERR_PTR(-ENOMEM); @@ -655,7 +655,7 @@ static int iwch_dealloc_mw(struct ib_mw *mw) rhp = mhp->rhp; mmid = (mw->rkey) >> 8; cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); - remove_handle(rhp, &rhp->mmidr, mmid); + xa_erase_irq(&rhp->mrs, mmid); pr_debug("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp); kfree(mhp); return 0; @@ -701,7 +701,7 @@ static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, mhp->attr.state = 1; mmid = (stag) >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - ret = insert_handle(rhp, &rhp->mmidr, mhp, mmid); + ret = xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL); if (ret) goto err3; -- cgit v1.2.3 From 52e124c27e7bfb78980189bdfec049594d7612be Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:20:50 -0800 Subject: cxgb4: Convert cqidr to XArray Signed-off-by: Matthew Wilcox Acked-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cq.c | 6 +++--- drivers/infiniband/hw/cxgb4/device.c | 5 ++--- drivers/infiniband/hw/cxgb4/ev.c | 8 ++++---- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 4 ++-- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 1fd8798d91a7..1fa5f6445be3 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -976,7 +976,7 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq) pr_debug("ib_cq %p\n", ib_cq); chp = to_c4iw_cq(ib_cq); - remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid); + xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid); atomic_dec(&chp->refcnt); wait_event(chp->wait, !atomic_read(&chp->refcnt)); @@ -1088,7 +1088,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, spin_lock_init(&chp->comp_handler_lock); atomic_set(&chp->refcnt, 1); init_waitqueue_head(&chp->wait); - ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); + ret = xa_insert_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL); if (ret) goto err_destroy_cq; @@ -1143,7 +1143,7 @@ err_free_mm2: err_free_mm: kfree(mm); err_remove_handle: - remove_handle(rhp, &rhp->cqidr, chp->cq.cqid); + xa_erase_irq(&rhp->cqs, chp->cq.cqid); err_destroy_cq: destroy_cq(&chp->rhp->rdev, &chp->cq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index c79cf63fb0bb..16eee5380fe8 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -931,8 +931,7 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev) void c4iw_dealloc(struct uld_ctx *ctx) { c4iw_rdev_close(&ctx->dev->rdev); - WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr)); - idr_destroy(&ctx->dev->cqidr); + WARN_ON(!xa_empty(&ctx->dev->cqs)); WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr)); idr_destroy(&ctx->dev->qpidr); WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr)); @@ -1044,7 +1043,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) return ERR_PTR(ret); } - idr_init(&devp->cqidr); + xa_init_flags(&devp->cqs, XA_FLAGS_LOCK_IRQ); idr_init(&devp->qpidr); idr_init(&devp->mmidr); idr_init(&devp->hwtid_idr); diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index 8741d23168f3..670c2c802e45 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -225,11 +225,11 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid) struct c4iw_cq *chp; unsigned long flag; - spin_lock_irqsave(&dev->lock, flag); - chp = get_chp(dev, qid); + xa_lock_irqsave(&dev->cqs, flag); + chp = xa_load(&dev->cqs, qid); if (chp) { atomic_inc(&chp->refcnt); - spin_unlock_irqrestore(&dev->lock, flag); + xa_unlock_irqrestore(&dev->cqs, flag); t4_clear_cq_armed(&chp->cq); spin_lock_irqsave(&chp->comp_handler_lock, flag); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); @@ -238,7 +238,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid) wake_up(&chp->wait); } else { pr_debug("unknown cqid 0x%x\n", qid); - spin_unlock_irqrestore(&dev->lock, flag); + xa_unlock_irqrestore(&dev->cqs, flag); } return 0; } diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 5a5da41faef6..4ca5800023c1 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -315,7 +315,7 @@ struct c4iw_dev { struct ib_device ibdev; struct c4iw_rdev rdev; u32 device_cap_flags; - struct idr cqidr; + struct xarray cqs; struct idr qpidr; struct idr mmidr; spinlock_t lock; @@ -349,7 +349,7 @@ static inline struct c4iw_dev *rdev_to_c4iw_dev(struct c4iw_rdev *rdev) static inline struct c4iw_cq *get_chp(struct c4iw_dev *rhp, u32 cqid) { - return idr_find(&rhp->cqidr, cqid); + return xa_load(&rhp->cqs, cqid); } static inline struct c4iw_qp *get_qhp(struct c4iw_dev *rhp, u32 qpid) -- cgit v1.2.3 From 2f43129127e62b25f56ff82a37c40b42c0e6f883 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:20:51 -0800 Subject: cxgb4: Convert qpidr to XArray Signed-off-by: Matthew Wilcox Acked-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/device.c | 121 ++++++++++++++------------------- drivers/infiniband/hw/cxgb4/ev.c | 10 +-- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 4 +- drivers/infiniband/hw/cxgb4/qp.c | 33 +++++---- 4 files changed, 73 insertions(+), 95 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 16eee5380fe8..834803ec7e15 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -250,16 +250,11 @@ static void set_ep_sin6_addrs(struct c4iw_ep *ep, } } -static int dump_qp(int id, void *p, void *data) +static int dump_qp(struct c4iw_qp *qp, struct c4iw_debugfs_data *qpd) { - struct c4iw_qp *qp = p; - struct c4iw_debugfs_data *qpd = data; int space; int cc; - if (id != qp->wq.sq.qid) - return 0; - space = qpd->bufsize - qpd->pos - 1; if (space == 0) return 1; @@ -335,7 +330,9 @@ static int qp_release(struct inode *inode, struct file *file) static int qp_open(struct inode *inode, struct file *file) { + struct c4iw_qp *qp; struct c4iw_debugfs_data *qpd; + unsigned long index; int count = 1; qpd = kmalloc(sizeof *qpd, GFP_KERNEL); @@ -345,9 +342,12 @@ static int qp_open(struct inode *inode, struct file *file) qpd->devp = inode->i_private; qpd->pos = 0; - spin_lock_irq(&qpd->devp->lock); - idr_for_each(&qpd->devp->qpidr, count_idrs, &count); - spin_unlock_irq(&qpd->devp->lock); + /* + * No need to lock; we drop the lock to call vmalloc so it's racy + * anyway. Someone who cares should switch this over to seq_file + */ + xa_for_each(&qpd->devp->qps, index, qp) + count++; qpd->bufsize = count * 180; qpd->buf = vmalloc(qpd->bufsize); @@ -356,9 +356,10 @@ static int qp_open(struct inode *inode, struct file *file) return -ENOMEM; } - spin_lock_irq(&qpd->devp->lock); - idr_for_each(&qpd->devp->qpidr, dump_qp, qpd); - spin_unlock_irq(&qpd->devp->lock); + xa_lock_irq(&qpd->devp->qps); + xa_for_each(&qpd->devp->qps, index, qp) + dump_qp(qp, qpd); + xa_unlock_irq(&qpd->devp->qps); qpd->buf[qpd->pos++] = 0; file->private_data = qpd; @@ -932,8 +933,7 @@ void c4iw_dealloc(struct uld_ctx *ctx) { c4iw_rdev_close(&ctx->dev->rdev); WARN_ON(!xa_empty(&ctx->dev->cqs)); - WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr)); - idr_destroy(&ctx->dev->qpidr); + WARN_ON(!xa_empty(&ctx->dev->qps)); WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr)); idr_destroy(&ctx->dev->mmidr); wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr)); @@ -1044,7 +1044,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) } xa_init_flags(&devp->cqs, XA_FLAGS_LOCK_IRQ); - idr_init(&devp->qpidr); + xa_init_flags(&devp->qps, XA_FLAGS_LOCK_IRQ); idr_init(&devp->mmidr); idr_init(&devp->hwtid_idr); idr_init(&devp->stid_idr); @@ -1264,34 +1264,21 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state) return 0; } -static int disable_qp_db(int id, void *p, void *data) -{ - struct c4iw_qp *qp = p; - - t4_disable_wq_db(&qp->wq); - return 0; -} - static void stop_queues(struct uld_ctx *ctx) { - unsigned long flags; + struct c4iw_qp *qp; + unsigned long index, flags; - spin_lock_irqsave(&ctx->dev->lock, flags); + xa_lock_irqsave(&ctx->dev->qps, flags); ctx->dev->rdev.stats.db_state_transitions++; ctx->dev->db_state = STOPPED; - if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) - idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL); - else + if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) { + xa_for_each(&ctx->dev->qps, index, qp) + t4_disable_wq_db(&qp->wq); + } else { ctx->dev->rdev.status_page->db_off = 1; - spin_unlock_irqrestore(&ctx->dev->lock, flags); -} - -static int enable_qp_db(int id, void *p, void *data) -{ - struct c4iw_qp *qp = p; - - t4_enable_wq_db(&qp->wq); - return 0; + } + xa_unlock_irqrestore(&ctx->dev->qps, flags); } static void resume_rc_qp(struct c4iw_qp *qp) @@ -1321,18 +1308,21 @@ static void resume_a_chunk(struct uld_ctx *ctx) static void resume_queues(struct uld_ctx *ctx) { - spin_lock_irq(&ctx->dev->lock); + xa_lock_irq(&ctx->dev->qps); if (ctx->dev->db_state != STOPPED) goto out; ctx->dev->db_state = FLOW_CONTROL; while (1) { if (list_empty(&ctx->dev->db_fc_list)) { + struct c4iw_qp *qp; + unsigned long index; + WARN_ON(ctx->dev->db_state != FLOW_CONTROL); ctx->dev->db_state = NORMAL; ctx->dev->rdev.stats.db_state_transitions++; if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) { - idr_for_each(&ctx->dev->qpidr, enable_qp_db, - NULL); + xa_for_each(&ctx->dev->qps, index, qp) + t4_enable_wq_db(&qp->wq); } else { ctx->dev->rdev.status_page->db_off = 0; } @@ -1344,12 +1334,12 @@ static void resume_queues(struct uld_ctx *ctx) resume_a_chunk(ctx); } if (!list_empty(&ctx->dev->db_fc_list)) { - spin_unlock_irq(&ctx->dev->lock); + xa_unlock_irq(&ctx->dev->qps); if (DB_FC_RESUME_DELAY) { set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(DB_FC_RESUME_DELAY); } - spin_lock_irq(&ctx->dev->lock); + xa_lock_irq(&ctx->dev->qps); if (ctx->dev->db_state != FLOW_CONTROL) break; } @@ -1358,7 +1348,7 @@ static void resume_queues(struct uld_ctx *ctx) out: if (ctx->dev->db_state != NORMAL) ctx->dev->rdev.stats.db_fc_interruptions++; - spin_unlock_irq(&ctx->dev->lock); + xa_unlock_irq(&ctx->dev->qps); } struct qp_list { @@ -1366,23 +1356,6 @@ struct qp_list { struct c4iw_qp **qps; }; -static int add_and_ref_qp(int id, void *p, void *data) -{ - struct qp_list *qp_listp = data; - struct c4iw_qp *qp = p; - - c4iw_qp_add_ref(&qp->ibqp); - qp_listp->qps[qp_listp->idx++] = qp; - return 0; -} - -static int count_qps(int id, void *p, void *data) -{ - unsigned *countp = data; - (*countp)++; - return 0; -} - static void deref_qps(struct qp_list *qp_list) { int idx; @@ -1399,7 +1372,7 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) for (idx = 0; idx < qp_list->idx; idx++) { struct c4iw_qp *qp = qp_list->qps[idx]; - spin_lock_irq(&qp->rhp->lock); + xa_lock_irq(&qp->rhp->qps); spin_lock(&qp->lock); ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0], qp->wq.sq.qid, @@ -1409,7 +1382,7 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) pr_err("%s: Fatal error - DB overflow recovery failed - error syncing SQ qid %u\n", pci_name(ctx->lldi.pdev), qp->wq.sq.qid); spin_unlock(&qp->lock); - spin_unlock_irq(&qp->rhp->lock); + xa_unlock_irq(&qp->rhp->qps); return; } qp->wq.sq.wq_pidx_inc = 0; @@ -1423,12 +1396,12 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) pr_err("%s: Fatal error - DB overflow recovery failed - error syncing RQ qid %u\n", pci_name(ctx->lldi.pdev), qp->wq.rq.qid); spin_unlock(&qp->lock); - spin_unlock_irq(&qp->rhp->lock); + xa_unlock_irq(&qp->rhp->qps); return; } qp->wq.rq.wq_pidx_inc = 0; spin_unlock(&qp->lock); - spin_unlock_irq(&qp->rhp->lock); + xa_unlock_irq(&qp->rhp->qps); /* Wait for the dbfifo to drain */ while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) { @@ -1440,6 +1413,8 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) static void recover_queues(struct uld_ctx *ctx) { + struct c4iw_qp *qp; + unsigned long index; int count = 0; struct qp_list qp_list; int ret; @@ -1457,22 +1432,26 @@ static void recover_queues(struct uld_ctx *ctx) } /* Count active queues so we can build a list of queues to recover */ - spin_lock_irq(&ctx->dev->lock); + xa_lock_irq(&ctx->dev->qps); WARN_ON(ctx->dev->db_state != STOPPED); ctx->dev->db_state = RECOVERY; - idr_for_each(&ctx->dev->qpidr, count_qps, &count); + xa_for_each(&ctx->dev->qps, index, qp) + count++; qp_list.qps = kcalloc(count, sizeof(*qp_list.qps), GFP_ATOMIC); if (!qp_list.qps) { - spin_unlock_irq(&ctx->dev->lock); + xa_unlock_irq(&ctx->dev->qps); return; } qp_list.idx = 0; /* add and ref each qp so it doesn't get freed */ - idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list); + xa_for_each(&ctx->dev->qps, index, qp) { + c4iw_qp_add_ref(&qp->ibqp); + qp_list.qps[qp_list.idx++] = qp; + } - spin_unlock_irq(&ctx->dev->lock); + xa_unlock_irq(&ctx->dev->qps); /* now traverse the list in a safe context to recover the db state*/ recover_lost_dbs(ctx, &qp_list); @@ -1481,10 +1460,10 @@ static void recover_queues(struct uld_ctx *ctx) deref_qps(&qp_list); kfree(qp_list.qps); - spin_lock_irq(&ctx->dev->lock); + xa_lock_irq(&ctx->dev->qps); WARN_ON(ctx->dev->db_state != RECOVERY); ctx->dev->db_state = STOPPED; - spin_unlock_irq(&ctx->dev->lock); + xa_unlock_irq(&ctx->dev->qps); } static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...) diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index 670c2c802e45..4cd877bd2f56 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -123,15 +123,15 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) struct c4iw_qp *qhp; u32 cqid; - spin_lock_irq(&dev->lock); - qhp = get_qhp(dev, CQE_QPID(err_cqe)); + xa_lock_irq(&dev->qps); + qhp = xa_load(&dev->qps, CQE_QPID(err_cqe)); if (!qhp) { pr_err("BAD AE qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n", CQE_QPID(err_cqe), CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe)); - spin_unlock_irq(&dev->lock); + xa_unlock_irq(&dev->qps); goto out; } @@ -146,13 +146,13 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe)); - spin_unlock_irq(&dev->lock); + xa_unlock_irq(&dev->qps); goto out; } c4iw_qp_add_ref(&qhp->ibqp); atomic_inc(&chp->refcnt); - spin_unlock_irq(&dev->lock); + xa_unlock_irq(&dev->qps); /* Bad incoming write */ if (RQ_TYPE(err_cqe) && diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 4ca5800023c1..e24bd1f8955e 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -316,7 +316,7 @@ struct c4iw_dev { struct c4iw_rdev rdev; u32 device_cap_flags; struct xarray cqs; - struct idr qpidr; + struct xarray qps; struct idr mmidr; spinlock_t lock; struct mutex db_mutex; @@ -354,7 +354,7 @@ static inline struct c4iw_cq *get_chp(struct c4iw_dev *rhp, u32 cqid) static inline struct c4iw_qp *get_qhp(struct c4iw_dev *rhp, u32 qpid) { - return idr_find(&rhp->qpidr, qpid); + return xa_load(&rhp->qps, qpid); } static inline struct c4iw_mr *get_mhp(struct c4iw_dev *rhp, u32 mmid) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index d3a82839f5ea..b2ae5b40cc3e 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -63,12 +63,12 @@ static int alloc_ird(struct c4iw_dev *dev, u32 ird) { int ret = 0; - spin_lock_irq(&dev->lock); + xa_lock_irq(&dev->qps); if (ird <= dev->avail_ird) dev->avail_ird -= ird; else ret = -ENOMEM; - spin_unlock_irq(&dev->lock); + xa_unlock_irq(&dev->qps); if (ret) dev_warn(&dev->rdev.lldi.pdev->dev, @@ -79,9 +79,9 @@ static int alloc_ird(struct c4iw_dev *dev, u32 ird) static void free_ird(struct c4iw_dev *dev, int ird) { - spin_lock_irq(&dev->lock); + xa_lock_irq(&dev->qps); dev->avail_ird += ird; - spin_unlock_irq(&dev->lock); + xa_unlock_irq(&dev->qps); } static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state) @@ -939,7 +939,7 @@ static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc) { unsigned long flags; - spin_lock_irqsave(&qhp->rhp->lock, flags); + xa_lock_irqsave(&qhp->rhp->qps, flags); spin_lock(&qhp->lock); if (qhp->rhp->db_state == NORMAL) t4_ring_sq_db(&qhp->wq, inc, NULL); @@ -948,7 +948,7 @@ static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc) qhp->wq.sq.wq_pidx_inc += inc; } spin_unlock(&qhp->lock); - spin_unlock_irqrestore(&qhp->rhp->lock, flags); + xa_unlock_irqrestore(&qhp->rhp->qps, flags); return 0; } @@ -956,7 +956,7 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) { unsigned long flags; - spin_lock_irqsave(&qhp->rhp->lock, flags); + xa_lock_irqsave(&qhp->rhp->qps, flags); spin_lock(&qhp->lock); if (qhp->rhp->db_state == NORMAL) t4_ring_rq_db(&qhp->wq, inc, NULL); @@ -965,7 +965,7 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) qhp->wq.rq.wq_pidx_inc += inc; } spin_unlock(&qhp->lock); - spin_unlock_irqrestore(&qhp->rhp->lock, flags); + xa_unlock_irqrestore(&qhp->rhp->qps, flags); return 0; } @@ -2111,12 +2111,11 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); wait_event(qhp->wait, !qhp->ep); - remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); - - spin_lock_irq(&rhp->lock); + xa_lock_irq(&rhp->qps); + __xa_erase(&rhp->qps, qhp->wq.sq.qid); if (!list_empty(&qhp->db_fc_entry)) list_del_init(&qhp->db_fc_entry); - spin_unlock_irq(&rhp->lock); + xa_unlock_irq(&rhp->qps); free_ird(rhp, qhp->attr.max_ird); c4iw_qp_rem_ref(ib_qp); @@ -2234,7 +2233,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, kref_init(&qhp->kref); INIT_WORK(&qhp->free_work, free_qp_work); - ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); + ret = xa_insert_irq(&rhp->qps, qhp->wq.sq.qid, qhp, GFP_KERNEL); if (ret) goto err_destroy_qp; @@ -2370,7 +2369,7 @@ err_free_rq_key: err_free_sq_key: kfree(sq_key_mm); err_remove_handle: - remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); + xa_erase_irq(&rhp->qps, qhp->wq.sq.qid); err_destroy_qp: destroy_qp(&rhp->rdev, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !attrs->srq); @@ -2760,7 +2759,7 @@ struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6) srq->flags = T4_SRQ_LIMIT_SUPPORT; - ret = insert_handle(rhp, &rhp->qpidr, srq, srq->wq.qid); + ret = xa_insert_irq(&rhp->qps, srq->wq.qid, srq, GFP_KERNEL); if (ret) goto err_free_queue; @@ -2812,7 +2811,7 @@ err_free_srq_db_key_mm: err_free_srq_key_mm: kfree(srq_key_mm); err_remove_handle: - remove_handle(rhp, &rhp->qpidr, srq->wq.qid); + xa_erase_irq(&rhp->qps, srq->wq.qid); err_free_queue: free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, srq->wr_waitp); @@ -2838,7 +2837,7 @@ int c4iw_destroy_srq(struct ib_srq *ibsrq) pr_debug("%s id %d\n", __func__, srq->wq.qid); - remove_handle(rhp, &rhp->qpidr, srq->wq.qid); + xa_erase_irq(&rhp->qps, srq->wq.qid); ucontext = ibsrq->uobject ? to_c4iw_ucontext(ibsrq->uobject->context) : NULL; free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, -- cgit v1.2.3 From 7a268a93973c07f93e952d96c2faa88df8ed38d8 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:20:52 -0800 Subject: cxgb4: Convert mmidr to XArray Signed-off-by: Matthew Wilcox Acked-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/device.c | 22 +++++++++++----------- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 6 +----- drivers/infiniband/hw/cxgb4/mem.c | 16 ++++++++-------- 3 files changed, 20 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 834803ec7e15..78577b3191fa 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -374,9 +374,8 @@ static const struct file_operations qp_debugfs_fops = { .llseek = default_llseek, }; -static int dump_stag(int id, void *p, void *data) +static int dump_stag(unsigned long id, struct c4iw_debugfs_data *stagd) { - struct c4iw_debugfs_data *stagd = data; int space; int cc; struct fw_ri_tpte tpte; @@ -425,6 +424,8 @@ static int stag_release(struct inode *inode, struct file *file) static int stag_open(struct inode *inode, struct file *file) { struct c4iw_debugfs_data *stagd; + void *p; + unsigned long index; int ret = 0; int count = 1; @@ -436,9 +437,8 @@ static int stag_open(struct inode *inode, struct file *file) stagd->devp = inode->i_private; stagd->pos = 0; - spin_lock_irq(&stagd->devp->lock); - idr_for_each(&stagd->devp->mmidr, count_idrs, &count); - spin_unlock_irq(&stagd->devp->lock); + xa_for_each(&stagd->devp->mrs, index, p) + count++; stagd->bufsize = count * 256; stagd->buf = vmalloc(stagd->bufsize); @@ -447,9 +447,10 @@ static int stag_open(struct inode *inode, struct file *file) goto err1; } - spin_lock_irq(&stagd->devp->lock); - idr_for_each(&stagd->devp->mmidr, dump_stag, stagd); - spin_unlock_irq(&stagd->devp->lock); + xa_lock_irq(&stagd->devp->mrs); + xa_for_each(&stagd->devp->mrs, index, p) + dump_stag(index, stagd); + xa_unlock_irq(&stagd->devp->mrs); stagd->buf[stagd->pos++] = 0; file->private_data = stagd; @@ -934,8 +935,7 @@ void c4iw_dealloc(struct uld_ctx *ctx) c4iw_rdev_close(&ctx->dev->rdev); WARN_ON(!xa_empty(&ctx->dev->cqs)); WARN_ON(!xa_empty(&ctx->dev->qps)); - WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr)); - idr_destroy(&ctx->dev->mmidr); + WARN_ON(!xa_empty(&ctx->dev->mrs)); wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr)); idr_destroy(&ctx->dev->hwtid_idr); idr_destroy(&ctx->dev->stid_idr); @@ -1045,7 +1045,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) xa_init_flags(&devp->cqs, XA_FLAGS_LOCK_IRQ); xa_init_flags(&devp->qps, XA_FLAGS_LOCK_IRQ); - idr_init(&devp->mmidr); + xa_init_flags(&devp->mrs, XA_FLAGS_LOCK_IRQ); idr_init(&devp->hwtid_idr); idr_init(&devp->stid_idr); idr_init(&devp->atid_idr); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index e24bd1f8955e..7dffb68092fc 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -317,7 +317,7 @@ struct c4iw_dev { u32 device_cap_flags; struct xarray cqs; struct xarray qps; - struct idr mmidr; + struct xarray mrs; spinlock_t lock; struct mutex db_mutex; struct dentry *debugfs_root; @@ -357,10 +357,6 @@ static inline struct c4iw_qp *get_qhp(struct c4iw_dev *rhp, u32 qpid) return xa_load(&rhp->qps, qpid); } -static inline struct c4iw_mr *get_mhp(struct c4iw_dev *rhp, u32 mmid) -{ - return idr_find(&rhp->mmidr, mmid); -} static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr, void *handle, u32 id, int lock) diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 5baa31ab6366..de6697fdffa7 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -395,7 +395,7 @@ static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag) mhp->ibmr.iova = mhp->attr.va_fbo; mhp->ibmr.page_size = 1U << (mhp->attr.page_size + 12); pr_debug("mmid 0x%x mhp %p\n", mmid, mhp); - return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid); + return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL); } static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php, @@ -645,7 +645,7 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, mhp->attr.stag = stag; mmid = (stag) >> 8; mhp->ibmw.rkey = stag; - if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { + if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) { ret = -ENOMEM; goto dealloc_win; } @@ -673,7 +673,7 @@ int c4iw_dealloc_mw(struct ib_mw *mw) mhp = to_c4iw_mw(mw); rhp = mhp->rhp; mmid = (mw->rkey) >> 8; - remove_handle(rhp, &rhp->mmidr, mmid); + xa_erase_irq(&rhp->mrs, mmid); deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb, mhp->wr_waitp); kfree_skb(mhp->dereg_skb); @@ -740,7 +740,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, mhp->attr.state = 0; mmid = (stag) >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { + if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) { ret = -ENOMEM; goto err_dereg; } @@ -797,7 +797,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr) mhp = to_c4iw_mr(ib_mr); rhp = mhp->rhp; mmid = mhp->attr.stag >> 8; - remove_handle(rhp, &rhp->mmidr, mmid); + xa_erase_irq(&rhp->mrs, mmid); if (mhp->mpl) dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev, mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr); @@ -821,9 +821,9 @@ void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey) struct c4iw_mr *mhp; unsigned long flags; - spin_lock_irqsave(&rhp->lock, flags); - mhp = get_mhp(rhp, rkey >> 8); + xa_lock_irqsave(&rhp->mrs, flags); + mhp = xa_load(&rhp->mrs, rkey >> 8); if (mhp) mhp->attr.state = 0; - spin_unlock_irqrestore(&rhp->lock, flags); + xa_unlock_irqrestore(&rhp->mrs, flags); } -- cgit v1.2.3 From f254ba6ae5474cb3f1c1863b102368483963e858 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:20:53 -0800 Subject: cxgb4: Convert hwtid_idr to XArray Signed-off-by: Matthew Wilcox Acked-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cm.c | 27 +++++++++++++++------------ drivers/infiniband/hw/cxgb4/device.c | 26 +++++++++++++++++--------- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 +- 3 files changed, 33 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 4d232bdf9e97..a922895b31e1 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -331,20 +331,23 @@ static void remove_ep_tid(struct c4iw_ep *ep) { unsigned long flags; - spin_lock_irqsave(&ep->com.dev->lock, flags); - _remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0); - if (idr_is_empty(&ep->com.dev->hwtid_idr)) + xa_lock_irqsave(&ep->com.dev->hwtids, flags); + __xa_erase(&ep->com.dev->hwtids, ep->hwtid); + if (xa_empty(&ep->com.dev->hwtids)) wake_up(&ep->com.dev->wait); - spin_unlock_irqrestore(&ep->com.dev->lock, flags); + xa_unlock_irqrestore(&ep->com.dev->hwtids, flags); } -static void insert_ep_tid(struct c4iw_ep *ep) +static int insert_ep_tid(struct c4iw_ep *ep) { unsigned long flags; + int err; + + xa_lock_irqsave(&ep->com.dev->hwtids, flags); + err = __xa_insert(&ep->com.dev->hwtids, ep->hwtid, ep, GFP_KERNEL); + xa_unlock_irqrestore(&ep->com.dev->hwtids, flags); - spin_lock_irqsave(&ep->com.dev->lock, flags); - _insert_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep, ep->hwtid, 0); - spin_unlock_irqrestore(&ep->com.dev->lock, flags); + return err; } /* @@ -355,11 +358,11 @@ static struct c4iw_ep *get_ep_from_tid(struct c4iw_dev *dev, unsigned int tid) struct c4iw_ep *ep; unsigned long flags; - spin_lock_irqsave(&dev->lock, flags); - ep = idr_find(&dev->hwtid_idr, tid); + xa_lock_irqsave(&dev->hwtids, flags); + ep = xa_load(&dev->hwtids, tid); if (ep) c4iw_get_ep(&ep->com); - spin_unlock_irqrestore(&dev->lock, flags); + xa_unlock_irqrestore(&dev->hwtids, flags); return ep; } @@ -2947,7 +2950,7 @@ out: (const u32 *)&sin6->sin6_addr.s6_addr, 1); } - remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); + xa_erase_irq(&ep->com.dev->hwtids, ep->hwtid); cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid, ep->com.local_addr.ss_family); dst_release(ep->dst); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 78577b3191fa..87fe82e69a8b 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -560,10 +560,8 @@ static const struct file_operations stats_debugfs_fops = { .write = stats_clear, }; -static int dump_ep(int id, void *p, void *data) +static int dump_ep(struct c4iw_ep *ep, struct c4iw_debugfs_data *epd) { - struct c4iw_ep *ep = p; - struct c4iw_debugfs_data *epd = data; int space; int cc; @@ -619,6 +617,11 @@ static int dump_ep(int id, void *p, void *data) return 0; } +static int _dump_ep(int id, void *p, void *data) +{ + return dump_ep(p, data); +} + static int dump_listen_ep(int id, void *p, void *data) { struct c4iw_listen_ep *ep = p; @@ -676,6 +679,8 @@ static int ep_release(struct inode *inode, struct file *file) static int ep_open(struct inode *inode, struct file *file) { + struct c4iw_ep *ep; + unsigned long index; struct c4iw_debugfs_data *epd; int ret = 0; int count = 1; @@ -688,8 +693,9 @@ static int ep_open(struct inode *inode, struct file *file) epd->devp = inode->i_private; epd->pos = 0; + xa_for_each(&epd->devp->hwtids, index, ep) + count++; spin_lock_irq(&epd->devp->lock); - idr_for_each(&epd->devp->hwtid_idr, count_idrs, &count); idr_for_each(&epd->devp->atid_idr, count_idrs, &count); idr_for_each(&epd->devp->stid_idr, count_idrs, &count); spin_unlock_irq(&epd->devp->lock); @@ -701,9 +707,12 @@ static int ep_open(struct inode *inode, struct file *file) goto err1; } + xa_lock_irq(&epd->devp->hwtids); + xa_for_each(&epd->devp->hwtids, index, ep) + dump_ep(ep, epd); + xa_unlock_irq(&epd->devp->hwtids); spin_lock_irq(&epd->devp->lock); - idr_for_each(&epd->devp->hwtid_idr, dump_ep, epd); - idr_for_each(&epd->devp->atid_idr, dump_ep, epd); + idr_for_each(&epd->devp->atid_idr, _dump_ep, epd); idr_for_each(&epd->devp->stid_idr, dump_listen_ep, epd); spin_unlock_irq(&epd->devp->lock); @@ -936,8 +945,7 @@ void c4iw_dealloc(struct uld_ctx *ctx) WARN_ON(!xa_empty(&ctx->dev->cqs)); WARN_ON(!xa_empty(&ctx->dev->qps)); WARN_ON(!xa_empty(&ctx->dev->mrs)); - wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr)); - idr_destroy(&ctx->dev->hwtid_idr); + wait_event(ctx->dev->wait, xa_empty(&ctx->dev->hwtids)); idr_destroy(&ctx->dev->stid_idr); idr_destroy(&ctx->dev->atid_idr); if (ctx->dev->rdev.bar2_kva) @@ -1046,7 +1054,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) xa_init_flags(&devp->cqs, XA_FLAGS_LOCK_IRQ); xa_init_flags(&devp->qps, XA_FLAGS_LOCK_IRQ); xa_init_flags(&devp->mrs, XA_FLAGS_LOCK_IRQ); - idr_init(&devp->hwtid_idr); + xa_init_flags(&devp->hwtids, XA_FLAGS_LOCK_IRQ); idr_init(&devp->stid_idr); idr_init(&devp->atid_idr); spin_lock_init(&devp->lock); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 7dffb68092fc..bba03fadb86b 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -322,7 +322,7 @@ struct c4iw_dev { struct mutex db_mutex; struct dentry *debugfs_root; enum db_state db_state; - struct idr hwtid_idr; + struct xarray hwtids; struct idr atid_idr; struct idr stid_idr; struct list_head db_fc_list; -- cgit v1.2.3 From 9f5a9632e412ac35f34de1d8bc8b7e99b72414cf Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:20:54 -0800 Subject: cxgb4: Convert atid_idr to XArray Signed-off-by: Matthew Wilcox Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cm.c | 25 +++++++++++++++---------- drivers/infiniband/hw/cxgb4/device.c | 17 ++++++++--------- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 +- 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index a922895b31e1..2dfa49f5ea84 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -558,7 +558,7 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb) cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], (const u32 *)&sin6->sin6_addr.s6_addr, 1); } - remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); + xa_erase_irq(&ep->com.dev->atids, ep->atid); cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE); } @@ -1238,7 +1238,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) set_emss(ep, tcp_opt); /* dealloc the atid */ - remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); + xa_erase_irq(&ep->com.dev->atids, atid); cxgb4_free_atid(t, atid); set_bit(ACT_ESTAB, &ep->com.history); @@ -2187,7 +2187,9 @@ static int c4iw_reconnect(struct c4iw_ep *ep) err = -ENOMEM; goto fail2; } - insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid); + err = xa_insert_irq(&ep->com.dev->atids, ep->atid, ep, GFP_KERNEL); + if (err) + goto fail2a; /* find a route */ if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) { @@ -2239,7 +2241,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep) fail4: dst_release(ep->dst); fail3: - remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); + xa_erase_irq(&ep->com.dev->atids, ep->atid); +fail2a: cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); fail2: /* @@ -2322,8 +2325,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) (const u32 *) &sin6->sin6_addr.s6_addr, 1); } - remove_handle(ep->com.dev, &ep->com.dev->atid_idr, - atid); + xa_erase_irq(&ep->com.dev->atids, atid); cxgb4_free_atid(t, atid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); @@ -2360,7 +2362,7 @@ fail: cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl), ep->com.local_addr.ss_family); - remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); + xa_erase_irq(&ep->com.dev->atids, atid); cxgb4_free_atid(t, atid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); @@ -3345,7 +3347,9 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) err = -ENOMEM; goto fail2; } - insert_handle(dev, &dev->atid_idr, ep, ep->atid); + err = xa_insert_irq(&dev->atids, ep->atid, ep, GFP_KERNEL); + if (err) + goto fail5; memcpy(&ep->com.local_addr, &cm_id->m_local_addr, sizeof(ep->com.local_addr)); @@ -3433,7 +3437,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) fail4: dst_release(ep->dst); fail3: - remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); + xa_erase_irq(&ep->com.dev->atids, ep->atid); +fail5: cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); fail2: skb_queue_purge(&ep->com.ep_skb_list); @@ -3766,7 +3771,7 @@ static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], (const u32 *)&sin6->sin6_addr.s6_addr, 1); } - remove_handle(dev, &dev->atid_idr, atid); + xa_erase_irq(&dev->atids, atid); cxgb4_free_atid(dev->rdev.lldi.tids, atid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 87fe82e69a8b..ad874872fa88 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -617,11 +617,6 @@ static int dump_ep(struct c4iw_ep *ep, struct c4iw_debugfs_data *epd) return 0; } -static int _dump_ep(int id, void *p, void *data) -{ - return dump_ep(p, data); -} - static int dump_listen_ep(int id, void *p, void *data) { struct c4iw_listen_ep *ep = p; @@ -695,8 +690,9 @@ static int ep_open(struct inode *inode, struct file *file) xa_for_each(&epd->devp->hwtids, index, ep) count++; + xa_for_each(&epd->devp->atids, index, ep) + count++; spin_lock_irq(&epd->devp->lock); - idr_for_each(&epd->devp->atid_idr, count_idrs, &count); idr_for_each(&epd->devp->stid_idr, count_idrs, &count); spin_unlock_irq(&epd->devp->lock); @@ -711,8 +707,11 @@ static int ep_open(struct inode *inode, struct file *file) xa_for_each(&epd->devp->hwtids, index, ep) dump_ep(ep, epd); xa_unlock_irq(&epd->devp->hwtids); + xa_lock_irq(&epd->devp->atids); + xa_for_each(&epd->devp->atids, index, ep) + dump_ep(ep, epd); + xa_unlock_irq(&epd->devp->atids); spin_lock_irq(&epd->devp->lock); - idr_for_each(&epd->devp->atid_idr, _dump_ep, epd); idr_for_each(&epd->devp->stid_idr, dump_listen_ep, epd); spin_unlock_irq(&epd->devp->lock); @@ -947,7 +946,7 @@ void c4iw_dealloc(struct uld_ctx *ctx) WARN_ON(!xa_empty(&ctx->dev->mrs)); wait_event(ctx->dev->wait, xa_empty(&ctx->dev->hwtids)); idr_destroy(&ctx->dev->stid_idr); - idr_destroy(&ctx->dev->atid_idr); + WARN_ON(!xa_empty(&ctx->dev->atids)); if (ctx->dev->rdev.bar2_kva) iounmap(ctx->dev->rdev.bar2_kva); if (ctx->dev->rdev.oc_mw_kva) @@ -1055,8 +1054,8 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) xa_init_flags(&devp->qps, XA_FLAGS_LOCK_IRQ); xa_init_flags(&devp->mrs, XA_FLAGS_LOCK_IRQ); xa_init_flags(&devp->hwtids, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&devp->atids, XA_FLAGS_LOCK_IRQ); idr_init(&devp->stid_idr); - idr_init(&devp->atid_idr); spin_lock_init(&devp->lock); mutex_init(&devp->rdev.stats.lock); mutex_init(&devp->db_mutex); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index bba03fadb86b..dec93157b311 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -323,7 +323,7 @@ struct c4iw_dev { struct dentry *debugfs_root; enum db_state db_state; struct xarray hwtids; - struct idr atid_idr; + struct xarray atids; struct idr stid_idr; struct list_head db_fc_list; u32 avail_ird; -- cgit v1.2.3 From 401b44804c2ae7ea1121235fa9c9827d12c2a573 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:20:55 -0800 Subject: cxgb4: Convert stid_idr to XArray Signed-off-by: Matthew Wilcox Acked-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cm.c | 15 +++++---- drivers/infiniband/hw/cxgb4/device.c | 31 +++++++----------- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 59 ++-------------------------------- 3 files changed, 22 insertions(+), 83 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 2dfa49f5ea84..1e68d87b663d 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -375,11 +375,11 @@ static struct c4iw_listen_ep *get_ep_from_stid(struct c4iw_dev *dev, struct c4iw_listen_ep *ep; unsigned long flags; - spin_lock_irqsave(&dev->lock, flags); - ep = idr_find(&dev->stid_idr, stid); + xa_lock_irqsave(&dev->stids, flags); + ep = xa_load(&dev->stids, stid); if (ep) c4iw_get_ep(&ep->com); - spin_unlock_irqrestore(&dev->lock, flags); + xa_unlock_irqrestore(&dev->stids, flags); return ep; } @@ -3561,7 +3561,9 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) err = -ENOMEM; goto fail2; } - insert_handle(dev, &dev->stid_idr, ep, ep->stid); + err = xa_insert_irq(&dev->stids, ep->stid, ep, GFP_KERNEL); + if (err) + goto fail3; state_set(&ep->com, LISTEN); if (ep->com.local_addr.ss_family == AF_INET) @@ -3572,7 +3574,8 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) cm_id->provider_data = ep; goto out; } - remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid); + xa_erase_irq(&ep->com.dev->stids, ep->stid); +fail3: cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, ep->com.local_addr.ss_family); fail2: @@ -3611,7 +3614,7 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id) cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], (const u32 *)&sin6->sin6_addr.s6_addr, 1); } - remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid); + xa_erase_irq(&ep->com.dev->stids, ep->stid); cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, ep->com.local_addr.ss_family); done: diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index ad874872fa88..4c0d925c5ff5 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -81,14 +81,6 @@ struct c4iw_debugfs_data { int pos; }; -static int count_idrs(int id, void *p, void *data) -{ - int *countp = data; - - *countp = *countp + 1; - return 0; -} - static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -617,10 +609,9 @@ static int dump_ep(struct c4iw_ep *ep, struct c4iw_debugfs_data *epd) return 0; } -static int dump_listen_ep(int id, void *p, void *data) +static +int dump_listen_ep(struct c4iw_listen_ep *ep, struct c4iw_debugfs_data *epd) { - struct c4iw_listen_ep *ep = p; - struct c4iw_debugfs_data *epd = data; int space; int cc; @@ -675,6 +666,7 @@ static int ep_release(struct inode *inode, struct file *file) static int ep_open(struct inode *inode, struct file *file) { struct c4iw_ep *ep; + struct c4iw_listen_ep *lep; unsigned long index; struct c4iw_debugfs_data *epd; int ret = 0; @@ -692,9 +684,8 @@ static int ep_open(struct inode *inode, struct file *file) count++; xa_for_each(&epd->devp->atids, index, ep) count++; - spin_lock_irq(&epd->devp->lock); - idr_for_each(&epd->devp->stid_idr, count_idrs, &count); - spin_unlock_irq(&epd->devp->lock); + xa_for_each(&epd->devp->stids, index, lep) + count++; epd->bufsize = count * 240; epd->buf = vmalloc(epd->bufsize); @@ -711,9 +702,10 @@ static int ep_open(struct inode *inode, struct file *file) xa_for_each(&epd->devp->atids, index, ep) dump_ep(ep, epd); xa_unlock_irq(&epd->devp->atids); - spin_lock_irq(&epd->devp->lock); - idr_for_each(&epd->devp->stid_idr, dump_listen_ep, epd); - spin_unlock_irq(&epd->devp->lock); + xa_lock_irq(&epd->devp->stids); + xa_for_each(&epd->devp->stids, index, lep) + dump_listen_ep(lep, epd); + xa_unlock_irq(&epd->devp->stids); file->private_data = epd; goto out; @@ -945,7 +937,7 @@ void c4iw_dealloc(struct uld_ctx *ctx) WARN_ON(!xa_empty(&ctx->dev->qps)); WARN_ON(!xa_empty(&ctx->dev->mrs)); wait_event(ctx->dev->wait, xa_empty(&ctx->dev->hwtids)); - idr_destroy(&ctx->dev->stid_idr); + WARN_ON(!xa_empty(&ctx->dev->stids)); WARN_ON(!xa_empty(&ctx->dev->atids)); if (ctx->dev->rdev.bar2_kva) iounmap(ctx->dev->rdev.bar2_kva); @@ -1055,8 +1047,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) xa_init_flags(&devp->mrs, XA_FLAGS_LOCK_IRQ); xa_init_flags(&devp->hwtids, XA_FLAGS_LOCK_IRQ); xa_init_flags(&devp->atids, XA_FLAGS_LOCK_IRQ); - idr_init(&devp->stid_idr); - spin_lock_init(&devp->lock); + xa_init_flags(&devp->stids, XA_FLAGS_LOCK_IRQ); mutex_init(&devp->rdev.stats.lock); mutex_init(&devp->db_mutex); INIT_LIST_HEAD(&devp->db_fc_list); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index dec93157b311..4c918fe2430e 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include @@ -318,13 +318,12 @@ struct c4iw_dev { struct xarray cqs; struct xarray qps; struct xarray mrs; - spinlock_t lock; struct mutex db_mutex; struct dentry *debugfs_root; enum db_state db_state; struct xarray hwtids; struct xarray atids; - struct idr stid_idr; + struct xarray stids; struct list_head db_fc_list; u32 avail_ird; wait_queue_head_t wait; @@ -357,60 +356,6 @@ static inline struct c4iw_qp *get_qhp(struct c4iw_dev *rhp, u32 qpid) return xa_load(&rhp->qps, qpid); } - -static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr, - void *handle, u32 id, int lock) -{ - int ret; - - if (lock) { - idr_preload(GFP_KERNEL); - spin_lock_irq(&rhp->lock); - } - - ret = idr_alloc(idr, handle, id, id + 1, GFP_ATOMIC); - - if (lock) { - spin_unlock_irq(&rhp->lock); - idr_preload_end(); - } - - return ret < 0 ? ret : 0; -} - -static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr, - void *handle, u32 id) -{ - return _insert_handle(rhp, idr, handle, id, 1); -} - -static inline int insert_handle_nolock(struct c4iw_dev *rhp, struct idr *idr, - void *handle, u32 id) -{ - return _insert_handle(rhp, idr, handle, id, 0); -} - -static inline void _remove_handle(struct c4iw_dev *rhp, struct idr *idr, - u32 id, int lock) -{ - if (lock) - spin_lock_irq(&rhp->lock); - idr_remove(idr, id); - if (lock) - spin_unlock_irq(&rhp->lock); -} - -static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id) -{ - _remove_handle(rhp, idr, id, 1); -} - -static inline void remove_handle_nolock(struct c4iw_dev *rhp, - struct idr *idr, u32 id) -{ - _remove_handle(rhp, idr, id, 0); -} - extern uint c4iw_max_read_depth; static inline int cur_max_read_depth(struct c4iw_dev *dev) -- cgit v1.2.3 From f04cc17878b47bfa47af2e50f481d7f6eaaf3ca7 Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Sat, 23 Feb 2019 20:01:21 +0800 Subject: RDMA/hns: Only assign the relatived fields of psn if IB_QP_SQ_PSN is set Only when the IB_QP_SQ_PSN flags of attr_mask is set is it valid to assign the relatived fields of psn into the qp context when modified qp. Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 83 +++++++++++++++++------------- 1 file changed, 46 insertions(+), 37 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 1c54390e1c85..4a9bc75b2279 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3886,13 +3886,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, V2_QPC_BYTE_240_RX_ACK_MSN_M, V2_QPC_BYTE_240_RX_ACK_MSN_S, 0); - roce_set_field(context->byte_244_rnr_rxack, - V2_QPC_BYTE_244_RX_ACK_EPSN_M, - V2_QPC_BYTE_244_RX_ACK_EPSN_S, attr->sq_psn); - roce_set_field(qpc_mask->byte_244_rnr_rxack, - V2_QPC_BYTE_244_RX_ACK_EPSN_M, - V2_QPC_BYTE_244_RX_ACK_EPSN_S, 0); - roce_set_field(qpc_mask->byte_248_ack_psn, V2_QPC_BYTE_248_ACK_LAST_OPTYPE_M, V2_QPC_BYTE_248_ACK_LAST_OPTYPE_S, 0); @@ -3906,27 +3899,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, V2_QPC_BYTE_240_IRRL_TAIL_REAL_M, V2_QPC_BYTE_240_IRRL_TAIL_REAL_S, 0); - roce_set_field(context->byte_220_retry_psn_msn, - V2_QPC_BYTE_220_RETRY_MSG_PSN_M, - V2_QPC_BYTE_220_RETRY_MSG_PSN_S, attr->sq_psn); - roce_set_field(qpc_mask->byte_220_retry_psn_msn, - V2_QPC_BYTE_220_RETRY_MSG_PSN_M, - V2_QPC_BYTE_220_RETRY_MSG_PSN_S, 0); - - roce_set_field(context->byte_224_retry_msg, - V2_QPC_BYTE_224_RETRY_MSG_PSN_M, - V2_QPC_BYTE_224_RETRY_MSG_PSN_S, attr->sq_psn >> 16); - roce_set_field(qpc_mask->byte_224_retry_msg, - V2_QPC_BYTE_224_RETRY_MSG_PSN_M, - V2_QPC_BYTE_224_RETRY_MSG_PSN_S, 0); - - roce_set_field(context->byte_224_retry_msg, - V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_M, - V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_S, attr->sq_psn); - roce_set_field(qpc_mask->byte_224_retry_msg, - V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_M, - V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_S, 0); - roce_set_field(qpc_mask->byte_220_retry_psn_msn, V2_QPC_BYTE_220_RETRY_MSG_MSN_M, V2_QPC_BYTE_220_RETRY_MSG_MSN_S, 0); @@ -3977,17 +3949,8 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, } } - roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M, - V2_QPC_BYTE_172_SQ_CUR_PSN_S, attr->sq_psn); - roce_set_field(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M, - V2_QPC_BYTE_172_SQ_CUR_PSN_S, 0); - roce_set_field(qpc_mask->byte_196_sq_psn, V2_QPC_BYTE_196_IRRL_HEAD_M, V2_QPC_BYTE_196_IRRL_HEAD_S, 0); - roce_set_field(context->byte_196_sq_psn, V2_QPC_BYTE_196_SQ_MAX_PSN_M, - V2_QPC_BYTE_196_SQ_MAX_PSN_S, attr->sq_psn); - roce_set_field(qpc_mask->byte_196_sq_psn, V2_QPC_BYTE_196_SQ_MAX_PSN_M, - V2_QPC_BYTE_196_SQ_MAX_PSN_S, 0); if ((attr_mask & IB_QP_MAX_QP_RD_ATOMIC) && attr->max_rd_atomic) { roce_set_field(context->byte_208_irrl, V2_QPC_BYTE_208_SR_MAX_M, @@ -4190,6 +4153,52 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); } + if (attr_mask & IB_QP_SQ_PSN) { + roce_set_field(context->byte_172_sq_psn, + V2_QPC_BYTE_172_SQ_CUR_PSN_M, + V2_QPC_BYTE_172_SQ_CUR_PSN_S, attr->sq_psn); + roce_set_field(qpc_mask->byte_172_sq_psn, + V2_QPC_BYTE_172_SQ_CUR_PSN_M, + V2_QPC_BYTE_172_SQ_CUR_PSN_S, 0); + + roce_set_field(context->byte_196_sq_psn, + V2_QPC_BYTE_196_SQ_MAX_PSN_M, + V2_QPC_BYTE_196_SQ_MAX_PSN_S, attr->sq_psn); + roce_set_field(qpc_mask->byte_196_sq_psn, + V2_QPC_BYTE_196_SQ_MAX_PSN_M, + V2_QPC_BYTE_196_SQ_MAX_PSN_S, 0); + + roce_set_field(context->byte_220_retry_psn_msn, + V2_QPC_BYTE_220_RETRY_MSG_PSN_M, + V2_QPC_BYTE_220_RETRY_MSG_PSN_S, attr->sq_psn); + roce_set_field(qpc_mask->byte_220_retry_psn_msn, + V2_QPC_BYTE_220_RETRY_MSG_PSN_M, + V2_QPC_BYTE_220_RETRY_MSG_PSN_S, 0); + + roce_set_field(context->byte_224_retry_msg, + V2_QPC_BYTE_224_RETRY_MSG_PSN_M, + V2_QPC_BYTE_224_RETRY_MSG_PSN_S, + attr->sq_psn >> 16); + roce_set_field(qpc_mask->byte_224_retry_msg, + V2_QPC_BYTE_224_RETRY_MSG_PSN_M, + V2_QPC_BYTE_224_RETRY_MSG_PSN_S, 0); + + roce_set_field(context->byte_224_retry_msg, + V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_M, + V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_S, + attr->sq_psn); + roce_set_field(qpc_mask->byte_224_retry_msg, + V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_M, + V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_S, 0); + + roce_set_field(context->byte_244_rnr_rxack, + V2_QPC_BYTE_244_RX_ACK_EPSN_M, + V2_QPC_BYTE_244_RX_ACK_EPSN_S, attr->sq_psn); + roce_set_field(qpc_mask->byte_244_rnr_rxack, + V2_QPC_BYTE_244_RX_ACK_EPSN_M, + V2_QPC_BYTE_244_RX_ACK_EPSN_S, 0); + } + if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask); -- cgit v1.2.3 From 601f3e6d067c4399953dc7ede8f4c5448f91b02a Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Sat, 23 Feb 2019 20:01:22 +0800 Subject: RDMA/hns: Only assign the fields of the rq psn if IB_QP_RQ_PSN is set Only when the IB_QP_RQ_PSN flags of attr_mask is set is it valid to assign the relatived fields of rq'psn into the qp context when modified qp. Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 4a9bc75b2279..707442391e8b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3670,13 +3670,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S, 0); - roce_set_field(context->byte_108_rx_reqepsn, - V2_QPC_BYTE_108_RX_REQ_EPSN_M, - V2_QPC_BYTE_108_RX_REQ_EPSN_S, attr->rq_psn); - roce_set_field(qpc_mask->byte_108_rx_reqepsn, - V2_QPC_BYTE_108_RX_REQ_EPSN_M, - V2_QPC_BYTE_108_RX_REQ_EPSN_S, 0); - roce_set_field(context->byte_132_trrl, V2_QPC_BYTE_132_TRRL_BA_M, V2_QPC_BYTE_132_TRRL_BA_S, dma_handle_3 >> 4); roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_BA_M, @@ -3784,11 +3777,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, context->rq_rnr_timer = 0; qpc_mask->rq_rnr_timer = 0; - roce_set_field(context->byte_152_raq, V2_QPC_BYTE_152_RAQ_PSN_M, - V2_QPC_BYTE_152_RAQ_PSN_S, attr->rq_psn - 1); - roce_set_field(qpc_mask->byte_152_raq, V2_QPC_BYTE_152_RAQ_PSN_M, - V2_QPC_BYTE_152_RAQ_PSN_S, 0); - roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_HEAD_MAX_M, V2_QPC_BYTE_132_TRRL_HEAD_MAX_S, 0); roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M, @@ -4202,6 +4190,22 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask); + /* RC&UC required attr */ + if (attr_mask & IB_QP_RQ_PSN) { + roce_set_field(context->byte_108_rx_reqepsn, + V2_QPC_BYTE_108_RX_REQ_EPSN_M, + V2_QPC_BYTE_108_RX_REQ_EPSN_S, attr->rq_psn); + roce_set_field(qpc_mask->byte_108_rx_reqepsn, + V2_QPC_BYTE_108_RX_REQ_EPSN_M, + V2_QPC_BYTE_108_RX_REQ_EPSN_S, 0); + + roce_set_field(context->byte_152_raq, V2_QPC_BYTE_152_RAQ_PSN_M, + V2_QPC_BYTE_152_RAQ_PSN_S, attr->rq_psn - 1); + roce_set_field(qpc_mask->byte_152_raq, + V2_QPC_BYTE_152_RAQ_PSN_M, + V2_QPC_BYTE_152_RAQ_PSN_S, 0); + } + roce_set_bit(context->byte_108_rx_reqepsn, V2_QPC_BYTE_108_INV_CREDIT_S, ibqp->srq ? 1 : 0); roce_set_bit(qpc_mask->byte_108_rx_reqepsn, -- cgit v1.2.3 From 834fa8cf6f7002706b02873fc0d16f9b06ef4819 Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Sat, 23 Feb 2019 20:01:23 +0800 Subject: RDMA/hns: Update the range of raq_psn field of qp context According to hip08 UM(User Manual), the raq_psn field size is [23:0]. Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index f1f1b75812f9..1136763fa12f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -719,8 +719,8 @@ struct hns_roce_v2_qp_context { #define V2_QPC_BYTE_148_RAQ_SYNDROME_S 24 #define V2_QPC_BYTE_148_RAQ_SYNDROME_M GENMASK(31, 24) -#define V2_QPC_BYTE_152_RAQ_PSN_S 8 -#define V2_QPC_BYTE_152_RAQ_PSN_M GENMASK(31, 8) +#define V2_QPC_BYTE_152_RAQ_PSN_S 0 +#define V2_QPC_BYTE_152_RAQ_PSN_M GENMASK(23, 0) #define V2_QPC_BYTE_152_RAQ_TRRL_RTY_HEAD_S 24 #define V2_QPC_BYTE_152_RAQ_TRRL_RTY_HEAD_M GENMASK(31, 24) -- cgit v1.2.3 From 5b01b243b0b3725b4460e8924e1f105bb4038969 Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Sat, 23 Feb 2019 20:01:24 +0800 Subject: RDMA/hns: Only assgin some fields if the relatived attr_mask is set According to IB protocol, some fields of qp context are filled with optional when the relatived attr_mask are set. The relatived attr_mask include IB_QP_TIMEOUT, IB_QP_RETRY_CNT, IB_QP_RNR_RETRY and IB_QP_MIN_RNR_TIMER. Besides, we move some assignments of the fields of qp context into the outside of the specific qp state jump function. Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 152 +++++++++++++++-------------- 1 file changed, 81 insertions(+), 71 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 707442391e8b..7237dce41aa0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3171,12 +3171,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CNP_TX_FLAG_S, 0); roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CE_FLAG_S, 0); - if (attr_mask & IB_QP_QKEY) { - context->qkey_xrcd = attr->qkey; - qpc_mask->qkey_xrcd = 0; - hr_qp->qkey = attr->qkey; - } - if (hr_qp->rdb_en) { roce_set_bit(context->byte_68_rq_db, V2_QPC_BYTE_68_RQ_RECORD_EN_S, 1); @@ -3388,7 +3382,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, 0); hr_qp->access_flags = attr->qp_access_flags; - hr_qp->pkey_index = attr->pkey_index; roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M, V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn); roce_set_field(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M, @@ -3512,11 +3505,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, V2_QPC_BYTE_76_SRQN_M, V2_QPC_BYTE_76_SRQN_S, 0); } - if (attr_mask & IB_QP_QKEY) { - context->qkey_xrcd = attr->qkey; - qpc_mask->qkey_xrcd = 0; - } - roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M, V2_QPC_BYTE_4_SQPN_S, hr_qp->qpn); roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M, @@ -3636,13 +3624,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, 0); - roce_set_field(context->byte_80_rnr_rx_cqn, - V2_QPC_BYTE_80_MIN_RNR_TIME_M, - V2_QPC_BYTE_80_MIN_RNR_TIME_S, attr->min_rnr_timer); - roce_set_field(qpc_mask->byte_80_rnr_rx_cqn, - V2_QPC_BYTE_80_MIN_RNR_TIME_M, - V2_QPC_BYTE_80_MIN_RNR_TIME_S, 0); - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); context->rq_cur_blk_addr = (u32)(mtts[hr_qp->rq.offset / page_size] >> PAGE_ADDR_SHIFT); @@ -3708,15 +3689,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_LBI_S, 0); } - if ((attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) && - attr->max_dest_rd_atomic) { - roce_set_field(context->byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M, - V2_QPC_BYTE_140_RR_MAX_S, - fls(attr->max_dest_rd_atomic - 1)); - roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M, - V2_QPC_BYTE_140_RR_MAX_S, 0); - } - if (attr_mask & IB_QP_DEST_QPN) { roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, attr->dest_qp_num); @@ -3897,57 +3869,14 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_212_lsn, V2_QPC_BYTE_212_CHECK_FLG_M, V2_QPC_BYTE_212_CHECK_FLG_S, 0); - roce_set_field(context->byte_212_lsn, V2_QPC_BYTE_212_RETRY_CNT_M, - V2_QPC_BYTE_212_RETRY_CNT_S, attr->retry_cnt); - roce_set_field(qpc_mask->byte_212_lsn, V2_QPC_BYTE_212_RETRY_CNT_M, - V2_QPC_BYTE_212_RETRY_CNT_S, 0); - - roce_set_field(context->byte_212_lsn, V2_QPC_BYTE_212_RETRY_NUM_INIT_M, - V2_QPC_BYTE_212_RETRY_NUM_INIT_S, attr->retry_cnt); - roce_set_field(qpc_mask->byte_212_lsn, V2_QPC_BYTE_212_RETRY_NUM_INIT_M, - V2_QPC_BYTE_212_RETRY_NUM_INIT_S, 0); - - roce_set_field(context->byte_244_rnr_rxack, - V2_QPC_BYTE_244_RNR_NUM_INIT_M, - V2_QPC_BYTE_244_RNR_NUM_INIT_S, attr->rnr_retry); - roce_set_field(qpc_mask->byte_244_rnr_rxack, - V2_QPC_BYTE_244_RNR_NUM_INIT_M, - V2_QPC_BYTE_244_RNR_NUM_INIT_S, 0); - - roce_set_field(context->byte_244_rnr_rxack, V2_QPC_BYTE_244_RNR_CNT_M, - V2_QPC_BYTE_244_RNR_CNT_S, attr->rnr_retry); - roce_set_field(qpc_mask->byte_244_rnr_rxack, V2_QPC_BYTE_244_RNR_CNT_M, - V2_QPC_BYTE_244_RNR_CNT_S, 0); - roce_set_field(context->byte_212_lsn, V2_QPC_BYTE_212_LSN_M, V2_QPC_BYTE_212_LSN_S, 0x100); roce_set_field(qpc_mask->byte_212_lsn, V2_QPC_BYTE_212_LSN_M, V2_QPC_BYTE_212_LSN_S, 0); - if (attr_mask & IB_QP_TIMEOUT) { - if (attr->timeout < 31) { - roce_set_field(context->byte_28_at_fl, - V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S, - attr->timeout); - roce_set_field(qpc_mask->byte_28_at_fl, - V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S, - 0); - } else { - dev_warn(dev, "Local ACK timeout shall be 0 to 30.\n"); - } - } - roce_set_field(qpc_mask->byte_196_sq_psn, V2_QPC_BYTE_196_IRRL_HEAD_M, V2_QPC_BYTE_196_IRRL_HEAD_S, 0); - if ((attr_mask & IB_QP_MAX_QP_RD_ATOMIC) && attr->max_rd_atomic) { - roce_set_field(context->byte_208_irrl, V2_QPC_BYTE_208_SR_MAX_M, - V2_QPC_BYTE_208_SR_MAX_S, - fls(attr->max_rd_atomic - 1)); - roce_set_field(qpc_mask->byte_208_irrl, - V2_QPC_BYTE_208_SR_MAX_M, - V2_QPC_BYTE_208_SR_MAX_S, 0); - } return 0; } @@ -4141,6 +4070,53 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); } + if (attr_mask & IB_QP_TIMEOUT) { + if (attr->timeout < 31) { + roce_set_field(context->byte_28_at_fl, + V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S, + attr->timeout); + roce_set_field(qpc_mask->byte_28_at_fl, + V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S, + 0); + } else { + dev_warn(dev, "Local ACK timeout shall be 0 to 30.\n"); + } + } + + if (attr_mask & IB_QP_RETRY_CNT) { + roce_set_field(context->byte_212_lsn, + V2_QPC_BYTE_212_RETRY_NUM_INIT_M, + V2_QPC_BYTE_212_RETRY_NUM_INIT_S, + attr->retry_cnt); + roce_set_field(qpc_mask->byte_212_lsn, + V2_QPC_BYTE_212_RETRY_NUM_INIT_M, + V2_QPC_BYTE_212_RETRY_NUM_INIT_S, 0); + + roce_set_field(context->byte_212_lsn, + V2_QPC_BYTE_212_RETRY_CNT_M, + V2_QPC_BYTE_212_RETRY_CNT_S, + attr->retry_cnt); + roce_set_field(qpc_mask->byte_212_lsn, + V2_QPC_BYTE_212_RETRY_CNT_M, + V2_QPC_BYTE_212_RETRY_CNT_S, 0); + } + + if (attr_mask & IB_QP_RNR_RETRY) { + roce_set_field(context->byte_244_rnr_rxack, + V2_QPC_BYTE_244_RNR_NUM_INIT_M, + V2_QPC_BYTE_244_RNR_NUM_INIT_S, attr->rnr_retry); + roce_set_field(qpc_mask->byte_244_rnr_rxack, + V2_QPC_BYTE_244_RNR_NUM_INIT_M, + V2_QPC_BYTE_244_RNR_NUM_INIT_S, 0); + + roce_set_field(context->byte_244_rnr_rxack, + V2_QPC_BYTE_244_RNR_CNT_M, + V2_QPC_BYTE_244_RNR_CNT_S, attr->rnr_retry); + roce_set_field(qpc_mask->byte_244_rnr_rxack, + V2_QPC_BYTE_244_RNR_CNT_M, + V2_QPC_BYTE_244_RNR_CNT_S, 0); + } + if (attr_mask & IB_QP_SQ_PSN) { roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M, @@ -4187,9 +4163,37 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, V2_QPC_BYTE_244_RX_ACK_EPSN_S, 0); } + if ((attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) && + attr->max_dest_rd_atomic) { + roce_set_field(context->byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M, + V2_QPC_BYTE_140_RR_MAX_S, + fls(attr->max_dest_rd_atomic - 1)); + roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M, + V2_QPC_BYTE_140_RR_MAX_S, 0); + } + + if ((attr_mask & IB_QP_MAX_QP_RD_ATOMIC) && attr->max_rd_atomic) { + roce_set_field(context->byte_208_irrl, V2_QPC_BYTE_208_SR_MAX_M, + V2_QPC_BYTE_208_SR_MAX_S, + fls(attr->max_rd_atomic - 1)); + roce_set_field(qpc_mask->byte_208_irrl, + V2_QPC_BYTE_208_SR_MAX_M, + V2_QPC_BYTE_208_SR_MAX_S, 0); + } + if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask); + if (attr_mask & IB_QP_MIN_RNR_TIMER) { + roce_set_field(context->byte_80_rnr_rx_cqn, + V2_QPC_BYTE_80_MIN_RNR_TIME_M, + V2_QPC_BYTE_80_MIN_RNR_TIME_S, + attr->min_rnr_timer); + roce_set_field(qpc_mask->byte_80_rnr_rx_cqn, + V2_QPC_BYTE_80_MIN_RNR_TIME_M, + V2_QPC_BYTE_80_MIN_RNR_TIME_S, 0); + } + /* RC&UC required attr */ if (attr_mask & IB_QP_RQ_PSN) { roce_set_field(context->byte_108_rx_reqepsn, @@ -4206,6 +4210,12 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, V2_QPC_BYTE_152_RAQ_PSN_S, 0); } + if (attr_mask & IB_QP_QKEY) { + context->qkey_xrcd = attr->qkey; + qpc_mask->qkey_xrcd = 0; + hr_qp->qkey = attr->qkey; + } + roce_set_bit(context->byte_108_rx_reqepsn, V2_QPC_BYTE_108_INV_CREDIT_S, ibqp->srq ? 1 : 0); roce_set_bit(qpc_mask->byte_108_rx_reqepsn, -- cgit v1.2.3 From 07c2339a91c1ec3a8b8ada00361eced7b153ec0c Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Sat, 23 Feb 2019 20:01:25 +0800 Subject: RDMA/hns: Hide error print information with roce vf device The driver should not print the error information when the hip08 driver not support virtual function. Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7237dce41aa0..a798cbd6a819 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6136,15 +6136,8 @@ static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, struct hnae3_handle *handle) { struct hns_roce_v2_priv *priv = hr_dev->priv; - const struct pci_device_id *id; int i; - id = pci_match_id(hns_roce_hw_v2_pci_tbl, hr_dev->pci_dev); - if (!id) { - dev_err(hr_dev->dev, "device is not compatible!\n"); - return -ENXIO; - } - hr_dev->hw = &hns_roce_hw_v2; hr_dev->sdb_offset = ROCEE_DB_SQ_L_0_REG; hr_dev->odb_offset = hr_dev->sdb_offset; @@ -6232,6 +6225,7 @@ static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) { const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + const struct pci_device_id *id; struct device *dev = &handle->pdev->dev; int ret; @@ -6242,6 +6236,10 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) goto reset_chk_err; } + id = pci_match_id(hns_roce_hw_v2_pci_tbl, handle->pdev); + if (!id) + return 0; + ret = __hns_roce_hw_v2_init_instance(handle); if (ret) { handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; -- cgit v1.2.3 From 82342e493b7e53f5e0d0698a48190f05e84d6690 Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Sat, 23 Feb 2019 20:01:26 +0800 Subject: RDMA/hns: Bugfix for sending with invalidate According to IB protocol, the send with invalidate operation will not invalidate mr that was created through a register mr or reregister mr. Fixes: e93df0108579 ("RDMA/hns: Support local invalidate for hip08 in kernel space") Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a798cbd6a819..23ea16c6402c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2150,7 +2150,7 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, V2_MPT_BYTE_4_PD_S, mr->pd); roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1); + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 0); roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1); roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S, (mr->access & IB_ACCESS_MW_BIND ? 1 : 0)); -- cgit v1.2.3 From d0a935563bc0f447abed7799388fa3f13099cc0d Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Sat, 23 Feb 2019 20:01:28 +0800 Subject: RDMA/hns: Delete unused variable in hns_roce_v2_modify_qp function The src_mac array is not used in hns_roce_v2_modify_qp function. Signed-off-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 23ea16c6402c..14e89454e269 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3970,7 +3970,6 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); const struct ib_gid_attr *gid_attr = NULL; - u8 src_mac[ETH_ALEN]; int is_roce_protocol; u16 vlan = 0xffff; u8 ib_port; @@ -3985,7 +3984,6 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, if (is_roce_protocol) { gid_attr = attr->ah_attr.grh.sgid_attr; vlan = rdma_vlan_dev_vlan_id(gid_attr->ndev); - memcpy(src_mac, gid_attr->ndev->dev_addr, ETH_ALEN); } if (is_vlan_dev(gid_attr->ndev)) { -- cgit v1.2.3 From 19b1a294b0b3f4e8080584cd560fc058f12123fb Mon Sep 17 00:00:00 2001 From: Erez Alfasi Date: Mon, 25 Feb 2019 08:52:30 +0200 Subject: RDMA: Use __packed annotation instead of __attribute__ ((packed)) "__attribute__" set of macros has been standardized, have became more potentially portable and consistent code back in v2.6.21 by commit 82ddcb040 ("[PATCH] extend the set of "__attribute__" shortcut macros"). Moreover, nowadays checkpatch.pl warns about using __attribute__((packed)) instead of __packed. This patch converts all the "__attribute__ ((packed))" annotations to "__packed" within the RDMA subsystem. Signed-off-by: Erez Alfasi Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm_msgs.h | 22 +++++++++++----------- drivers/infiniband/core/mad_priv.h | 4 ++-- drivers/infiniband/hw/cxgb3/cxio_wr.h | 10 +++++----- drivers/infiniband/hw/mthca/mthca_cq.c | 2 +- drivers/infiniband/hw/mthca/mthca_eq.c | 16 ++++++++-------- drivers/infiniband/hw/mthca/mthca_mr.c | 2 +- drivers/infiniband/hw/mthca/mthca_qp.c | 6 +++--- drivers/infiniband/sw/rxe/rxe_hdr.h | 2 +- drivers/infiniband/ulp/iser/iscsi_iser.h | 2 +- include/rdma/ib_mad.h | 4 ++-- include/rdma/ib_smi.h | 2 +- include/rdma/opa_port_info.h | 2 +- include/rdma/opa_smi.h | 4 ++-- 13 files changed, 39 insertions(+), 39 deletions(-) diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 476d4309576d..3d16d614aff6 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -98,7 +98,7 @@ struct cm_req_msg { u32 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE / sizeof(u32)]; -} __attribute__ ((packed)); +} __packed; static inline __be32 cm_req_get_local_qpn(struct cm_req_msg *req_msg) { @@ -423,7 +423,7 @@ enum cm_msg_response { u8 private_data[IB_CM_MRA_PRIVATE_DATA_SIZE]; -} __attribute__ ((packed)); +} __packed; static inline u8 cm_mra_get_msg_mraed(struct cm_mra_msg *mra_msg) { @@ -461,7 +461,7 @@ struct cm_rej_msg { u8 private_data[IB_CM_REJ_PRIVATE_DATA_SIZE]; -} __attribute__ ((packed)); +} __packed; static inline u8 cm_rej_get_msg_rejected(struct cm_rej_msg *rej_msg) { @@ -506,7 +506,7 @@ struct cm_rep_msg { u8 private_data[IB_CM_REP_PRIVATE_DATA_SIZE]; -} __attribute__ ((packed)); +} __packed; static inline __be32 cm_rep_get_local_qpn(struct cm_rep_msg *rep_msg) { @@ -614,7 +614,7 @@ struct cm_rtu_msg { u8 private_data[IB_CM_RTU_PRIVATE_DATA_SIZE]; -} __attribute__ ((packed)); +} __packed; struct cm_dreq_msg { struct ib_mad_hdr hdr; @@ -626,7 +626,7 @@ struct cm_dreq_msg { u8 private_data[IB_CM_DREQ_PRIVATE_DATA_SIZE]; -} __attribute__ ((packed)); +} __packed; static inline __be32 cm_dreq_get_remote_qpn(struct cm_dreq_msg *dreq_msg) { @@ -647,7 +647,7 @@ struct cm_drep_msg { u8 private_data[IB_CM_DREP_PRIVATE_DATA_SIZE]; -} __attribute__ ((packed)); +} __packed; struct cm_lap_msg { struct ib_mad_hdr hdr; @@ -675,7 +675,7 @@ struct cm_lap_msg { u8 offset63; u8 private_data[IB_CM_LAP_PRIVATE_DATA_SIZE]; -} __attribute__ ((packed)); +} __packed; static inline __be32 cm_lap_get_remote_qpn(struct cm_lap_msg *lap_msg) { @@ -784,7 +784,7 @@ struct cm_apr_msg { u8 info[IB_CM_APR_INFO_LENGTH]; u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE]; -} __attribute__ ((packed)); +} __packed; struct cm_sidr_req_msg { struct ib_mad_hdr hdr; @@ -795,7 +795,7 @@ struct cm_sidr_req_msg { __be64 service_id; u32 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE / sizeof(u32)]; -} __attribute__ ((packed)); +} __packed; struct cm_sidr_rep_msg { struct ib_mad_hdr hdr; @@ -811,7 +811,7 @@ struct cm_sidr_rep_msg { u8 info[IB_CM_SIDR_REP_INFO_LENGTH]; u8 private_data[IB_CM_SIDR_REP_PRIVATE_DATA_SIZE]; -} __attribute__ ((packed)); +} __packed; static inline __be32 cm_sidr_rep_get_qpn(struct cm_sidr_rep_msg *sidr_rep_msg) { diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 216509036aa8..956b3a7dfed7 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -73,14 +73,14 @@ struct ib_mad_private_header { struct ib_mad_recv_wc recv_wc; struct ib_wc wc; u64 mapping; -} __attribute__ ((packed)); +} __packed; struct ib_mad_private { struct ib_mad_private_header header; size_t mad_size; struct ib_grh grh; u8 mad[0]; -} __attribute__ ((packed)); +} __packed; struct ib_rmpp_segment { struct list_head list; diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index 83d2e19d31ae..53aa5c36247a 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -64,7 +64,7 @@ enum t3_wr_flags { T3_SOLICITED_EVENT_FLAG = 0x04, T3_READ_FENCE_FLAG = 0x08, T3_LOCAL_FENCE_FLAG = 0x10 -} __attribute__ ((packed)); +} __packed; enum t3_wr_opcode { T3_WR_BP = FW_WROPCODE_RI_BYPASS, @@ -77,7 +77,7 @@ enum t3_wr_opcode { T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT, T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP, T3_WR_FASTREG = FW_WROPCODE_RI_FASTREGISTER_MR -} __attribute__ ((packed)); +} __packed; enum t3_rdma_opcode { T3_RDMA_WRITE, /* IETF RDMAP v1.0 ... */ @@ -95,7 +95,7 @@ enum t3_rdma_opcode { T3_QP_MOD, T3_BYPASS, T3_RDMA_READ_REQ_WITH_INV, -} __attribute__ ((packed)); +} __packed; static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop) { @@ -306,7 +306,7 @@ enum t3_mpa_attrs { uP_RI_MPA_TX_MARKER_ENABLE = 0x2, uP_RI_MPA_CRC_ENABLE = 0x4, uP_RI_MPA_IETF_ENABLE = 0x8 -} __attribute__ ((packed)); +} __packed; enum t3_qp_caps { uP_RI_QP_RDMA_READ_ENABLE = 0x01, @@ -314,7 +314,7 @@ enum t3_qp_caps { uP_RI_QP_BIND_ENABLE = 0x04, uP_RI_QP_FAST_REGISTER_ENABLE = 0x08, uP_RI_QP_STAG0_ENABLE = 0x10 -} __attribute__ ((packed)); +} __packed; enum rdma_init_rtr_types { RTR_READ = 1, diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index a6531ffe29a6..97c7c0ff0f42 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -77,7 +77,7 @@ struct mthca_cq_context { __be32 ci_db; /* Arbel only */ __be32 state_db; /* Arbel only */ u32 reserved; -} __attribute__((packed)); +} __packed; #define MTHCA_CQ_STATUS_OK ( 0 << 28) #define MTHCA_CQ_STATUS_OVERFLOW ( 9 << 28) diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 30400ea4808b..2cdf686203c1 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -63,7 +63,7 @@ struct mthca_eq_context { __be32 consumer_index; __be32 producer_index; u32 reserved3[4]; -} __attribute__((packed)); +} __packed; #define MTHCA_EQ_STATUS_OK ( 0 << 28) #define MTHCA_EQ_STATUS_OVERFLOW ( 9 << 28) @@ -130,7 +130,7 @@ struct mthca_eqe { u32 raw[6]; struct { __be32 cqn; - } __attribute__((packed)) comp; + } __packed comp; struct { u16 reserved1; __be16 token; @@ -138,27 +138,27 @@ struct mthca_eqe { u8 reserved3[3]; u8 status; __be64 out_param; - } __attribute__((packed)) cmd; + } __packed cmd; struct { __be32 qpn; - } __attribute__((packed)) qp; + } __packed qp; struct { __be32 srqn; - } __attribute__((packed)) srq; + } __packed srq; struct { __be32 cqn; u32 reserved1; u8 reserved2[3]; u8 syndrome; - } __attribute__((packed)) cq_err; + } __packed cq_err; struct { u32 reserved1[2]; __be32 port; - } __attribute__((packed)) port_change; + } __packed port_change; } event; u8 reserved3[3]; u8 owner; -} __attribute__((packed)); +} __packed; #define MTHCA_EQ_ENTRY_OWNER_SW (0 << 7) #define MTHCA_EQ_ENTRY_OWNER_HW (1 << 7) diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 6686042aafb4..4250b2c18c64 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -60,7 +60,7 @@ struct mthca_mpt_entry { __be64 mtt_seg; __be32 mtt_sz; /* Arbel only */ u32 reserved[2]; -} __attribute__((packed)); +} __packed; #define MTHCA_MPT_FLAG_SW_OWNS (0xfUL << 28) #define MTHCA_MPT_FLAG_MIO (1 << 17) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 7a5b25d13faa..6d3a00d28e90 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -115,7 +115,7 @@ struct mthca_qp_path { u8 hop_limit; __be32 sl_tclass_flowlabel; u8 rgid[16]; -} __attribute__((packed)); +} __packed; struct mthca_qp_context { __be32 flags; @@ -154,14 +154,14 @@ struct mthca_qp_context { __be16 rq_wqe_counter; /* reserved on Tavor */ __be16 sq_wqe_counter; /* reserved on Tavor */ u32 reserved3[18]; -} __attribute__((packed)); +} __packed; struct mthca_qp_param { __be32 opt_param_mask; u32 reserved1; struct mthca_qp_context context; u32 reserved2[62]; -} __attribute__((packed)); +} __packed; enum { MTHCA_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h index 6cb18406f5b8..ce003666b800 100644 --- a/drivers/infiniband/sw/rxe/rxe_hdr.h +++ b/drivers/infiniband/sw/rxe/rxe_hdr.h @@ -643,7 +643,7 @@ struct rxe_atmeth { __be32 rkey; __be64 swap_add; __be64 comp; -} __attribute__((__packed__)); +} __packed; static inline u64 __atmeth_va(void *arg) { diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index a7aeaa0c6fbc..36d525110fd2 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -311,7 +311,7 @@ struct iser_login_desc { u64 rsp_dma; struct ib_sge sge; struct ib_cqe cqe; -} __attribute__((packed)); +} __packed; struct iser_conn; struct ib_conn; diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 79ba8219e7dc..eea946fcc819 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -198,7 +198,7 @@ struct ib_sa_hdr { __be16 attr_offset; __be16 reserved; ib_sa_comp_mask comp_mask; -} __attribute__ ((packed)); +} __packed; struct ib_mad { struct ib_mad_hdr mad_hdr; @@ -227,7 +227,7 @@ struct ib_sa_mad { struct ib_rmpp_hdr rmpp_hdr; struct ib_sa_hdr sa_hdr; u8 data[IB_MGMT_SA_DATA]; -} __attribute__ ((packed)); +} __packed; struct ib_vendor_mad { struct ib_mad_hdr mad_hdr; diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h index b439e988408e..7be0028f155c 100644 --- a/include/rdma/ib_smi.h +++ b/include/rdma/ib_smi.h @@ -61,7 +61,7 @@ struct ib_smp { u8 data[IB_SMP_DATA_SIZE]; u8 initial_path[IB_SMP_MAX_PATH_HOPS]; u8 return_path[IB_SMP_MAX_PATH_HOPS]; -} __attribute__ ((packed)); +} __packed; #define IB_SMP_DIRECTION cpu_to_be16(0x8000) diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h index b4f0ac02f283..7147a9263011 100644 --- a/include/rdma/opa_port_info.h +++ b/include/rdma/opa_port_info.h @@ -413,6 +413,6 @@ struct opa_port_info { u8 local_port_num; u8 reserved12; u8 reserved13; /* was guid_cap */ -} __attribute__ ((packed)); +} __packed; #endif /* OPA_PORT_INFO_H */ diff --git a/include/rdma/opa_smi.h b/include/rdma/opa_smi.h index f7896117936e..c7b2ef12792d 100644 --- a/include/rdma/opa_smi.h +++ b/include/rdma/opa_smi.h @@ -98,7 +98,7 @@ struct opa_smp { struct opa_node_description { u8 data[64]; -} __attribute__ ((packed)); +} __packed; struct opa_node_info { u8 base_version; @@ -114,7 +114,7 @@ struct opa_node_info { __be32 revision; u8 local_port_num; u8 vendor_id[3]; /* network byte order */ -} __attribute__ ((packed)); +} __packed; #define OPA_PARTITION_TABLE_BLK_SIZE 32 -- cgit v1.2.3 From 270a9833b2697d455f6ae12669b8220d3ea48f18 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 26 Feb 2019 08:46:16 -0800 Subject: IB/hfi1: Add running average for adaptive pio The adaptive PIO implementation only considers the current packet size when deciding between SDMA and pio for a packet. This causes credit return forces if small and large packets are interleaved. Add a running average to avoid costly credit forces so that a large sequence of small packets is required to go below the threshold that chooses pio. Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/qp.c | 2 ++ drivers/infiniband/hw/hfi1/verbs.c | 7 ++++--- drivers/infiniband/hw/hfi1/verbs.h | 1 + 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 9b643c2409cf..1390172b488e 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -742,6 +742,8 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp) iowait_wakeup, iowait_sdma_drained, hfi1_init_priority); + /* Init to a value to start the running average correctly */ + priv->s_running_pkt_size = piothreshold / 2; return priv; } diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 55a56b3d7f83..b73ab7c64d91 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1223,15 +1223,16 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, case IB_QPT_UD: break; case IB_QPT_UC: - case IB_QPT_RC: { + case IB_QPT_RC: + priv->s_running_pkt_size = + (tx->s_cur_size + priv->s_running_pkt_size) / 2; if (piothreshold && - tx->s_cur_size <= min(piothreshold, qp->pmtu) && + priv->s_running_pkt_size <= min(piothreshold, qp->pmtu) && (BIT(ps->opcode & OPMASK) & pio_opmask[ps->opcode >> 5]) && iowait_sdma_pending(&priv->s_iowait) == 0 && !sdma_txreq_built(&tx->txreq)) return dd->process_pio_send; break; - } default: break; } diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 62ace0b2d17a..7ecb8ed4a1d9 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -170,6 +170,7 @@ struct hfi1_qp_priv { struct tid_flow_state flow_state; struct tid_rdma_qp_params tid_rdma; struct rvt_qp *owner; + u16 s_running_pkt_size; u8 hdr_type; /* 9B or 16B */ struct rvt_sge_state tid_ss; /* SGE state pointer for 2nd leg */ atomic_t n_requests; /* # of TID RDMA requests in the */ -- cgit v1.2.3 From b02a29eb8841bd4f210110bddb346c80478366a1 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:20:37 -0800 Subject: mlx5: Convert mlx5_srq_table to XArray Remove the custom spinlock as the XArray handles its own locking. Signed-off-by: Matthew Wilcox Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/srq.h | 5 +---- drivers/infiniband/hw/mlx5/srq_cmd.c | 27 +++++++++------------------ 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h index c330af35ff10..2c3627b2509d 100644 --- a/drivers/infiniband/hw/mlx5/srq.h +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -51,10 +51,7 @@ struct mlx5_core_srq { struct mlx5_srq_table { struct notifier_block nb; - /* protect radix tree - */ - spinlock_t lock; - struct radix_tree_root tree; + struct xarray array; }; int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index 63ac38bb3498..1e04319684f4 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -83,13 +83,11 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) struct mlx5_srq_table *table = &dev->srq_table; struct mlx5_core_srq *srq; - spin_lock(&table->lock); - - srq = radix_tree_lookup(&table->tree, srqn); + xa_lock(&table->array); + srq = xa_load(&table->array, srqn); if (srq) atomic_inc(&srq->common.refcount); - - spin_unlock(&table->lock); + xa_unlock(&table->array); return srq; } @@ -597,9 +595,7 @@ int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, atomic_set(&srq->common.refcount, 1); init_completion(&srq->common.free); - spin_lock_irq(&table->lock); - err = radix_tree_insert(&table->tree, srq->srqn, srq); - spin_unlock_irq(&table->lock); + err = xa_err(xa_store_irq(&table->array, srq->srqn, srq, GFP_KERNEL)); if (err) goto err_destroy_srq_split; @@ -617,9 +613,7 @@ int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) struct mlx5_core_srq *tmp; int err; - spin_lock_irq(&table->lock); - tmp = radix_tree_delete(&table->tree, srq->srqn); - spin_unlock_irq(&table->lock); + tmp = xa_erase_irq(&table->array, srq->srqn); if (!tmp || tmp != srq) return -EINVAL; @@ -680,13 +674,11 @@ static int srq_event_notifier(struct notifier_block *nb, eqe = data; srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; - spin_lock(&table->lock); - - srq = radix_tree_lookup(&table->tree, srqn); + xa_lock(&table->array); + srq = xa_load(&table->array, srqn); if (srq) atomic_inc(&srq->common.refcount); - - spin_unlock(&table->lock); + xa_unlock(&table->array); if (!srq) return NOTIFY_OK; @@ -703,8 +695,7 @@ int mlx5_init_srq_table(struct mlx5_ib_dev *dev) struct mlx5_srq_table *table = &dev->srq_table; memset(table, 0, sizeof(*table)); - spin_lock_init(&table->lock); - INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + xa_init_flags(&table->array, XA_FLAGS_LOCK_IRQ); table->nb.notifier_call = srq_event_notifier; mlx5_notifier_register(dev->mdev, &table->nb); -- cgit v1.2.3 From f1430536e008cd3b70794e12c414c20d54aabec2 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:20:44 -0800 Subject: mlx4: Convert pv_id_table to XArray Signed-off-by: Matthew Wilcox Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/cm.c | 36 +++++++++++++----------------------- drivers/infiniband/hw/mlx4/mlx4_ib.h | 5 +++-- 2 files changed, 16 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index 8c79a480f2b7..ecd6cadd529a 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -168,20 +168,17 @@ static void id_map_ent_timeout(struct work_struct *work) { struct delayed_work *delay = to_delayed_work(work); struct id_map_entry *ent = container_of(delay, struct id_map_entry, timeout); - struct id_map_entry *db_ent, *found_ent; + struct id_map_entry *found_ent; struct mlx4_ib_dev *dev = ent->dev; struct mlx4_ib_sriov *sriov = &dev->sriov; struct rb_root *sl_id_map = &sriov->sl_id_map; - int pv_id = (int) ent->pv_cm_id; spin_lock(&sriov->id_map_lock); - db_ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_id); - if (!db_ent) + if (!xa_erase(&sriov->pv_id_table, ent->pv_cm_id)) goto out; found_ent = id_map_find_by_sl_id(&dev->ib_dev, ent->slave_id, ent->sl_cm_id); if (found_ent && found_ent == ent) rb_erase(&found_ent->node, sl_id_map); - idr_remove(&sriov->pv_id_table, pv_id); out: list_del(&ent->list); @@ -196,13 +193,12 @@ static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id) struct id_map_entry *ent, *found_ent; spin_lock(&sriov->id_map_lock); - ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_cm_id); + ent = xa_erase(&sriov->pv_id_table, pv_cm_id); if (!ent) goto out; found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id); if (found_ent && found_ent == ent) rb_erase(&found_ent->node, sl_id_map); - idr_remove(&sriov->pv_id_table, pv_cm_id); out: spin_unlock(&sriov->id_map_lock); } @@ -256,25 +252,19 @@ id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id) ent->dev = to_mdev(ibdev); INIT_DELAYED_WORK(&ent->timeout, id_map_ent_timeout); - idr_preload(GFP_KERNEL); - spin_lock(&to_mdev(ibdev)->sriov.id_map_lock); - - ret = idr_alloc_cyclic(&sriov->pv_id_table, ent, 0, 0, GFP_NOWAIT); + ret = xa_alloc_cyclic(&sriov->pv_id_table, &ent->pv_cm_id, ent, + xa_limit_32b, &sriov->pv_id_next, GFP_KERNEL); if (ret >= 0) { - ent->pv_cm_id = (u32)ret; + spin_lock(&sriov->id_map_lock); sl_id_map_add(ibdev, ent); list_add_tail(&ent->list, &sriov->cm_list); - } - - spin_unlock(&sriov->id_map_lock); - idr_preload_end(); - - if (ret >= 0) + spin_unlock(&sriov->id_map_lock); return ent; + } /*error flow*/ kfree(ent); - mlx4_ib_warn(ibdev, "No more space in the idr (err:0x%x)\n", ret); + mlx4_ib_warn(ibdev, "Allocation failed (err:0x%x)\n", ret); return ERR_PTR(-ENOMEM); } @@ -290,7 +280,7 @@ id_map_get(struct ib_device *ibdev, int *pv_cm_id, int slave_id, int sl_cm_id) if (ent) *pv_cm_id = (int) ent->pv_cm_id; } else - ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, *pv_cm_id); + ent = xa_load(&sriov->pv_id_table, *pv_cm_id); spin_unlock(&sriov->id_map_lock); return ent; @@ -407,7 +397,7 @@ void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev) spin_lock_init(&dev->sriov.id_map_lock); INIT_LIST_HEAD(&dev->sriov.cm_list); dev->sriov.sl_id_map = RB_ROOT; - idr_init(&dev->sriov.pv_id_table); + xa_init_flags(&dev->sriov.pv_id_table, XA_FLAGS_ALLOC); } /* slave = -1 ==> all slaves */ @@ -444,7 +434,7 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave) struct id_map_entry, node); rb_erase(&ent->node, sl_id_map); - idr_remove(&sriov->pv_id_table, (int) ent->pv_cm_id); + xa_erase(&sriov->pv_id_table, ent->pv_cm_id); } list_splice_init(&dev->sriov.cm_list, &lh); } else { @@ -460,7 +450,7 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave) /* remove those nodes from databases */ list_for_each_entry_safe(map, tmp_map, &lh, list) { rb_erase(&map->node, sl_id_map); - idr_remove(&sriov->pv_id_table, (int) map->pv_cm_id); + xa_erase(&sriov->pv_id_table, map->pv_cm_id); } /* add remaining nodes from cm_list */ diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 60dc1347c5ab..24633fc29a29 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -492,10 +492,11 @@ struct mlx4_ib_sriov { struct mlx4_sriov_alias_guid alias_guid; /* CM paravirtualization fields */ - struct list_head cm_list; + struct xarray pv_id_table; + u32 pv_id_next; spinlock_t id_map_lock; struct rb_root sl_id_map; - struct idr pv_id_table; + struct list_head cm_list; }; struct gid_cache_context { -- cgit v1.2.3 From 949a2370466b09682ad0afb33826f83403458a57 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:20:42 -0800 Subject: IB/mad: Convert ib_mad_clients to XArray Pull the allocation function out into its own function to reduce the length of ib_register_mad_agent() a little and keep all the allocation logic together. Signed-off-by: Matthew Wilcox Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/mad.c | 39 ++++++++++++++------------------------- 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index e742a6a2c138..96571fa3abd8 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -38,10 +38,10 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include #include #include #include +#include #include #include "mad_priv.h" @@ -59,12 +59,9 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests module_param_named(recv_queue_size, mad_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); -/* - * The mlx4 driver uses the top byte to distinguish which virtual function - * generated the MAD, so we must avoid using it. - */ -#define AGENT_ID_LIMIT (1 << 24) -static DEFINE_IDR(ib_mad_clients); +/* Client ID 0 is used for snoop-only clients */ +static DEFINE_XARRAY_ALLOC1(ib_mad_clients); +static u32 ib_mad_client_next; static struct list_head ib_mad_port_list; /* Port list lock */ @@ -389,18 +386,17 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, goto error4; } - idr_preload(GFP_KERNEL); - idr_lock(&ib_mad_clients); - ret2 = idr_alloc_cyclic(&ib_mad_clients, mad_agent_priv, 0, - AGENT_ID_LIMIT, GFP_ATOMIC); - idr_unlock(&ib_mad_clients); - idr_preload_end(); - + /* + * The mlx4 driver uses the top byte to distinguish which virtual + * function generated the MAD, so we must avoid using it. + */ + ret2 = xa_alloc_cyclic(&ib_mad_clients, &mad_agent_priv->agent.hi_tid, + mad_agent_priv, XA_LIMIT(0, (1 << 24) - 1), + &ib_mad_client_next, GFP_KERNEL); if (ret2 < 0) { ret = ERR_PTR(ret2); goto error5; } - mad_agent_priv->agent.hi_tid = ret2; /* * Make sure MAD registration (if supplied) @@ -448,9 +444,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, return &mad_agent_priv->agent; error6: spin_unlock_irq(&port_priv->reg_lock); - idr_lock(&ib_mad_clients); - idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid); - idr_unlock(&ib_mad_clients); + xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid); error5: ib_mad_agent_security_cleanup(&mad_agent_priv->agent); error4: @@ -614,9 +608,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) spin_lock_irq(&port_priv->reg_lock); remove_mad_reg_req(mad_agent_priv); spin_unlock_irq(&port_priv->reg_lock); - idr_lock(&ib_mad_clients); - idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid); - idr_unlock(&ib_mad_clients); + xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid); flush_workqueue(port_priv->wq); ib_cancel_rmpp_recvs(mad_agent_priv); @@ -1756,7 +1748,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, */ hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; rcu_read_lock(); - mad_agent = idr_find(&ib_mad_clients, hi_tid); + mad_agent = xa_load(&ib_mad_clients, hi_tid); if (mad_agent && !atomic_inc_not_zero(&mad_agent->refcount)) mad_agent = NULL; rcu_read_unlock(); @@ -3356,9 +3348,6 @@ int ib_mad_init(void) INIT_LIST_HEAD(&ib_mad_port_list); - /* Client ID 0 is used for snoop-only clients */ - idr_alloc(&ib_mad_clients, NULL, 0, 0, GFP_KERNEL); - if (ib_register_client(&mad_client)) { pr_err("Couldn't register ib_mad client\n"); return -EINVAL; -- cgit v1.2.3 From ae78ff3a0f0c23234c5c75e9bc6921eb7f4d8652 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:20:43 -0800 Subject: RDMA/cm: Convert local_id_table to XArray Also introduce cm_local_id() to reduce the amount of boilerplate when converting a local ID to an XArray index. Signed-off-by: Matthew Wilcox Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 42 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index b9416a6fca36..5671c92b69bd 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -124,7 +124,8 @@ static struct ib_cm { struct rb_root remote_qp_table; struct rb_root remote_id_table; struct rb_root remote_sidr_table; - struct idr local_id_table; + struct xarray local_id_table; + u32 local_id_next; __be32 random_id_operand; struct list_head timewait_list; struct workqueue_struct *wq; @@ -598,35 +599,31 @@ static int cm_init_av_by_path(struct sa_path_rec *path, static int cm_alloc_id(struct cm_id_private *cm_id_priv) { - unsigned long flags; - int id; - - idr_preload(GFP_KERNEL); - spin_lock_irqsave(&cm.lock, flags); + int err; + u32 id; - id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT); - - spin_unlock_irqrestore(&cm.lock, flags); - idr_preload_end(); + err = xa_alloc_cyclic_irq(&cm.local_id_table, &id, cm_id_priv, + xa_limit_32b, &cm.local_id_next, GFP_KERNEL); cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand; - return id < 0 ? id : 0; + return err; +} + +static u32 cm_local_id(__be32 local_id) +{ + return (__force u32) (local_id ^ cm.random_id_operand); } static void cm_free_id(__be32 local_id) { - spin_lock_irq(&cm.lock); - idr_remove(&cm.local_id_table, - (__force int) (local_id ^ cm.random_id_operand)); - spin_unlock_irq(&cm.lock); + xa_erase_irq(&cm.local_id_table, cm_local_id(local_id)); } static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) { struct cm_id_private *cm_id_priv; - cm_id_priv = idr_find(&cm.local_id_table, - (__force int) (local_id ^ cm.random_id_operand)); + cm_id_priv = xa_load(&cm.local_id_table, cm_local_id(local_id)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) atomic_inc(&cm_id_priv->refcount); @@ -2824,9 +2821,8 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) spin_unlock_irq(&cm.lock); return NULL; } - cm_id_priv = idr_find(&cm.local_id_table, (__force int) - (timewait_info->work.local_id ^ - cm.random_id_operand)); + cm_id_priv = xa_load(&cm.local_id_table, + cm_local_id(timewait_info->work.local_id)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) atomic_inc(&cm_id_priv->refcount); @@ -4512,7 +4508,7 @@ static int __init ib_cm_init(void) cm.remote_id_table = RB_ROOT; cm.remote_qp_table = RB_ROOT; cm.remote_sidr_table = RB_ROOT; - idr_init(&cm.local_id_table); + xa_init_flags(&cm.local_id_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); INIT_LIST_HEAD(&cm.timewait_list); @@ -4538,7 +4534,6 @@ error3: error2: class_unregister(&cm_class); error1: - idr_destroy(&cm.local_id_table); return ret; } @@ -4560,9 +4555,8 @@ static void __exit ib_cm_cleanup(void) } class_unregister(&cm_class); - idr_destroy(&cm.local_id_table); + WARN_ON(!xa_empty(&cm.local_id_table)); } module_init(ib_cm_init); module_exit(ib_cm_cleanup); - -- cgit v1.2.3 From 8e5a9d61e258b5210877375485d73bb613feb2e8 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:20:46 -0800 Subject: ib core: Convert query_idr to XArray Signed-off-by: Matthew Wilcox Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sa_query.c | 44 ++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 7925e45ea88a..114f890ab425 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include #include @@ -183,8 +183,7 @@ static struct ib_client sa_client = { .remove = ib_sa_remove_one }; -static DEFINE_SPINLOCK(idr_lock); -static DEFINE_IDR(query_idr); +static DEFINE_XARRAY_FLAGS(queries, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); static DEFINE_SPINLOCK(tid_lock); static u32 tid; @@ -1180,14 +1179,14 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query) struct ib_mad_agent *agent; struct ib_mad_send_buf *mad_buf; - spin_lock_irqsave(&idr_lock, flags); - if (idr_find(&query_idr, id) != query) { - spin_unlock_irqrestore(&idr_lock, flags); + xa_lock_irqsave(&queries, flags); + if (xa_load(&queries, id) != query) { + xa_unlock_irqrestore(&queries, flags); return; } agent = query->port->agent; mad_buf = query->mad_buf; - spin_unlock_irqrestore(&idr_lock, flags); + xa_unlock_irqrestore(&queries, flags); /* * If the query is still on the netlink request list, schedule @@ -1363,21 +1362,14 @@ static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent) static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms, gfp_t gfp_mask) { - bool preload = gfpflags_allow_blocking(gfp_mask); unsigned long flags; int ret, id; - if (preload) - idr_preload(gfp_mask); - spin_lock_irqsave(&idr_lock, flags); - - id = idr_alloc(&query_idr, query, 0, 0, GFP_NOWAIT); - - spin_unlock_irqrestore(&idr_lock, flags); - if (preload) - idr_preload_end(); - if (id < 0) - return id; + xa_lock_irqsave(&queries, flags); + ret = __xa_alloc(&queries, &id, query, xa_limit_32b, gfp_mask); + xa_unlock_irqrestore(&queries, flags); + if (ret < 0) + return ret; query->mad_buf->timeout_ms = timeout_ms; query->mad_buf->context[0] = query; @@ -1394,9 +1386,9 @@ static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms, ret = ib_post_send_mad(query->mad_buf, NULL); if (ret) { - spin_lock_irqsave(&idr_lock, flags); - idr_remove(&query_idr, id); - spin_unlock_irqrestore(&idr_lock, flags); + xa_lock_irqsave(&queries, flags); + __xa_erase(&queries, id); + xa_unlock_irqrestore(&queries, flags); } /* @@ -2188,9 +2180,9 @@ static void send_handler(struct ib_mad_agent *agent, break; } - spin_lock_irqsave(&idr_lock, flags); - idr_remove(&query_idr, query->id); - spin_unlock_irqrestore(&idr_lock, flags); + xa_lock_irqsave(&queries, flags); + __xa_erase(&queries, query->id); + xa_unlock_irqrestore(&queries, flags); free_mad(query); if (query->client) @@ -2475,5 +2467,5 @@ void ib_sa_cleanup(void) destroy_workqueue(ib_nl_wq); mcast_cleanup(); ib_unregister_client(&sa_client); - idr_destroy(&query_idr); + WARN_ON(!xa_empty(&queries)); } -- cgit v1.2.3 From 81cc440883d4f79f5d8b349bdc02818d683aeebf Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:21:03 -0800 Subject: ucm: Convert ctx_id_table to XArray Signed-off-by: Matthew Wilcox Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ucm.c | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 7541fbaf58a3..94fac8fda75f 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include @@ -125,23 +125,22 @@ static struct ib_client ucm_client = { .remove = ib_ucm_remove_one }; -static DEFINE_MUTEX(ctx_id_mutex); -static DEFINE_IDR(ctx_id_table); +static DEFINE_XARRAY_ALLOC(ctx_id_table); static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES); static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) { struct ib_ucm_context *ctx; - mutex_lock(&ctx_id_mutex); - ctx = idr_find(&ctx_id_table, id); + xa_lock(&ctx_id_table); + ctx = xa_load(&ctx_id_table, id); if (!ctx) ctx = ERR_PTR(-ENOENT); else if (ctx->file != file) ctx = ERR_PTR(-EINVAL); else atomic_inc(&ctx->ref); - mutex_unlock(&ctx_id_mutex); + xa_unlock(&ctx_id_table); return ctx; } @@ -194,10 +193,7 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) ctx->file = file; INIT_LIST_HEAD(&ctx->events); - mutex_lock(&ctx_id_mutex); - ctx->id = idr_alloc(&ctx_id_table, ctx, 0, 0, GFP_KERNEL); - mutex_unlock(&ctx_id_mutex); - if (ctx->id < 0) + if (xa_alloc(&ctx_id_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL)) goto error; list_add_tail(&ctx->file_list, &file->ctxs); @@ -514,9 +510,7 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, err2: ib_destroy_cm_id(ctx->cm_id); err1: - mutex_lock(&ctx_id_mutex); - idr_remove(&ctx_id_table, ctx->id); - mutex_unlock(&ctx_id_mutex); + xa_erase(&ctx_id_table, ctx->id); kfree(ctx); return result; } @@ -536,15 +530,15 @@ static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - mutex_lock(&ctx_id_mutex); - ctx = idr_find(&ctx_id_table, cmd.id); + xa_lock(&ctx_id_table); + ctx = xa_load(&ctx_id_table, cmd.id); if (!ctx) ctx = ERR_PTR(-ENOENT); else if (ctx->file != file) ctx = ERR_PTR(-EINVAL); else - idr_remove(&ctx_id_table, ctx->id); - mutex_unlock(&ctx_id_mutex); + __xa_erase(&ctx_id_table, ctx->id); + xa_unlock(&ctx_id_table); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -1189,10 +1183,7 @@ static int ib_ucm_close(struct inode *inode, struct file *filp) struct ib_ucm_context, file_list); mutex_unlock(&file->file_mutex); - mutex_lock(&ctx_id_mutex); - idr_remove(&ctx_id_table, ctx->id); - mutex_unlock(&ctx_id_mutex); - + xa_erase(&ctx_id_table, ctx->id); ib_destroy_cm_id(ctx->cm_id); ib_ucm_cleanup_events(ctx); kfree(ctx); @@ -1352,7 +1343,7 @@ static void __exit ib_ucm_cleanup(void) class_remove_file(&cm_class, &class_attr_abi_version.attr); unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR); unregister_chrdev_region(dynamic_ucm_dev, IB_UCM_NUM_DYNAMIC_MINOR); - idr_destroy(&ctx_id_table); + WARN_ON(!xa_empty(&ctx_id_table)); } module_init(ib_ucm_init); -- cgit v1.2.3 From 638267537ad9c751f9f8b4763e616fe237d379c9 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:21:06 -0800 Subject: cma: Convert portspace IDRs to XArray Signed-off-by: Matthew Wilcox Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 68c997be2429..a8b9c66c8525 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include #include @@ -191,10 +191,10 @@ static struct workqueue_struct *cma_wq; static unsigned int cma_pernet_id; struct cma_pernet { - struct idr tcp_ps; - struct idr udp_ps; - struct idr ipoib_ps; - struct idr ib_ps; + struct xarray tcp_ps; + struct xarray udp_ps; + struct xarray ipoib_ps; + struct xarray ib_ps; }; static struct cma_pernet *cma_pernet(struct net *net) @@ -202,7 +202,8 @@ static struct cma_pernet *cma_pernet(struct net *net) return net_generic(net, cma_pernet_id); } -static struct idr *cma_pernet_idr(struct net *net, enum rdma_ucm_port_space ps) +static +struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps) { struct cma_pernet *pernet = cma_pernet(net); @@ -247,25 +248,25 @@ struct class_port_info_context { static int cma_ps_alloc(struct net *net, enum rdma_ucm_port_space ps, struct rdma_bind_list *bind_list, int snum) { - struct idr *idr = cma_pernet_idr(net, ps); + struct xarray *xa = cma_pernet_xa(net, ps); - return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); + return xa_insert(xa, snum, bind_list, GFP_KERNEL); } static struct rdma_bind_list *cma_ps_find(struct net *net, enum rdma_ucm_port_space ps, int snum) { - struct idr *idr = cma_pernet_idr(net, ps); + struct xarray *xa = cma_pernet_xa(net, ps); - return idr_find(idr, snum); + return xa_load(xa, snum); } static void cma_ps_remove(struct net *net, enum rdma_ucm_port_space ps, int snum) { - struct idr *idr = cma_pernet_idr(net, ps); + struct xarray *xa = cma_pernet_xa(net, ps); - idr_remove(idr, snum); + xa_erase(xa, snum); } enum { @@ -4655,10 +4656,10 @@ static int cma_init_net(struct net *net) { struct cma_pernet *pernet = cma_pernet(net); - idr_init(&pernet->tcp_ps); - idr_init(&pernet->udp_ps); - idr_init(&pernet->ipoib_ps); - idr_init(&pernet->ib_ps); + xa_init(&pernet->tcp_ps); + xa_init(&pernet->udp_ps); + xa_init(&pernet->ipoib_ps); + xa_init(&pernet->ib_ps); return 0; } @@ -4667,10 +4668,10 @@ static void cma_exit_net(struct net *net) { struct cma_pernet *pernet = cma_pernet(net); - idr_destroy(&pernet->tcp_ps); - idr_destroy(&pernet->udp_ps); - idr_destroy(&pernet->ipoib_ps); - idr_destroy(&pernet->ib_ps); + WARN_ON(!xa_empty(&pernet->tcp_ps)); + WARN_ON(!xa_empty(&pernet->udp_ps)); + WARN_ON(!xa_empty(&pernet->ipoib_ps)); + WARN_ON(!xa_empty(&pernet->ib_ps)); } static struct pernet_operations cma_pernet_operations = { -- cgit v1.2.3 From a6a9274a7c71573c8921080da990696702f7301c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 2 Mar 2019 23:06:36 +0000 Subject: RDMA/nes: remove redundant check on udata The non-null check on udata is redundant as this check was performed just a few statements earlier and the check is always true as udata must be non-null at this point. Remove redundant the check on udata and the redundant else part that can never be executed. Detected by CoverityScan, CID#1477317 ("Logically dead code") Fixes: 899444505473 ("IB/{hw,sw}: Remove 'uobject->context' dependency in object creation APIs") Signed-off-by: Colin Ian King Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/nes/nes_verbs.c | 73 ++++++++++++++++------------------- 1 file changed, 34 insertions(+), 39 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 828e4af3f951..526092d435df 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1039,53 +1039,48 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, } if (req.user_qp_buffer) nesqp->nesuqp_addr = req.user_qp_buffer; - if (udata) { - nesqp->user_mode = 1; - if (virt_wqs) { - err = 1; - list_for_each_entry(nespbl, &nes_ucontext->qp_reg_mem_list, list) { - if (nespbl->user_base == (unsigned long )req.user_wqe_buffers) { - list_del(&nespbl->list); - err = 0; - nes_debug(NES_DBG_QP, "Found PBL for virtual QP. nespbl=%p. user_base=0x%lx\n", - nespbl, nespbl->user_base); - break; - } - } - if (err) { - nes_debug(NES_DBG_QP, "Didn't Find PBL for virtual QP. address = %llx.\n", - (long long unsigned int)req.user_wqe_buffers); - nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); - kfree(nesqp->allocated_buffer); - return ERR_PTR(-EFAULT); + + nesqp->user_mode = 1; + if (virt_wqs) { + err = 1; + list_for_each_entry(nespbl, &nes_ucontext->qp_reg_mem_list, list) { + if (nespbl->user_base == (unsigned long )req.user_wqe_buffers) { + list_del(&nespbl->list); + err = 0; + nes_debug(NES_DBG_QP, "Found PBL for virtual QP. nespbl=%p. user_base=0x%lx\n", + nespbl, nespbl->user_base); + break; } } - - nesqp->mmap_sq_db_index = - find_next_zero_bit(nes_ucontext->allocated_wqs, - NES_MAX_USER_WQ_REGIONS, nes_ucontext->first_free_wq); - /* nes_debug(NES_DBG_QP, "find_first_zero_biton wqs returned %u\n", - nespd->mmap_db_index); */ - if (nesqp->mmap_sq_db_index >= NES_MAX_USER_WQ_REGIONS) { - nes_debug(NES_DBG_QP, - "db index > max user regions, failing create QP\n"); + if (err) { + nes_debug(NES_DBG_QP, "Didn't Find PBL for virtual QP. address = %llx.\n", + (long long unsigned int)req.user_wqe_buffers); nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); - if (virt_wqs) { - pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, - nespbl->pbl_pbase); - kfree(nespbl); - } kfree(nesqp->allocated_buffer); - return ERR_PTR(-ENOMEM); + return ERR_PTR(-EFAULT); } - set_bit(nesqp->mmap_sq_db_index, nes_ucontext->allocated_wqs); - nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = nesqp; - nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index + 1; - } else { + } + + nesqp->mmap_sq_db_index = + find_next_zero_bit(nes_ucontext->allocated_wqs, + NES_MAX_USER_WQ_REGIONS, nes_ucontext->first_free_wq); + /* nes_debug(NES_DBG_QP, "find_first_zero_biton wqs returned %u\n", + nespd->mmap_db_index); */ + if (nesqp->mmap_sq_db_index >= NES_MAX_USER_WQ_REGIONS) { + nes_debug(NES_DBG_QP, + "db index > max user regions, failing create QP\n"); nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + if (virt_wqs) { + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, + nespbl->pbl_pbase); + kfree(nespbl); + } kfree(nesqp->allocated_buffer); - return ERR_PTR(-EFAULT); + return ERR_PTR(-ENOMEM); } + set_bit(nesqp->mmap_sq_db_index, nes_ucontext->allocated_wqs); + nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = nesqp; + nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index + 1; } err = (!virt_wqs) ? nes_setup_mmap_qp(nesqp, nesvnic, sq_size, rq_size) : nes_setup_virt_qp(nesqp, nespbl, nesvnic, sq_size, rq_size); -- cgit v1.2.3 From 1a2e158327c957baef21a50f3a7f6e6942cbca1e Mon Sep 17 00:00:00 2001 From: "Enrico Weigelt, metux IT consult" Date: Wed, 6 Mar 2019 23:08:45 +0100 Subject: drivers: infiniband: Fix whitespace in kconfig Adjust the kconfig whitespace in bnxt_re/iser to match the kernel standard. Signed-off-by: Enrico Weigelt, metux IT consult Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/Kconfig | 12 ++++++------ drivers/infiniband/ulp/iser/Kconfig | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig index d25439c305f7..51e8234520a9 100644 --- a/drivers/infiniband/hw/bnxt_re/Kconfig +++ b/drivers/infiniband/hw/bnxt_re/Kconfig @@ -1,10 +1,10 @@ config INFINIBAND_BNXT_RE - tristate "Broadcom Netxtreme HCA support" - depends on 64BIT - depends on ETHERNET && NETDEVICES && PCI && INET && DCB - select NET_VENDOR_BROADCOM - select BNXT - ---help--- + tristate "Broadcom Netxtreme HCA support" + depends on 64BIT + depends on ETHERNET && NETDEVICES && PCI && INET && DCB + select NET_VENDOR_BROADCOM + select BNXT + ---help--- This driver supports Broadcom NetXtreme-E 10/25/40/50 gigabit RoCE HCAs. To compile this driver as a module, choose M here: the module will be called bnxt_re. diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig index d00af71a2cfc..299268f261ee 100644 --- a/drivers/infiniband/ulp/iser/Kconfig +++ b/drivers/infiniband/ulp/iser/Kconfig @@ -4,8 +4,8 @@ config INFINIBAND_ISER select SCSI_ISCSI_ATTRS ---help--- Support for the iSCSI Extensions for RDMA (iSER) Protocol - over InfiniBand. This allows you to access storage devices - that speak iSCSI over iSER over InfiniBand. + over InfiniBand. This allows you to access storage devices + that speak iSCSI over iSER over InfiniBand. The iSER protocol is defined by IETF. See -- cgit v1.2.3 From a4b7013db23e93824ac53083eeb3e4efdef4b5b0 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 12 Mar 2019 10:15:44 +0200 Subject: RDMA/rxe: Fix slab-out-bounds access which lead to kernel crash later BUG: KASAN: slab-out-of-bounds in rxe_mem_init_user+0x6c1/0x740 [rdma_rxe] Read of size 8 at addr ffff88805c01a608 by task ib_send_bw/573 CPU: 24 PID: 573 Comm: ib_send_bw Not tainted 5.0.0-rc5+ #189 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.11.0-0-g63451fca13-prebuilt.qemu-project.org 04/01/2014 Call Trace: rxe_mem_init_user+0x6c1/0x740 [rdma_rxe] rxe_reg_user_mr+0x9b/0x110 [rdma_rxe] ib_uverbs_reg_mr+0x428/0x9c0 [ib_uverbs] ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0x2b0/0x410 [ib_uverbs] ib_uverbs_run_method+0x79c/0x1da0 [ib_uverbs] rxe_mem_init_user+0x6c1/0x740 [rdma_rxe] rxe_reg_user_mr+0x9b/0x110 [rdma_rxe] ib_uverbs_reg_mr+0x428/0x9c0 [ib_uverbs] ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0x2b0/0x410 [ib_uverbs] ib_uverbs_run_method+0x79c/0x1da0 [ib_uverbs] ib_uverbs_cmd_verbs+0x5f2/0xf20 [ib_uverbs] ib_uverbs_ioctl+0x202/0x310 [ib_uverbs] do_vfs_ioctl+0x193/0x1440 ksys_ioctl+0x3a/0x70 __x64_sys_ioctl+0x6f/0xb0 do_syscall_64+0x13f/0x570 entry_SYSCALL_64_after_hwframe+0x49/0xbe Allocated by task 573: __kasan_kmalloc.constprop.5+0xc1/0xd0 __kmalloc+0x161/0x310 rxe_mem_alloc+0x52/0x470 [rdma_rxe] rxe_mem_init_user+0x113/0x740 [rdma_rxe] rxe_reg_user_mr+0x9b/0x110 [rdma_rxe] ib_uverbs_reg_mr+0x428/0x9c0 [ib_uverbs] ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0x2b0/0x410 [ib_uverbs] ib_uverbs_run_method+0x79c/0x1da0 [ib_uverbs] ib_uverbs_cmd_verbs+0x5f2/0xf20 [ib_uverbs] ib_uverbs_ioctl+0x202/0x310 [ib_uverbs] do_vfs_ioctl+0x193/0x1440 ksys_ioctl+0x3a/0x70 __x64_sys_ioctl+0x6f/0xb0 do_syscall_64+0x13f/0x570 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 0: __kasan_slab_free+0x12e/0x180 kfree+0x10a/0x2c0 rcu_process_callbacks+0xa77/0x1260 __do_softirq+0x2ad/0xacb Test scenario: ib_send_bw -x 1 -d rxe0 -a & ib_send_bw -x 1 -d rxe0 -a localhost Fixes: 8700e3e7c485 ("Soft RoCE driver") Reported-by: Parav Pandit Reviewed-by: Zhu Yanjun Tested-by: Zhu Yanjun Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 42f0f25e396c..ec89fbd06c53 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -199,6 +199,12 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, buf = map[0]->buf; for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { + if (num_buf >= RXE_BUF_PER_MAP) { + map++; + buf = map[0]->buf; + num_buf = 0; + } + vaddr = page_address(sg_page_iter_page(&sg_iter)); if (!vaddr) { pr_warn("null vaddr\n"); @@ -211,11 +217,6 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, num_buf++; buf++; - if (num_buf >= RXE_BUF_PER_MAP) { - map++; - buf = map[0]->buf; - num_buf = 0; - } } } -- cgit v1.2.3 From 4ae27444100f54e6db3a046f086ba4e70e1ac22b Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 13 Mar 2019 12:05:59 -0700 Subject: IB/core: Ensure an invalidate_range callback on ODP MR No device supports ODP MR without an invalidate_range callback. Warn on any any device which attempts to support ODP without supplying this callback. Then we can remove the checks for the callback within the code. This stems from the discussion https://www.spinics.net/lists/linux-rdma/msg76460.html ...which concluded this code was no longer necessary. Acked-by: John Hubbard Reviewed-by: Haggai Eran Signed-off-by: Ira Weiny Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 5 +++++ drivers/infiniband/core/umem_odp.c | 13 +++---------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index fe5551562dbc..89a7d57f9fa5 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -138,6 +138,11 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, mmgrab(mm); if (access & IB_ACCESS_ON_DEMAND) { + if (WARN_ON_ONCE(!context->invalidate_range)) { + ret = -EINVAL; + goto umem_kfree; + } + ret = ib_umem_odp_get(to_ib_umem_odp(umem), access); if (ret) goto umem_kfree; diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index e6ec79ad9cc8..6f8c36fcda78 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -241,7 +241,7 @@ static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx, per_mm->mm = mm; per_mm->umem_tree = RB_ROOT_CACHED; init_rwsem(&per_mm->umem_rwsem); - per_mm->active = ctx->invalidate_range; + per_mm->active = true; rcu_read_lock(); per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); @@ -503,7 +503,6 @@ static int ib_umem_odp_map_dma_single_page( struct ib_umem *umem = &umem_odp->umem; struct ib_device *dev = umem->context->device; dma_addr_t dma_addr; - int stored_page = 0; int remove_existing_mapping = 0; int ret = 0; @@ -528,7 +527,6 @@ static int ib_umem_odp_map_dma_single_page( umem_odp->dma_list[page_index] = dma_addr | access_mask; umem_odp->page_list[page_index] = page; umem->npages++; - stored_page = 1; } else if (umem_odp->page_list[page_index] == page) { umem_odp->dma_list[page_index] |= access_mask; } else { @@ -540,11 +538,9 @@ static int ib_umem_odp_map_dma_single_page( } out: - /* On Demand Paging - avoid pinning the page */ - if (umem->context->invalidate_range || !stored_page) - put_page(page); + put_page(page); - if (remove_existing_mapping && umem->context->invalidate_range) { + if (remove_existing_mapping) { ib_umem_notifier_start_account(umem_odp); umem->context->invalidate_range( umem_odp, @@ -754,9 +750,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, */ set_page_dirty(head_page); } - /* on demand pinning support */ - if (!umem->context->invalidate_range) - put_page(page); umem_odp->page_list[idx] = NULL; umem_odp->dma_list[idx] = 0; umem->npages--; -- cgit v1.2.3 From e2a438bd7116889af36304903b92e56d0f347228 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Fri, 15 Mar 2019 01:57:14 -0500 Subject: RDMA/i40iw: Handle workqueue allocation failure alloc_ordered_workqueue may fail and return NULL. The fix captures the failure and handles it properly to avoid potential NULL pointer dereferences. Signed-off-by: Kangjie Lu Reviewed-by: Shiraz, Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw.h | 2 +- drivers/infiniband/hw/i40iw/i40iw_cm.c | 18 +++++++++++++++--- drivers/infiniband/hw/i40iw/i40iw_main.c | 5 ++++- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 2f2b4426ded7..8feec35f95a7 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -552,7 +552,7 @@ enum i40iw_status_code i40iw_obj_aligned_mem(struct i40iw_device *iwdev, void i40iw_request_reset(struct i40iw_device *iwdev); void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev); -void i40iw_setup_cm_core(struct i40iw_device *iwdev); +int i40iw_setup_cm_core(struct i40iw_device *iwdev); void i40iw_cleanup_cm_core(struct i40iw_cm_core *cm_core); void i40iw_process_ceq(struct i40iw_device *, struct i40iw_ceq *iwceq); void i40iw_process_aeq(struct i40iw_device *); diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 206cfb0016f8..1c6aa0efd2b6 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -3237,7 +3237,7 @@ void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf) * core * @iwdev: iwarp device structure */ -void i40iw_setup_cm_core(struct i40iw_device *iwdev) +int i40iw_setup_cm_core(struct i40iw_device *iwdev) { struct i40iw_cm_core *cm_core = &iwdev->cm_core; @@ -3256,9 +3256,19 @@ void i40iw_setup_cm_core(struct i40iw_device *iwdev) cm_core->event_wq = alloc_ordered_workqueue("iwewq", WQ_MEM_RECLAIM); + if (!cm_core->event_wq) + goto error; cm_core->disconn_wq = alloc_ordered_workqueue("iwdwq", WQ_MEM_RECLAIM); + if (!cm_core->disconn_wq) + goto error; + + return 0; +error: + i40iw_cleanup_cm_core(&iwdev->cm_core); + + return -ENOMEM; } /** @@ -3278,8 +3288,10 @@ void i40iw_cleanup_cm_core(struct i40iw_cm_core *cm_core) del_timer_sync(&cm_core->tcp_timer); spin_unlock_irqrestore(&cm_core->ht_lock, flags); - destroy_workqueue(cm_core->event_wq); - destroy_workqueue(cm_core->disconn_wq); + if (cm_core->event_wq) + destroy_workqueue(cm_core->event_wq); + if (cm_core->disconn_wq) + destroy_workqueue(cm_core->disconn_wq); } /** diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 68095f00d08f..10932baee279 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1641,7 +1641,10 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client) iwdev = &hdl->device; iwdev->hdl = hdl; dev = &iwdev->sc_dev; - i40iw_setup_cm_core(iwdev); + if (i40iw_setup_cm_core(iwdev)) { + kfree(iwdev->hdl); + return -ENOMEM; + } dev->back_dev = (void *)iwdev; iwdev->ldev = &hdl->ldev; -- cgit v1.2.3 From 9513ea4f67280a17365f5adfa31fac7d344150c6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 16 Mar 2019 23:05:12 +0000 Subject: IB/iser: remove uninitialized variable len The variable len is not being inintialized and the uninitialized value is being returned. However, this return path is never reached because the default case in the switch statement returns -ENOSYS. Clean up the code by replacing the return -ENOSYS with a break for the default case and returning -ENOSYS at the end of the function. This allows len to be removed. Also remove redundant break that follows a return statement. Signed-off-by: Colin Ian King Reviewed-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/iser/iscsi_iser.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 8c707accd148..9c185a8dabd3 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -763,7 +763,6 @@ static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep, enum iscsi_param param, char *buf) { struct iser_conn *iser_conn = ep->dd_data; - int len; switch (param) { case ISCSI_PARAM_CONN_PORT: @@ -774,12 +773,10 @@ static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep, return iscsi_conn_get_addr_param((struct sockaddr_storage *) &iser_conn->ib_conn.cma_id->route.addr.dst_addr, param, buf); - break; default: - return -ENOSYS; + break; } - - return len; + return -ENOSYS; } /** -- cgit v1.2.3 From dc7fe518b0493faa0af0568d6d8c2a33c00f58d0 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 17 Mar 2019 12:11:14 +0200 Subject: overflow: Fix -Wtype-limits compilation warnings Attempt to use check_shl_overflow() with inputs of unsigned type produces the following compilation warnings. drivers/infiniband/hw/mlx5/qp.c: In function _set_user_rq_size_: ./include/linux/overflow.h:230:6: warning: comparison of unsigned expression >= 0 is always true [-Wtype-limits] _s >= 0 && _s < 8 * sizeof(*d) ? _s : 0; \ ^~ drivers/infiniband/hw/mlx5/qp.c:5820:6: note: in expansion of macro _check_shl_overflow_ if (check_shl_overflow(rwq->wqe_count, rwq->wqe_shift, &rwq->buf_size)) ^~~~~~~~~~~~~~~~~~ ./include/linux/overflow.h:232:26: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits] (_to_shift != _s || *_d < 0 || _a < 0 || \ ^ drivers/infiniband/hw/mlx5/qp.c:5820:6: note: in expansion of macro _check_shl_overflow_ if (check_shl_overflow(rwq->wqe_count, rwq->wqe_shift, &rwq->buf_size)) ^~~~~~~~~~~~~~~~~~ ./include/linux/overflow.h:232:36: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits] (_to_shift != _s || *_d < 0 || _a < 0 || \ ^ drivers/infiniband/hw/mlx5/qp.c:5820:6: note: in expansion of macro _check_shl_overflow_ if (check_shl_overflow(rwq->wqe_count, rwq->wqe_shift,&rwq->buf_size)) ^~~~~~~~~~~~~~~~~~ Fixes: 0c66847793d1 ("overflow.h: Add arithmetic shift helper") Reviewed-by: Bart Van Assche Acked-by: Kees Cook Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/overflow.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 40b48e2133cb..15eb85de9226 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -36,6 +36,12 @@ #define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) #define type_min(T) ((T)((T)-type_max(T)-(T)1)) +/* + * Avoids triggering -Wtype-limits compilation warning, + * while using unsigned data types to check a < 0. + */ +#define is_non_negative(a) ((a) > 0 || (a) == 0) +#define is_negative(a) (!(is_non_negative(a))) #ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW /* @@ -227,10 +233,10 @@ typeof(d) _d = d; \ u64 _a_full = _a; \ unsigned int _to_shift = \ - _s >= 0 && _s < 8 * sizeof(*d) ? _s : 0; \ + is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \ *_d = (_a_full << _to_shift); \ - (_to_shift != _s || *_d < 0 || _a < 0 || \ - (*_d >> _to_shift) != _a); \ + (_to_shift != _s || is_negative(*_d) || is_negative(_a) || \ + (*_d >> _to_shift) != _a); \ }) /** -- cgit v1.2.3 From e95e52a1788d4a8af547261875c0fbae2e6e3028 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 19 Mar 2019 11:10:09 +0200 Subject: RDMA/hns: Limit scope of hns_roce_cmq_send() The forgotten static keyword causes to the following error to appear while building HNS driver. Declare hns_roce_cmq_send() to be static function to fix this warning. drivers/infiniband/hw/hns/hns_roce_hw_v2.c:1089:5: warning: no previous prototype for _hns_roce_cmq_send_ [-Wmissing-prototypes] int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, Fixes: 6a04aed6afae ("RDMA/hns: Fix the chip hanging caused by sending mailbox&CMQ during reset") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 14e89454e269..dafc33b02e09 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1086,7 +1086,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, return ret; } -int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, +static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, struct hns_roce_cmq_desc *desc, int num) { int retval; -- cgit v1.2.3 From 1e5887b700ddbfa42365b5bf95dda5de11b76259 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Tue, 19 Mar 2019 11:24:37 +0200 Subject: IB/mlx5: WQE dump jumps over first 16 bytes Move index increment after its is used or otherwise it will start the dump of the WQE from second WQE BB. Fixes: 34f4c9554d8b ("IB/mlx5: Use fragmented QP's buffer for in-kernel users") Signed-off-by: Artemy Kovalyov Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7cd006da1dae..f864e454de8f 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4726,16 +4726,15 @@ static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size, static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16) { __be32 *p = NULL; - u32 tidx = idx; int i, j; pr_debug("dump WQE index %u:\n", idx); for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) { if ((i & 0xf) == 0) { - tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1); - p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, tidx); + p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx); pr_debug("WQBB at %p:\n", (void *)p); j = 0; + idx = (idx + 1) & (qp->sq.wqe_cnt - 1); } pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]), be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]), -- cgit v1.2.3 From d623dfd2836114507d647c9793a80d213d8bffe8 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Tue, 19 Mar 2019 11:24:39 +0200 Subject: IB/mlx5: Compare only index part of a memory window rkey The InfiniBand Architecture Specification section 10.6.7.2.4 TYPE 2 MEMORY WINDOWS says that if the CI supports the Base Memory Management Extensions defined in this specification, the R_Key format for a Type 2 Memory Window must consist of: * 24 bit index in the most significant bits of the R_Key, which is owned by the CI, and * 8 bit key in the least significant bits of the R_Key, which is owned by the Consumer. This means that the kernel should compare only the index part of a R_Key to determine equality with another R_Key. Fixes: db570d7deafb ("IB/mlx5: Add ODP support to MW") Signed-off-by: Artemy Kovalyov Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index c20bfc41ecf1..2bc4d67b3e42 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -710,6 +710,15 @@ struct pf_frame { int depth; }; +static bool mkey_is_eq(struct mlx5_core_mkey *mmkey, u32 key) +{ + if (!mmkey) + return false; + if (mmkey->type == MLX5_MKEY_MW) + return mlx5_base_mkey(mmkey->key) == mlx5_base_mkey(key); + return mmkey->key == key; +} + static int get_indirect_num_descs(struct mlx5_core_mkey *mmkey) { struct mlx5_ib_mw *mw; @@ -759,7 +768,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, next_mr: mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(key)); - if (!mmkey || mmkey->key != key) { + if (!mkey_is_eq(mmkey, key)) { mlx5_ib_dbg(dev, "failed to find mkey %x\n", key); ret = -EFAULT; goto srcu_unlock; -- cgit v1.2.3 From 6a1096611cc57ac7307a3121b5df12b07c01a2c6 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Wed, 20 Mar 2019 16:14:18 +0200 Subject: RDMA/vmw_pvrdma: Skip zeroing device attrs Caller already clears props before calling query_device. Signed-off-by: Yuval Shaia Acked-by: Adit Ranadive Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index 42fe821f8d58..8a32e1e435a9 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -70,8 +70,6 @@ int pvrdma_query_device(struct ib_device *ibdev, if (uhw->inlen || uhw->outlen) return -EINVAL; - memset(props, 0, sizeof(*props)); - props->fw_ver = dev->dsr->caps.fw_ver; props->sys_image_guid = dev->dsr->caps.sys_image_guid; props->max_mr_size = dev->dsr->caps.max_mr_size; -- cgit v1.2.3 From 2dd0cf77c93f615810d1735e6bd1582b73a9ca94 Mon Sep 17 00:00:00 2001 From: Potnuri Bharat Teja Date: Tue, 26 Mar 2019 18:31:47 +0530 Subject: iw_cxgb4: Update Maintainer details Remove Steve and add undersigned as maintainer for iw_cxgb4 drivers. Signed-off-by: Potnuri Bharat Teja Acked-by: Steve Wise Signed-off-by: Jason Gunthorpe --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3e5a5d263f29..d87c59b1f44a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4246,7 +4246,7 @@ S: Supported F: drivers/scsi/cxgbi/cxgb3i CXGB3 IWARP RNIC DRIVER (IW_CXGB3) -M: Steve Wise +M: Potnuri Bharat Teja L: linux-rdma@vger.kernel.org W: http://www.openfabrics.org S: Supported @@ -4275,7 +4275,7 @@ S: Supported F: drivers/scsi/cxgbi/cxgb4i CXGB4 IWARP RNIC DRIVER (IW_CXGB4) -M: Steve Wise +M: Potnuri Bharat Teja L: linux-rdma@vger.kernel.org W: http://www.openfabrics.org S: Supported -- cgit v1.2.3 From 4d60cad5db7fb450e64532afbbaba85af235fa5d Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 19 Mar 2019 14:11:44 -0700 Subject: IB/MAD: Add send path trace points Use the standard Linux trace mechanism to trace MADs being sent. 4 trace points are added, when the MAD is posted to the qp, when the MAD is completed, if a MAD is resent, and when the MAD completes in error. Reviewed-by: "Ruhl, Michael J" Suggested-by: Steven Rostedt (VMware) Reviewed-by: Steven Rostedt (VMware) Signed-off-by: Ira Weiny Signed-off-by: Jason Gunthorpe --- MAINTAINERS | 1 + drivers/infiniband/core/mad.c | 33 +++++++- include/trace/events/ib_mad.h | 187 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 220 insertions(+), 1 deletion(-) create mode 100644 include/trace/events/ib_mad.h diff --git a/MAINTAINERS b/MAINTAINERS index d87c59b1f44a..acd7278a8f3a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7653,6 +7653,7 @@ F: drivers/infiniband/ F: include/uapi/linux/if_infiniband.h F: include/uapi/rdma/ F: include/rdma/ +F: include/trace/events/ib_mad.h INGENIC JZ4780 DMA Driver M: Zubair Lutfullah Kakakhel diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 96571fa3abd8..2acb44bc5ade 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -3,7 +3,7 @@ * Copyright (c) 2005 Intel Corporation. All rights reserved. * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2009 HNR Consulting. All rights reserved. - * Copyright (c) 2014 Intel Corporation. All rights reserved. + * Copyright (c) 2014,2018 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -51,6 +51,32 @@ #include "opa_smi.h" #include "agent.h" +#define CREATE_TRACE_POINTS +#include + +#ifdef CONFIG_TRACEPOINTS +static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_qp_info *qp_info, + struct trace_event_raw_ib_mad_send_template *entry) +{ + u16 pkey; + struct ib_device *dev = qp_info->port_priv->device; + u8 pnum = qp_info->port_priv->port_num; + struct ib_ud_wr *wr = &mad_send_wr->send_wr; + struct rdma_ah_attr attr = {}; + + rdma_query_ah(wr->ah, &attr); + + /* These are common */ + entry->sl = attr.sl; + ib_query_pkey(dev, pnum, wr->pkey_index, &pkey); + entry->pkey = pkey; + entry->rqpn = wr->remote_qpn; + entry->rqkey = wr->remote_qkey; + entry->dlid = rdma_ah_get_dlid(&attr); +} +#endif + static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; @@ -1215,6 +1241,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { + trace_ib_mad_ib_send_mad(mad_send_wr, qp_info); ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr, NULL); list = &qp_info->send_queue.list; @@ -2488,6 +2515,8 @@ static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc) send_queue = mad_list->mad_queue; qp_info = send_queue->qp_info; + trace_ib_mad_send_done_handler(mad_send_wr, wc); + retry: ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device, mad_send_wr->header_mapping, @@ -2519,6 +2548,7 @@ retry: ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); if (queued_send_wr) { + trace_ib_mad_send_done_resend(queued_send_wr, qp_info); ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr, NULL); if (ret) { @@ -2566,6 +2596,7 @@ static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, if (mad_send_wr->retry) { /* Repost send */ mad_send_wr->retry = 0; + trace_ib_mad_error_handler(mad_send_wr, qp_info); ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr, NULL); if (!ret) diff --git a/include/trace/events/ib_mad.h b/include/trace/events/ib_mad.h new file mode 100644 index 000000000000..585028f17fa7 --- /dev/null +++ b/include/trace/events/ib_mad.h @@ -0,0 +1,187 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ + +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ib_mad + +#if !defined(_TRACE_IB_MAD_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_IB_MAD_H + +#include +#include + +#ifdef CONFIG_TRACEPOINTS +struct trace_event_raw_ib_mad_send_template; +static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_qp_info *qp_info, + struct trace_event_raw_ib_mad_send_template *entry); +#endif + +DECLARE_EVENT_CLASS(ib_mad_send_template, + TP_PROTO(struct ib_mad_send_wr_private *wr, + struct ib_mad_qp_info *qp_info), + TP_ARGS(wr, qp_info), + + TP_STRUCT__entry( + __field(u8, base_version) + __field(u8, mgmt_class) + __field(u8, class_version) + __field(u8, port_num) + __field(u32, qp_num) + __field(u8, method) + __field(u8, sl) + __field(u16, attr_id) + __field(u32, attr_mod) + __field(u64, wrtid) + __field(u64, tid) + __field(u16, status) + __field(u16, class_specific) + __field(u32, length) + __field(u32, dlid) + __field(u32, rqpn) + __field(u32, rqkey) + __field(u32, dev_index) + __field(void *, agent_priv) + __field(unsigned long, timeout) + __field(int, retries_left) + __field(int, max_retries) + __field(int, retry) + __field(u16, pkey) + ), + + TP_fast_assign( + __entry->dev_index = wr->mad_agent_priv->agent.device->index; + __entry->port_num = wr->mad_agent_priv->agent.port_num; + __entry->qp_num = wr->mad_agent_priv->qp_info->qp->qp_num; + __entry->agent_priv = wr->mad_agent_priv; + __entry->wrtid = wr->tid; + __entry->max_retries = wr->max_retries; + __entry->retries_left = wr->retries_left; + __entry->retry = wr->retry; + __entry->timeout = wr->timeout; + __entry->length = wr->send_buf.hdr_len + + wr->send_buf.data_len; + __entry->base_version = + ((struct ib_mad_hdr *)wr->send_buf.mad)->base_version; + __entry->mgmt_class = + ((struct ib_mad_hdr *)wr->send_buf.mad)->mgmt_class; + __entry->class_version = + ((struct ib_mad_hdr *)wr->send_buf.mad)->class_version; + __entry->method = + ((struct ib_mad_hdr *)wr->send_buf.mad)->method; + __entry->status = + ((struct ib_mad_hdr *)wr->send_buf.mad)->status; + __entry->class_specific = + ((struct ib_mad_hdr *)wr->send_buf.mad)->class_specific; + __entry->tid = ((struct ib_mad_hdr *)wr->send_buf.mad)->tid; + __entry->attr_id = + ((struct ib_mad_hdr *)wr->send_buf.mad)->attr_id; + __entry->attr_mod = + ((struct ib_mad_hdr *)wr->send_buf.mad)->attr_mod; + create_mad_addr_info(wr, qp_info, __entry); + ), + + TP_printk("%d:%d QP%d agent %p: " \ + "wrtid 0x%llx; %d/%d retries(%d); timeout %lu length %d : " \ + "hdr : base_ver 0x%x class 0x%x class_ver 0x%x " \ + "method 0x%x status 0x%x class_specific 0x%x tid 0x%llx " \ + "attr_id 0x%x attr_mod 0x%x => dlid 0x%08x sl %d "\ + "pkey 0x%x rpqn 0x%x rqpkey 0x%x", + __entry->dev_index, __entry->port_num, __entry->qp_num, + __entry->agent_priv, be64_to_cpu(__entry->wrtid), + __entry->retries_left, __entry->max_retries, + __entry->retry, __entry->timeout, __entry->length, + __entry->base_version, __entry->mgmt_class, + __entry->class_version, + __entry->method, be16_to_cpu(__entry->status), + be16_to_cpu(__entry->class_specific), + be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id), + be32_to_cpu(__entry->attr_mod), + be32_to_cpu(__entry->dlid), __entry->sl, __entry->pkey, + __entry->rqpn, __entry->rqkey + ) +); + +DEFINE_EVENT(ib_mad_send_template, ib_mad_error_handler, + TP_PROTO(struct ib_mad_send_wr_private *wr, + struct ib_mad_qp_info *qp_info), + TP_ARGS(wr, qp_info)); +DEFINE_EVENT(ib_mad_send_template, ib_mad_ib_send_mad, + TP_PROTO(struct ib_mad_send_wr_private *wr, + struct ib_mad_qp_info *qp_info), + TP_ARGS(wr, qp_info)); +DEFINE_EVENT(ib_mad_send_template, ib_mad_send_done_resend, + TP_PROTO(struct ib_mad_send_wr_private *wr, + struct ib_mad_qp_info *qp_info), + TP_ARGS(wr, qp_info)); + +TRACE_EVENT(ib_mad_send_done_handler, + TP_PROTO(struct ib_mad_send_wr_private *wr, struct ib_wc *wc), + TP_ARGS(wr, wc), + + TP_STRUCT__entry( + __field(u8, port_num) + __field(u8, base_version) + __field(u8, mgmt_class) + __field(u8, class_version) + __field(u32, qp_num) + __field(u64, wrtid) + __field(u16, status) + __field(u16, wc_status) + __field(u32, length) + __field(void *, agent_priv) + __field(unsigned long, timeout) + __field(u32, dev_index) + __field(int, retries_left) + __field(int, max_retries) + __field(int, retry) + __field(u8, method) + ), + + TP_fast_assign( + __entry->dev_index = wr->mad_agent_priv->agent.device->index; + __entry->port_num = wr->mad_agent_priv->agent.port_num; + __entry->qp_num = wr->mad_agent_priv->qp_info->qp->qp_num; + __entry->agent_priv = wr->mad_agent_priv; + __entry->wrtid = wr->tid; + __entry->max_retries = wr->max_retries; + __entry->retries_left = wr->retries_left; + __entry->retry = wr->retry; + __entry->timeout = wr->timeout; + __entry->base_version = + ((struct ib_mad_hdr *)wr->send_buf.mad)->base_version; + __entry->mgmt_class = + ((struct ib_mad_hdr *)wr->send_buf.mad)->mgmt_class; + __entry->class_version = + ((struct ib_mad_hdr *)wr->send_buf.mad)->class_version; + __entry->method = + ((struct ib_mad_hdr *)wr->send_buf.mad)->method; + __entry->status = + ((struct ib_mad_hdr *)wr->send_buf.mad)->status; + __entry->wc_status = wc->status; + __entry->length = wc->byte_len; + ), + + TP_printk("%d:%d QP%d : SEND WC Status %d : agent %p: " \ + "wrtid 0x%llx %d/%d retries(%d) timeout %lu length %d: " \ + "hdr : base_ver 0x%x class 0x%x class_ver 0x%x " \ + "method 0x%x status 0x%x", + __entry->dev_index, __entry->port_num, __entry->qp_num, + __entry->wc_status, + __entry->agent_priv, be64_to_cpu(__entry->wrtid), + __entry->retries_left, __entry->max_retries, + __entry->retry, __entry->timeout, + __entry->length, + __entry->base_version, __entry->mgmt_class, + __entry->class_version, __entry->method, + be16_to_cpu(__entry->status) + ) +); + + +#endif /* _TRACE_IB_MAD_H */ + +#include -- cgit v1.2.3 From 821bf1de45a1a084e2e11b1a2308777434194bfe Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 19 Mar 2019 14:11:45 -0700 Subject: IB/MAD: Add recv path trace point Trace received MAD details. Reviewed-by: "Ruhl, Michael J" Reviewed-by: Steven Rostedt (VMware) Signed-off-by: Ira Weiny Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/mad.c | 3 ++ include/trace/events/ib_mad.h | 67 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 2acb44bc5ade..7164db34bd4d 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2305,6 +2305,9 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa)) goto out; + trace_ib_mad_recv_done_handler(qp_info, wc, + (struct ib_mad_hdr *)recv->mad); + mad_size = recv->mad_size; response = alloc_mad_private(mad_size, GFP_KERNEL); if (!response) diff --git a/include/trace/events/ib_mad.h b/include/trace/events/ib_mad.h index 585028f17fa7..245a916bfd5a 100644 --- a/include/trace/events/ib_mad.h +++ b/include/trace/events/ib_mad.h @@ -181,6 +181,73 @@ TRACE_EVENT(ib_mad_send_done_handler, ) ); +TRACE_EVENT(ib_mad_recv_done_handler, + TP_PROTO(struct ib_mad_qp_info *qp_info, struct ib_wc *wc, + struct ib_mad_hdr *mad_hdr), + TP_ARGS(qp_info, wc, mad_hdr), + + TP_STRUCT__entry( + __field(u8, base_version) + __field(u8, mgmt_class) + __field(u8, class_version) + __field(u8, port_num) + __field(u32, qp_num) + __field(u16, status) + __field(u16, class_specific) + __field(u32, length) + __field(u64, tid) + __field(u8, method) + __field(u8, sl) + __field(u16, attr_id) + __field(u32, attr_mod) + __field(u16, src_qp) + __field(u16, wc_status) + __field(u32, slid) + __field(u32, dev_index) + __field(u16, pkey) + ), + + TP_fast_assign( + __entry->dev_index = qp_info->port_priv->device->index; + __entry->port_num = qp_info->port_priv->port_num; + __entry->qp_num = qp_info->qp->qp_num; + __entry->length = wc->byte_len; + __entry->base_version = mad_hdr->base_version; + __entry->mgmt_class = mad_hdr->mgmt_class; + __entry->class_version = mad_hdr->class_version; + __entry->method = mad_hdr->method; + __entry->status = mad_hdr->status; + __entry->class_specific = mad_hdr->class_specific; + __entry->tid = mad_hdr->tid; + __entry->attr_id = mad_hdr->attr_id; + __entry->attr_mod = mad_hdr->attr_mod; + __entry->slid = wc->slid; + __entry->src_qp = wc->src_qp; + __entry->sl = wc->sl; + ib_query_pkey(qp_info->port_priv->device, + qp_info->port_priv->port_num, + wc->pkey_index, &__entry->pkey); + __entry->wc_status = wc->status; + ), + + TP_printk("%d:%d QP%d : RECV WC Status %d : length %d : hdr : " \ + "base_ver 0x%02x class 0x%02x class_ver 0x%02x " \ + "method 0x%02x status 0x%04x class_specific 0x%04x " \ + "tid 0x%016llx attr_id 0x%04x attr_mod 0x%08x " \ + "slid 0x%08x src QP%d, sl %d pkey 0x%04x", + __entry->dev_index, __entry->port_num, __entry->qp_num, + __entry->wc_status, + __entry->length, + __entry->base_version, __entry->mgmt_class, + __entry->class_version, __entry->method, + be16_to_cpu(__entry->status), + be16_to_cpu(__entry->class_specific), + be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id), + be32_to_cpu(__entry->attr_mod), + __entry->slid, __entry->src_qp, __entry->sl, __entry->pkey + ) +); + #endif /* _TRACE_IB_MAD_H */ -- cgit v1.2.3 From 0e65bae205cbec92029ff7b3c3a14ec90456f7eb Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 19 Mar 2019 14:11:46 -0700 Subject: IB/MAD: Add agent trace points Trace agent details when agents are [un]registered. In addition, report agent details on send/recv. Reviewed-by: "Ruhl, Michael J" Reviewed-by: Steven Rostedt (VMware) Signed-off-by: Ira Weiny Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/mad.c | 4 ++++ include/trace/events/ib_mad.h | 46 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 7164db34bd4d..cc1805211a84 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -467,6 +467,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, } spin_unlock_irq(&port_priv->reg_lock); + trace_ib_mad_create_agent(mad_agent_priv); return &mad_agent_priv->agent; error6: spin_unlock_irq(&port_priv->reg_lock); @@ -622,6 +623,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) struct ib_mad_port_private *port_priv; /* Note that we could still be handling received MADs */ + trace_ib_mad_unregister_agent(mad_agent_priv); /* * Canceling all sends results in dropping received response @@ -2354,6 +2356,7 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad); if (mad_agent) { + trace_ib_mad_recv_done_agent(mad_agent); ib_mad_complete_recv(mad_agent, &recv->header.recv_wc); /* * recv is freed up in error cases in ib_mad_complete_recv @@ -2518,6 +2521,7 @@ static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc) send_queue = mad_list->mad_queue; qp_info = send_queue->qp_info; + trace_ib_mad_send_done_agent(mad_send_wr->mad_agent_priv); trace_ib_mad_send_done_handler(mad_send_wr, wc); retry: diff --git a/include/trace/events/ib_mad.h b/include/trace/events/ib_mad.h index 245a916bfd5a..6f504c2af935 100644 --- a/include/trace/events/ib_mad.h +++ b/include/trace/events/ib_mad.h @@ -248,6 +248,52 @@ TRACE_EVENT(ib_mad_recv_done_handler, ) ); +DECLARE_EVENT_CLASS(ib_mad_agent_template, + TP_PROTO(struct ib_mad_agent_private *agent), + TP_ARGS(agent), + + TP_STRUCT__entry( + __field(u32, dev_index) + __field(u32, hi_tid) + __field(u8, port_num) + __field(u8, mgmt_class) + __field(u8, mgmt_class_version) + ), + + TP_fast_assign( + __entry->dev_index = agent->agent.device->index; + __entry->port_num = agent->agent.port_num; + __entry->hi_tid = agent->agent.hi_tid; + + if (agent->reg_req) { + __entry->mgmt_class = agent->reg_req->mgmt_class; + __entry->mgmt_class_version = + agent->reg_req->mgmt_class_version; + } else { + __entry->mgmt_class = 0; + __entry->mgmt_class_version = 0; + } + ), + + TP_printk("%d:%d mad agent : hi_tid 0x%08x class 0x%02x class_ver 0x%02x", + __entry->dev_index, __entry->port_num, + __entry->hi_tid, __entry->mgmt_class, + __entry->mgmt_class_version + ) +); +DEFINE_EVENT(ib_mad_agent_template, ib_mad_recv_done_agent, + TP_PROTO(struct ib_mad_agent_private *agent), + TP_ARGS(agent)); +DEFINE_EVENT(ib_mad_agent_template, ib_mad_send_done_agent, + TP_PROTO(struct ib_mad_agent_private *agent), + TP_ARGS(agent)); +DEFINE_EVENT(ib_mad_agent_template, ib_mad_create_agent, + TP_PROTO(struct ib_mad_agent_private *agent), + TP_ARGS(agent)); +DEFINE_EVENT(ib_mad_agent_template, ib_mad_unregister_agent, + TP_PROTO(struct ib_mad_agent_private *agent), + TP_ARGS(agent)); + #endif /* _TRACE_IB_MAD_H */ -- cgit v1.2.3 From 056533192a9e04cac5ecd0ac683a1d659cb712ad Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 19 Mar 2019 14:11:47 -0700 Subject: IB/UMAD: Add umad trace points Trace MADs going to/from user space. Suggested-by: Steven Rostedt (VMware) Signed-off-by: Ira Weiny Signed-off-by: Jason Gunthorpe --- MAINTAINERS | 1 + drivers/infiniband/core/user_mad.c | 12 ++++ include/trace/events/ib_umad.h | 126 +++++++++++++++++++++++++++++++++++++ 3 files changed, 139 insertions(+) create mode 100644 include/trace/events/ib_umad.h diff --git a/MAINTAINERS b/MAINTAINERS index acd7278a8f3a..4e851fc92e41 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7654,6 +7654,7 @@ F: include/uapi/linux/if_infiniband.h F: include/uapi/rdma/ F: include/rdma/ F: include/trace/events/ib_mad.h +F: include/trace/events/ib_umad.h INGENIC JZ4780 DMA Driver M: Zubair Lutfullah Kakakhel diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 02b7947ab215..2de5b4404abc 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -129,6 +129,9 @@ struct ib_umad_packet { struct ib_user_mad mad; }; +#define CREATE_TRACE_POINTS +#include + static const dev_t base_umad_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); static const dev_t base_issm_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE) + IB_UMAD_NUM_FIXED_MINOR; @@ -334,6 +337,9 @@ static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf, return -EFAULT; } } + + trace_ib_umad_read_recv(file, &packet->mad.hdr, &recv_buf->mad->mad_hdr); + return hdr_size(file) + packet->length; } @@ -353,6 +359,9 @@ static ssize_t copy_send_mad(struct ib_umad_file *file, char __user *buf, if (copy_to_user(buf, packet->mad.data, packet->length)) return -EFAULT; + trace_ib_umad_read_send(file, &packet->mad.hdr, + (struct ib_mad_hdr *)&packet->mad.data); + return size; } @@ -508,6 +517,9 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, mutex_lock(&file->mutex); + trace_ib_umad_write(file, &packet->mad.hdr, + (struct ib_mad_hdr *)&packet->mad.data); + agent = __get_agent(file, packet->mad.hdr.id); if (!agent) { ret = -EINVAL; diff --git a/include/trace/events/ib_umad.h b/include/trace/events/ib_umad.h new file mode 100644 index 000000000000..c393a19a0f60 --- /dev/null +++ b/include/trace/events/ib_umad.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ + +/* + * Copyright (c) 2018 Intel Corporation. All rights reserved. + * + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ib_umad + +#if !defined(_TRACE_IB_UMAD_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_IB_UMAD_H + +#include + +DECLARE_EVENT_CLASS(ib_umad_template, + TP_PROTO(struct ib_umad_file *file, struct ib_user_mad_hdr *umad_hdr, + struct ib_mad_hdr *mad_hdr), + TP_ARGS(file, umad_hdr, mad_hdr), + + TP_STRUCT__entry( + __field(u8, port_num) + __field(u8, sl) + __field(u8, path_bits) + __field(u8, grh_present) + __field(u32, id) + __field(u32, status) + __field(u32, timeout_ms) + __field(u32, retires) + __field(u32, length) + __field(u32, qpn) + __field(u32, qkey) + __field(u8, gid_index) + __field(u8, hop_limit) + __field(u16, lid) + __field(u16, attr_id) + __field(u16, pkey_index) + __field(u8, base_version) + __field(u8, mgmt_class) + __field(u8, class_version) + __field(u8, method) + __field(u32, flow_label) + __field(u16, mad_status) + __field(u16, class_specific) + __field(u32, attr_mod) + __field(u64, tid) + __array(u8, gid, 16) + __field(u32, dev_index) + __field(u8, traffic_class) + ), + + TP_fast_assign( + __entry->dev_index = file->port->ib_dev->index; + __entry->port_num = file->port->port_num; + + __entry->id = umad_hdr->id; + __entry->status = umad_hdr->status; + __entry->timeout_ms = umad_hdr->timeout_ms; + __entry->retires = umad_hdr->retries; + __entry->length = umad_hdr->length; + __entry->qpn = umad_hdr->qpn; + __entry->qkey = umad_hdr->qkey; + __entry->lid = umad_hdr->lid; + __entry->sl = umad_hdr->sl; + __entry->path_bits = umad_hdr->path_bits; + __entry->grh_present = umad_hdr->grh_present; + __entry->gid_index = umad_hdr->gid_index; + __entry->hop_limit = umad_hdr->hop_limit; + __entry->traffic_class = umad_hdr->traffic_class; + memcpy(__entry->gid, umad_hdr->gid, sizeof(umad_hdr->gid)); + __entry->flow_label = umad_hdr->flow_label; + __entry->pkey_index = umad_hdr->pkey_index; + + __entry->base_version = mad_hdr->base_version; + __entry->mgmt_class = mad_hdr->mgmt_class; + __entry->class_version = mad_hdr->class_version; + __entry->method = mad_hdr->method; + __entry->mad_status = mad_hdr->status; + __entry->class_specific = mad_hdr->class_specific; + __entry->tid = mad_hdr->tid; + __entry->attr_id = mad_hdr->attr_id; + __entry->attr_mod = mad_hdr->attr_mod; + ), + + TP_printk("%d:%d umad_hdr: id 0x%08x status 0x%08x ms %u ret %u " \ + "len %u QP%u qkey 0x%08x lid 0x%04x sl %u path_bits 0x%x " \ + "grh 0x%x gidi %u hop_lim %u traf_cl %u gid %pI6c " \ + "flow 0x%08x pkeyi %u MAD: base_ver 0x%x class 0x%x " \ + "class_ver 0x%x method 0x%x status 0x%04x " \ + "class_specific 0x%04x tid 0x%016llx attr_id 0x%04x " \ + "attr_mod 0x%08x ", + __entry->dev_index, __entry->port_num, + __entry->id, __entry->status, __entry->timeout_ms, + __entry->retires, __entry->length, be32_to_cpu(__entry->qpn), + be32_to_cpu(__entry->qkey), be16_to_cpu(__entry->lid), + __entry->sl, __entry->path_bits, __entry->grh_present, + __entry->gid_index, __entry->hop_limit, + __entry->traffic_class, &__entry->gid, + be32_to_cpu(__entry->flow_label), __entry->pkey_index, + __entry->base_version, __entry->mgmt_class, + __entry->class_version, __entry->method, + be16_to_cpu(__entry->mad_status), + be16_to_cpu(__entry->class_specific), + be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id), + be32_to_cpu(__entry->attr_mod) + ) +); + +DEFINE_EVENT(ib_umad_template, ib_umad_write, + TP_PROTO(struct ib_umad_file *file, struct ib_user_mad_hdr *umad_hdr, + struct ib_mad_hdr *mad_hdr), + TP_ARGS(file, umad_hdr, mad_hdr)); + +DEFINE_EVENT(ib_umad_template, ib_umad_read_recv, + TP_PROTO(struct ib_umad_file *file, struct ib_user_mad_hdr *umad_hdr, + struct ib_mad_hdr *mad_hdr), + TP_ARGS(file, umad_hdr, mad_hdr)); + +DEFINE_EVENT(ib_umad_template, ib_umad_read_send, + TP_PROTO(struct ib_umad_file *file, struct ib_user_mad_hdr *umad_hdr, + struct ib_mad_hdr *mad_hdr), + TP_ARGS(file, umad_hdr, mad_hdr)); + +#endif /* _TRACE_IB_UMAD_H */ + +#include -- cgit v1.2.3 From 2ccfbb70c2773ab0f257319c7e3b1cefb766d82e Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 19 Mar 2019 14:11:48 -0700 Subject: IB/MAD: Add SMP details to MAD tracing Decode more information from the packet and include it in the trace. Reviewed-by: "Ruhl, Michael J" Signed-off-by: Ira Weiny Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/mad.c | 8 ++++ include/trace/events/ib_mad.h | 90 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index cc1805211a84..cc99479b2c09 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -841,6 +841,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, if (opa && smp->class_version == OPA_SM_CLASS_VERSION) { u32 opa_drslid; + trace_ib_mad_handle_out_opa_smi(opa_smp); + if ((opa_get_smp_direction(opa_smp) ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) == OPA_LID_PERMISSIVE && @@ -866,6 +868,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD) goto out; } else { + trace_ib_mad_handle_out_ib_smi(smp); + if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == IB_LID_PERMISSIVE && smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) == @@ -2098,6 +2102,8 @@ static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv enum smi_forward_action retsmi; struct ib_smp *smp = (struct ib_smp *)recv->mad; + trace_ib_mad_handle_ib_smi(smp); + if (smi_handle_dr_smp_recv(smp, rdma_cap_ib_switch(port_priv->device), port_num, @@ -2183,6 +2189,8 @@ handle_opa_smi(struct ib_mad_port_private *port_priv, enum smi_forward_action retsmi; struct opa_smp *smp = (struct opa_smp *)recv->mad; + trace_ib_mad_handle_opa_smi(smp); + if (opa_smi_handle_dr_smp_recv(smp, rdma_cap_ib_switch(port_priv->device), port_num, diff --git a/include/trace/events/ib_mad.h b/include/trace/events/ib_mad.h index 6f504c2af935..59363a083ecb 100644 --- a/include/trace/events/ib_mad.h +++ b/include/trace/events/ib_mad.h @@ -295,6 +295,96 @@ DEFINE_EVENT(ib_mad_agent_template, ib_mad_unregister_agent, TP_ARGS(agent)); + +DECLARE_EVENT_CLASS(ib_mad_opa_smi_template, + TP_PROTO(struct opa_smp *smp), + TP_ARGS(smp), + + TP_STRUCT__entry( + __field(u64, mkey) + __field(u32, dr_slid) + __field(u32, dr_dlid) + __field(u8, hop_ptr) + __field(u8, hop_cnt) + __array(u8, initial_path, OPA_SMP_MAX_PATH_HOPS) + __array(u8, return_path, OPA_SMP_MAX_PATH_HOPS) + ), + + TP_fast_assign( + __entry->hop_ptr = smp->hop_ptr; + __entry->hop_cnt = smp->hop_cnt; + __entry->mkey = smp->mkey; + __entry->dr_slid = smp->route.dr.dr_slid; + __entry->dr_dlid = smp->route.dr.dr_dlid; + memcpy(__entry->initial_path, smp->route.dr.initial_path, + OPA_SMP_MAX_PATH_HOPS); + memcpy(__entry->return_path, smp->route.dr.return_path, + OPA_SMP_MAX_PATH_HOPS); + ), + + TP_printk("OPA SMP: hop_ptr %d hop_cnt %d " \ + "mkey 0x%016llx dr_slid 0x%08x dr_dlid 0x%08x " \ + "initial_path %*ph return_path %*ph ", + __entry->hop_ptr, __entry->hop_cnt, + be64_to_cpu(__entry->mkey), be32_to_cpu(__entry->dr_slid), + be32_to_cpu(__entry->dr_dlid), + OPA_SMP_MAX_PATH_HOPS, __entry->initial_path, + OPA_SMP_MAX_PATH_HOPS, __entry->return_path + ) +); + +DEFINE_EVENT(ib_mad_opa_smi_template, ib_mad_handle_opa_smi, + TP_PROTO(struct opa_smp *smp), + TP_ARGS(smp)); +DEFINE_EVENT(ib_mad_opa_smi_template, ib_mad_handle_out_opa_smi, + TP_PROTO(struct opa_smp *smp), + TP_ARGS(smp)); + + +DECLARE_EVENT_CLASS(ib_mad_opa_ib_template, + TP_PROTO(struct ib_smp *smp), + TP_ARGS(smp), + + TP_STRUCT__entry( + __field(u64, mkey) + __field(u32, dr_slid) + __field(u32, dr_dlid) + __field(u8, hop_ptr) + __field(u8, hop_cnt) + __array(u8, initial_path, IB_SMP_MAX_PATH_HOPS) + __array(u8, return_path, IB_SMP_MAX_PATH_HOPS) + ), + + TP_fast_assign( + __entry->hop_ptr = smp->hop_ptr; + __entry->hop_cnt = smp->hop_cnt; + __entry->mkey = smp->mkey; + __entry->dr_slid = smp->dr_slid; + __entry->dr_dlid = smp->dr_dlid; + memcpy(__entry->initial_path, smp->initial_path, + IB_SMP_MAX_PATH_HOPS); + memcpy(__entry->return_path, smp->return_path, + IB_SMP_MAX_PATH_HOPS); + ), + + TP_printk("OPA SMP: hop_ptr %d hop_cnt %d " \ + "mkey 0x%016llx dr_slid 0x%04x dr_dlid 0x%04x " \ + "initial_path %*ph return_path %*ph ", + __entry->hop_ptr, __entry->hop_cnt, + be64_to_cpu(__entry->mkey), be16_to_cpu(__entry->dr_slid), + be16_to_cpu(__entry->dr_dlid), + IB_SMP_MAX_PATH_HOPS, __entry->initial_path, + IB_SMP_MAX_PATH_HOPS, __entry->return_path + ) +); + +DEFINE_EVENT(ib_mad_opa_ib_template, ib_mad_handle_ib_smi, + TP_PROTO(struct ib_smp *smp), + TP_ARGS(smp)); +DEFINE_EVENT(ib_mad_opa_ib_template, ib_mad_handle_out_ib_smi, + TP_PROTO(struct ib_smp *smp), + TP_ARGS(smp)); + #endif /* _TRACE_IB_MAD_H */ #include -- cgit v1.2.3 From 0ac01febd4753809d7d4d785f71cac2abde6b1c3 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 19 Mar 2019 14:11:49 -0700 Subject: BPF: Add sample code for new ib_umad tracepoint Provide a count of class types for a summary of MAD packets. The example shows one way to filter the trace data based on management class. Signed-off-by: Ira Weiny Signed-off-by: Jason Gunthorpe --- MAINTAINERS | 2 + samples/bpf/Makefile | 3 + samples/bpf/ibumad_kern.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++ samples/bpf/ibumad_user.c | 122 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 271 insertions(+) create mode 100644 samples/bpf/ibumad_kern.c create mode 100644 samples/bpf/ibumad_user.c diff --git a/MAINTAINERS b/MAINTAINERS index 4e851fc92e41..1de122b6b553 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7655,6 +7655,8 @@ F: include/uapi/rdma/ F: include/rdma/ F: include/trace/events/ib_mad.h F: include/trace/events/ib_umad.h +F: samples/bpf/ibumad_kern.c +F: samples/bpf/ibumad_user.c INGENIC JZ4780 DMA Driver M: Zubair Lutfullah Kakakhel diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 65e667bdf979..4f0a1cdbfe7c 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -52,6 +52,7 @@ hostprogs-y += xdpsock hostprogs-y += xdp_fwd hostprogs-y += task_fd_query hostprogs-y += xdp_sample_pkts +hostprogs-y += ibumad hostprogs-y += hbm # Libbpf dependencies @@ -108,6 +109,7 @@ xdpsock-objs := xdpsock_user.o xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) +ibumad-objs := bpf_load.o ibumad_user.o $(TRACE_HELPERS) hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS) # Tell kbuild to always build the programs @@ -166,6 +168,7 @@ always += xdp_adjust_tail_kern.o always += xdp_fwd_kern.o always += task_fd_query_kern.o always += xdp_sample_pkts_kern.o +always += ibumad_kern.o always += hbm_out_kern.o KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include diff --git a/samples/bpf/ibumad_kern.c b/samples/bpf/ibumad_kern.c new file mode 100644 index 000000000000..38b2b3f22049 --- /dev/null +++ b/samples/bpf/ibumad_kern.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + +/** + * ibumad BPF sample kernel side + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Copyright(c) 2018 Ira Weiny, Intel Corporation + */ + +#define KBUILD_MODNAME "ibumad_count_pkts_by_class" +#include + +#include "bpf_helpers.h" + + +struct bpf_map_def SEC("maps") read_count = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), /* class; u32 required */ + .value_size = sizeof(u64), /* count of mads read */ + .max_entries = 256, /* Room for all Classes */ +}; + +struct bpf_map_def SEC("maps") write_count = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), /* class; u32 required */ + .value_size = sizeof(u64), /* count of mads written */ + .max_entries = 256, /* Room for all Classes */ +}; + +#undef DEBUG +#ifdef DEBUG +#define bpf_debug(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) +#else +#define bpf_debug(fmt, ...) +#endif + +/* Taken from the current format defined in + * include/trace/events/ib_umad.h + * and + * /sys/kernel/debug/tracing/events/ib_umad/ib_umad_read/format + * /sys/kernel/debug/tracing/events/ib_umad/ib_umad_write/format + */ +struct ib_umad_rw_args { + u64 pad; + u8 port_num; + u8 sl; + u8 path_bits; + u8 grh_present; + u32 id; + u32 status; + u32 timeout_ms; + u32 retires; + u32 length; + u32 qpn; + u32 qkey; + u8 gid_index; + u8 hop_limit; + u16 lid; + u16 attr_id; + u16 pkey_index; + u8 base_version; + u8 mgmt_class; + u8 class_version; + u8 method; + u32 flow_label; + u16 mad_status; + u16 class_specific; + u32 attr_mod; + u64 tid; + u8 gid[16]; + u32 dev_index; + u8 traffic_class; +}; + +SEC("tracepoint/ib_umad/ib_umad_read_recv") +int on_ib_umad_read_recv(struct ib_umad_rw_args *ctx) +{ + u64 zero = 0, *val; + u8 class = ctx->mgmt_class; + + bpf_debug("ib_umad read recv : class 0x%x\n", class); + + val = bpf_map_lookup_elem(&read_count, &class); + if (!val) { + bpf_map_update_elem(&read_count, &class, &zero, BPF_NOEXIST); + val = bpf_map_lookup_elem(&read_count, &class); + if (!val) + return 0; + } + + (*val) += 1; + + return 0; +} +SEC("tracepoint/ib_umad/ib_umad_read_send") +int on_ib_umad_read_send(struct ib_umad_rw_args *ctx) +{ + u64 zero = 0, *val; + u8 class = ctx->mgmt_class; + + bpf_debug("ib_umad read send : class 0x%x\n", class); + + val = bpf_map_lookup_elem(&read_count, &class); + if (!val) { + bpf_map_update_elem(&read_count, &class, &zero, BPF_NOEXIST); + val = bpf_map_lookup_elem(&read_count, &class); + if (!val) + return 0; + } + + (*val) += 1; + + return 0; +} +SEC("tracepoint/ib_umad/ib_umad_write") +int on_ib_umad_write(struct ib_umad_rw_args *ctx) +{ + u64 zero = 0, *val; + u8 class = ctx->mgmt_class; + + bpf_debug("ib_umad write : class 0x%x\n", class); + + val = bpf_map_lookup_elem(&write_count, &class); + if (!val) { + bpf_map_update_elem(&write_count, &class, &zero, BPF_NOEXIST); + val = bpf_map_lookup_elem(&write_count, &class); + if (!val) + return 0; + } + + (*val) += 1; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/ibumad_user.c b/samples/bpf/ibumad_user.c new file mode 100644 index 000000000000..097d76143363 --- /dev/null +++ b/samples/bpf/ibumad_user.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + +/** + * ibumad BPF sample user side + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Copyright(c) 2018 Ira Weiny, Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "bpf_load.h" +#include "bpf_util.h" +#include "bpf/libbpf.h" + +static void dump_counts(int fd) +{ + __u32 key; + __u64 value; + + for (key = 0; key < 256; key++) { + if (bpf_map_lookup_elem(fd, &key, &value)) { + printf("failed to read key %u\n", key); + continue; + } + if (value) + printf("0x%02x : %llu\n", key, value); + } +} + +static void dump_all_counts(void) +{ + printf("Read 'Class : count'\n"); + dump_counts(map_fd[0]); + printf("Write 'Class : count'\n"); + dump_counts(map_fd[1]); +} + +static void dump_exit(int sig) +{ + dump_all_counts(); + exit(0); +} + +static const struct option long_options[] = { + {"help", no_argument, NULL, 'h'}, + {"delay", required_argument, NULL, 'd'}, +}; + +static void usage(char *cmd) +{ + printf("eBPF test program to count packets from various IP addresses\n" + "Usage: %s \n" + " --help, -h this menu\n" + " --delay, -d wait sec between prints [1 - 1000000]\n" + , cmd + ); +} + +int main(int argc, char **argv) +{ + unsigned long delay = 5; + int longindex = 0; + int opt; + char bpf_file[256]; + + /* Create the eBPF kernel code path name. + * This follows the pattern of all of the other bpf samples + */ + snprintf(bpf_file, sizeof(bpf_file), "%s_kern.o", argv[0]); + + /* Do one final dump when exiting */ + signal(SIGINT, dump_exit); + signal(SIGTERM, dump_exit); + + while ((opt = getopt_long(argc, argv, "hd:rSw", + long_options, &longindex)) != -1) { + switch (opt) { + case 'd': + delay = strtoul(optarg, NULL, 0); + if (delay == ULONG_MAX || delay < 0 || + delay > 1000000) { + fprintf(stderr, "ERROR: invalid delay : %s\n", + optarg); + usage(argv[0]); + return 1; + } + break; + default: + case 'h': + usage(argv[0]); + return 1; + } + } + + if (load_bpf_file(bpf_file)) { + fprintf(stderr, "ERROR: failed to load eBPF from file : %s\n", + bpf_file); + return 1; + } + + while (1) { + sleep(delay); + dump_all_counts(); + } + + return 0; +} -- cgit v1.2.3 From 08304d714638dff08bf001e92ce56411e2e902c7 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Wed, 27 Mar 2019 05:50:47 -0400 Subject: IB/rxe: Replace av->network_type with skb->protocol In the function rxe_init_packet, based on av->network_type, skb->protocol is set to ipv4 or ipv6. The functions rxe_prepare and rxe_send are called after the functin rxe_init_packet. So in these functions, av->network_type can be replaced with skb->protocol. The functions are in the xmit fast path. So with skb->protocol, the performance will be better. Signed-off-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_net.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 753cabcd441c..f186b92ba45b 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -338,13 +338,13 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); } -static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb, - struct rxe_av *av) +static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb) { struct rxe_qp *qp = pkt->qp; struct dst_entry *dst; bool xnet = false; __be16 df = htons(IP_DF); + struct rxe_av *av = rxe_get_av(pkt); struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; @@ -364,11 +364,11 @@ static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb, return 0; } -static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb, - struct rxe_av *av) +static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb) { struct rxe_qp *qp = pkt->qp; struct dst_entry *dst; + struct rxe_av *av = rxe_get_av(pkt); struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; @@ -392,16 +392,15 @@ static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb, int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc) { int err = 0; - struct rxe_av *av = rxe_get_av(pkt); - if (av->network_type == RDMA_NETWORK_IPV4) - err = prepare4(pkt, skb, av); - else if (av->network_type == RDMA_NETWORK_IPV6) - err = prepare6(pkt, skb, av); + if (skb->protocol == htons(ETH_P_IP)) + err = prepare4(pkt, skb); + else if (skb->protocol == htons(ETH_P_IPV6)) + err = prepare6(pkt, skb); *crc = rxe_icrc_hdr(pkt, skb); - if (ether_addr_equal(skb->dev->dev_addr, av->dmac)) + if (ether_addr_equal(skb->dev->dev_addr, rxe_get_av(pkt)->dmac)) pkt->mask |= RXE_LOOPBACK_MASK; return err; @@ -422,23 +421,20 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb) int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb) { - struct rxe_av *av; int err; - av = rxe_get_av(pkt); - skb->destructor = rxe_skb_tx_dtor; skb->sk = pkt->qp->sk->sk; rxe_add_ref(pkt->qp); atomic_inc(&pkt->qp->skb_out); - if (av->network_type == RDMA_NETWORK_IPV4) { + if (skb->protocol == htons(ETH_P_IP)) { err = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); - } else if (av->network_type == RDMA_NETWORK_IPV6) { + } else if (skb->protocol == htons(ETH_P_IPV6)) { err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); } else { - pr_err("Unknown layer 3 protocol: %d\n", av->network_type); + pr_err("Unknown layer 3 protocol: %d\n", skb->protocol); atomic_dec(&pkt->qp->skb_out); rxe_drop_ref(pkt->qp); kfree_skb(skb); -- cgit v1.2.3 From 259e66bcdff59b5d92b8f43e42237aaea2c4d457 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 27 Mar 2019 16:50:45 -0700 Subject: RDMA/uverbs: Add a __user annotation to a pointer This patch avoids that sparse and smatch report the following: warning: cast removes address space of expression Reviewed-by: Leon Romanovsky Fixes: 3a6532c9af1a ("RDMA/uverbs: Use uverbs_attr_bundle to pass udata for write") Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 70b7d80431a9..b8fc5a329e21 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -720,7 +720,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, * then the command request structure starts * with a '__aligned u64 response' member. */ - ret = get_user(response, (const u64 *)buf); + ret = get_user(response, (const u64 __user *)buf); if (ret) goto out_unlock; -- cgit v1.2.3 From 2dcdebff5e84356824c800ae5217c02ef0f78b06 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 27 Mar 2019 16:50:46 -0700 Subject: RDMA/uverbs: Annotate uverbs_request_next_ptr() return value as a __user pointer This patch avoids that sparse complains about a mismatch between the returned value and the function return type. Reviewed-by: Leon Romanovsky Fixes: c3bea3d2dc53 ("RDMA/uverbs: Use the iterator for ib_uverbs_unmarshall_recv()") Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 062a86c04123..c9acd94b049d 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -162,7 +162,7 @@ static const void __user *uverbs_request_next_ptr(struct uverbs_req_iter *iter, const void __user *res = iter->cur; if (iter->cur + len > iter->end) - return ERR_PTR(-ENOSPC); + return (void __force __user *)ERR_PTR(-ENOSPC); iter->cur += len; return res; } -- cgit v1.2.3 From 0080aed4e4ca0d58e718df5a907aa12c1f9b9226 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 27 Mar 2019 16:50:47 -0700 Subject: RDMA/uverbs: Allow the compiler to verify declaration and definition consistency This patch avoids that sparse reports the following warnings: drivers/infiniband/core/uverbs_std_types_flow_action.c:442:30: warning: symbol 'uverbs_def_obj_flow_action' was not declared. Should it be static? drivers/infiniband/core/uverbs_std_types_dm.c:112:30: warning: symbol 'uverbs_def_obj_dm' was not declared. Should it be static? drivers/infiniband/core/uverbs_std_types_counters.c:153:30: warning: symbol 'uverbs_def_obj_counters' was not declared. Should it be static? drivers/infiniband/core/uverbs_std_types_mr.c:213:30: warning: symbol 'uverbs_def_obj_mr' was not declared. Should it be static? Reviewed-by: Leon Romanovsky Fixes: 0bd01f3d0907 ("RDMA/uverbs: Require all objects to have a driver destroy function") Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types_counters.c | 1 + drivers/infiniband/core/uverbs_std_types_dm.c | 1 + drivers/infiniband/core/uverbs_std_types_flow_action.c | 1 + drivers/infiniband/core/uverbs_std_types_mr.c | 1 + 4 files changed, 4 insertions(+) diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index 309c5e80988d..7880d50165ed 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -31,6 +31,7 @@ * SOFTWARE. */ +#include "rdma_core.h" #include "uverbs.h" #include diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index 2ef70637bee1..de3f04a4398c 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include "rdma_core.h" #include "uverbs.h" #include diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index 4962b87fa600..3a87b16a93b3 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include "rdma_core.h" #include "uverbs.h" #include diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index 4d4be0c2b752..3b4bf6370333 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include "rdma_core.h" #include "uverbs.h" #include -- cgit v1.2.3 From 1f687edee2aed32586b4f5959b26da401026b6b3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 27 Mar 2019 16:50:48 -0700 Subject: IB/mlx5: Declare devx_async_cmd_event_fops static Avoid that sparse complains about a missing declaration. Reviewed-by: Leon Romanovsky Fixes: 6bf8f22aea0d ("IB/mlx5: Introduce MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD") Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 9e08df7914aa..fa8d2a9229fa 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1704,7 +1704,7 @@ static __poll_t devx_async_cmd_event_poll(struct file *filp, return pollflags; } -const struct file_operations devx_async_cmd_event_fops = { +static const struct file_operations devx_async_cmd_event_fops = { .owner = THIS_MODULE, .read = devx_async_cmd_event_read, .poll = devx_async_cmd_event_poll, -- cgit v1.2.3 From 920d10e45844d1448d4d279d07fa91e5a7cee4f1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 27 Mar 2019 16:50:50 -0700 Subject: IB/hfi1: Fix two format strings Enable format string checking for hfi1_cdbg() and fix the resulting compiler warnings. Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/init.c | 4 ++-- drivers/infiniband/hw/hfi1/trace_dbg.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index faaaac8fbc55..3c339617aa95 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -469,7 +469,7 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, if (rcd->egrbufs.size < hfi1_max_mtu) { rcd->egrbufs.size = __roundup_pow_of_two(hfi1_max_mtu); hfi1_cdbg(PROC, - "ctxt%u: eager bufs size too small. Adjusting to %zu\n", + "ctxt%u: eager bufs size too small. Adjusting to %u\n", rcd->ctxt, rcd->egrbufs.size); } rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE; @@ -2071,7 +2071,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) rcd->egrbufs.size = alloced_bytes; hfi1_cdbg(PROC, - "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %zuKB\n", + "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %uKB\n", rcd->ctxt, rcd->egrbufs.alloced, rcd->egrbufs.rcvtid_size / 1024, rcd->egrbufs.size / 1024); diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h index e62171fb7379..de7a87392b8d 100644 --- a/drivers/infiniband/hw/hfi1/trace_dbg.h +++ b/drivers/infiniband/hw/hfi1/trace_dbg.h @@ -86,14 +86,14 @@ DECLARE_EVENT_CLASS(hfi1_trace_template, * actual function to work and can not be in a macro. */ #define __hfi1_trace_def(lvl) \ -void __hfi1_trace_##lvl(const char *funct, char *fmt, ...); \ +void __printf(2, 3) __hfi1_trace_##lvl(const char *funct, char *fmt, ...); \ \ DEFINE_EVENT(hfi1_trace_template, hfi1_ ##lvl, \ TP_PROTO(const char *function, struct va_format *vaf), \ TP_ARGS(function, vaf)) #define __hfi1_trace_fn(lvl) \ -void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \ +void __printf(2, 3) __hfi1_trace_##lvl(const char *func, char *fmt, ...)\ { \ struct va_format vaf = { \ .fmt = fmt, \ -- cgit v1.2.3 From 196b4ce57d1612ca03be3c7f14bfb6b0740c5c53 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 27 Mar 2019 16:50:51 -0700 Subject: IB/qib: Remove a set-but-not-used variable This patch avoids that a compiler warning is reported when building with W=1. Reviewed-by: Leon Romanovsky Fixes: 49c0e2414b20 ("IB/qib: Change SDMA progression mode depending on single- or multi-rail") Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_user_sdma.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index 31c523b2a9f5..ef19d39a44b1 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -225,8 +225,6 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) if (sdma_rb_node) { sdma_rb_node->refcount++; } else { - int ret; - sdma_rb_node = kmalloc(sizeof( struct qib_user_sdma_rb_node), GFP_KERNEL); if (!sdma_rb_node) @@ -235,8 +233,7 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) sdma_rb_node->refcount = 1; sdma_rb_node->pid = current->pid; - ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root, - sdma_rb_node); + qib_user_sdma_rb_insert(&qib_user_sdma_rb_root, sdma_rb_node); } pq->sdma_rb_node = sdma_rb_node; -- cgit v1.2.3 From 5aa8484080115cff2da68080ad1d115613648504 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 28 Mar 2019 11:49:43 -0500 Subject: RDMA/bnxt_re: Use correct sizing on buffers holding page DMA addresses umem->nmap is used while allocating internal buffer for storing page DMA addresses. This causes out of bounds array access while iterating the umem DMA-mapped SGL with umem page combining as umem->nmap can be less than number of system pages in umem. Use ib_umem_num_pages() instead of umem->nmap to size the page array. Add a new structure (bnxt_qplib_sg_info) to pass sglist, npages and nmap. Signed-off-by: Selvin Xavier Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 25 ++++++++--------- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 27 ++++++++++--------- drivers/infiniband/hw/bnxt_re/qplib_fp.h | 9 +++---- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 4 +-- drivers/infiniband/hw/bnxt_re/qplib_res.c | 43 ++++++++++++++++++------------ drivers/infiniband/hw/bnxt_re/qplib_res.h | 8 +++++- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 4 +-- 7 files changed, 67 insertions(+), 53 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 071b2fc38b0b..33b2a06c6dde 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -895,8 +895,9 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, return PTR_ERR(umem); qp->sumem = umem; - qplib_qp->sq.sglist = umem->sg_head.sgl; - qplib_qp->sq.nmap = umem->nmap; + qplib_qp->sq.sg_info.sglist = umem->sg_head.sgl; + qplib_qp->sq.sg_info.npages = ib_umem_num_pages(umem); + qplib_qp->sq.sg_info.nmap = umem->nmap; qplib_qp->qp_handle = ureq.qp_handle; if (!qp->qplib_qp.srq) { @@ -907,8 +908,9 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, if (IS_ERR(umem)) goto rqfail; qp->rumem = umem; - qplib_qp->rq.sglist = umem->sg_head.sgl; - qplib_qp->rq.nmap = umem->nmap; + qplib_qp->rq.sg_info.sglist = umem->sg_head.sgl; + qplib_qp->rq.sg_info.npages = ib_umem_num_pages(umem); + qplib_qp->rq.sg_info.nmap = umem->nmap; } qplib_qp->dpi = &cntx->dpi; @@ -916,8 +918,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, rqfail: ib_umem_release(qp->sumem); qp->sumem = NULL; - qplib_qp->sq.sglist = NULL; - qplib_qp->sq.nmap = 0; + memset(&qplib_qp->sq.sg_info, 0, sizeof(qplib_qp->sq.sg_info)); return PTR_ERR(umem); } @@ -1374,8 +1375,9 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, return PTR_ERR(umem); srq->umem = umem; - qplib_srq->nmap = umem->nmap; - qplib_srq->sglist = umem->sg_head.sgl; + qplib_srq->sg_info.sglist = umem->sg_head.sgl; + qplib_srq->sg_info.npages = ib_umem_num_pages(umem); + qplib_srq->sg_info.nmap = umem->nmap; qplib_srq->srq_handle = ureq.srq_handle; qplib_srq->dpi = &cntx->dpi; @@ -2632,8 +2634,9 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, rc = PTR_ERR(cq->umem); goto fail; } - cq->qplib_cq.sghead = cq->umem->sg_head.sgl; - cq->qplib_cq.nmap = cq->umem->nmap; + cq->qplib_cq.sg_info.sglist = cq->umem->sg_head.sgl; + cq->qplib_cq.sg_info.npages = ib_umem_num_pages(cq->umem); + cq->qplib_cq.sg_info.nmap = cq->umem->nmap; cq->qplib_cq.dpi = &uctx->dpi; } else { cq->max_cql = min_t(u32, entries, MAX_CQL_PER_POLL); @@ -2645,8 +2648,6 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, } cq->qplib_cq.dpi = &rdev->dpi_privileged; - cq->qplib_cq.sghead = NULL; - cq->qplib_cq.nmap = 0; } /* * Allocating the NQ in a round robin fashion. nq_alloc_cnt is a diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 71c34d5b0ac0..f034cab303f6 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -478,7 +478,7 @@ int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq) nq->hwq.max_elements > BNXT_QPLIB_NQE_MAX_CNT) nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT; hwq_type = bnxt_qplib_get_hwq_type(nq->res); - if (bnxt_qplib_alloc_init_hwq(nq->pdev, &nq->hwq, NULL, 0, + if (bnxt_qplib_alloc_init_hwq(nq->pdev, &nq->hwq, NULL, &nq->hwq.max_elements, BNXT_QPLIB_MAX_NQE_ENTRY_SIZE, 0, PAGE_SIZE, hwq_type)) @@ -542,8 +542,8 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, int rc, idx; srq->hwq.max_elements = srq->max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &srq->hwq, srq->sglist, - srq->nmap, &srq->hwq.max_elements, + rc = bnxt_qplib_alloc_init_hwq(res->pdev, &srq->hwq, &srq->sg_info, + &srq->hwq.max_elements, BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0, PAGE_SIZE, HWQ_TYPE_QUEUE); if (rc) @@ -742,7 +742,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* SQ */ sq->hwq.max_elements = sq->max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, NULL, &sq->hwq.max_elements, BNXT_QPLIB_MAX_SQE_ENTRY_SIZE, 0, PAGE_SIZE, HWQ_TYPE_QUEUE); @@ -781,7 +781,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* RQ */ if (rq->max_wqe) { rq->hwq.max_elements = qp->rq.max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq, NULL, &rq->hwq.max_elements, BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0, PAGE_SIZE, HWQ_TYPE_QUEUE); @@ -890,8 +890,8 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) sizeof(struct sq_psn_search); } sq->hwq.max_elements = sq->max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, sq->sglist, - sq->nmap, &sq->hwq.max_elements, + rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, &sq->sg_info, + &sq->hwq.max_elements, BNXT_QPLIB_MAX_SQE_ENTRY_SIZE, psn_sz, PAGE_SIZE, HWQ_TYPE_QUEUE); @@ -959,8 +959,9 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* RQ */ if (rq->max_wqe) { rq->hwq.max_elements = rq->max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq, rq->sglist, - rq->nmap, &rq->hwq.max_elements, + rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq, + &rq->sg_info, + &rq->hwq.max_elements, BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0, PAGE_SIZE, HWQ_TYPE_QUEUE); if (rc) @@ -1030,7 +1031,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) req_size = xrrq->max_elements * BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE + PAGE_SIZE - 1; req_size &= ~(PAGE_SIZE - 1); - rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL, &xrrq->max_elements, BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE, 0, req_size, HWQ_TYPE_CTX); @@ -1046,7 +1047,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE + PAGE_SIZE - 1; req_size &= ~(PAGE_SIZE - 1); - rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL, &xrrq->max_elements, BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE, 0, req_size, HWQ_TYPE_CTX); @@ -1935,8 +1936,8 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) int rc; cq->hwq.max_elements = cq->max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &cq->hwq, cq->sghead, - cq->nmap, &cq->hwq.max_elements, + rc = bnxt_qplib_alloc_init_hwq(res->pdev, &cq->hwq, &cq->sg_info, + &cq->hwq.max_elements, BNXT_QPLIB_MAX_CQE_ENTRY_SIZE, 0, PAGE_SIZE, HWQ_TYPE_QUEUE); if (rc) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 3f618b5f1f06..31436af2a4ec 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -52,10 +52,9 @@ struct bnxt_qplib_srq { struct bnxt_qplib_cq *cq; struct bnxt_qplib_hwq hwq; struct bnxt_qplib_swq *swq; - struct scatterlist *sglist; int start_idx; int last_idx; - u32 nmap; + struct bnxt_qplib_sg_info sg_info; u16 eventq_hw_ring_id; spinlock_t lock; /* protect SRQE link list */ }; @@ -237,8 +236,7 @@ struct bnxt_qplib_swqe { struct bnxt_qplib_q { struct bnxt_qplib_hwq hwq; struct bnxt_qplib_swq *swq; - struct scatterlist *sglist; - u32 nmap; + struct bnxt_qplib_sg_info sg_info; u32 max_wqe; u16 q_full_delta; u16 max_sge; @@ -381,8 +379,7 @@ struct bnxt_qplib_cq { u32 cnq_hw_ring_id; struct bnxt_qplib_nq *nq; bool resize_in_progress; - struct scatterlist *sghead; - u32 nmap; + struct bnxt_qplib_sg_info sg_info; u64 cq_handle; #define CQ_RESIZE_WAIT_TIME_MS 500 diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index c6461e957078..48b04d2f175f 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -569,7 +569,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, rcfw->pdev = pdev; rcfw->creq.max_elements = BNXT_QPLIB_CREQE_MAX_CNT; hwq_type = bnxt_qplib_get_hwq_type(rcfw->res); - if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->creq, NULL, 0, + if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->creq, NULL, &rcfw->creq.max_elements, BNXT_QPLIB_CREQE_UNITS, 0, PAGE_SIZE, hwq_type)) { @@ -584,7 +584,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, rcfw->cmdq.max_elements = rcfw->cmdq_depth; if (bnxt_qplib_alloc_init_hwq - (rcfw->pdev, &rcfw->cmdq, NULL, 0, + (rcfw->pdev, &rcfw->cmdq, NULL, &rcfw->cmdq.max_elements, BNXT_QPLIB_CMDQE_UNITS, 0, bnxt_qplib_cmdqe_page_size(rcfw->cmdq_depth), diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 0bc24f934829..37928b1111df 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -83,7 +83,8 @@ static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl, } static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl, - struct scatterlist *sghead, u32 pages, u32 pg_size) + struct scatterlist *sghead, u32 pages, + u32 nmaps, u32 pg_size) { struct sg_dma_page_iter sg_iter; bool is_umem = false; @@ -116,7 +117,7 @@ static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl, } else { i = 0; is_umem = true; - for_each_sg_dma_page (sghead, &sg_iter, pages, 0) { + for_each_sg_dma_page(sghead, &sg_iter, nmaps, 0) { pbl->pg_map_arr[i] = sg_page_iter_dma_address(&sg_iter); pbl->pg_arr[i] = NULL; pbl->pg_count++; @@ -158,12 +159,13 @@ void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq) /* All HWQs are power of 2 in size */ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq, - struct scatterlist *sghead, int nmap, + struct bnxt_qplib_sg_info *sg_info, u32 *elements, u32 element_size, u32 aux, u32 pg_size, enum bnxt_qplib_hwq_type hwq_type) { - u32 pages, slots, size, aux_pages = 0, aux_size = 0; + u32 pages, maps, slots, size, aux_pages = 0, aux_size = 0; dma_addr_t *src_phys_ptr, **dst_virt_ptr; + struct scatterlist *sghead = NULL; int i, rc; hwq->level = PBL_LVL_MAX; @@ -177,6 +179,9 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq, } size = roundup_pow_of_two(element_size); + if (sg_info) + sghead = sg_info->sglist; + if (!sghead) { hwq->is_user = false; pages = (slots * size) / pg_size + aux_pages; @@ -184,17 +189,20 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq, pages++; if (!pages) return -EINVAL; + maps = 0; } else { hwq->is_user = true; - pages = nmap; + pages = sg_info->npages; + maps = sg_info->nmap; } /* Alloc the 1st memory block; can be a PDL/PTL/PBL */ if (sghead && (pages == MAX_PBL_LVL_0_PGS)) rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], sghead, - pages, pg_size); + pages, maps, pg_size); else - rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], NULL, 1, pg_size); + rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], NULL, + 1, 0, pg_size); if (rc) goto fail; @@ -204,7 +212,8 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq, if (pages > MAX_PBL_LVL_1_PGS) { /* 2 levels of indirection */ rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_1], NULL, - MAX_PBL_LVL_1_PGS_FOR_LVL_2, pg_size); + MAX_PBL_LVL_1_PGS_FOR_LVL_2, + 0, pg_size); if (rc) goto fail; /* Fill in lvl0 PBL */ @@ -217,7 +226,7 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq, hwq->level = PBL_LVL_1; rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_2], sghead, - pages, pg_size); + pages, maps, pg_size); if (rc) goto fail; @@ -246,7 +255,7 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq, /* 1 level of indirection */ rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_1], sghead, - pages, pg_size); + pages, maps, pg_size); if (rc) goto fail; /* Fill in lvl0 PBL */ @@ -339,7 +348,7 @@ int bnxt_qplib_alloc_ctx(struct pci_dev *pdev, /* QPC Tables */ ctx->qpc_tbl.max_elements = ctx->qpc_count; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->qpc_tbl, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->qpc_tbl, NULL, &ctx->qpc_tbl.max_elements, BNXT_QPLIB_MAX_QP_CTX_ENTRY_SIZE, 0, PAGE_SIZE, HWQ_TYPE_CTX); @@ -348,7 +357,7 @@ int bnxt_qplib_alloc_ctx(struct pci_dev *pdev, /* MRW Tables */ ctx->mrw_tbl.max_elements = ctx->mrw_count; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->mrw_tbl, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->mrw_tbl, NULL, &ctx->mrw_tbl.max_elements, BNXT_QPLIB_MAX_MRW_CTX_ENTRY_SIZE, 0, PAGE_SIZE, HWQ_TYPE_CTX); @@ -357,7 +366,7 @@ int bnxt_qplib_alloc_ctx(struct pci_dev *pdev, /* SRQ Tables */ ctx->srqc_tbl.max_elements = ctx->srqc_count; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->srqc_tbl, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->srqc_tbl, NULL, &ctx->srqc_tbl.max_elements, BNXT_QPLIB_MAX_SRQ_CTX_ENTRY_SIZE, 0, PAGE_SIZE, HWQ_TYPE_CTX); @@ -366,7 +375,7 @@ int bnxt_qplib_alloc_ctx(struct pci_dev *pdev, /* CQ Tables */ ctx->cq_tbl.max_elements = ctx->cq_count; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->cq_tbl, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->cq_tbl, NULL, &ctx->cq_tbl.max_elements, BNXT_QPLIB_MAX_CQ_CTX_ENTRY_SIZE, 0, PAGE_SIZE, HWQ_TYPE_CTX); @@ -375,7 +384,7 @@ int bnxt_qplib_alloc_ctx(struct pci_dev *pdev, /* TQM Buffer */ ctx->tqm_pde.max_elements = 512; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tqm_pde, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tqm_pde, NULL, &ctx->tqm_pde.max_elements, sizeof(u64), 0, PAGE_SIZE, HWQ_TYPE_CTX); if (rc) @@ -386,7 +395,7 @@ int bnxt_qplib_alloc_ctx(struct pci_dev *pdev, continue; ctx->tqm_tbl[i].max_elements = ctx->qpc_count * ctx->tqm_count[i]; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tqm_tbl[i], NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tqm_tbl[i], NULL, &ctx->tqm_tbl[i].max_elements, 1, 0, PAGE_SIZE, HWQ_TYPE_CTX); if (rc) @@ -424,7 +433,7 @@ int bnxt_qplib_alloc_ctx(struct pci_dev *pdev, /* TIM Buffer */ ctx->tim_tbl.max_elements = ctx->qpc_count * 16; - rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tim_tbl, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tim_tbl, NULL, &ctx->tim_tbl.max_elements, 1, 0, PAGE_SIZE, HWQ_TYPE_CTX); if (rc) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 32cebd0f1436..30c42c92fac7 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -219,6 +219,12 @@ static inline u8 bnxt_qplib_get_ring_type(struct bnxt_qplib_chip_ctx *cctx) RING_ALLOC_REQ_RING_TYPE_ROCE_CMPL; } +struct bnxt_qplib_sg_info { + struct scatterlist *sglist; + u32 nmap; + u32 npages; +}; + #define to_bnxt_qplib(ptr, type, member) \ container_of(ptr, type, member) @@ -227,7 +233,7 @@ struct bnxt_qplib_dev_attr; void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq); int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq, - struct scatterlist *sl, int nmap, u32 *elements, + struct bnxt_qplib_sg_info *sg_info, u32 *elements, u32 elements_per_page, u32 aux, u32 pg_size, enum bnxt_qplib_hwq_type hwq_type); void bnxt_qplib_get_guid(u8 *dev_addr, u8 *guid); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index e9c53e406404..ef1938733a41 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -684,7 +684,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, mr->hwq.max_elements = pages; /* Use system PAGE_SIZE */ - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &mr->hwq, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(res->pdev, &mr->hwq, NULL, &mr->hwq.max_elements, PAGE_SIZE, 0, PAGE_SIZE, HWQ_TYPE_CTX); @@ -754,7 +754,7 @@ int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res, return -ENOMEM; frpl->hwq.max_elements = pages; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &frpl->hwq, NULL, 0, + rc = bnxt_qplib_alloc_init_hwq(res->pdev, &frpl->hwq, NULL, &frpl->hwq.max_elements, PAGE_SIZE, 0, PAGE_SIZE, HWQ_TYPE_CTX); if (!rc) -- cgit v1.2.3 From 5f818d676ac455bbc812ffaaf5bf780be5465114 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Thu, 28 Mar 2019 11:49:44 -0500 Subject: RDMA/cxbg: Use correct sizing on buffers holding page DMA addresses The PBL array that hold the page DMA address is sized off umem->nmap. This can potentially cause out of bound accesses on the PBL array when iterating the umem DMA-mapped SGL. This is because if umem pages are combined, umem->nmap can be much lower than the number of system pages in umem. Use ib_umem_num_pages() to size this array. Cc: Potnuri Bharat Teja Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb3/iwch_provider.c | 2 +- drivers/infiniband/hw/cxgb4/mem.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index c9a1fb323b5c..21aac6bca06f 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -539,7 +539,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, shift = PAGE_SHIFT; - n = mhp->umem->nmap; + n = ib_umem_num_pages(mhp->umem); err = iwch_alloc_pbl(mhp, n); if (err) diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index de6697fdffa7..81f5b5b026b1 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -542,7 +542,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, shift = PAGE_SHIFT; - n = mhp->umem->nmap; + n = ib_umem_num_pages(mhp->umem); err = alloc_pbl(mhp, n); if (err) goto err_umem_release; -- cgit v1.2.3 From 41d34865b24c6a0b594b0a69bfe9ea56dff5abcd Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Thu, 28 Mar 2019 11:49:45 -0500 Subject: RDMA/mthca: Use correct sizing on buffers holding page DMA addresses The buffer that holds the page DMA addresses is sized off umem->nmap. This can potentially cause out of bound accesses on the PBL array when iterating the umem DMA-mapped SGL. This is because if umem pages are combined, umem->nmap can be much lower than the number of system pages in umem. Use ib_umem_num_pages() to size this buffer. Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mthca/mthca_provider.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index d063d7a37762..35c3119726bb 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -914,7 +914,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err; } - n = mr->umem->nmap; + n = ib_umem_num_pages(mr->umem); mr->mtt = mthca_alloc_mtt(dev, n); if (IS_ERR(mr->mtt)) { -- cgit v1.2.3 From 93923d309bda99bc52f8cee6ea4774895b18ae5b Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Thu, 28 Mar 2019 11:49:46 -0500 Subject: RDMA/rxe: Use correct sizing on buffers holding page DMA addresses The buffer that holds the page DMA addresses is sized off umem->nmap. This can potentially cause out of bound accesses on the PBL array when iterating the umem DMA-mapped SGL. This is because if umem pages are combined, umem->nmap can be much lower than the number of system pages in umem. Use ib_umem_num_pages() to size this buffer. Cc: Moni Shoua Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index ec89fbd06c53..f501f72489d8 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -179,7 +179,7 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, } mem->umem = umem; - num_buf = umem->nmap; + num_buf = ib_umem_num_pages(umem); rxe_mem_init(access, mem); -- cgit v1.2.3 From 629e6f9db6bf4c5702212dd77da534b838f14859 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Thu, 28 Mar 2019 11:49:47 -0500 Subject: RDMA/rdmavt: Use correct sizing on buffers holding page DMA addresses The buffer that holds the page DMA addresses is sized off umem->nmap. This can potentially cause out of bound accesses on the PBL array when iterating the umem DMA-mapped SGL. This is because if umem pages are combined, umem->nmap can be much lower than the number of system pages in umem. Use ib_umem_num_pages() to size this buffer. Cc: Dennis Dalessandro Cc: Mike Marciniszyn Cc: Michael J. Ruhl Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 728795043496..e8b03ae54914 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -392,7 +392,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (IS_ERR(umem)) return (void *)umem; - n = umem->nmap; + n = ib_umem_num_pages(umem); mr = __rvt_alloc_mr(n, pd); if (IS_ERR(mr)) { -- cgit v1.2.3 From cebe556bd755d16559c8bc0d1fe5545db6bbeaf0 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 26 Feb 2019 13:56:11 +0200 Subject: RDMA/core: Introduce ib_core_device to hold device In order to support sysfs entries in multiple net namespaces for a rdma device, introduce a ib_core_device whose scope is limited to hold core device and per port sysfs related entries. This is preparation patch so that multiple ib_core_devices in each net namespace can be created in subsequent patch who all can share ib_device. (a) Move sysfs specific fields to ib_core_device. (b) Make sysfs and device life cycle related routines to work on ib_core_device. (c) Introduce and use rdma_init_coredev() helper to initialize coredev fields. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 24 ++++++++++++++++++++---- drivers/infiniband/core/sysfs.c | 29 ++++++++++++++++------------- include/rdma/ib_verbs.h | 25 +++++++++++++++++++------ 3 files changed, 55 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 7421ec4883fb..31229074981d 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -363,6 +363,25 @@ static struct class ib_class = { .dev_uevent = ib_device_uevent, }; +static void rdma_init_coredev(struct ib_core_device *coredev, + struct ib_device *dev) +{ + /* This BUILD_BUG_ON is intended to catch layout change + * of union of ib_core_device and device. + * dev must be the first element as ib_core and providers + * driver uses it. Adding anything in ib_core_device before + * device will break this assumption. + */ + BUILD_BUG_ON(offsetof(struct ib_device, coredev.dev) != + offsetof(struct ib_device, dev)); + + coredev->dev.class = &ib_class; + coredev->dev.groups = dev->groups; + device_initialize(&coredev->dev); + coredev->owner = dev; + INIT_LIST_HEAD(&coredev->port_list); +} + /** * _ib_alloc_device - allocate an IB device struct * @size:size of structure to allocate @@ -389,10 +408,8 @@ struct ib_device *_ib_alloc_device(size_t size) return NULL; } - device->dev.class = &ib_class; device->groups[0] = &ib_dev_attr_group; - device->dev.groups = device->groups; - device_initialize(&device->dev); + rdma_init_coredev(&device->coredev, device); INIT_LIST_HEAD(&device->event_handler_list); spin_lock_init(&device->event_handler_lock); @@ -403,7 +420,6 @@ struct ib_device *_ib_alloc_device(size_t size) */ xa_init_flags(&device->client_data, XA_FLAGS_ALLOC); init_rwsem(&device->client_data_rwsem); - INIT_LIST_HEAD(&device->port_list); init_completion(&device->unreg_completion); INIT_WORK(&device->unregistration_work, ib_unregister_work); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 9b6a065bdfa5..46ac766af110 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1015,8 +1015,9 @@ err_free_stats: return; } -static int add_port(struct ib_device *device, int port_num) +static int add_port(struct ib_core_device *coredev, int port_num) { + struct ib_device *device = rdma_device_to_ibdev(&coredev->dev); struct ib_port *p; struct ib_port_attr attr; int i; @@ -1034,7 +1035,7 @@ static int add_port(struct ib_device *device, int port_num) p->port_num = port_num; ret = kobject_init_and_add(&p->kobj, &port_type, - device->ports_kobj, + coredev->ports_kobj, "%d", port_num); if (ret) { kfree(p); @@ -1125,7 +1126,7 @@ static int add_port(struct ib_device *device, int port_num) if (device->ops.alloc_hw_stats && port_num) setup_hw_stats(device, p, port_num); - list_add_tail(&p->kobj.entry, &device->port_list); + list_add_tail(&p->kobj.entry, &coredev->port_list); kobject_uevent(&p->kobj, KOBJ_ADD); return 0; @@ -1279,11 +1280,11 @@ const struct attribute_group ib_dev_attr_group = { .attrs = ib_dev_attrs, }; -static void ib_free_port_attrs(struct ib_device *device) +static void ib_free_port_attrs(struct ib_core_device *coredev) { struct kobject *p, *t; - list_for_each_entry_safe(p, t, &device->port_list, entry) { + list_for_each_entry_safe(p, t, &coredev->port_list, entry) { struct ib_port *port = container_of(p, struct ib_port, kobj); list_del(&p->entry); @@ -1303,20 +1304,22 @@ static void ib_free_port_attrs(struct ib_device *device) kobject_put(p); } - kobject_put(device->ports_kobj); + kobject_put(coredev->ports_kobj); } -static int ib_setup_port_attrs(struct ib_device *device) +static int ib_setup_port_attrs(struct ib_core_device *coredev) { + struct ib_device *device = rdma_device_to_ibdev(&coredev->dev); unsigned int port; int ret; - device->ports_kobj = kobject_create_and_add("ports", &device->dev.kobj); - if (!device->ports_kobj) + coredev->ports_kobj = kobject_create_and_add("ports", + &coredev->dev.kobj); + if (!coredev->ports_kobj) return -ENOMEM; rdma_for_each_port (device, port) { - ret = add_port(device, port); + ret = add_port(coredev, port); if (ret) goto err_put; } @@ -1324,7 +1327,7 @@ static int ib_setup_port_attrs(struct ib_device *device) return 0; err_put: - ib_free_port_attrs(device); + ib_free_port_attrs(coredev); return ret; } @@ -1332,7 +1335,7 @@ int ib_device_register_sysfs(struct ib_device *device) { int ret; - ret = ib_setup_port_attrs(device); + ret = ib_setup_port_attrs(&device->coredev); if (ret) return ret; @@ -1348,5 +1351,5 @@ void ib_device_unregister_sysfs(struct ib_device *device) free_hsag(&device->dev.kobj, device->hw_stats_ag); kfree(device->hw_stats); - ib_free_port_attrs(device); + ib_free_port_attrs(&device->coredev); } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9b9e17bcc201..5f9f4fcdc4cc 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2554,8 +2554,17 @@ struct ib_device_ops { DECLARE_RDMA_OBJ_SIZE(ib_ucontext); }; -struct rdma_restrack_root; +struct ib_core_device { + /* device must be the first element in structure until, + * union of ib_core_device and device exists in ib_device. + */ + struct device dev; + struct kobject *ports_kobj; + struct list_head port_list; + struct ib_device *owner; /* reach back to owner ib_device */ +}; +struct rdma_restrack_root; struct ib_device { /* Do not access @dma_device directly from ULP nor from HW drivers. */ struct device *dma_device; @@ -2581,16 +2590,17 @@ struct ib_device { struct iw_cm_verbs *iwcm; struct module *owner; - struct device dev; + union { + struct device dev; + struct ib_core_device coredev; + }; + /* First group for device attributes, * Second group for driver provided attributes (optional). * It is NULL terminated array. */ const struct attribute_group *groups[3]; - struct kobject *ports_kobj; - struct list_head port_list; - int uverbs_abi_ver; u64 uverbs_cmd_mask; u64 uverbs_ex_cmd_mask; @@ -4349,7 +4359,10 @@ rdma_set_device_sysfs_group(struct ib_device *dev, */ static inline struct ib_device *rdma_device_to_ibdev(struct device *device) { - return container_of(device, struct ib_device, dev); + struct ib_core_device *coredev = + container_of(device, struct ib_core_device, dev); + + return coredev->owner; } /** -- cgit v1.2.3 From 62dfa7955e842e7ff6827edc034a0839b009c326 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 26 Feb 2019 13:56:12 +0200 Subject: RDMA/core: Restrict sysfs entries view to init_net This is a preparation patch to provide isolation of rdma device in a network namespace. As first step, make rdma device visible only in init net namespace. Subsequent patch will enable rdma device visibility back in multiple net namespaces using compat ib_core_device device/sysfs tree. Given that the IB subsystem depends on net stack, it needs to be initialized after netdev and since it support devices, it needs to be initialized before the device subsystem; therefore, change initcall sequence to fs_initcall, so that when ib_core is compiled in the kernel image, the right init sequence is followed. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 31229074981d..078566d0d7c2 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -357,10 +357,17 @@ static int ib_device_uevent(struct device *device, return 0; } +static const void *net_namespace(struct device *d) +{ + return &init_net; +} + static struct class ib_class = { .name = "infiniband", .dev_release = ib_device_release, .dev_uevent = ib_device_uevent, + .ns_type = &net_ns_type_operations, + .namespace = net_namespace, }; static void rdma_init_coredev(struct ib_core_device *coredev, @@ -1966,5 +1973,8 @@ static void __exit ib_core_cleanup(void) MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4); -subsys_initcall(ib_core_init); +/* ib core relies on netdev stack to first register net_ns_type_operations + * ns kobject type before ib_core initialization. + */ +fs_initcall(ib_core_init); module_exit(ib_core_cleanup); -- cgit v1.2.3 From 4e0f7b9070726a34bbd87a74e407d4cced6d49ab Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 26 Feb 2019 13:56:13 +0200 Subject: RDMA/core: Implement compat device/sysfs tree in net namespace Implement compatibility layer sysfs entries of ib_core so that non init_net net namespaces can also discover rdma devices. Each non init_net net namespace has ib_core_device created in it. Such ib_core_device sysfs tree resembles rdma devices found in init_net namespace. This allows discovering rdma devices in multiple non init_net net namespaces via sysfs entries and helpful to rdma-core userspace. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 261 ++++++++++++++++++++++++++++++++++++++- include/rdma/ib_verbs.h | 6 + 2 files changed, 263 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 078566d0d7c2..167e2d46e4cb 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -38,6 +38,8 @@ #include #include #include +#include +#include #include #include #include @@ -101,6 +103,30 @@ static DECLARE_RWSEM(clients_rwsem); * be registered. */ #define CLIENT_DATA_REGISTERED XA_MARK_1 + +/** + * struct rdma_dev_net - rdma net namespace metadata for a net + * @net: Pointer to owner net namespace + * @id: xarray id to identify the net namespace. + */ +struct rdma_dev_net { + possible_net_t net; + u32 id; +}; + +static unsigned int rdma_dev_net_id; + +/* + * A list of net namespaces is maintained in an xarray. This is necessary + * because we can't get the locking right using the existing net ns list. We + * would require a init_net callback after the list is updated. + */ +static DEFINE_XARRAY_FLAGS(rdma_nets, XA_FLAGS_ALLOC); +/* + * rwsem to protect accessing the rdma_nets xarray entries. + */ +static DECLARE_RWSEM(rdma_nets_rwsem); + /* * xarray has this behavior where it won't iterate over NULL values stored in * allocated arrays. So we need our own iterator to see all values stored in @@ -268,6 +294,26 @@ struct ib_device *ib_device_get_by_name(const char *name, } EXPORT_SYMBOL(ib_device_get_by_name); +static int rename_compat_devs(struct ib_device *device) +{ + struct ib_core_device *cdev; + unsigned long index; + int ret = 0; + + mutex_lock(&device->compat_devs_mutex); + xa_for_each (&device->compat_devs, index, cdev) { + ret = device_rename(&cdev->dev, dev_name(&device->dev)); + if (ret) { + dev_warn(&cdev->dev, + "Fail to rename compatdev to new name %s\n", + dev_name(&device->dev)); + break; + } + } + mutex_unlock(&device->compat_devs_mutex); + return ret; +} + int ib_device_rename(struct ib_device *ibdev, const char *name) { int ret; @@ -287,6 +333,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name) if (ret) goto out; strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); + ret = rename_compat_devs(ibdev); out: up_write(&devices_rwsem); return ret; @@ -336,6 +383,7 @@ static void ib_device_release(struct device *device) WARN_ON(refcount_read(&dev->refcount)); ib_cache_release_one(dev); ib_security_release_port_pkey_list(dev); + xa_destroy(&dev->compat_devs); xa_destroy(&dev->client_data); if (dev->port_data) kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu, @@ -359,7 +407,10 @@ static int ib_device_uevent(struct device *device, static const void *net_namespace(struct device *d) { - return &init_net; + struct ib_core_device *coredev = + container_of(d, struct ib_core_device, dev); + + return read_pnet(&coredev->rdma_net); } static struct class ib_class = { @@ -371,7 +422,7 @@ static struct class ib_class = { }; static void rdma_init_coredev(struct ib_core_device *coredev, - struct ib_device *dev) + struct ib_device *dev, struct net *net) { /* This BUILD_BUG_ON is intended to catch layout change * of union of ib_core_device and device. @@ -387,6 +438,7 @@ static void rdma_init_coredev(struct ib_core_device *coredev, device_initialize(&coredev->dev); coredev->owner = dev; INIT_LIST_HEAD(&coredev->port_list); + write_pnet(&coredev->rdma_net, net); } /** @@ -416,7 +468,7 @@ struct ib_device *_ib_alloc_device(size_t size) } device->groups[0] = &ib_dev_attr_group; - rdma_init_coredev(&device->coredev, device); + rdma_init_coredev(&device->coredev, device, &init_net); INIT_LIST_HEAD(&device->event_handler_list); spin_lock_init(&device->event_handler_lock); @@ -427,6 +479,8 @@ struct ib_device *_ib_alloc_device(size_t size) */ xa_init_flags(&device->client_data, XA_FLAGS_ALLOC); init_rwsem(&device->client_data_rwsem); + xa_init_flags(&device->compat_devs, XA_FLAGS_ALLOC); + mutex_init(&device->compat_devs_mutex); init_completion(&device->unreg_completion); INIT_WORK(&device->unregistration_work, ib_unregister_work); @@ -459,6 +513,7 @@ void ib_dealloc_device(struct ib_device *device) /* Expedite releasing netdev references */ free_netdevs(device); + WARN_ON(!xa_empty(&device->compat_devs)); WARN_ON(!xa_empty(&device->client_data)); WARN_ON(refcount_read(&device->refcount)); rdma_restrack_clean(device); @@ -667,6 +722,180 @@ static int ib_security_change(struct notifier_block *nb, unsigned long event, return NOTIFY_OK; } +static void compatdev_release(struct device *dev) +{ + struct ib_core_device *cdev = + container_of(dev, struct ib_core_device, dev); + + kfree(cdev); +} + +static int add_one_compat_dev(struct ib_device *device, + struct rdma_dev_net *rnet) +{ + struct ib_core_device *cdev; + int ret; + + /* + * Create and add compat device in all namespaces other than where it + * is currently bound to. + */ + if (net_eq(read_pnet(&rnet->net), + read_pnet(&device->coredev.rdma_net))) + return 0; + + /* + * The first of init_net() or ib_register_device() to take the + * compat_devs_mutex wins and gets to add the device. Others will wait + * for completion here. + */ + mutex_lock(&device->compat_devs_mutex); + cdev = xa_load(&device->compat_devs, rnet->id); + if (cdev) { + ret = 0; + goto done; + } + ret = xa_reserve(&device->compat_devs, rnet->id, GFP_KERNEL); + if (ret) + goto done; + + cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); + if (!cdev) { + ret = -ENOMEM; + goto cdev_err; + } + + cdev->dev.parent = device->dev.parent; + rdma_init_coredev(cdev, device, read_pnet(&rnet->net)); + cdev->dev.release = compatdev_release; + dev_set_name(&cdev->dev, "%s", dev_name(&device->dev)); + + ret = device_add(&cdev->dev); + if (ret) + goto add_err; + + ret = xa_err(xa_store(&device->compat_devs, rnet->id, + cdev, GFP_KERNEL)); + if (ret) + goto insert_err; + + mutex_unlock(&device->compat_devs_mutex); + return 0; + +insert_err: + device_del(&cdev->dev); +add_err: + put_device(&cdev->dev); +cdev_err: + xa_release(&device->compat_devs, rnet->id); +done: + mutex_unlock(&device->compat_devs_mutex); + return ret; +} + +static void remove_one_compat_dev(struct ib_device *device, u32 id) +{ + struct ib_core_device *cdev; + + mutex_lock(&device->compat_devs_mutex); + cdev = xa_erase(&device->compat_devs, id); + mutex_unlock(&device->compat_devs_mutex); + if (cdev) { + device_del(&cdev->dev); + put_device(&cdev->dev); + } +} + +static void remove_compat_devs(struct ib_device *device) +{ + struct ib_core_device *cdev; + unsigned long index; + + xa_for_each (&device->compat_devs, index, cdev) + remove_one_compat_dev(device, index); +} + +static int add_compat_devs(struct ib_device *device) +{ + struct rdma_dev_net *rnet; + unsigned long index; + int ret = 0; + + down_read(&rdma_nets_rwsem); + xa_for_each (&rdma_nets, index, rnet) { + ret = add_one_compat_dev(device, rnet); + if (ret) + break; + } + up_read(&rdma_nets_rwsem); + return ret; +} + +static void rdma_dev_exit_net(struct net *net) +{ + struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id); + struct ib_device *dev; + unsigned long index; + int ret; + + down_write(&rdma_nets_rwsem); + /* + * Prevent the ID from being re-used and hide the id from xa_for_each. + */ + ret = xa_err(xa_store(&rdma_nets, rnet->id, NULL, GFP_KERNEL)); + WARN_ON(ret); + up_write(&rdma_nets_rwsem); + + down_read(&devices_rwsem); + xa_for_each (&devices, index, dev) { + get_device(&dev->dev); + /* + * Release the devices_rwsem so that pontentially blocking + * device_del, doesn't hold the devices_rwsem for too long. + */ + up_read(&devices_rwsem); + + remove_one_compat_dev(dev, rnet->id); + + put_device(&dev->dev); + down_read(&devices_rwsem); + } + up_read(&devices_rwsem); + + xa_erase(&rdma_nets, rnet->id); +} + +static __net_init int rdma_dev_init_net(struct net *net) +{ + struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id); + unsigned long index; + struct ib_device *dev; + int ret; + + /* No need to create any compat devices in default init_net. */ + if (net_eq(net, &init_net)) + return 0; + + write_pnet(&rnet->net, net); + + ret = xa_alloc(&rdma_nets, &rnet->id, rnet, xa_limit_32b, GFP_KERNEL); + if (ret) + return ret; + + down_read(&devices_rwsem); + xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { + ret = add_one_compat_dev(dev, rnet); + if (ret) + break; + } + up_read(&devices_rwsem); + + if (ret) + rdma_dev_exit_net(net); + + return ret; +} + /* * Assign the unique string device name and the unique device index. This is * undone by ib_dealloc_device. @@ -788,6 +1017,13 @@ static void disable_device(struct ib_device *device) ib_device_put(device); wait_for_completion(&device->unreg_completion); + /* + * compat devices must be removed after device refcount drops to zero. + * Otherwise init_net() may add more compatdevs after removing compat + * devices and before device is disabled. + */ + remove_compat_devs(device); + /* Expedite removing unregistered pointers from the hash table */ free_netdevs(device); } @@ -830,7 +1066,8 @@ static int enable_device_and_get(struct ib_device *device) break; } up_read(&clients_rwsem); - + if (!ret) + ret = add_compat_devs(device); out: up_read(&devices_rwsem); return ret; @@ -1061,6 +1298,13 @@ void ib_unregister_device_queued(struct ib_device *ib_dev) } EXPORT_SYMBOL(ib_unregister_device_queued); +static struct pernet_operations rdma_dev_net_ops = { + .init = rdma_dev_init_net, + .exit = rdma_dev_exit_net, + .id = &rdma_dev_net_id, + .size = sizeof(struct rdma_dev_net), +}; + static int assign_client_id(struct ib_client *client) { int ret; @@ -1926,12 +2170,20 @@ static int __init ib_core_init(void) goto err_sa; } + ret = register_pernet_device(&rdma_dev_net_ops); + if (ret) { + pr_warn("Couldn't init compat dev. ret %d\n", ret); + goto err_compat; + } + nldev_init(); rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table); roce_gid_mgmt_init(); return 0; +err_compat: + unregister_lsm_notifier(&ibdev_lsm_nb); err_sa: ib_sa_cleanup(); err_mad: @@ -1956,6 +2208,7 @@ static void __exit ib_core_cleanup(void) roce_gid_mgmt_cleanup(); nldev_exit(); rdma_nl_unregister(RDMA_NL_LS); + unregister_pernet_device(&rdma_dev_net_ops); unregister_lsm_notifier(&ibdev_lsm_nb); ib_sa_cleanup(); ib_mad_cleanup(); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 5f9f4fcdc4cc..d42267e72c4b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2559,6 +2559,7 @@ struct ib_core_device { * union of ib_core_device and device exists in ib_device. */ struct device dev; + possible_net_t rdma_net; struct kobject *ports_kobj; struct list_head port_list; struct ib_device *owner; /* reach back to owner ib_device */ @@ -2636,6 +2637,11 @@ struct ib_device { struct work_struct unregistration_work; const struct rdma_link_ops *link_ops; + + /* Protects compat_devs xarray modifications */ + struct mutex compat_devs_mutex; + /* Maintains compat devices for each net namespace */ + struct xarray compat_devs; }; struct ib_client { -- cgit v1.2.3 From 5417783eabb2c66738394149123fbcdd0cca0f51 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 26 Feb 2019 13:56:15 +0200 Subject: RDMA/core: Support core port attributes in non init_net Now that sysfs compatibility layer for non init_net exists, add core port attributes such as pkey and gid table to non init_net ns. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 4 ++++ drivers/infiniband/core/device.c | 6 ++++++ drivers/infiniband/core/sysfs.c | 15 ++++++++------- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 08c690249594..24f2aa2e1b7c 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -336,4 +336,8 @@ int roce_resolve_route_from_path(struct sa_path_rec *rec, const struct ib_gid_attr *attr); struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr); + +void ib_free_port_attrs(struct ib_core_device *coredev); +int ib_setup_port_attrs(struct ib_core_device *coredev, + bool alloc_hw_stats); #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 167e2d46e4cb..0735f8b8167e 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -773,6 +773,9 @@ static int add_one_compat_dev(struct ib_device *device, ret = device_add(&cdev->dev); if (ret) goto add_err; + ret = ib_setup_port_attrs(cdev, false); + if (ret) + goto port_err; ret = xa_err(xa_store(&device->compat_devs, rnet->id, cdev, GFP_KERNEL)); @@ -783,6 +786,8 @@ static int add_one_compat_dev(struct ib_device *device, return 0; insert_err: + ib_free_port_attrs(cdev); +port_err: device_del(&cdev->dev); add_err: put_device(&cdev->dev); @@ -801,6 +806,7 @@ static void remove_one_compat_dev(struct ib_device *device, u32 id) cdev = xa_erase(&device->compat_devs, id); mutex_unlock(&device->compat_devs_mutex); if (cdev) { + ib_free_port_attrs(cdev); device_del(&cdev->dev); put_device(&cdev->dev); } diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 46ac766af110..2ce3f58157a5 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1015,7 +1015,8 @@ err_free_stats: return; } -static int add_port(struct ib_core_device *coredev, int port_num) +static int add_port(struct ib_core_device *coredev, + int port_num, bool alloc_stats) { struct ib_device *device = rdma_device_to_ibdev(&coredev->dev); struct ib_port *p; @@ -1056,7 +1057,7 @@ static int add_port(struct ib_core_device *coredev, int port_num) goto err_put; } - if (device->ops.process_mad) { + if (device->ops.process_mad && alloc_stats) { p->pma_table = get_counter_table(device, port_num); ret = sysfs_create_group(&p->kobj, p->pma_table); if (ret) @@ -1123,7 +1124,7 @@ static int add_port(struct ib_core_device *coredev, int port_num) * port, so holder should be device. Therefore skip per port conunter * initialization. */ - if (device->ops.alloc_hw_stats && port_num) + if (device->ops.alloc_hw_stats && port_num && alloc_stats) setup_hw_stats(device, p, port_num); list_add_tail(&p->kobj.entry, &coredev->port_list); @@ -1280,7 +1281,7 @@ const struct attribute_group ib_dev_attr_group = { .attrs = ib_dev_attrs, }; -static void ib_free_port_attrs(struct ib_core_device *coredev) +void ib_free_port_attrs(struct ib_core_device *coredev) { struct kobject *p, *t; @@ -1307,7 +1308,7 @@ static void ib_free_port_attrs(struct ib_core_device *coredev) kobject_put(coredev->ports_kobj); } -static int ib_setup_port_attrs(struct ib_core_device *coredev) +int ib_setup_port_attrs(struct ib_core_device *coredev, bool alloc_stats) { struct ib_device *device = rdma_device_to_ibdev(&coredev->dev); unsigned int port; @@ -1319,7 +1320,7 @@ static int ib_setup_port_attrs(struct ib_core_device *coredev) return -ENOMEM; rdma_for_each_port (device, port) { - ret = add_port(coredev, port); + ret = add_port(coredev, port, alloc_stats); if (ret) goto err_put; } @@ -1335,7 +1336,7 @@ int ib_device_register_sysfs(struct ib_device *device) { int ret; - ret = ib_setup_port_attrs(&device->coredev); + ret = ib_setup_port_attrs(&device->coredev, true); if (ret) return ret; -- cgit v1.2.3 From a56bc45b27b92954d99c811cb047e789b6cc5a81 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 26 Feb 2019 14:01:45 +0200 Subject: RDMA/core: Add module param to disable device sharing among net ns Add module parameter to change a sharing mode of ib_core early in the boot process. This parameter helps to those systems where modern up to date rdma tool (iproute2) package may not be available during kernel upgrade cycle. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 0735f8b8167e..ebc0b0e58eca 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -127,6 +127,10 @@ static DEFINE_XARRAY_FLAGS(rdma_nets, XA_FLAGS_ALLOC); */ static DECLARE_RWSEM(rdma_nets_rwsem); +static bool ib_devices_shared_netns = true; +module_param_named(netns_mode, ib_devices_shared_netns, bool, 0444); +MODULE_PARM_DESC(netns_mode, + "Share device among net namespaces; default=1 (shared)"); /* * xarray has this behavior where it won't iterate over NULL values stored in * allocated arrays. So we need our own iterator to see all values stored in @@ -736,6 +740,9 @@ static int add_one_compat_dev(struct ib_device *device, struct ib_core_device *cdev; int ret; + if (!ib_devices_shared_netns) + return 0; + /* * Create and add compat device in all namespaces other than where it * is currently bound to. -- cgit v1.2.3 From 41c6140189afdf67bd07d7bbe2d8f9382b6f9ef7 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 26 Feb 2019 14:01:46 +0200 Subject: RDMA: Check net namespace access for uverbs, umad, cma and nldev Introduce an API rdma_dev_access_netns() to check whether a rdma device can be accessed from the specified net namespace or not. Use rdma_dev_access_netns() while opening character uverbs, umad network device and also check while rdma cm_id binds to rdma device. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 3 +++ drivers/infiniband/core/device.c | 20 ++++++++++++++++++++ drivers/infiniband/core/user_mad.c | 10 ++++++++++ drivers/infiniband/core/uverbs_main.c | 5 +++++ include/rdma/ib_verbs.h | 3 +++ 5 files changed, 41 insertions(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index a8b9c66c8525..895899230a7e 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -616,6 +616,9 @@ cma_validate_port(struct ib_device *device, u8 port, int dev_type = dev_addr->dev_type; struct net_device *ndev = NULL; + if (!rdma_dev_access_netns(device, id_priv->id.route.addr.dev_addr.net)) + return ERR_PTR(-ENODEV); + if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) return ERR_PTR(-ENODEV); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index ebc0b0e58eca..74736ea9b007 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -131,6 +131,26 @@ static bool ib_devices_shared_netns = true; module_param_named(netns_mode, ib_devices_shared_netns, bool, 0444); MODULE_PARM_DESC(netns_mode, "Share device among net namespaces; default=1 (shared)"); +/** + * rdma_dev_access_netns() - Return whether a rdma device can be accessed + * from a specified net namespace or not. + * @device: Pointer to rdma device which needs to be checked + * @net: Pointer to net namesapce for which access to be checked + * + * rdma_dev_access_netns() - Return whether a rdma device can be accessed + * from a specified net namespace or not. When + * rdma device is in shared mode, it ignores the + * net namespace. When rdma device is exclusive + * to a net namespace, rdma device net namespace is + * checked against the specified one. + */ +bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net) +{ + return (ib_devices_shared_netns || + net_eq(read_pnet(&dev->coredev.rdma_net), net)); +} +EXPORT_SYMBOL(rdma_dev_access_netns); + /* * xarray has this behavior where it won't iterate over NULL values stored in * allocated arrays. So we need our own iterator to see all values stored in diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 2de5b4404abc..56aa34206110 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -980,6 +980,11 @@ static int ib_umad_open(struct inode *inode, struct file *filp) goto out; } + if (!rdma_dev_access_netns(port->ib_dev, current->nsproxy->net_ns)) { + ret = -EPERM; + goto out; + } + file = kzalloc(sizeof(*file), GFP_KERNEL); if (!file) { ret = -ENOMEM; @@ -1073,6 +1078,11 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp) } } + if (!rdma_dev_access_netns(port->ib_dev, current->nsproxy->net_ns)) { + ret = -EPERM; + goto err_up_sem; + } + ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); if (ret) goto err_up_sem; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index b8fc5a329e21..fef4519d1241 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -1045,6 +1045,11 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) goto err; } + if (!rdma_dev_access_netns(ib_dev, current->nsproxy->net_ns)) { + ret = -EPERM; + goto err; + } + /* In case IB device supports disassociate ucontext, there is no hard * dependency between uverbs device and its low level device. */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index d42267e72c4b..418d17c8b65b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4381,4 +4381,7 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device) */ #define rdma_device_to_drv_device(dev, drv_dev_struct, ibdev_member) \ container_of(rdma_device_to_ibdev(dev), drv_dev_struct, ibdev_member) + +bool rdma_dev_access_netns(const struct ib_device *device, + const struct net *net); #endif /* IB_VERBS_H */ -- cgit v1.2.3 From 37eeab55aeca98cb6648b471f09c0e651ccb1e7c Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 26 Feb 2019 14:01:47 +0200 Subject: RDMA/core: Extend ib_device_get_by_index for net namespace Extend ib_device_get_by_index() API to check device access for net namespace for serving netlink commands. Also enforce net ns check on dumpit commands which iterate over all registered rdma devices and which don't call ib_device_get_by_index(). Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 3 ++- drivers/infiniband/core/device.c | 11 ++++++++++- drivers/infiniband/core/nldev.c | 18 +++++++++--------- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 24f2aa2e1b7c..fe5607ebca80 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -279,7 +279,8 @@ static inline void ib_mad_agent_security_change(void) } #endif -struct ib_device *ib_device_get_by_index(u32 ifindex); +struct ib_device *ib_device_get_by_index(const struct net *net, u32 index); + /* RDMA device netlink */ void nldev_init(void); void nldev_exit(void); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 74736ea9b007..e6f82f4d4108 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -250,16 +250,22 @@ static int ib_device_check_mandatory(struct ib_device *device) * Caller must perform ib_device_put() to return the device reference count * when ib_device_get_by_index() returns valid device pointer. */ -struct ib_device *ib_device_get_by_index(u32 index) +struct ib_device *ib_device_get_by_index(const struct net *net, u32 index) { struct ib_device *device; down_read(&devices_rwsem); device = xa_load(&devices, index); if (device) { + if (!rdma_dev_access_netns(device, net)) { + device = NULL; + goto out; + } + if (!ib_device_try_get(device)) device = NULL; } +out: up_read(&devices_rwsem); return device; } @@ -1815,6 +1821,9 @@ int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb, down_read(&devices_rwsem); xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { + if (!rdma_dev_access_netns(dev, sock_net(skb->sk))) + continue; + ret = nldev_cb(dev, skb, cb, idx); if (ret) break; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 11ed58d3fce5..284e5f103fc9 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -614,7 +614,7 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = ib_device_get_by_index(index); + device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; @@ -658,7 +658,7 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = ib_device_get_by_index(index); + device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; @@ -706,7 +706,7 @@ static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { /* * There is no need to take lock, because - * we are relying on ib_core's lists_rwsem + * we are relying on ib_core's locking. */ return ib_enum_all_devs(_nldev_get_dumpit, skb, cb); } @@ -729,7 +729,7 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = ib_device_get_by_index(index); + device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; @@ -783,7 +783,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb, return -EINVAL; ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = ib_device_get_by_index(ifindex); + device = ib_device_get_by_index(sock_net(skb->sk), ifindex); if (!device) return -EINVAL; @@ -838,7 +838,7 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = ib_device_get_by_index(index); + device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; @@ -987,7 +987,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = ib_device_get_by_index(index); + device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; @@ -1084,7 +1084,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = ib_device_get_by_index(index); + device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; @@ -1299,7 +1299,7 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = ib_device_get_by_index(index); + device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; -- cgit v1.2.3 From cb7e0e130503cb8e5ee6a7e8983bf71ed408516f Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 26 Feb 2019 14:01:48 +0200 Subject: RDMA/core: Add interface to read device namespace sharing mode Add an interface via netlink command to query whether rdma devices are shared among multiple net namespaces or not. When using RDMAtool, it can be queried as, $rdma system show netns netns shared Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 1 + drivers/infiniband/core/device.c | 2 +- drivers/infiniband/core/nldev.c | 32 ++++++++++++++++++++++++++++++++ include/uapi/rdma/rdma_netlink.h | 9 ++++++++- 4 files changed, 42 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index fe5607ebca80..30c7d4a46b76 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -55,6 +55,7 @@ struct pkey_index_qp_list { }; extern const struct attribute_group ib_dev_attr_group; +extern bool ib_devices_shared_netns; int ib_device_register_sysfs(struct ib_device *device); void ib_device_unregister_sysfs(struct ib_device *device); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index e6f82f4d4108..0605208a73d3 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -127,7 +127,7 @@ static DEFINE_XARRAY_FLAGS(rdma_nets, XA_FLAGS_ALLOC); */ static DECLARE_RWSEM(rdma_nets_rwsem); -static bool ib_devices_shared_netns = true; +bool ib_devices_shared_netns = true; module_param_named(netns_mode, ib_devices_shared_netns, bool, 0444); MODULE_PARM_DESC(netns_mode, "Share device among net namespaces; default=1 (shared)"); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 284e5f103fc9..49c048738f94 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -116,6 +116,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING, .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN }, + [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 }, }; static int put_driver_name_print_type(struct sk_buff *msg, const char *name, @@ -1312,6 +1313,34 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } +static int nldev_get_sys_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; + struct nlmsghdr *nlh; + int err; + + err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, NULL); + if (err) + return err; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + RDMA_NLDEV_CMD_SYS_GET), + 0, 0); + + err = nla_put_u8(skb, RDMA_NLDEV_SYS_ATTR_NETNS_MODE, + (u8)ib_devices_shared_netns); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } + + nlmsg_end(skb, nlh); + return skb->len; +} + static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_GET] = { .doit = nldev_get_doit, @@ -1357,6 +1386,9 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { .doit = nldev_res_get_pd_doit, .dump = nldev_res_get_pd_dumpit, }, + [RDMA_NLDEV_CMD_SYS_GET] = { + .dump = nldev_get_sys_get_dumpit, + }, }; void __init nldev_init(void) diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 5cc592728071..6af88c736073 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -261,7 +261,8 @@ enum rdma_nldev_command { RDMA_NLDEV_CMD_PORT_GET, /* can dump */ - /* 6 - 8 are free to use */ + RDMA_NLDEV_CMD_SYS_GET, /* can dump */ + /* 7 - 8 are free to use */ RDMA_NLDEV_CMD_RES_GET = 9, /* can dump */ @@ -472,6 +473,12 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_ATTR_LINK_TYPE, /* string */ + /* + * net namespace mode for rdma subsystem: + * either shared or exclusive among multiple net namespaces. + */ + RDMA_NLDEV_SYS_ATTR_NETNS_MODE, /* u8 */ + /* * Always the end */ -- cgit v1.2.3 From 2b34c558022673c0d6393dd7941d417f1b5a7236 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 26 Feb 2019 14:01:49 +0200 Subject: RDMA/core: Add command to set ib_core device net namspace sharing mode Add netlink command that enables/disables sharing rdma device among multiple net namespaces. Using rdma tool, $rdma sys set netns shared (default mode) When rdma subsystem netns mode is set to shared mode, rdma devices will be accessible in all net namespaces. Using rdma tool, $rdma sys set netns exclusive When rdma subsystem netns mode is set to exclusive mode, devices will be accessible in only one net namespace at any given point of time. If there are any net namespaces other than default init_net exists, while executing this command, it will fail and mode cannot be changed. To change this mode, netlink command is used instead of sysctl, because netlink command allows to auto load a module. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 2 + drivers/infiniband/core/device.c | 87 +++++++++++++++++++++++++++++++++++++ drivers/infiniband/core/nldev.c | 25 +++++++++++ include/uapi/rdma/rdma_netlink.h | 4 +- 4 files changed, 117 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 30c7d4a46b76..0663fc64e950 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -342,4 +342,6 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr); void ib_free_port_attrs(struct ib_core_device *coredev); int ib_setup_port_attrs(struct ib_core_device *coredev, bool alloc_hw_stats); + +int rdma_compatdev_set(u8 enable); #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 0605208a73d3..2dbd04739ac6 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -766,6 +766,7 @@ static int add_one_compat_dev(struct ib_device *device, struct ib_core_device *cdev; int ret; + lockdep_assert_held(&rdma_nets_rwsem); if (!ib_devices_shared_netns) return 0; @@ -870,6 +871,87 @@ static int add_compat_devs(struct ib_device *device) return ret; } +static void remove_all_compat_devs(void) +{ + struct ib_compat_device *cdev; + struct ib_device *dev; + unsigned long index; + + down_read(&devices_rwsem); + xa_for_each (&devices, index, dev) { + unsigned long c_index = 0; + + /* Hold nets_rwsem so that any other thread modifying this + * system param can sync with this thread. + */ + down_read(&rdma_nets_rwsem); + xa_for_each (&dev->compat_devs, c_index, cdev) + remove_one_compat_dev(dev, c_index); + up_read(&rdma_nets_rwsem); + } + up_read(&devices_rwsem); +} + +static int add_all_compat_devs(void) +{ + struct rdma_dev_net *rnet; + struct ib_device *dev; + unsigned long index; + int ret = 0; + + down_read(&devices_rwsem); + xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { + unsigned long net_index = 0; + + /* Hold nets_rwsem so that any other thread modifying this + * system param can sync with this thread. + */ + down_read(&rdma_nets_rwsem); + xa_for_each (&rdma_nets, net_index, rnet) { + ret = add_one_compat_dev(dev, rnet); + if (ret) + break; + } + up_read(&rdma_nets_rwsem); + } + up_read(&devices_rwsem); + if (ret) + remove_all_compat_devs(); + return ret; +} + +int rdma_compatdev_set(u8 enable) +{ + struct rdma_dev_net *rnet; + unsigned long index; + int ret = 0; + + down_write(&rdma_nets_rwsem); + if (ib_devices_shared_netns == enable) { + up_write(&rdma_nets_rwsem); + return 0; + } + + /* enable/disable of compat devices is not supported + * when more than default init_net exists. + */ + xa_for_each (&rdma_nets, index, rnet) { + ret++; + break; + } + if (!ret) + ib_devices_shared_netns = enable; + up_write(&rdma_nets_rwsem); + if (ret) + return -EBUSY; + + if (enable) + ret = add_all_compat_devs(); + else + remove_all_compat_devs(); + return ret; +} + static void rdma_dev_exit_net(struct net *net) { struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id); @@ -923,7 +1005,12 @@ static __net_init int rdma_dev_init_net(struct net *net) down_read(&devices_rwsem); xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { + /* Hold nets_rwsem so that netlink command cannot change + * system configuration for device sharing mode. + */ + down_read(&rdma_nets_rwsem); ret = add_one_compat_dev(dev, rnet); + up_read(&rdma_nets_rwsem); if (ret) break; } diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 49c048738f94..28b4ed8f9930 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1341,6 +1341,27 @@ static int nldev_get_sys_get_dumpit(struct sk_buff *skb, return skb->len; } +static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; + u8 enable; + int err; + + err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); + if (err || !tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]) + return -EINVAL; + + enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]); + /* Only 0 and 1 are supported */ + if (enable > 1) + return -EINVAL; + + err = rdma_compatdev_set(enable); + return err; +} + static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_GET] = { .doit = nldev_get_doit, @@ -1389,6 +1410,10 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_SYS_GET] = { .dump = nldev_get_sys_get_dumpit, }, + [RDMA_NLDEV_CMD_SYS_SET] = { + .doit = nldev_set_sys_set_doit, + .flags = RDMA_NL_ADMIN_PERM, + }, }; void __init nldev_init(void) diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 6af88c736073..9bba001a7347 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -262,7 +262,9 @@ enum rdma_nldev_command { RDMA_NLDEV_CMD_PORT_GET, /* can dump */ RDMA_NLDEV_CMD_SYS_GET, /* can dump */ - /* 7 - 8 are free to use */ + RDMA_NLDEV_CMD_SYS_SET, + + /* 8 is free to use */ RDMA_NLDEV_CMD_RES_GET = 9, /* can dump */ -- cgit v1.2.3 From 27e19f45108959f99b23a414be391b6a2a282caa Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:20:39 -0800 Subject: RDMA/hns: Convert cq_table to XArray Change the locking to not disable interrupts as the lookup in interrupt context will not see a freed CQ, thanks to the synchronize_irq() call before freeing the CQ. Signed-off-by: Matthew Wilcox Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 33 ++++++++++------------------- drivers/infiniband/hw/hns/hns_roce_device.h | 3 +-- 2 files changed, 12 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 1dfe5627006c..c50f241211e9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -127,13 +127,9 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, goto err_out; } - /* The cq insert radix tree */ - spin_lock_irq(&cq_table->lock); - /* Radix_tree: The associated pointer and long integer key value like */ - ret = radix_tree_insert(&cq_table->tree, hr_cq->cqn, hr_cq); - spin_unlock_irq(&cq_table->lock); + ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL)); if (ret) { - dev_err(dev, "CQ alloc.Failed to radix_tree_insert.\n"); + dev_err(dev, "CQ alloc failed xa_store.\n"); goto err_put; } @@ -141,7 +137,7 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) { ret = PTR_ERR(mailbox); - goto err_radix; + goto err_xa; } hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle, @@ -152,7 +148,7 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { dev_err(dev, "CQ alloc.Failed to cmd mailbox.\n"); - goto err_radix; + goto err_xa; } hr_cq->cons_index = 0; @@ -164,10 +160,8 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, return 0; -err_radix: - spin_lock_irq(&cq_table->lock); - radix_tree_delete(&cq_table->tree, hr_cq->cqn); - spin_unlock_irq(&cq_table->lock); +err_xa: + xa_erase(&cq_table->array, hr_cq->cqn); err_put: hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); @@ -197,6 +191,8 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) dev_err(dev, "HW2SW_CQ failed (%d) for CQN %06lx\n", ret, hr_cq->cqn); + xa_erase(&cq_table->array, hr_cq->cqn); + /* Waiting interrupt process procedure carried out */ synchronize_irq(hr_dev->eq_table.eq[hr_cq->vector].irq); @@ -205,10 +201,6 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) complete(&hr_cq->free); wait_for_completion(&hr_cq->free); - spin_lock_irq(&cq_table->lock); - radix_tree_delete(&cq_table->tree, hr_cq->cqn); - spin_unlock_irq(&cq_table->lock); - hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); } @@ -491,8 +483,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) struct device *dev = hr_dev->dev; struct hns_roce_cq *cq; - cq = radix_tree_lookup(&hr_dev->cq_table.tree, - cqn & (hr_dev->caps.num_cqs - 1)); + cq = xa_load(&hr_dev->cq_table.array, cqn & (hr_dev->caps.num_cqs - 1)); if (!cq) { dev_warn(dev, "Completion event for bogus CQ 0x%08x\n", cqn); return; @@ -509,8 +500,7 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type) struct device *dev = hr_dev->dev; struct hns_roce_cq *cq; - cq = radix_tree_lookup(&cq_table->tree, - cqn & (hr_dev->caps.num_cqs - 1)); + cq = xa_load(&cq_table->array, cqn & (hr_dev->caps.num_cqs - 1)); if (cq) atomic_inc(&cq->refcount); @@ -530,8 +520,7 @@ int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; - spin_lock_init(&cq_table->lock); - INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); + xa_init(&cq_table->array); return hns_roce_bitmap_init(&cq_table->bitmap, hr_dev->caps.num_cqs, hr_dev->caps.num_cqs - 1, diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 9ee86daf1700..e20c320b322b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -515,8 +515,7 @@ struct hns_roce_qp_table { struct hns_roce_cq_table { struct hns_roce_bitmap bitmap; - spinlock_t lock; - struct radix_tree_root tree; + struct xarray array; struct hns_roce_hem_table table; }; -- cgit v1.2.3 From 736b5a70db9875970dca0f89a9036d17835771ee Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 25 Oct 2018 11:15:34 -0400 Subject: RDMA/hns: Convert qp_table_tree to XArray Also fully initialise the qp before storing it in the XArray. Signed-off-by: Matthew Wilcox Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 6 ++-- drivers/infiniband/hw/hns/hns_roce_qp.c | 50 ++++++++--------------------- 2 files changed, 16 insertions(+), 40 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index e20c320b322b..61411ca655f5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -505,7 +505,6 @@ struct hns_roce_uar_table { struct hns_roce_qp_table { struct hns_roce_bitmap bitmap; - spinlock_t lock; struct hns_roce_hem_table qp_table; struct hns_roce_hem_table irrl_table; struct hns_roce_hem_table trrl_table; @@ -955,7 +954,7 @@ struct hns_roce_dev { int irq[HNS_ROCE_MAX_IRQ_NUM]; u8 __iomem *reg_base; struct hns_roce_caps caps; - struct radix_tree_root qp_table_tree; + struct xarray qp_table_xa; unsigned char dev_addr[HNS_ROCE_MAX_PORTS][MAC_ADDR_OCTET_NUM]; u64 sys_image_guid; @@ -1045,8 +1044,7 @@ static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest) static inline struct hns_roce_qp *__hns_roce_qp_lookup(struct hns_roce_dev *hr_dev, u32 qpn) { - return radix_tree_lookup(&hr_dev->qp_table_tree, - qpn & (hr_dev->caps.num_qps - 1)); + return xa_load(&hr_dev->qp_table_xa, qpn & (hr_dev->caps.num_qps - 1)); } static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 57c76eafef2f..90dcb8cd379c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -45,17 +45,14 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) { - struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; struct device *dev = hr_dev->dev; struct hns_roce_qp *qp; - spin_lock(&qp_table->lock); - + xa_lock(&hr_dev->qp_table_xa); qp = __hns_roce_qp_lookup(hr_dev, qpn); if (qp) atomic_inc(&qp->refcount); - - spin_unlock(&qp_table->lock); + xa_unlock(&hr_dev->qp_table_xa); if (!qp) { dev_warn(dev, "Async event for bogus QP %08x\n", qpn); @@ -147,29 +144,20 @@ EXPORT_SYMBOL_GPL(to_hns_roce_state); static int hns_roce_gsi_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn, struct hns_roce_qp *hr_qp) { - struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + struct xarray *xa = &hr_dev->qp_table_xa; int ret; if (!qpn) return -EINVAL; hr_qp->qpn = qpn; - - spin_lock_irq(&qp_table->lock); - ret = radix_tree_insert(&hr_dev->qp_table_tree, - hr_qp->qpn & (hr_dev->caps.num_qps - 1), hr_qp); - spin_unlock_irq(&qp_table->lock); - if (ret) { - dev_err(hr_dev->dev, "QPC radix_tree_insert failed\n"); - goto err_put_irrl; - } - atomic_set(&hr_qp->refcount, 1); init_completion(&hr_qp->free); - return 0; - -err_put_irrl: + ret = xa_err(xa_store_irq(xa, hr_qp->qpn & (hr_dev->caps.num_qps - 1), + hr_qp, GFP_KERNEL)); + if (ret) + dev_err(hr_dev->dev, "QPC xa_store failed\n"); return ret; } @@ -220,17 +208,9 @@ static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn, } } - spin_lock_irq(&qp_table->lock); - ret = radix_tree_insert(&hr_dev->qp_table_tree, - hr_qp->qpn & (hr_dev->caps.num_qps - 1), hr_qp); - spin_unlock_irq(&qp_table->lock); - if (ret) { - dev_err(dev, "QPC radix_tree_insert failed\n"); + ret = hns_roce_gsi_qp_alloc(hr_dev, qpn, hr_qp); + if (ret) goto err_put_sccc; - } - - atomic_set(&hr_qp->refcount, 1); - init_completion(&hr_qp->free); return 0; @@ -255,13 +235,12 @@ err_out: void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { - struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + struct xarray *xa = &hr_dev->qp_table_xa; unsigned long flags; - spin_lock_irqsave(&qp_table->lock, flags); - radix_tree_delete(&hr_dev->qp_table_tree, - hr_qp->qpn & (hr_dev->caps.num_qps - 1)); - spin_unlock_irqrestore(&qp_table->lock, flags); + xa_lock_irqsave(xa, flags); + __xa_erase(xa, hr_qp->qpn & (hr_dev->caps.num_qps - 1)); + xa_unlock_irqrestore(xa, flags); } EXPORT_SYMBOL_GPL(hns_roce_qp_remove); @@ -1157,8 +1136,7 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) int ret; mutex_init(&qp_table->scc_mutex); - spin_lock_init(&qp_table->lock); - INIT_RADIX_TREE(&hr_dev->qp_table_tree, GFP_ATOMIC); + xa_init(&hr_dev->qp_table_xa); /* In hw v1, a port include two SQP, six ports total 12 */ if (hr_dev->caps.max_sq_sg <= 2) -- cgit v1.2.3 From 0ee3b915b1b03c4deeea9d47af3e6a8e5e66c262 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:20:57 -0800 Subject: hfi1: Convert vesw_idr to XArray Signed-off-by: Matthew Wilcox Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/hfi.h | 3 +-- drivers/infiniband/hw/hfi1/vnic_main.c | 16 ++++++++-------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 048b5d73ba39..aafd8c377dd3 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -54,7 +54,6 @@ #include #include #include -#include #include #include #include @@ -1021,8 +1020,8 @@ struct hfi1_asic_data { struct hfi1_vnic_data { struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT]; struct kmem_cache *txreq_cache; + struct xarray vesws; u8 num_vports; - struct idr vesw_idr; u8 rmt_start; u8 num_ctxt; }; diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index a922db58be14..4d5683919b1f 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -162,12 +162,12 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, void hfi1_vnic_setup(struct hfi1_devdata *dd) { - idr_init(&dd->vnic.vesw_idr); + xa_init(&dd->vnic.vesws); } void hfi1_vnic_cleanup(struct hfi1_devdata *dd) { - idr_destroy(&dd->vnic.vesw_idr); + WARN_ON(!xa_empty(&dd->vnic.vesws)); } #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \ @@ -534,7 +534,7 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) l4_type = hfi1_16B_get_l4(packet->ebuf); if (likely(l4_type == OPA_16B_L4_ETHR)) { vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf); - vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id); + vinfo = xa_load(&dd->vnic.vesws, vesw_id); /* * In case of invalid vesw id, count the error on @@ -542,9 +542,10 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) */ if (unlikely(!vinfo)) { struct hfi1_vnic_vport_info *vinfo_tmp; - int id_tmp = 0; + unsigned long index = 0; - vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp); + vinfo_tmp = xa_find(&dd->vnic.vesws, &index, ULONG_MAX, + XA_PRESENT); if (vinfo_tmp) { spin_lock(&vport_cntr_lock); vinfo_tmp->stats[0].netstats.rx_nohandler++; @@ -598,8 +599,7 @@ static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo) if (!vinfo->vesw_id) return -EINVAL; - rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id, - vinfo->vesw_id + 1, GFP_NOWAIT); + rc = xa_insert(&dd->vnic.vesws, vinfo->vesw_id, vinfo, GFP_KERNEL); if (rc < 0) return rc; @@ -625,7 +625,7 @@ static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo) clear_bit(HFI1_VNIC_UP, &vinfo->flags); netif_carrier_off(vinfo->netdev); netif_tx_disable(vinfo->netdev); - idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id); + xa_erase(&dd->vnic.vesws, vinfo->vesw_id); /* ensure irqs see the change */ msix_vnic_synchronize_irq(dd); -- cgit v1.2.3 From b6014f9e5f39e389f0034a6c82b7a6542eb6477e Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:20:58 -0800 Subject: qedr: Convert qpidr to XArray Signed-off-by: Matthew Wilcox Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/main.c | 3 +-- drivers/infiniband/hw/qedr/qedr.h | 2 +- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 10 ++++------ drivers/infiniband/hw/qedr/verbs.c | 4 ++-- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 996d9ecd93e0..21bd625f160e 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -364,8 +364,7 @@ static int qedr_alloc_resources(struct qedr_dev *dev) spin_lock_init(&dev->sgid_lock); if (IS_IWARP(dev)) { - spin_lock_init(&dev->qpidr.idr_lock); - idr_init(&dev->qpidr.idr); + xa_init_flags(&dev->qps, XA_FLAGS_LOCK_IRQ); dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq"); } diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 53bbe6b4e6e6..1d906034f10f 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -171,7 +171,7 @@ struct qedr_dev { struct qedr_cq *gsi_rqcq; struct qedr_qp *gsi_qp; enum qed_rdma_type rdma_type; - struct qedr_idr qpidr; + struct xarray qps; struct qedr_idr srqidr; struct workqueue_struct *iwarp_wq; u16 iwarp_max_mtu; diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 0555e5a8c9ed..22881d4442b9 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -491,7 +491,7 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) int rc = 0; int i; - qp = idr_find(&dev->qpidr.idr, conn_param->qpn); + qp = xa_load(&dev->qps, conn_param->qpn); if (unlikely(!qp)) return -EINVAL; @@ -681,7 +681,7 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn); - qp = idr_find(&dev->qpidr.idr, conn_param->qpn); + qp = xa_load(&dev->qps, conn_param->qpn); if (!qp) { DP_ERR(dev, "Invalid QP number %d\n", conn_param->qpn); return -EINVAL; @@ -739,9 +739,7 @@ void qedr_iw_qp_rem_ref(struct ib_qp *ibqp) struct qedr_qp *qp = get_qedr_qp(ibqp); if (atomic_dec_and_test(&qp->refcnt)) { - spin_lock_irq(&qp->dev->qpidr.idr_lock); - idr_remove(&qp->dev->qpidr.idr, qp->qp_id); - spin_unlock_irq(&qp->dev->qpidr.idr_lock); + xa_erase_irq(&qp->dev->qps, qp->qp_id); kfree(qp); } } @@ -750,5 +748,5 @@ struct ib_qp *qedr_iw_get_qp(struct ib_device *ibdev, int qpn) { struct qedr_dev *dev = get_qedr_dev(ibdev); - return idr_find(&dev->qpidr.idr, qpn); + return xa_load(&dev->qps, qpn); } diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 59ad4202422c..aa66e8620965 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1988,7 +1988,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, qp->ibqp.qp_num = qp->qp_id; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { - rc = qedr_idr_add(dev, &dev->qpidr, qp, qp->qp_id); + rc = xa_insert_irq(&dev->qps, qp->qp_id, qp, GFP_KERNEL); if (rc) goto err; } @@ -2564,7 +2564,7 @@ int qedr_destroy_qp(struct ib_qp *ibqp) if (atomic_dec_and_test(&qp->refcnt) && rdma_protocol_iwarp(&dev->ibdev, 1)) { - qedr_idr_remove(dev, &dev->qpidr, qp->qp_id); + xa_erase_irq(&dev->qps, qp->qp_id); kfree(qp); } return rc; -- cgit v1.2.3 From 9fd15987ed27b14f57370bed7861868565663739 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:20:59 -0800 Subject: qedr: Convert srqidr to XArray Signed-off-by: Matthew Wilcox Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/main.c | 7 +++---- drivers/infiniband/hw/qedr/qedr.h | 9 ++------- drivers/infiniband/hw/qedr/verbs.c | 32 ++------------------------------ 3 files changed, 7 insertions(+), 41 deletions(-) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 21bd625f160e..2119158e3692 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include @@ -759,8 +758,8 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) break; case EVENT_TYPE_SRQ: srq_id = (u16)roce_handle64; - spin_lock_irqsave(&dev->srqidr.idr_lock, flags); - srq = idr_find(&dev->srqidr.idr, srq_id); + xa_lock_irqsave(&dev->srqs, flags); + srq = xa_load(&dev->srqs, srq_id); if (srq) { ibsrq = &srq->ibsrq; if (ibsrq->event_handler) { @@ -774,7 +773,7 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) "SRQ event with NULL pointer ibsrq. Handle=%llx\n", roce_handle64); } - spin_unlock_irqrestore(&dev->srqidr.idr_lock, flags); + xa_unlock_irqrestore(&dev->srqs, flags); DP_NOTICE(dev, "SRQ event %d on handle %p\n", e_code, srq); default: break; diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 1d906034f10f..6175d1e98717 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -33,7 +33,7 @@ #define __QEDR_H__ #include -#include +#include #include #include #include @@ -123,11 +123,6 @@ struct qedr_device_attr { #define QEDR_ENET_STATE_BIT (0) -struct qedr_idr { - spinlock_t idr_lock; /* Protect idr data-structure */ - struct idr idr; -}; - struct qedr_dev { struct ib_device ibdev; struct qed_dev *cdev; @@ -172,7 +167,7 @@ struct qedr_dev { struct qedr_qp *gsi_qp; enum qed_rdma_type rdma_type; struct xarray qps; - struct qedr_idr srqidr; + struct xarray srqs; struct workqueue_struct *iwarp_wq; u16 iwarp_max_mtu; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index aa66e8620965..e9fc15392dda 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1380,11 +1380,6 @@ err0: return rc; } -static int qedr_idr_add(struct qedr_dev *dev, struct qedr_idr *qidr, - void *ptr, u32 id); -static void qedr_idr_remove(struct qedr_dev *dev, - struct qedr_idr *qidr, u32 id); - struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, struct ib_srq_init_attr *init_attr, struct ib_udata *udata) @@ -1467,7 +1462,7 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, goto err2; } - rc = qedr_idr_add(dev, &dev->srqidr, srq, srq->srq_id); + rc = xa_insert_irq(&dev->srqs, srq->srq_id, srq, GFP_KERNEL); if (rc) goto err2; @@ -1496,7 +1491,7 @@ int qedr_destroy_srq(struct ib_srq *ibsrq) struct qedr_dev *dev = get_qedr_dev(ibsrq->device); struct qedr_srq *srq = get_qedr_srq(ibsrq); - qedr_idr_remove(dev, &dev->srqidr, srq->srq_id); + xa_erase_irq(&dev->srqs, srq->srq_id); in_params.srq_id = srq->srq_id; dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params); @@ -1596,29 +1591,6 @@ static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp) qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len); } -static int qedr_idr_add(struct qedr_dev *dev, struct qedr_idr *qidr, - void *ptr, u32 id) -{ - int rc; - - idr_preload(GFP_KERNEL); - spin_lock_irq(&qidr->idr_lock); - - rc = idr_alloc(&qidr->idr, ptr, id, id + 1, GFP_ATOMIC); - - spin_unlock_irq(&qidr->idr_lock); - idr_preload_end(); - - return rc < 0 ? rc : 0; -} - -static void qedr_idr_remove(struct qedr_dev *dev, struct qedr_idr *qidr, u32 id) -{ - spin_lock_irq(&qidr->idr_lock); - idr_remove(&qidr->idr, id); - spin_unlock_irq(&qidr->idr_lock); -} - static inline void qedr_iwarp_populate_user_qp(struct qedr_dev *dev, struct qedr_qp *qp, -- cgit v1.2.3 From 065d55237ad1c72bb849434542842e1328305a29 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 10 Mar 2019 17:27:45 +0200 Subject: RDMA/netlink: Remove unused data structure Delete structure which is not connected due to removal in commit cited in Fixes line. Fixes: a78e8723a505 ("RDMA/cma: Remove CM_ID statistics provided by rdma-cm module") Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/rdma_netlink.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 9bba001a7347..3a231a989974 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -49,17 +49,6 @@ enum { RDMA_NL_IWPM_NUM_OPS }; -struct rdma_cm_id_stats { - __u32 qp_num; - __u32 bound_dev_if; - __u32 port_space; - __s32 pid; - __u8 cm_state; - __u8 node_type; - __u8 port_num; - __u8 qp_type; -}; - enum { IWPM_NLA_REG_PID_UNSPEC = 0, IWPM_NLA_REG_PID_SEQ, -- cgit v1.2.3 From d3243da8e3700eaccb41b93b498d0dfc77c90d37 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 10 Mar 2019 17:27:46 +0200 Subject: RDMA/core: Don't compare specific bit after boolean AND There is no need to perform extra comparison after boolean AND. Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 418d17c8b65b..3b6eb646066c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2994,8 +2994,8 @@ static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num) */ static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num) { - return (device->port_data[port_num].immutable.core_cap_flags & - RDMA_CORE_CAP_OPA_MAD) == RDMA_CORE_CAP_OPA_MAD; + return device->port_data[port_num].immutable.core_cap_flags & + RDMA_CORE_CAP_OPA_MAD; } /** -- cgit v1.2.3 From 03b92789e5cfdac66805c1a98f1ec67336199d56 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 8 Feb 2019 15:41:29 -0500 Subject: hfi1: Convert hfi1_unit_table to XArray Also remove hfi1_devs_list. Signed-off-by: Matthew Wilcox Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 16 +++++------ drivers/infiniband/hw/hfi1/debugfs.c | 8 +++--- drivers/infiniband/hw/hfi1/driver.c | 10 +++---- drivers/infiniband/hw/hfi1/hfi.h | 5 ++-- drivers/infiniband/hw/hfi1/init.c | 52 +++++++----------------------------- drivers/infiniband/hw/hfi1/verbs.c | 8 +++--- 6 files changed, 31 insertions(+), 68 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 612f04190ed8..6150567c0b51 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14654,8 +14654,8 @@ void hfi1_start_cleanup(struct hfi1_devdata *dd) */ static int init_asic_data(struct hfi1_devdata *dd) { - unsigned long flags; - struct hfi1_devdata *tmp, *peer = NULL; + unsigned long index; + struct hfi1_devdata *peer; struct hfi1_asic_data *asic_data; int ret = 0; @@ -14664,14 +14664,12 @@ static int init_asic_data(struct hfi1_devdata *dd) if (!asic_data) return -ENOMEM; - spin_lock_irqsave(&hfi1_devs_lock, flags); + xa_lock_irq(&hfi1_dev_table); /* Find our peer device */ - list_for_each_entry(tmp, &hfi1_dev_list, list) { - if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(tmp)) && - dd->unit != tmp->unit) { - peer = tmp; + xa_for_each(&hfi1_dev_table, index, peer) { + if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(peer)) && + dd->unit != peer->unit) break; - } } if (peer) { @@ -14683,7 +14681,7 @@ static int init_asic_data(struct hfi1_devdata *dd) mutex_init(&dd->asic_data->asic_resource_mutex); } dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */ - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irq(&hfi1_dev_table); /* first one through - set up i2c devices */ if (!peer) diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 427ba0ce74a5..057bb82c664f 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -1302,15 +1302,15 @@ static void _driver_stats_seq_stop(struct seq_file *s, void *v) static u64 hfi1_sps_ints(void) { - unsigned long flags; + unsigned long index, flags; struct hfi1_devdata *dd; u64 sps_ints = 0; - spin_lock_irqsave(&hfi1_devs_lock, flags); - list_for_each_entry(dd, &hfi1_dev_list, list) { + xa_lock_irqsave(&hfi1_dev_table, flags); + xa_for_each(&hfi1_dev_table, index, dd) { sps_ints += get_all_cpu_total(dd->int_counter); } - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irqrestore(&hfi1_dev_table, flags); return sps_ints; } diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 2a9d2912f5db..867b4e10018f 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -72,8 +72,6 @@ */ const char ib_hfi1_version[] = HFI1_DRIVER_VERSION "\n"; -DEFINE_SPINLOCK(hfi1_devs_lock); -LIST_HEAD(hfi1_dev_list); DEFINE_MUTEX(hfi1_mutex); /* general driver use */ unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU; @@ -175,11 +173,11 @@ int hfi1_count_active_units(void) { struct hfi1_devdata *dd; struct hfi1_pportdata *ppd; - unsigned long flags; + unsigned long index, flags; int pidx, nunits_active = 0; - spin_lock_irqsave(&hfi1_devs_lock, flags); - list_for_each_entry(dd, &hfi1_dev_list, list) { + xa_lock_irqsave(&hfi1_dev_table, flags); + xa_for_each(&hfi1_dev_table, index, dd) { if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase1) continue; for (pidx = 0; pidx < dd->num_pports; ++pidx) { @@ -190,7 +188,7 @@ int hfi1_count_active_units(void) } } } - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irqrestore(&hfi1_dev_table, flags); return nunits_active; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index aafd8c377dd3..b458c218842b 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -64,6 +64,7 @@ #include #include #include +#include #include #include #include @@ -1040,7 +1041,6 @@ struct sdma_vl_map; typedef int (*send_routine)(struct rvt_qp *, struct hfi1_pkt_state *, u64); struct hfi1_devdata { struct hfi1_ibdev verbs_dev; /* must be first */ - struct list_head list; /* pointers to related structs for this device */ /* pci access data structure */ struct pci_dev *pcidev; @@ -1425,8 +1425,7 @@ struct hfi1_filedata { struct mm_struct *mm; }; -extern struct list_head hfi1_dev_list; -extern spinlock_t hfi1_devs_lock; +extern struct xarray hfi1_dev_table; struct hfi1_devdata *hfi1_lookup(int unit); static inline unsigned long uctxt_offset(struct hfi1_ctxtdata *uctxt) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 3c339617aa95..e4c2ae4f1cb3 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -49,7 +49,7 @@ #include #include #include -#include +#include #include #include #include @@ -124,7 +124,7 @@ MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user static inline u64 encode_rcv_header_entry_size(u16 size); -static struct idr hfi1_unit_table; +DEFINE_XARRAY_FLAGS(hfi1_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); static int hfi1_create_kctxt(struct hfi1_devdata *dd, struct hfi1_pportdata *ppd) @@ -1018,21 +1018,9 @@ done: return ret; } -static inline struct hfi1_devdata *__hfi1_lookup(int unit) -{ - return idr_find(&hfi1_unit_table, unit); -} - struct hfi1_devdata *hfi1_lookup(int unit) { - struct hfi1_devdata *dd; - unsigned long flags; - - spin_lock_irqsave(&hfi1_devs_lock, flags); - dd = __hfi1_lookup(unit); - spin_unlock_irqrestore(&hfi1_devs_lock, flags); - - return dd; + return xa_load(&hfi1_dev_table, unit); } /* @@ -1200,7 +1188,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) /* * Release our hold on the shared asic data. If we are the last one, * return the structure to be finalized outside the lock. Must be - * holding hfi1_devs_lock. + * holding hfi1_dev_table lock. */ static struct hfi1_asic_data *release_asic_data(struct hfi1_devdata *dd) { @@ -1236,13 +1224,10 @@ static void hfi1_clean_devdata(struct hfi1_devdata *dd) struct hfi1_asic_data *ad; unsigned long flags; - spin_lock_irqsave(&hfi1_devs_lock, flags); - if (!list_empty(&dd->list)) { - idr_remove(&hfi1_unit_table, dd->unit); - list_del_init(&dd->list); - } + xa_lock_irqsave(&hfi1_dev_table, flags); + __xa_erase(&hfi1_dev_table, dd->unit); ad = release_asic_data(dd); - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irqrestore(&hfi1_dev_table, flags); finalize_asic_data(dd, ad); free_platform_config(dd); @@ -1286,13 +1271,10 @@ void hfi1_free_devdata(struct hfi1_devdata *dd) * Must be done via verbs allocator, because the verbs cleanup process * both does cleanup and free of the data structure. * "extra" is for chip-specific data. - * - * Use the idr mechanism to get a unit number for this unit. */ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) { - unsigned long flags; struct hfi1_devdata *dd; int ret, nports; @@ -1307,21 +1289,10 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, dd->pport = (struct hfi1_pportdata *)(dd + 1); dd->pcidev = pdev; pci_set_drvdata(pdev, dd); - - INIT_LIST_HEAD(&dd->list); - idr_preload(GFP_KERNEL); - spin_lock_irqsave(&hfi1_devs_lock, flags); - - ret = idr_alloc(&hfi1_unit_table, dd, 0, 0, GFP_NOWAIT); - if (ret >= 0) { - dd->unit = ret; - list_add(&dd->list, &hfi1_dev_list); - } dd->node = NUMA_NO_NODE; - spin_unlock_irqrestore(&hfi1_devs_lock, flags); - idr_preload_end(); - + ret = xa_alloc_irq(&hfi1_dev_table, &dd->unit, dd, xa_limit_32b, + GFP_KERNEL); if (ret < 0) { dev_err(&pdev->dev, "Could not allocate unit ID: error %d\n", -ret); @@ -1522,8 +1493,6 @@ static int __init hfi1_mod_init(void) * These must be called before the driver is registered with * the PCI subsystem. */ - idr_init(&hfi1_unit_table); - hfi1_dbg_init(); ret = pci_register_driver(&hfi1_pci_driver); if (ret < 0) { @@ -1534,7 +1503,6 @@ static int __init hfi1_mod_init(void) bail_dev: hfi1_dbg_exit(); - idr_destroy(&hfi1_unit_table); dev_cleanup(); bail: return ret; @@ -1552,7 +1520,7 @@ static void __exit hfi1_mod_cleanup(void) node_affinity_destroy_all(); hfi1_dbg_exit(); - idr_destroy(&hfi1_unit_table); + WARN_ON(!xa_empty(&hfi1_dev_table)); dispose_firmware(); /* asymmetric with obtain_firmware() */ dev_cleanup(); } diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index b73ab7c64d91..1eb4105b2d22 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1740,15 +1740,15 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, static u64 hfi1_sps_ints(void) { - unsigned long flags; + unsigned long index, flags; struct hfi1_devdata *dd; u64 sps_ints = 0; - spin_lock_irqsave(&hfi1_devs_lock, flags); - list_for_each_entry(dd, &hfi1_dev_list, list) { + xa_lock_irqsave(&hfi1_dev_table, flags); + xa_for_each(&hfi1_dev_table, index, dd) { sps_ints += get_all_cpu_total(dd->int_counter); } - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irqrestore(&hfi1_dev_table, flags); return sps_ints; } -- cgit v1.2.3 From 059d48fbf61bec9aae7cd9a00604d7ccf95463b5 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:21:00 -0800 Subject: qib: Convert qib_unit_table to XArray Also remove qib_devs_list. Signed-off-by: Matthew Wilcox Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib.h | 4 +-- drivers/infiniband/hw/qib/qib_driver.c | 20 +++++------- drivers/infiniband/hw/qib/qib_fs.c | 12 ++----- drivers/infiniband/hw/qib/qib_iba7322.c | 4 +-- drivers/infiniband/hw/qib/qib_init.c | 56 +++++++-------------------------- 5 files changed, 27 insertions(+), 69 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 83d2349188db..432d6d0fd7f4 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -52,6 +52,7 @@ #include #include #include +#include #include #include @@ -1105,8 +1106,7 @@ struct qib_filedata { int rec_cpu_num; /* for cpu affinity; -1 if none */ }; -extern struct list_head qib_dev_list; -extern spinlock_t qib_devs_lock; +extern struct xarray qib_dev_table; extern struct qib_devdata *qib_lookup(int unit); extern u32 qib_cpulist_count; extern unsigned long *qib_cpulist; diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 3117cc5f2a9a..92eeea5679e2 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -49,8 +49,6 @@ */ const char ib_qib_version[] = QIB_DRIVER_VERSION "\n"; -DEFINE_SPINLOCK(qib_devs_lock); -LIST_HEAD(qib_dev_list); DEFINE_MUTEX(qib_mutex); /* general driver use */ unsigned qib_ibmtu; @@ -96,11 +94,11 @@ int qib_count_active_units(void) { struct qib_devdata *dd; struct qib_pportdata *ppd; - unsigned long flags; + unsigned long index, flags; int pidx, nunits_active = 0; - spin_lock_irqsave(&qib_devs_lock, flags); - list_for_each_entry(dd, &qib_dev_list, list) { + xa_lock_irqsave(&qib_dev_table, flags); + xa_for_each(&qib_dev_table, index, dd) { if (!(dd->flags & QIB_PRESENT) || !dd->kregbase) continue; for (pidx = 0; pidx < dd->num_pports; ++pidx) { @@ -112,7 +110,7 @@ int qib_count_active_units(void) } } } - spin_unlock_irqrestore(&qib_devs_lock, flags); + xa_unlock_irqrestore(&qib_dev_table, flags); return nunits_active; } @@ -125,13 +123,12 @@ int qib_count_units(int *npresentp, int *nupp) { int nunits = 0, npresent = 0, nup = 0; struct qib_devdata *dd; - unsigned long flags; + unsigned long index, flags; int pidx; struct qib_pportdata *ppd; - spin_lock_irqsave(&qib_devs_lock, flags); - - list_for_each_entry(dd, &qib_dev_list, list) { + xa_lock_irqsave(&qib_dev_table, flags); + xa_for_each(&qib_dev_table, index, dd) { nunits++; if ((dd->flags & QIB_PRESENT) && dd->kregbase) npresent++; @@ -142,8 +139,7 @@ int qib_count_units(int *npresentp, int *nupp) nup++; } } - - spin_unlock_irqrestore(&qib_devs_lock, flags); + xa_unlock_irqrestore(&qib_dev_table, flags); if (npresentp) *npresentp = npresent; diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index 1d940a2885c9..ceb42d948412 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -508,8 +508,8 @@ bail: */ static int qibfs_fill_super(struct super_block *sb, void *data, int silent) { - struct qib_devdata *dd, *tmp; - unsigned long flags; + struct qib_devdata *dd; + unsigned long index; int ret; static const struct tree_descr files[] = { @@ -524,18 +524,12 @@ static int qibfs_fill_super(struct super_block *sb, void *data, int silent) goto bail; } - spin_lock_irqsave(&qib_devs_lock, flags); - - list_for_each_entry_safe(dd, tmp, &qib_dev_list, list) { - spin_unlock_irqrestore(&qib_devs_lock, flags); + xa_for_each(&qib_dev_table, index, dd) { ret = add_cntr_files(sb, dd); if (ret) goto bail; - spin_lock_irqsave(&qib_devs_lock, flags); } - spin_unlock_irqrestore(&qib_devs_lock, flags); - bail: return ret; } diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 17d6b24b3473..5f4aa36e5ca4 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -6140,7 +6140,7 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change) static int setup_txselect(const char *str, const struct kernel_param *kp) { struct qib_devdata *dd; - unsigned long val; + unsigned long index, val; char *n; if (strlen(str) >= ARRAY_SIZE(txselect_list)) { @@ -6156,7 +6156,7 @@ static int setup_txselect(const char *str, const struct kernel_param *kp) } strncpy(txselect_list, str, ARRAY_SIZE(txselect_list) - 1); - list_for_each_entry(dd, &qib_dev_list, list) + xa_for_each(&qib_dev_table, index, dd) if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322) set_no_qsfp_atten(dd, 1); return 0; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 9fd69903ca57..d4fd8a6cff7b 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #ifdef CONFIG_INFINIBAND_QIB_DCA @@ -95,7 +94,7 @@ MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disable static void verify_interrupt(struct timer_list *); -static struct idr qib_unit_table; +DEFINE_XARRAY_FLAGS(qib_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); u32 qib_cpulist_count; unsigned long *qib_cpulist; @@ -785,21 +784,9 @@ void __attribute__((weak)) qib_disable_wc(struct qib_devdata *dd) { } -static inline struct qib_devdata *__qib_lookup(int unit) -{ - return idr_find(&qib_unit_table, unit); -} - struct qib_devdata *qib_lookup(int unit) { - struct qib_devdata *dd; - unsigned long flags; - - spin_lock_irqsave(&qib_devs_lock, flags); - dd = __qib_lookup(unit); - spin_unlock_irqrestore(&qib_devs_lock, flags); - - return dd; + return xa_load(&qib_dev_table, unit); } /* @@ -1046,10 +1033,9 @@ void qib_free_devdata(struct qib_devdata *dd) { unsigned long flags; - spin_lock_irqsave(&qib_devs_lock, flags); - idr_remove(&qib_unit_table, dd->unit); - list_del(&dd->list); - spin_unlock_irqrestore(&qib_devs_lock, flags); + xa_lock_irqsave(&qib_dev_table, flags); + __xa_erase(&qib_dev_table, dd->unit); + xa_unlock_irqrestore(&qib_dev_table, flags); #ifdef CONFIG_DEBUG_FS qib_dbg_ibdev_exit(&dd->verbs_dev); @@ -1070,15 +1056,15 @@ u64 qib_int_counter(struct qib_devdata *dd) u64 qib_sps_ints(void) { - unsigned long flags; + unsigned long index, flags; struct qib_devdata *dd; u64 sps_ints = 0; - spin_lock_irqsave(&qib_devs_lock, flags); - list_for_each_entry(dd, &qib_dev_list, list) { + xa_lock_irqsave(&qib_dev_table, flags); + xa_for_each(&qib_dev_table, index, dd) { sps_ints += qib_int_counter(dd); } - spin_unlock_irqrestore(&qib_devs_lock, flags); + xa_unlock_irqrestore(&qib_dev_table, flags); return sps_ints; } @@ -1087,12 +1073,9 @@ u64 qib_sps_ints(void) * allocator, because the verbs cleanup process both does cleanup and * free of the data structure. * "extra" is for chip-specific data. - * - * Use the idr mechanism to get a unit number for this unit. */ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) { - unsigned long flags; struct qib_devdata *dd; int ret, nports; @@ -1103,20 +1086,8 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) if (!dd) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&dd->list); - - idr_preload(GFP_KERNEL); - spin_lock_irqsave(&qib_devs_lock, flags); - - ret = idr_alloc(&qib_unit_table, dd, 0, 0, GFP_NOWAIT); - if (ret >= 0) { - dd->unit = ret; - list_add(&dd->list, &qib_dev_list); - } - - spin_unlock_irqrestore(&qib_devs_lock, flags); - idr_preload_end(); - + ret = xa_alloc_irq(&qib_dev_table, &dd->unit, dd, xa_limit_32b, + GFP_KERNEL); if (ret < 0) { qib_early_err(&pdev->dev, "Could not allocate unit ID: error %d\n", -ret); @@ -1255,8 +1226,6 @@ static int __init qib_ib_init(void) * These must be called before the driver is registered with * the PCI subsystem. */ - idr_init(&qib_unit_table); - #ifdef CONFIG_INFINIBAND_QIB_DCA dca_register_notify(&dca_notifier); #endif @@ -1281,7 +1250,6 @@ bail_dev: #ifdef CONFIG_DEBUG_FS qib_dbg_exit(); #endif - idr_destroy(&qib_unit_table); qib_dev_cleanup(); bail: return ret; @@ -1313,7 +1281,7 @@ static void __exit qib_ib_cleanup(void) qib_cpulist_count = 0; kfree(qib_cpulist); - idr_destroy(&qib_unit_table); + WARN_ON(!xa_empty(&qib_dev_table)); qib_dev_cleanup(); } -- cgit v1.2.3 From fae7a699a92577e383c82ae42918ec257cf3bba9 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:21:01 -0800 Subject: opa_vnic: Convert vport_idr to XArray Signed-off-by: Matthew Wilcox Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c | 60 ++++++++++--------------- 1 file changed, 23 insertions(+), 37 deletions(-) diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index 560e4f2d466e..76cd09410d9a 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -51,6 +51,7 @@ */ #include +#include #include #include #include @@ -97,7 +98,7 @@ const char opa_vnic_driver_version[] = DRV_VERSION; * @class_port_info: Class port info information. * @tid: Transaction id * @port_num: OPA port number - * @vport_idr: vnic ports idr + * @vports: vnic ports * @event_handler: ib event handler * @lock: adapter interface lock */ @@ -107,7 +108,7 @@ struct opa_vnic_vema_port { struct opa_class_port_info class_port_info; u64 tid; u8 port_num; - struct idr vport_idr; + struct xarray vports; struct ib_event_handler event_handler; /* Lock to query/update network adapter */ @@ -148,7 +149,7 @@ vema_get_vport_adapter(struct opa_vnic_vema_mad *recvd_mad, { u8 vport_num = vema_get_vport_num(recvd_mad); - return idr_find(&port->vport_idr, vport_num); + return xa_load(&port->vports, vport_num); } /** @@ -207,8 +208,7 @@ static struct opa_vnic_adapter *vema_add_vport(struct opa_vnic_vema_port *port, int rc; adapter->cport = cport; - rc = idr_alloc(&port->vport_idr, adapter, vport_num, - vport_num + 1, GFP_NOWAIT); + rc = xa_insert(&port->vports, vport_num, adapter, GFP_KERNEL); if (rc < 0) { opa_vnic_rem_netdev(adapter); adapter = ERR_PTR(rc); @@ -853,36 +853,14 @@ err_exit: v_err("Aborting trap\n"); } -static int vema_rem_vport(int id, void *p, void *data) -{ - struct opa_vnic_adapter *adapter = p; - - opa_vnic_rem_netdev(adapter); - return 0; -} - -static int vema_enable_vport(int id, void *p, void *data) -{ - struct opa_vnic_adapter *adapter = p; - - netif_carrier_on(adapter->netdev); - return 0; -} - -static int vema_disable_vport(int id, void *p, void *data) -{ - struct opa_vnic_adapter *adapter = p; - - netif_carrier_off(adapter->netdev); - return 0; -} - static void opa_vnic_event(struct ib_event_handler *handler, struct ib_event *record) { struct opa_vnic_vema_port *port = container_of(handler, struct opa_vnic_vema_port, event_handler); struct opa_vnic_ctrl_port *cport = port->cport; + struct opa_vnic_adapter *adapter; + unsigned long index; if (record->element.port_num != port->port_num) return; @@ -891,10 +869,16 @@ static void opa_vnic_event(struct ib_event_handler *handler, record->event, dev_name(&record->device->dev), record->element.port_num); - if (record->event == IB_EVENT_PORT_ERR) - idr_for_each(&port->vport_idr, vema_disable_vport, NULL); - if (record->event == IB_EVENT_PORT_ACTIVE) - idr_for_each(&port->vport_idr, vema_enable_vport, NULL); + if (record->event != IB_EVENT_PORT_ERR || + record->event != IB_EVENT_PORT_ACTIVE) + return; + + xa_for_each(&port->vports, index, adapter) { + if (record->event == IB_EVENT_PORT_ACTIVE) + netif_carrier_on(adapter->netdev); + else + netif_carrier_off(adapter->netdev); + } } /** @@ -905,6 +889,8 @@ static void opa_vnic_event(struct ib_event_handler *handler, */ static void vema_unregister(struct opa_vnic_ctrl_port *cport) { + struct opa_vnic_adapter *adapter; + unsigned long index; int i; for (i = 1; i <= cport->num_ports; i++) { @@ -915,13 +901,14 @@ static void vema_unregister(struct opa_vnic_ctrl_port *cport) /* Lock ensures no MAD is being processed */ mutex_lock(&port->lock); - idr_for_each(&port->vport_idr, vema_rem_vport, NULL); + xa_for_each(&port->vports, index, adapter) + opa_vnic_rem_netdev(adapter); mutex_unlock(&port->lock); ib_unregister_mad_agent(port->mad_agent); port->mad_agent = NULL; mutex_destroy(&port->lock); - idr_destroy(&port->vport_idr); + xa_destroy(&port->vports); ib_unregister_event_handler(&port->event_handler); } } @@ -958,7 +945,7 @@ static int vema_register(struct opa_vnic_ctrl_port *cport) cport->ibdev, opa_vnic_event); ib_register_event_handler(&port->event_handler); - idr_init(&port->vport_idr); + xa_init(&port->vports); mutex_init(&port->lock); port->mad_agent = ib_register_mad_agent(cport->ibdev, i, IB_QPT_GSI, ®_req, @@ -969,7 +956,6 @@ static int vema_register(struct opa_vnic_ctrl_port *cport) ret = PTR_ERR(port->mad_agent); port->mad_agent = NULL; mutex_destroy(&port->lock); - idr_destroy(&port->vport_idr); vema_unregister(cport); return ret; } -- cgit v1.2.3 From 70f06b26f07ea42d158b41bba460ce06ffa3510a Mon Sep 17 00:00:00 2001 From: Shamir Rabinovitch Date: Sun, 31 Mar 2019 19:10:03 +0300 Subject: IB: ucontext should be set properly for all cmd & ioctl paths the Attempt to use the below commit to initialize the ucontext for the uobject destroy path has shown that the below commit is incomplete. Parts were reverted and the ucontext set up in the uverbs_attr_bundle was moved to rdma_lookup_get_uobject which is called from the uobj_get_XXX macros and rdma_alloc_begin_uobject which is called when uobject is created. Fixes: 3d9dfd060391 ("IB/uverbs: Add ib_ucontext to uverbs_attr_bundle sent from ioctl and cmd flows") Signed-off-by: Shamir Rabinovitch Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 70 ++++++++++------------------------ drivers/infiniband/core/rdma_core.h | 5 +-- drivers/infiniband/core/uverbs_cmd.c | 2 +- drivers/infiniband/core/uverbs_ioctl.c | 12 ++---- include/rdma/uverbs_std_types.h | 30 ++++++--------- include/rdma/uverbs_types.h | 6 ++- 6 files changed, 44 insertions(+), 81 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 778375ff664e..0d18fb0e975d 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -224,14 +224,13 @@ out_unlock: * uverbs_put_destroy. */ struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, - u32 id, - const struct uverbs_attr_bundle *attrs) + u32 id, struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj; int ret; uobj = rdma_lookup_get_uobject(obj, attrs->ufile, id, - UVERBS_LOOKUP_DESTROY); + UVERBS_LOOKUP_DESTROY, attrs); if (IS_ERR(uobj)) return uobj; @@ -249,7 +248,7 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, * (negative errno on failure). For use by callers that do not need the uobj. */ int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, - const struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj; @@ -393,7 +392,8 @@ lookup_get_fd_uobject(const struct uverbs_api_object *obj, struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile, s64 id, - enum rdma_lookup_mode mode) + enum rdma_lookup_mode mode, + struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj; int ret; @@ -431,6 +431,8 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, ret = uverbs_try_lock_object(uobj, mode); if (ret) goto free; + if (attrs) + attrs->context = uobj->context; return uobj; free: @@ -438,38 +440,6 @@ free: uverbs_uobject_put(uobj); return ERR_PTR(ret); } -struct ib_uobject *_uobj_get_read(enum uverbs_default_objects type, - u32 object_id, - struct uverbs_attr_bundle *attrs) -{ - struct ib_uobject *uobj; - - uobj = rdma_lookup_get_uobject(uobj_get_type(attrs, type), attrs->ufile, - object_id, UVERBS_LOOKUP_READ); - if (IS_ERR(uobj)) - return uobj; - - attrs->context = uobj->context; - - return uobj; -} - -struct ib_uobject *_uobj_get_write(enum uverbs_default_objects type, - u32 object_id, - struct uverbs_attr_bundle *attrs) -{ - struct ib_uobject *uobj; - - uobj = rdma_lookup_get_uobject(uobj_get_type(attrs, type), attrs->ufile, - object_id, UVERBS_LOOKUP_WRITE); - - if (IS_ERR(uobj)) - return uobj; - - attrs->context = uobj->context; - - return uobj; -} static struct ib_uobject * alloc_begin_idr_uobject(const struct uverbs_api_object *obj, @@ -526,7 +496,8 @@ alloc_begin_fd_uobject(const struct uverbs_api_object *obj, } struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, - struct ib_uverbs_file *ufile) + struct ib_uverbs_file *ufile, + struct uverbs_attr_bundle *attrs) { struct ib_uobject *ret; @@ -546,6 +517,8 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, up_read(&ufile->hw_destroy_rwsem); return ret; } + if (attrs) + attrs->context = ret->context; return ret; } @@ -967,26 +940,25 @@ const struct uverbs_obj_type_class uverbs_fd_class = { EXPORT_SYMBOL(uverbs_fd_class); struct ib_uobject * -uverbs_get_uobject_from_file(u16 object_id, - struct ib_uverbs_file *ufile, - enum uverbs_obj_access access, s64 id) +uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, + s64 id, struct uverbs_attr_bundle *attrs) { const struct uverbs_api_object *obj = - uapi_get_object(ufile->device->uapi, object_id); + uapi_get_object(attrs->ufile->device->uapi, object_id); switch (access) { case UVERBS_ACCESS_READ: - return rdma_lookup_get_uobject(obj, ufile, id, - UVERBS_LOOKUP_READ); + return rdma_lookup_get_uobject(obj, attrs->ufile, id, + UVERBS_LOOKUP_READ, attrs); case UVERBS_ACCESS_DESTROY: /* Actual destruction is done inside uverbs_handle_method */ - return rdma_lookup_get_uobject(obj, ufile, id, - UVERBS_LOOKUP_DESTROY); + return rdma_lookup_get_uobject(obj, attrs->ufile, id, + UVERBS_LOOKUP_DESTROY, attrs); case UVERBS_ACCESS_WRITE: - return rdma_lookup_get_uobject(obj, ufile, id, - UVERBS_LOOKUP_WRITE); + return rdma_lookup_get_uobject(obj, attrs->ufile, id, + UVERBS_LOOKUP_WRITE, attrs); case UVERBS_ACCESS_NEW: - return rdma_alloc_begin_uobject(obj, ufile); + return rdma_alloc_begin_uobject(obj, attrs->ufile, attrs); default: WARN_ON(true); return ERR_PTR(-EOPNOTSUPP); diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 69f8db66925e..d91d44f4fa89 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -83,9 +83,8 @@ void uverbs_close_fd(struct file *f); * uverbs_finalize_objects are called. */ struct ib_uobject * -uverbs_get_uobject_from_file(u16 object_id, - struct ib_uverbs_file *ufile, - enum uverbs_obj_access access, s64 id); +uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, + s64 id, struct uverbs_attr_bundle *attrs); /* * Note that certain finalize stages could return a status: diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index c9acd94b049d..5115a050f313 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -175,7 +175,7 @@ static int uverbs_request_finish(struct uverbs_req_iter *iter) } static struct ib_uverbs_completion_event_file * -_ib_uverbs_lookup_comp_file(s32 fd, const struct uverbs_attr_bundle *attrs) +_ib_uverbs_lookup_comp_file(s32 fd, struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = ufd_get_read(UVERBS_OBJECT_COMP_CHANNEL, fd, attrs); diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index e1379949e663..5255e00b91cc 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -207,13 +207,12 @@ static int uverbs_process_idrs_array(struct bundle_priv *pbundle, for (i = 0; i != array_len; i++) { attr->uobjects[i] = uverbs_get_uobject_from_file( - spec->u2.objs_arr.obj_type, pbundle->bundle.ufile, - spec->u2.objs_arr.access, idr_vals[i]); + spec->u2.objs_arr.obj_type, spec->u2.objs_arr.access, + idr_vals[i], &pbundle->bundle); if (IS_ERR(attr->uobjects[i])) { ret = PTR_ERR(attr->uobjects[i]); break; } - pbundle->bundle.context = attr->uobjects[i]->context; } attr->len = i; @@ -325,13 +324,10 @@ static int uverbs_process_attr(struct bundle_priv *pbundle, * IDR implementation today rejects negative IDs */ o_attr->uobject = uverbs_get_uobject_from_file( - spec->u.obj.obj_type, - pbundle->bundle.ufile, - spec->u.obj.access, - uattr->data_s64); + spec->u.obj.obj_type, spec->u.obj.access, + uattr->data_s64, &pbundle->bundle); if (IS_ERR(o_attr->uobject)) return PTR_ERR(o_attr->uobject); - pbundle->bundle.context = o_attr->uobject->context; __set_bit(attr_bkey, pbundle->uobj_finalize); if (spec->u.obj.access == UVERBS_ACCESS_NEW) { diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 794c47565971..2d0e6287e43a 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -48,17 +48,15 @@ #define uobj_get_type(_attrs, _object) \ uapi_get_object((_attrs)->ufile->device->uapi, _object) -struct ib_uobject *_uobj_get_read(enum uverbs_default_objects type, - u32 object_id, - struct uverbs_attr_bundle *attrs); - #define uobj_get_read(_type, _id, _attrs) \ - _uobj_get_read(_type, _uobj_check_id(_id), _attrs) + rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \ + _uobj_check_id(_id), UVERBS_LOOKUP_READ, \ + _attrs) #define ufd_get_read(_type, _fdnum, _attrs) \ rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \ (_fdnum)*typecheck(s32, _fdnum), \ - UVERBS_LOOKUP_READ) + UVERBS_LOOKUP_READ, _attrs) static inline void *_uobj_get_obj_read(struct ib_uobject *uobj) { @@ -70,22 +68,19 @@ static inline void *_uobj_get_obj_read(struct ib_uobject *uobj) ((struct ib_##_object *)_uobj_get_obj_read( \ uobj_get_read(_type, _id, _attrs))) -struct ib_uobject *_uobj_get_write(enum uverbs_default_objects type, - u32 object_id, - struct uverbs_attr_bundle *attrs); - #define uobj_get_write(_type, _id, _attrs) \ - _uobj_get_write(_type, _uobj_check_id(_id), _attrs) + rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \ + _uobj_check_id(_id), UVERBS_LOOKUP_WRITE, \ + _attrs) int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, - const struct uverbs_attr_bundle *attrs); + struct uverbs_attr_bundle *attrs); #define uobj_perform_destroy(_type, _id, _attrs) \ __uobj_perform_destroy(uobj_get_type(_attrs, _type), \ _uobj_check_id(_id), _attrs) struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, - u32 id, - const struct uverbs_attr_bundle *attrs); + u32 id, struct uverbs_attr_bundle *attrs); #define uobj_get_destroy(_type, _id, _attrs) \ __uobj_get_destroy(uobj_get_type(_attrs, _type), _uobj_check_id(_id), \ @@ -127,12 +122,11 @@ static inline struct ib_uobject * __uobj_alloc(const struct uverbs_api_object *obj, struct uverbs_attr_bundle *attrs, struct ib_device **ib_dev) { - struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, attrs->ufile); + struct ib_uobject *uobj = + rdma_alloc_begin_uobject(obj, attrs->ufile, attrs); - if (!IS_ERR(uobj)) { + if (!IS_ERR(uobj)) *ib_dev = uobj->context->device; - attrs->context = uobj->context; - } return uobj; } diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 175d761695e1..b68f1b92c25d 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -131,11 +131,13 @@ struct uverbs_obj_idr_type { struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile, s64 id, - enum rdma_lookup_mode mode); + enum rdma_lookup_mode mode, + struct uverbs_attr_bundle *attrs); void rdma_lookup_put_uobject(struct ib_uobject *uobj, enum rdma_lookup_mode mode); struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, - struct ib_uverbs_file *ufile); + struct ib_uverbs_file *ufile, + struct uverbs_attr_bundle *attrs); void rdma_alloc_abort_uobject(struct ib_uobject *uobj); int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj); -- cgit v1.2.3 From a6a3797df2741aa81f33fe48f609247dba98f3f7 Mon Sep 17 00:00:00 2001 From: Shamir Rabinovitch Date: Sun, 31 Mar 2019 19:10:04 +0300 Subject: IB: Pass uverbs_attr_bundle down uobject destroy path Pass uverbs_attr_bundle down the uobject destroy path. The next patch will use this to eliminate the dependecy of the drivers in ib_x->uobject pointers. Signed-off-by: Shamir Rabinovitch Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 48 ++++++++++++-------- drivers/infiniband/core/rdma_core.h | 6 +-- drivers/infiniband/core/uverbs_cmd.c | 52 +++++++++++----------- drivers/infiniband/core/uverbs_ioctl.c | 15 ++++--- drivers/infiniband/core/uverbs_std_types.c | 27 +++++++---- .../infiniband/core/uverbs_std_types_counters.c | 3 +- drivers/infiniband/core/uverbs_std_types_cq.c | 3 +- drivers/infiniband/core/uverbs_std_types_dm.c | 3 +- .../infiniband/core/uverbs_std_types_flow_action.c | 3 +- drivers/infiniband/core/uverbs_std_types_mr.c | 3 +- drivers/infiniband/hw/mlx5/devx.c | 6 ++- drivers/infiniband/hw/mlx5/flow.c | 3 +- include/rdma/uverbs_std_types.h | 10 +++-- include/rdma/uverbs_types.h | 12 +++-- 14 files changed, 116 insertions(+), 78 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 0d18fb0e975d..d0a6755c0562 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -125,7 +125,8 @@ static void assert_uverbs_usecnt(struct ib_uobject *uobj, * and consumes the kref on the uobj. */ static int uverbs_destroy_uobject(struct ib_uobject *uobj, - enum rdma_remove_reason reason) + enum rdma_remove_reason reason, + struct uverbs_attr_bundle *attrs) { struct ib_uverbs_file *ufile = uobj->ufile; unsigned long flags; @@ -135,7 +136,8 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE); if (uobj->object) { - ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason); + ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason, + attrs); if (ret) { if (ib_is_destroy_retryable(ret, reason, uobj)) return ret; @@ -196,7 +198,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, * version requires the caller to have already obtained an * LOOKUP_DESTROY uobject kref. */ -int uobj_destroy(struct ib_uobject *uobj) +int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs) { struct ib_uverbs_file *ufile = uobj->ufile; int ret; @@ -207,7 +209,7 @@ int uobj_destroy(struct ib_uobject *uobj) if (ret) goto out_unlock; - ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY); + ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY, attrs); if (ret) { atomic_set(&uobj->usecnt, 0); goto out_unlock; @@ -234,7 +236,7 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, if (IS_ERR(uobj)) return uobj; - ret = uobj_destroy(uobj); + ret = uobj_destroy(uobj, attrs); if (ret) { rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY); return ERR_PTR(ret); @@ -533,12 +535,13 @@ static void alloc_abort_idr_uobject(struct ib_uobject *uobj) } static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { const struct uverbs_obj_idr_type *idr_type = container_of(uobj->uapi_object->type_attrs, struct uverbs_obj_idr_type, type); - int ret = idr_type->destroy_object(uobj, why); + int ret = idr_type->destroy_object(uobj, why, attrs); /* * We can only fail gracefully if the user requested to destroy the @@ -572,7 +575,8 @@ static void alloc_abort_fd_uobject(struct ib_uobject *uobj) } static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { const struct uverbs_obj_fd_type *fd_type = container_of( uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); @@ -648,7 +652,8 @@ static int alloc_commit_fd_uobject(struct ib_uobject *uobj) * caller can no longer assume uobj is valid. If this function fails it * destroys the uboject, including the attached HW object. */ -int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj) +int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs) { struct ib_uverbs_file *ufile = uobj->ufile; int ret; @@ -656,7 +661,7 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj) /* alloc_commit consumes the uobj kref */ ret = uobj->uapi_object->type_class->alloc_commit(uobj); if (ret) { - uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT); + uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs); up_read(&ufile->hw_destroy_rwsem); return ret; } @@ -680,12 +685,13 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj) * This consumes the kref for uobj. It is up to the caller to unwind the HW * object and anything else connected to uobj before calling this. */ -void rdma_alloc_abort_uobject(struct ib_uobject *uobj) +void rdma_alloc_abort_uobject(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs) { struct ib_uverbs_file *ufile = uobj->ufile; uobj->object = NULL; - uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT); + uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs); /* Matches the down_read in rdma_alloc_begin_uobject */ up_read(&ufile->hw_destroy_rwsem); @@ -787,6 +793,10 @@ void uverbs_close_fd(struct file *f) { struct ib_uobject *uobj = f->private_data; struct ib_uverbs_file *ufile = uobj->ufile; + struct uverbs_attr_bundle attrs = { + .context = uobj->context, + .ufile = ufile, + }; if (down_read_trylock(&ufile->hw_destroy_rwsem)) { /* @@ -796,7 +806,7 @@ void uverbs_close_fd(struct file *f) * write lock here, or we have a kernel bug. */ WARN_ON(uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE)); - uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE); + uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE, &attrs); up_read(&ufile->hw_destroy_rwsem); } @@ -845,6 +855,7 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, { struct ib_uobject *obj, *next_obj; int ret = -EINVAL; + struct uverbs_attr_bundle attrs = { .ufile = ufile }; /* * This shouldn't run while executing other commands on this @@ -856,12 +867,13 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, * other threads (which might still use the FDs) chance to run. */ list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) { + attrs.context = obj->context; /* * if we hit this WARN_ON, that means we are * racing with a lookup_get. */ WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE)); - if (!uverbs_destroy_uobject(obj, reason)) + if (!uverbs_destroy_uobject(obj, reason, &attrs)) ret = 0; else atomic_set(&obj->usecnt, 0); @@ -966,8 +978,8 @@ uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, } int uverbs_finalize_object(struct ib_uobject *uobj, - enum uverbs_obj_access access, - bool commit) + enum uverbs_obj_access access, bool commit, + struct uverbs_attr_bundle *attrs) { int ret = 0; @@ -990,9 +1002,9 @@ int uverbs_finalize_object(struct ib_uobject *uobj, break; case UVERBS_ACCESS_NEW: if (commit) - ret = rdma_alloc_commit_uobject(uobj); + ret = rdma_alloc_commit_uobject(uobj, attrs); else - rdma_alloc_abort_uobject(uobj); + rdma_alloc_abort_uobject(uobj, attrs); break; default: WARN_ON(true); diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index d91d44f4fa89..5445323629b5 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -48,7 +48,7 @@ struct ib_uverbs_device; void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, enum rdma_remove_reason reason); -int uobj_destroy(struct ib_uobject *uobj); +int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); /* * uverbs_uobject_get is called in order to increase the reference count on @@ -102,8 +102,8 @@ uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, * object. */ int uverbs_finalize_object(struct ib_uobject *uobj, - enum uverbs_obj_access access, - bool commit); + enum uverbs_obj_access access, bool commit, + struct uverbs_attr_bundle *attrs); int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 5115a050f313..726275288887 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -436,7 +436,7 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) if (ret) goto err_copy; - return uobj_alloc_commit(uobj); + return uobj_alloc_commit(uobj, attrs); err_copy: ib_dealloc_pd(pd); @@ -444,7 +444,7 @@ err_copy: err_alloc: kfree(pd); err: - uobj_alloc_abort(uobj); + uobj_alloc_abort(uobj, attrs); return ret; } @@ -633,7 +633,7 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) mutex_unlock(&ibudev->xrcd_tree_mutex); - return uobj_alloc_commit(&obj->uobject); + return uobj_alloc_commit(&obj->uobject, attrs); err_copy: if (inode) { @@ -646,7 +646,7 @@ err_dealloc_xrcd: ib_dealloc_xrcd(xrcd); err: - uobj_alloc_abort(&obj->uobject); + uobj_alloc_abort(&obj->uobject, attrs); err_tree_mutex_unlock: if (f.file) @@ -763,7 +763,7 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj); + return uobj_alloc_commit(uobj, attrs); err_copy: ib_dereg_mr(mr); @@ -772,7 +772,7 @@ err_put: uobj_put_obj_read(pd); err_free: - uobj_alloc_abort(uobj); + uobj_alloc_abort(uobj, attrs); return ret; } @@ -917,14 +917,14 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs) goto err_copy; uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj); + return uobj_alloc_commit(uobj, attrs); err_copy: uverbs_dealloc_mw(mw); err_put: uobj_put_obj_read(pd); err_free: - uobj_alloc_abort(uobj); + uobj_alloc_abort(uobj, attrs); return ret; } @@ -965,11 +965,11 @@ static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs) ret = uverbs_response(attrs, &resp, sizeof(resp)); if (ret) { - uobj_alloc_abort(uobj); + uobj_alloc_abort(uobj, attrs); return ret; } - return uobj_alloc_commit(uobj); + return uobj_alloc_commit(uobj, attrs); } static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, @@ -1036,7 +1036,7 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, if (ret) goto err_cb; - ret = uobj_alloc_commit(&obj->uobject); + ret = uobj_alloc_commit(&obj->uobject, attrs); if (ret) return ERR_PTR(ret); return obj; @@ -1049,7 +1049,7 @@ err_file: ib_uverbs_release_ucq(attrs->ufile, ev_file, obj); err: - uobj_alloc_abort(&obj->uobject); + uobj_alloc_abort(&obj->uobject, attrs); return ERR_PTR(ret); } @@ -1477,7 +1477,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (ind_tbl) uobj_put_obj_read(ind_tbl); - return uobj_alloc_commit(&obj->uevent.uobject); + return uobj_alloc_commit(&obj->uevent.uobject, attrs); err_cb: ib_destroy_qp(qp); @@ -1495,7 +1495,7 @@ err_put: if (ind_tbl) uobj_put_obj_read(ind_tbl); - uobj_alloc_abort(&obj->uevent.uobject); + uobj_alloc_abort(&obj->uevent.uobject, attrs); return ret; } @@ -1609,14 +1609,14 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) qp->uobject = &obj->uevent.uobject; uobj_put_read(xrcd_uobj); - return uobj_alloc_commit(&obj->uevent.uobject); + return uobj_alloc_commit(&obj->uevent.uobject, attrs); err_destroy: ib_destroy_qp(qp); err_xrcd: uobj_put_read(xrcd_uobj); err_put: - uobj_alloc_abort(&obj->uevent.uobject); + uobj_alloc_abort(&obj->uevent.uobject, attrs); return ret; } @@ -2451,7 +2451,7 @@ static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs) goto err_copy; uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj); + return uobj_alloc_commit(uobj, attrs); err_copy: rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); @@ -2460,7 +2460,7 @@ err_put: uobj_put_obj_read(pd); err: - uobj_alloc_abort(uobj); + uobj_alloc_abort(uobj, attrs); return ret; } @@ -2962,7 +2962,7 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) uobj_put_obj_read(pd); uobj_put_obj_read(cq); - return uobj_alloc_commit(&obj->uevent.uobject); + return uobj_alloc_commit(&obj->uevent.uobject, attrs); err_copy: ib_destroy_wq(wq); @@ -2971,7 +2971,7 @@ err_put_cq: err_put_pd: uobj_put_obj_read(pd); err_uobj: - uobj_alloc_abort(&obj->uevent.uobject); + uobj_alloc_abort(&obj->uevent.uobject, attrs); return err; } @@ -3136,12 +3136,12 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs) for (j = 0; j < num_read_wqs; j++) uobj_put_obj_read(wqs[j]); - return uobj_alloc_commit(uobj); + return uobj_alloc_commit(uobj, attrs); err_copy: ib_destroy_rwq_ind_table(rwq_ind_tbl); err_uobj: - uobj_alloc_abort(uobj); + uobj_alloc_abort(uobj, attrs); put_wqs: for (j = 0; j < num_read_wqs; j++) uobj_put_obj_read(wqs[j]); @@ -3314,7 +3314,7 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs) kfree(flow_attr); if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); - return uobj_alloc_commit(uobj); + return uobj_alloc_commit(uobj, attrs); err_copy: if (!qp->device->ops.destroy_flow(flow_id)) atomic_dec(&qp->usecnt); @@ -3325,7 +3325,7 @@ err_free_flow_attr: err_put: uobj_put_obj_read(qp); err_uobj: - uobj_alloc_abort(uobj); + uobj_alloc_abort(uobj, attrs); err_free_attr: if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); @@ -3458,7 +3458,7 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, uobj_put_obj_read(attr.ext.cq); uobj_put_obj_read(pd); - return uobj_alloc_commit(&obj->uevent.uobject); + return uobj_alloc_commit(&obj->uevent.uobject, attrs); err_copy: ib_destroy_srq(srq); @@ -3477,7 +3477,7 @@ err_put_xrcd: } err: - uobj_alloc_abort(&obj->uevent.uobject); + uobj_alloc_abort(&obj->uevent.uobject, attrs); return ret; } diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 5255e00b91cc..cfbef25b3a73 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -222,7 +222,7 @@ static int uverbs_process_idrs_array(struct bundle_priv *pbundle, static int uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi, struct uverbs_objs_arr_attr *attr, - bool commit) + bool commit, struct uverbs_attr_bundle *attrs) { const struct uverbs_attr_spec *spec = &attr_uapi->spec; int current_ret; @@ -230,8 +230,9 @@ static int uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi, size_t i; for (i = 0; i != attr->len; i++) { - current_ret = uverbs_finalize_object( - attr->uobjects[i], spec->u2.objs_arr.access, commit); + current_ret = uverbs_finalize_object(attr->uobjects[i], + spec->u2.objs_arr.access, + commit, attrs); if (!ret) ret = current_ret; } @@ -457,7 +458,7 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, struct uverbs_obj_attr *destroy_attr = &pbundle->bundle.attrs[destroy_bkey].obj_attr; - ret = uobj_destroy(destroy_attr->uobject); + ret = uobj_destroy(destroy_attr->uobject, &pbundle->bundle); if (ret) return ret; __clear_bit(destroy_bkey, pbundle->uobj_finalize); @@ -508,7 +509,8 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit) current_ret = uverbs_finalize_object( attr->obj_attr.uobject, - attr->obj_attr.attr_elm->spec.u.obj.access, commit); + attr->obj_attr.attr_elm->spec.u.obj.access, commit, + &pbundle->bundle); if (!ret) ret = current_ret; } @@ -531,7 +533,8 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit) if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) { current_ret = uverbs_free_idrs_array( - attr_uapi, &attr->objs_arr_attr, commit); + attr_uapi, &attr->objs_arr_attr, commit, + &pbundle->bundle); if (!ret) ret = current_ret; } diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index f224cb727224..a1b22fca057e 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -40,14 +40,16 @@ #include "uverbs.h" static int uverbs_free_ah(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { return rdma_destroy_ah((struct ib_ah *)uobject->object, RDMA_DESTROY_AH_SLEEPABLE); } static int uverbs_free_flow(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_flow *flow = (struct ib_flow *)uobject->object; struct ib_uflow_object *uflow = @@ -66,13 +68,15 @@ static int uverbs_free_flow(struct ib_uobject *uobject, } static int uverbs_free_mw(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { return uverbs_dealloc_mw((struct ib_mw *)uobject->object); } static int uverbs_free_qp(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_qp *qp = uobject->object; struct ib_uqp_object *uqp = @@ -105,7 +109,8 @@ static int uverbs_free_qp(struct ib_uobject *uobject, } static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object; struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl; @@ -120,7 +125,8 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, } static int uverbs_free_wq(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_wq *wq = uobject->object; struct ib_uwq_object *uwq = @@ -136,7 +142,8 @@ static int uverbs_free_wq(struct ib_uobject *uobject, } static int uverbs_free_srq(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_srq *srq = uobject->object; struct ib_uevent_object *uevent = @@ -160,7 +167,8 @@ static int uverbs_free_srq(struct ib_uobject *uobject, } static int uverbs_free_xrcd(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_xrcd *xrcd = uobject->object; struct ib_uxrcd_object *uxrcd = @@ -179,7 +187,8 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject, } static int uverbs_free_pd(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_pd *pd = uobject->object; int ret; diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index 7880d50165ed..87aaf91072e3 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -36,7 +36,8 @@ #include static int uverbs_free_counters(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_counters *counters = uobject->object; int ret; diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index a59ea89e3f2b..5664a8f48527 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -35,7 +35,8 @@ #include "uverbs.h" static int uverbs_free_cq(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_cq *cq = uobject->object; struct ib_uverbs_event_queue *ev_queue = cq->cq_context; diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index de3f04a4398c..50d71522e1cd 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -35,7 +35,8 @@ #include static int uverbs_free_dm(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_dm *dm = uobject->object; int ret; diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index 3a87b16a93b3..d6dbc1d580e5 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -35,7 +35,8 @@ #include static int uverbs_free_flow_action(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct ib_flow_action *action = uobject->object; int ret; diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index 3b4bf6370333..a74b73f684d4 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -35,7 +35,8 @@ #include static int uverbs_free_mr(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { return ib_dereg_mr((struct ib_mr *)uobject->object); } diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index fa8d2a9229fa..0770dcc74add 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1117,7 +1117,8 @@ static void devx_cleanup_mkey(struct devx_obj *obj) } static int devx_obj_cleanup(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; struct devx_obj *obj = uobject->object; @@ -1599,7 +1600,8 @@ err_obj_free: } static int devx_umem_cleanup(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct devx_umem *obj = uobject->object; u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index 798591a18484..e8c3847a1a10 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -189,7 +189,8 @@ err_out: } static int flow_matcher_cleanup(struct ib_uobject *uobject, - enum rdma_remove_reason why) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct mlx5_ib_flow_matcher *obj = uobject->object; int ret; diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 2d0e6287e43a..b9226a5cdfd7 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -104,18 +104,20 @@ static inline void uobj_put_write(struct ib_uobject *uobj) rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); } -static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj) +static inline int __must_check +uobj_alloc_commit(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs) { - int ret = rdma_alloc_commit_uobject(uobj); + int ret = rdma_alloc_commit_uobject(uobj, attrs); if (ret) return ret; return 0; } -static inline void uobj_alloc_abort(struct ib_uobject *uobj) +static inline void uobj_alloc_abort(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs) { - rdma_alloc_abort_uobject(uobj); + rdma_alloc_abort_uobject(uobj, attrs); } static inline struct ib_uobject * diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index b68f1b92c25d..d57a5ba00c74 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -95,7 +95,8 @@ struct uverbs_obj_type_class { void (*lookup_put)(struct ib_uobject *uobj, enum rdma_lookup_mode mode); /* This does not consume the kref on uobj */ int __must_check (*destroy_hw)(struct ib_uobject *uobj, - enum rdma_remove_reason why); + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs); void (*remove_handle)(struct ib_uobject *uobj); u8 needs_kfree_rcu; }; @@ -126,7 +127,8 @@ struct uverbs_obj_idr_type { * completely unchanged. */ int __must_check (*destroy_object)(struct ib_uobject *uobj, - enum rdma_remove_reason why); + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs); }; struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, @@ -138,8 +140,10 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj, struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile, struct uverbs_attr_bundle *attrs); -void rdma_alloc_abort_uobject(struct ib_uobject *uobj); -int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj); +void rdma_alloc_abort_uobject(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs); +int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs); struct uverbs_obj_fd_type { /* -- cgit v1.2.3 From c4367a26357be501338e41ceae7ebb7ce57064e5 Mon Sep 17 00:00:00 2001 From: Shamir Rabinovitch Date: Sun, 31 Mar 2019 19:10:05 +0300 Subject: IB: Pass uverbs_attr_bundle down ib_x destroy path The uverbs_attr_bundle with the ucontext is sent down to the drivers ib_x destroy path as ib_udata. The next patch will use the ib_udata to free the drivers destroy path from the dependency in 'uobject->context' as we already did for the create path. Signed-off-by: Shamir Rabinovitch Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cq.c | 19 +- drivers/infiniband/core/uverbs.h | 2 +- drivers/infiniband/core/uverbs_cmd.c | 17 +- drivers/infiniband/core/uverbs_std_types.c | 15 +- drivers/infiniband/core/uverbs_std_types_cq.c | 2 +- drivers/infiniband/core/uverbs_std_types_dm.c | 2 +- drivers/infiniband/core/uverbs_std_types_mr.c | 5 +- drivers/infiniband/core/verbs.c | 75 ++++---- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 14 +- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 14 +- drivers/infiniband/hw/cxgb3/iwch_provider.c | 27 ++- drivers/infiniband/hw/cxgb4/cq.c | 2 +- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 13 +- drivers/infiniband/hw/cxgb4/mem.c | 7 +- drivers/infiniband/hw/cxgb4/provider.c | 4 +- drivers/infiniband/hw/cxgb4/qp.c | 4 +- drivers/infiniband/hw/hns/hns_roce_ah.c | 2 +- drivers/infiniband/hw/hns/hns_roce_cq.c | 4 +- drivers/infiniband/hw/hns/hns_roce_device.h | 19 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 18 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.h | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- drivers/infiniband/hw/hns/hns_roce_mr.c | 6 +- drivers/infiniband/hw/hns/hns_roce_pd.c | 2 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 2 +- drivers/infiniband/hw/i40iw/i40iw_cm.c | 3 +- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 20 ++- drivers/infiniband/hw/mlx4/ah.c | 2 +- drivers/infiniband/hw/mlx4/cq.c | 2 +- drivers/infiniband/hw/mlx4/mad.c | 10 +- drivers/infiniband/hw/mlx4/main.c | 4 +- drivers/infiniband/hw/mlx4/mlx4_ib.h | 17 +- drivers/infiniband/hw/mlx4/mr.c | 7 +- drivers/infiniband/hw/mlx4/qp.c | 4 +- drivers/infiniband/hw/mlx4/srq.c | 2 +- drivers/infiniband/hw/mlx5/ah.c | 2 +- drivers/infiniband/hw/mlx5/cq.c | 3 +- drivers/infiniband/hw/mlx5/main.c | 30 ++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 21 ++- drivers/infiniband/hw/mlx5/mr.c | 7 +- drivers/infiniband/hw/mlx5/qp.c | 6 +- drivers/infiniband/hw/mlx5/srq.c | 2 +- drivers/infiniband/hw/mthca/mthca_provider.c | 12 +- drivers/infiniband/hw/nes/nes_cm.c | 3 +- drivers/infiniband/hw/nes/nes_verbs.c | 17 +- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_ah.h | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 15 +- drivers/infiniband/hw/ocrdma/ocrdma_verbs.h | 15 +- drivers/infiniband/hw/qedr/verbs.c | 16 +- drivers/infiniband/hw/qedr/verbs.h | 14 +- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 9 +- drivers/infiniband/hw/usnic/usnic_ib_verbs.h | 8 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c | 5 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c | 4 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 3 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c | 5 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 7 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h | 14 +- drivers/infiniband/sw/rdmavt/ah.c | 4 +- drivers/infiniband/sw/rdmavt/ah.h | 3 +- drivers/infiniband/sw/rdmavt/cq.c | 3 +- drivers/infiniband/sw/rdmavt/cq.h | 2 +- drivers/infiniband/sw/rdmavt/mr.c | 7 +- drivers/infiniband/sw/rdmavt/mr.h | 7 +- drivers/infiniband/sw/rdmavt/pd.c | 3 +- drivers/infiniband/sw/rdmavt/pd.h | 2 +- drivers/infiniband/sw/rdmavt/qp.c | 2 +- drivers/infiniband/sw/rdmavt/qp.h | 2 +- drivers/infiniband/sw/rdmavt/srq.c | 2 +- drivers/infiniband/sw/rdmavt/srq.h | 2 +- drivers/infiniband/sw/rxe/rxe_verbs.c | 17 +- include/rdma/ib_verbs.h | 228 ++++++++++++++++++++---- 73 files changed, 513 insertions(+), 343 deletions(-) diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index d61e5e1427c2..4797eef549c3 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -128,15 +128,17 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) * @comp_vector: HCA completion vectors for this CQ * @poll_ctx: context to poll the CQ from. * @caller: module owner name. + * @udata: Valid user data or NULL for kernel object * * This is the proper interface to allocate a CQ for in-kernel users. A * CQ allocated with this interface will automatically be polled from the * specified context. The ULP must use wr->wr_cqe instead of wr->wr_id * to use this CQ abstraction. */ -struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, - int nr_cqe, int comp_vector, - enum ib_poll_context poll_ctx, const char *caller) +struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, + int nr_cqe, int comp_vector, + enum ib_poll_context poll_ctx, + const char *caller, struct ib_udata *udata) { struct ib_cq_init_attr cq_attr = { .cqe = nr_cqe, @@ -193,16 +195,17 @@ out_free_wc: kfree(cq->wc); rdma_restrack_del(&cq->res); out_destroy_cq: - cq->device->ops.destroy_cq(cq); + cq->device->ops.destroy_cq(cq, udata); return ERR_PTR(ret); } -EXPORT_SYMBOL(__ib_alloc_cq); +EXPORT_SYMBOL(__ib_alloc_cq_user); /** * ib_free_cq - free a completion queue * @cq: completion queue to free. + * @udata: User data or NULL for kernel object */ -void ib_free_cq(struct ib_cq *cq) +void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata) { int ret; @@ -225,7 +228,7 @@ void ib_free_cq(struct ib_cq *cq) kfree(cq->wc); rdma_restrack_del(&cq->res); - ret = cq->device->ops.destroy_cq(cq); + ret = cq->device->ops.destroy_cq(cq, udata); WARN_ON_ONCE(ret); } -EXPORT_SYMBOL(ib_free_cq); +EXPORT_SYMBOL(ib_free_cq_user); diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index ea0bc6885517..fa5ea6529333 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -240,7 +240,7 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, - enum rdma_remove_reason why); + enum rdma_remove_reason why, struct ib_udata *udata); int uverbs_dealloc_mw(struct ib_mw *mw); void ib_uverbs_detach_umcast(struct ib_qp *qp, diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 726275288887..fe63dfd5f1b6 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -439,7 +439,7 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) return uobj_alloc_commit(uobj, attrs); err_copy: - ib_dealloc_pd(pd); + ib_dealloc_pd_user(pd, &attrs->driver_udata); pd = NULL; err_alloc: kfree(pd); @@ -643,7 +643,7 @@ err_copy: } err_dealloc_xrcd: - ib_dealloc_xrcd(xrcd); + ib_dealloc_xrcd(xrcd, &attrs->driver_udata); err: uobj_alloc_abort(&obj->uobject, attrs); @@ -669,9 +669,8 @@ static int ib_uverbs_close_xrcd(struct uverbs_attr_bundle *attrs) return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, attrs); } -int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, - struct ib_xrcd *xrcd, - enum rdma_remove_reason why) +int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, + enum rdma_remove_reason why, struct ib_udata *udata) { struct inode *inode; int ret; @@ -681,7 +680,7 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, if (inode && !atomic_dec_and_test(&xrcd->usecnt)) return 0; - ret = ib_dealloc_xrcd(xrcd); + ret = ib_dealloc_xrcd(xrcd, udata); if (ib_is_destroy_retryable(ret, why, uobject)) { atomic_inc(&xrcd->usecnt); @@ -766,7 +765,7 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) return uobj_alloc_commit(uobj, attrs); err_copy: - ib_dereg_mr(mr); + ib_dereg_mr_user(mr, &attrs->driver_udata); err_put: uobj_put_obj_read(pd); @@ -2965,7 +2964,7 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) return uobj_alloc_commit(&obj->uevent.uobject, attrs); err_copy: - ib_destroy_wq(wq); + ib_destroy_wq(wq, &attrs->driver_udata); err_put_cq: uobj_put_obj_read(cq); err_put_pd: @@ -3461,7 +3460,7 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, return uobj_alloc_commit(&obj->uevent.uobject, attrs); err_copy: - ib_destroy_srq(srq); + ib_destroy_srq_user(srq, &attrs->driver_udata); err_put: uobj_put_obj_read(pd); diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index a1b22fca057e..c625f590a8f0 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -43,8 +43,9 @@ static int uverbs_free_ah(struct ib_uobject *uobject, enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs) { - return rdma_destroy_ah((struct ib_ah *)uobject->object, - RDMA_DESTROY_AH_SLEEPABLE); + return rdma_destroy_ah_user((struct ib_ah *)uobject->object, + RDMA_DESTROY_AH_SLEEPABLE, + &attrs->driver_udata); } static int uverbs_free_flow(struct ib_uobject *uobject, @@ -97,7 +98,7 @@ static int uverbs_free_qp(struct ib_uobject *uobject, ib_uverbs_detach_umcast(qp, uqp); } - ret = ib_destroy_qp(qp); + ret = ib_destroy_qp_user(qp, &attrs->driver_udata); if (ib_is_destroy_retryable(ret, why, uobject)) return ret; @@ -133,7 +134,7 @@ static int uverbs_free_wq(struct ib_uobject *uobject, container_of(uobject, struct ib_uwq_object, uevent.uobject); int ret; - ret = ib_destroy_wq(wq); + ret = ib_destroy_wq(wq, &attrs->driver_udata); if (ib_is_destroy_retryable(ret, why, uobject)) return ret; @@ -151,7 +152,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject, enum ib_srq_type srq_type = srq->srq_type; int ret; - ret = ib_destroy_srq(srq); + ret = ib_destroy_srq_user(srq, &attrs->driver_udata); if (ib_is_destroy_retryable(ret, why, uobject)) return ret; @@ -180,7 +181,7 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject, return ret; mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex); - ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why); + ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why, &attrs->driver_udata); mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex); return ret; @@ -197,7 +198,7 @@ static int uverbs_free_pd(struct ib_uobject *uobject, if (ret) return ret; - ib_dealloc_pd(pd); + ib_dealloc_pd_user(pd, &attrs->driver_udata); return 0; } diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 5664a8f48527..f03506ece016 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -44,7 +44,7 @@ static int uverbs_free_cq(struct ib_uobject *uobject, container_of(uobject, struct ib_ucq_object, uobject); int ret; - ret = ib_destroy_cq(cq); + ret = ib_destroy_cq_user(cq, &attrs->driver_udata); if (ib_is_destroy_retryable(ret, why, uobject)) return ret; diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index 50d71522e1cd..c9b68dcf8f5c 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -45,7 +45,7 @@ static int uverbs_free_dm(struct ib_uobject *uobject, if (ret) return ret; - return dm->device->ops.dealloc_dm(dm); + return dm->device->ops.dealloc_dm(dm, attrs); } static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)( diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index a74b73f684d4..610d3b9f7654 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -38,7 +38,8 @@ static int uverbs_free_mr(struct ib_uobject *uobject, enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs) { - return ib_dereg_mr((struct ib_mr *)uobject->object); + return ib_dereg_mr_user((struct ib_mr *)uobject->object, + &attrs->driver_udata); } static int UVERBS_HANDLER(UVERBS_METHOD_ADVISE_MR)( @@ -147,7 +148,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( return 0; err_dereg: - ib_dereg_mr(mr); + ib_dereg_mr_user(mr, &attrs->driver_udata); return ret; } diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 5a5e83f5f0fc..ba9a89df815d 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -316,17 +316,18 @@ EXPORT_SYMBOL(__ib_alloc_pd); /** * ib_dealloc_pd - Deallocates a protection domain. * @pd: The protection domain to deallocate. + * @udata: Valid user data or NULL for kernel object * * It is an error to call this function while any resources in the pd still * exist. The caller is responsible to synchronously destroy them and * guarantee no new allocations will happen. */ -void ib_dealloc_pd(struct ib_pd *pd) +void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata) { int ret; if (pd->__internal_mr) { - ret = pd->device->ops.dereg_mr(pd->__internal_mr); + ret = pd->device->ops.dereg_mr(pd->__internal_mr, NULL); WARN_ON(ret); pd->__internal_mr = NULL; } @@ -336,10 +337,10 @@ void ib_dealloc_pd(struct ib_pd *pd) WARN_ON(atomic_read(&pd->usecnt)); rdma_restrack_del(&pd->res); - pd->device->ops.dealloc_pd(pd); + pd->device->ops.dealloc_pd(pd, udata); kfree(pd); } -EXPORT_SYMBOL(ib_dealloc_pd); +EXPORT_SYMBOL(ib_dealloc_pd_user); /* Address handles */ @@ -930,7 +931,7 @@ int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) } EXPORT_SYMBOL(rdma_query_ah); -int rdma_destroy_ah(struct ib_ah *ah, u32 flags) +int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata) { const struct ib_gid_attr *sgid_attr = ah->sgid_attr; struct ib_pd *pd; @@ -939,7 +940,7 @@ int rdma_destroy_ah(struct ib_ah *ah, u32 flags) might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE); pd = ah->pd; - ret = ah->device->ops.destroy_ah(ah, flags); + ret = ah->device->ops.destroy_ah(ah, flags, udata); if (!ret) { atomic_dec(&pd->usecnt); if (sgid_attr) @@ -948,7 +949,7 @@ int rdma_destroy_ah(struct ib_ah *ah, u32 flags) return ret; } -EXPORT_SYMBOL(rdma_destroy_ah); +EXPORT_SYMBOL(rdma_destroy_ah_user); /* Shared receive queues */ @@ -1003,7 +1004,7 @@ int ib_query_srq(struct ib_srq *srq, } EXPORT_SYMBOL(ib_query_srq); -int ib_destroy_srq(struct ib_srq *srq) +int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata) { struct ib_pd *pd; enum ib_srq_type srq_type; @@ -1021,7 +1022,7 @@ int ib_destroy_srq(struct ib_srq *srq) if (srq_type == IB_SRQT_XRC) xrcd = srq->ext.xrc.xrcd; - ret = srq->device->ops.destroy_srq(srq); + ret = srq->device->ops.destroy_srq(srq, udata); if (!ret) { atomic_dec(&pd->usecnt); if (srq_type == IB_SRQT_XRC) @@ -1032,7 +1033,7 @@ int ib_destroy_srq(struct ib_srq *srq) return ret; } -EXPORT_SYMBOL(ib_destroy_srq); +EXPORT_SYMBOL(ib_destroy_srq_user); /* Queue pairs */ @@ -1111,8 +1112,9 @@ struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, } EXPORT_SYMBOL(ib_open_qp); -static struct ib_qp *create_xrc_qp(struct ib_qp *qp, - struct ib_qp_init_attr *qp_init_attr) +static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp, + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata) { struct ib_qp *real_qp = qp; @@ -1134,8 +1136,9 @@ static struct ib_qp *create_xrc_qp(struct ib_qp *qp, return qp; } -struct ib_qp *ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr) +struct ib_qp *ib_create_qp_user(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata) { struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device; struct ib_qp *qp; @@ -1176,7 +1179,8 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->port = 0; if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { - struct ib_qp *xrc_qp = create_xrc_qp(qp, qp_init_attr); + struct ib_qp *xrc_qp = + create_xrc_qp_user(qp, qp_init_attr, udata); if (IS_ERR(xrc_qp)) { ret = PTR_ERR(xrc_qp); @@ -1230,7 +1234,7 @@ err: return ERR_PTR(ret); } -EXPORT_SYMBOL(ib_create_qp); +EXPORT_SYMBOL(ib_create_qp_user); static const struct { int valid; @@ -1837,7 +1841,7 @@ static int __ib_destroy_shared_qp(struct ib_qp *qp) return 0; } -int ib_destroy_qp(struct ib_qp *qp) +int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata) { const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr; const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr; @@ -1869,7 +1873,7 @@ int ib_destroy_qp(struct ib_qp *qp) rdma_rw_cleanup_mrs(qp); rdma_restrack_del(&qp->res); - ret = qp->device->ops.destroy_qp(qp); + ret = qp->device->ops.destroy_qp(qp, udata); if (!ret) { if (alt_path_sgid_attr) rdma_put_gid_attr(alt_path_sgid_attr); @@ -1894,7 +1898,7 @@ int ib_destroy_qp(struct ib_qp *qp) return ret; } -EXPORT_SYMBOL(ib_destroy_qp); +EXPORT_SYMBOL(ib_destroy_qp_user); /* Completion queues */ @@ -1933,15 +1937,15 @@ int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period) } EXPORT_SYMBOL(rdma_set_cq_moderation); -int ib_destroy_cq(struct ib_cq *cq) +int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata) { if (atomic_read(&cq->usecnt)) return -EBUSY; rdma_restrack_del(&cq->res); - return cq->device->ops.destroy_cq(cq); + return cq->device->ops.destroy_cq(cq, udata); } -EXPORT_SYMBOL(ib_destroy_cq); +EXPORT_SYMBOL(ib_destroy_cq_user); int ib_resize_cq(struct ib_cq *cq, int cqe) { @@ -1952,14 +1956,14 @@ EXPORT_SYMBOL(ib_resize_cq); /* Memory regions */ -int ib_dereg_mr(struct ib_mr *mr) +int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) { struct ib_pd *pd = mr->pd; struct ib_dm *dm = mr->dm; int ret; rdma_restrack_del(&mr->res); - ret = mr->device->ops.dereg_mr(mr); + ret = mr->device->ops.dereg_mr(mr, udata); if (!ret) { atomic_dec(&pd->usecnt); if (dm) @@ -1968,13 +1972,14 @@ int ib_dereg_mr(struct ib_mr *mr) return ret; } -EXPORT_SYMBOL(ib_dereg_mr); +EXPORT_SYMBOL(ib_dereg_mr_user); /** * ib_alloc_mr() - Allocates a memory region * @pd: protection domain associated with the region * @mr_type: memory region type * @max_num_sg: maximum sg entries available for registration. + * @udata: user data or null for kernel objects * * Notes: * Memory registeration page/sg lists must not exceed max_num_sg. @@ -1982,16 +1987,15 @@ EXPORT_SYMBOL(ib_dereg_mr); * max_num_sg * used_page_size. * */ -struct ib_mr *ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg) +struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct ib_mr *mr; if (!pd->device->ops.alloc_mr) return ERR_PTR(-EOPNOTSUPP); - mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg); + mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, udata); if (!IS_ERR(mr)) { mr->device = pd->device; mr->pd = pd; @@ -2005,7 +2009,7 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, return mr; } -EXPORT_SYMBOL(ib_alloc_mr); +EXPORT_SYMBOL(ib_alloc_mr_user); /* "Fast" memory regions */ @@ -2151,7 +2155,7 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller) } EXPORT_SYMBOL(__ib_alloc_xrcd); -int ib_dealloc_xrcd(struct ib_xrcd *xrcd) +int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { struct ib_qp *qp; int ret; @@ -2166,7 +2170,7 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd) return ret; } - return xrcd->device->ops.dealloc_xrcd(xrcd); + return xrcd->device->ops.dealloc_xrcd(xrcd, udata); } EXPORT_SYMBOL(ib_dealloc_xrcd); @@ -2210,10 +2214,11 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd, EXPORT_SYMBOL(ib_create_wq); /** - * ib_destroy_wq - Destroys the specified WQ. + * ib_destroy_wq - Destroys the specified user WQ. * @wq: The WQ to destroy. + * @udata: Valid user data */ -int ib_destroy_wq(struct ib_wq *wq) +int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) { int err; struct ib_cq *cq = wq->cq; @@ -2222,7 +2227,7 @@ int ib_destroy_wq(struct ib_wq *wq) if (atomic_read(&wq->usecnt)) return -EBUSY; - err = wq->device->ops.destroy_wq(wq); + err = wq->device->ops.destroy_wq(wq, udata); if (!err) { atomic_dec(&pd->usecnt); atomic_dec(&cq->usecnt); diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 33b2a06c6dde..a586ac28630b 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -564,7 +564,7 @@ fail: } /* Protection Domains */ -void bnxt_re_dealloc_pd(struct ib_pd *ib_pd) +void bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata) { struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; @@ -635,7 +635,7 @@ fail: } /* Address Handles */ -int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) +int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags, struct ib_udata *udata) { struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah); struct bnxt_re_dev *rdev = ah->rdev; @@ -789,7 +789,7 @@ void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, } /* Queue Pairs */ -int bnxt_re_destroy_qp(struct ib_qp *ib_qp) +int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) { struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); struct bnxt_re_dev *rdev = qp->rdev; @@ -1327,7 +1327,7 @@ static enum ib_mtu __to_ib_mtu(u32 mtu) } /* Shared Receive Queues */ -int bnxt_re_destroy_srq(struct ib_srq *ib_srq) +int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) { struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); @@ -2560,7 +2560,7 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr, } /* Completion Queues */ -int bnxt_re_destroy_cq(struct ib_cq *ib_cq) +int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { int rc; struct bnxt_re_cq *cq; @@ -3382,7 +3382,7 @@ fail: return ERR_PTR(rc); } -int bnxt_re_dereg_mr(struct ib_mr *ib_mr) +int bnxt_re_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { struct bnxt_re_mr *mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr); struct bnxt_re_dev *rdev = mr->rdev; @@ -3428,7 +3428,7 @@ int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents, } struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type, - u32 max_num_sg) + u32 max_num_sg, struct ib_udata *udata) { struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index e45465ed4eee..44e49988600e 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -165,14 +165,14 @@ enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev, u8 port_num); int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, struct ib_udata *udata); -void bnxt_re_dealloc_pd(struct ib_pd *pd); +void bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, struct ib_udata *udata); int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); -int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags); +int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata); struct ib_srq *bnxt_re_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); @@ -180,7 +180,7 @@ int bnxt_re_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); -int bnxt_re_destroy_srq(struct ib_srq *srq); +int bnxt_re_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); int bnxt_re_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd, @@ -190,7 +190,7 @@ int bnxt_re_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_udata *udata); int bnxt_re_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); -int bnxt_re_destroy_qp(struct ib_qp *qp); +int bnxt_re_destroy_qp(struct ib_qp *qp, struct ib_udata *udata); int bnxt_re_post_send(struct ib_qp *qp, const struct ib_send_wr *send_wr, const struct ib_send_wr **bad_send_wr); int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, @@ -199,7 +199,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); -int bnxt_re_destroy_cq(struct ib_cq *cq); +int bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags); @@ -207,8 +207,8 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags); int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type mr_type, - u32 max_num_sg); -int bnxt_re_dereg_mr(struct ib_mr *mr); + u32 max_num_sg, struct ib_udata *udata); +int bnxt_re_dereg_mr(struct ib_mr *mr, struct ib_udata *udata); struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, struct ib_udata *udata); int bnxt_re_dealloc_mw(struct ib_mw *mw); diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 21aac6bca06f..e10a56242998 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -88,7 +88,7 @@ static int iwch_alloc_ucontext(struct ib_ucontext *ucontext, return 0; } -static int iwch_destroy_cq(struct ib_cq *ib_cq) +static int iwch_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct iwch_cq *chp; @@ -175,7 +175,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, mm = kmalloc(sizeof *mm, GFP_KERNEL); if (!mm) { - iwch_destroy_cq(&chp->ibcq); + iwch_destroy_cq(&chp->ibcq, udata); return ERR_PTR(-ENOMEM); } uresp.cqid = chp->cq.cqid; @@ -201,7 +201,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, } if (ib_copy_to_udata(udata, &uresp, resplen)) { kfree(mm); - iwch_destroy_cq(&chp->ibcq); + iwch_destroy_cq(&chp->ibcq, udata); return ERR_PTR(-EFAULT); } insert_mmap(ucontext, mm); @@ -367,7 +367,7 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return ret; } -static void iwch_deallocate_pd(struct ib_pd *pd) +static void iwch_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) { struct iwch_dev *rhp; struct iwch_pd *php; @@ -398,7 +398,7 @@ static int iwch_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context, struct iwch_alloc_pd_resp resp = {.pdid = php->pdid}; if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { - iwch_deallocate_pd(&php->ibpd); + iwch_deallocate_pd(&php->ibpd, udata); return -EFAULT; } } @@ -406,7 +406,7 @@ static int iwch_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context, return 0; } -static int iwch_dereg_mr(struct ib_mr *ib_mr) +static int iwch_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { struct iwch_dev *rhp; struct iwch_mr *mhp; @@ -590,7 +590,7 @@ pbl_done: uresp.pbl_addr); if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) { - iwch_dereg_mr(&mhp->ibmr); + iwch_dereg_mr(&mhp->ibmr, udata); err = -EFAULT; goto err; } @@ -661,9 +661,8 @@ static int iwch_dealloc_mw(struct ib_mw *mw) return 0; } -static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg) +static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct iwch_dev *rhp; struct iwch_pd *php; @@ -742,7 +741,7 @@ static int iwch_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, iwch_set_page); } -static int iwch_destroy_qp(struct ib_qp *ib_qp) +static int iwch_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) { struct iwch_dev *rhp; struct iwch_qp *qhp; @@ -885,14 +884,14 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, mm1 = kmalloc(sizeof *mm1, GFP_KERNEL); if (!mm1) { - iwch_destroy_qp(&qhp->ibqp); + iwch_destroy_qp(&qhp->ibqp, udata); return ERR_PTR(-ENOMEM); } mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); if (!mm2) { kfree(mm1); - iwch_destroy_qp(&qhp->ibqp); + iwch_destroy_qp(&qhp->ibqp, udata); return ERR_PTR(-ENOMEM); } @@ -909,7 +908,7 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) { kfree(mm1); kfree(mm2); - iwch_destroy_qp(&qhp->ibqp); + iwch_destroy_qp(&qhp->ibqp, udata); return ERR_PTR(-EFAULT); } mm1->key = uresp.key; diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 1fa5f6445be3..562187f0c5af 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -968,7 +968,7 @@ int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) return !err || err == -ENODATA ? npolled : err; } -int c4iw_destroy_cq(struct ib_cq *ib_cq) +int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct c4iw_cq *chp; struct c4iw_ucontext *ucontext; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 4c918fe2430e..586fd1a00d33 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -979,9 +979,8 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); void c4iw_qp_add_ref(struct ib_qp *qp); void c4iw_qp_rem_ref(struct ib_qp *qp); -struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg); +struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata); int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int c4iw_dealloc_mw(struct ib_mw *mw); @@ -992,8 +991,8 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata); struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc); -int c4iw_dereg_mr(struct ib_mr *ib_mr); -int c4iw_destroy_cq(struct ib_cq *ib_cq); +int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); +int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *ib_context, @@ -1002,11 +1001,11 @@ int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); -int c4iw_destroy_srq(struct ib_srq *ib_srq); +int c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata); struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, struct ib_udata *udata); -int c4iw_destroy_qp(struct ib_qp *ib_qp); +int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata); struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 81f5b5b026b1..811c0c8c5b16 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -683,9 +683,8 @@ int c4iw_dealloc_mw(struct ib_mw *mw) return 0; } -struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg) +struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_pd *php; @@ -786,7 +785,7 @@ int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, c4iw_set_page); } -int c4iw_dereg_mr(struct ib_mr *ib_mr) +int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_mr *mhp; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 507c54572cc9..12f7d3ae6a53 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -190,7 +190,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return ret; } -static void c4iw_deallocate_pd(struct ib_pd *pd) +static void c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_pd *php; @@ -224,7 +224,7 @@ static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context, struct c4iw_alloc_pd_resp uresp = {.pdid = php->pdid}; if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { - c4iw_deallocate_pd(&php->ibpd); + c4iw_deallocate_pd(&php->ibpd, udata); return -EFAULT; } } diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index b2ae5b40cc3e..76e6544cf0b9 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2095,7 +2095,7 @@ out: return ret; } -int c4iw_destroy_qp(struct ib_qp *ib_qp) +int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_qp *qhp; @@ -2826,7 +2826,7 @@ err_free_srq: return ERR_PTR(ret); } -int c4iw_destroy_srq(struct ib_srq *ibsrq) +int c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_srq *srq; diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index b3c8c45ec1e3..42067325ae5e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -111,7 +111,7 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags) +int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata) { kfree(to_hr_ah(ah)); diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index c50f241211e9..a4e95a310c16 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -444,14 +444,14 @@ err_cq: } EXPORT_SYMBOL_GPL(hns_roce_ib_create_cq); -int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq) +int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); int ret = 0; if (hr_dev->hw->destroy_cq) { - ret = hr_dev->hw->destroy_cq(ib_cq); + ret = hr_dev->hw->destroy_cq(ib_cq, udata); } else { hns_roce_free_cq(hr_dev, hr_cq); hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 61411ca655f5..780a7ba204db 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -905,7 +905,7 @@ struct hns_roce_hw { int (*modify_qp)(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state); - int (*destroy_qp)(struct ib_qp *ibqp); + int (*destroy_qp)(struct ib_qp *ibqp, struct ib_udata *udata); int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); int (*post_send)(struct ib_qp *ibqp, const struct ib_send_wr *wr, @@ -914,8 +914,9 @@ struct hns_roce_hw { const struct ib_recv_wr **bad_recv_wr); int (*req_notify_cq)(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); - int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr); - int (*destroy_cq)(struct ib_cq *ibcq); + int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, + struct ib_udata *udata); + int (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*init_eq)(struct hns_roce_dev *hr_dev); void (*cleanup_eq)(struct hns_roce_dev *hr_dev); @@ -1109,11 +1110,11 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, u32 flags, struct ib_udata *udata); int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags); +int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata); int hns_roce_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, struct ib_udata *udata); -void hns_roce_dealloc_pd(struct ib_pd *pd); +void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, @@ -1123,10 +1124,10 @@ int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata); struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg); + u32 max_num_sg, struct ib_udata *udata); int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); -int hns_roce_dereg_mr(struct ib_mr *ibmr); +int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox, unsigned long mpt_index); @@ -1150,7 +1151,7 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); -int hns_roce_destroy_srq(struct ib_srq *ibsrq); +int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd, struct ib_qp_init_attr *init_attr, @@ -1179,7 +1180,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, struct ib_ucontext *context, struct ib_udata *udata); -int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq); +int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq); int hns_roce_db_map_user(struct hns_roce_ucontext *context, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 97515c340134..1863516f6be9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -855,17 +855,17 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) create_lp_qp_failed: for (i -= 1; i >= 0; i--) { hr_qp = free_mr->mr_free_qp[i]; - if (hns_roce_v1_destroy_qp(&hr_qp->ibqp)) + if (hns_roce_v1_destroy_qp(&hr_qp->ibqp, NULL)) dev_err(dev, "Destroy qp %d for mr free failed!\n", i); } - hns_roce_dealloc_pd(pd); + hns_roce_dealloc_pd(pd, NULL); alloc_pd_failed: kfree(pd); alloc_mem_failed: - if (hns_roce_ib_destroy_cq(cq)) + if (hns_roce_ib_destroy_cq(cq, NULL)) dev_err(dev, "Destroy cq for create_lp_qp failed!\n"); return ret; @@ -888,17 +888,17 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) if (!hr_qp) continue; - ret = hns_roce_v1_destroy_qp(&hr_qp->ibqp); + ret = hns_roce_v1_destroy_qp(&hr_qp->ibqp, NULL); if (ret) dev_err(dev, "Destroy qp %d for mr free failed(%d)!\n", i, ret); } - ret = hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq); + ret = hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL); if (ret) dev_err(dev, "Destroy cq for mr_free failed(%d)!\n", ret); - hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd); + hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd, NULL); } static int hns_roce_db_init(struct hns_roce_dev *hr_dev) @@ -1096,7 +1096,7 @@ free_work: } static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, - struct hns_roce_mr *mr) + struct hns_roce_mr *mr, struct ib_udata *udata) { struct device *dev = &hr_dev->pdev->dev; struct hns_roce_mr_free_work *mr_work; @@ -3921,7 +3921,7 @@ static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work) dev_dbg(dev, "Accomplished destroy QP(0x%lx) work.\n", qpn); } -int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) +int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); @@ -3998,7 +3998,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) return 0; } -static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq) +static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index 66440147d9eb..1a2c38785c7f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -1106,6 +1106,6 @@ struct hns_roce_v1_priv { int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset); int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); -int hns_roce_v1_destroy_qp(struct ib_qp *ibqp); +int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); #endif diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index dafc33b02e09..30b00240b7c8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4513,7 +4513,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, return 0; } -static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp) +static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index b09f1cde2ff5..9119d875b13d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -1282,14 +1282,14 @@ free_cmd_mbox: return ret; } -int hns_roce_dereg_mr(struct ib_mr *ibmr) +int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); struct hns_roce_mr *mr = to_hr_mr(ibmr); int ret = 0; if (hr_dev->hw->dereg_mr) { - ret = hr_dev->hw->dereg_mr(hr_dev, mr); + ret = hr_dev->hw->dereg_mr(hr_dev, mr, udata); } else { hns_roce_mr_free(hr_dev, mr); @@ -1303,7 +1303,7 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr) } struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg) + u32 max_num_sg, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct device *dev = hr_dev->dev; diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index b9b97c5e97e6..504e6e466d72 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -86,7 +86,7 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, } EXPORT_SYMBOL_GPL(hns_roce_alloc_pd); -void hns_roce_dealloc_pd(struct ib_pd *pd) +void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { hns_roce_pd_free(to_hr_dev(pd->device), to_hr_pd(pd)->pdn); } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index a8ee2f6da967..5874dbb391fd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -423,7 +423,7 @@ err_srq: return ERR_PTR(ret); } -int hns_roce_destroy_srq(struct ib_srq *ibsrq) +int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); struct hns_roce_srq *srq = to_hr_srq(ibsrq); diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 1c6aa0efd2b6..8233f5a4e623 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -3490,7 +3490,8 @@ static void i40iw_qp_disconnect(struct i40iw_qp *iwqp) /* Need to free the Last Streaming Mode Message */ if (iwqp->ietf_mem.va) { if (iwqp->lsmm_mr) - iwibdev->ibdev.ops.dereg_mr(iwqp->lsmm_mr); + iwibdev->ibdev.ops.dereg_mr(iwqp->lsmm_mr, + NULL); i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->ietf_mem); } } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index a8352e3ca23d..fd2d7426c832 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -342,8 +342,9 @@ error: /** * i40iw_dealloc_pd - deallocate pd * @ibpd: ptr of pd to be deallocated + * @udata: user data or null for kernel object */ -static void i40iw_dealloc_pd(struct ib_pd *ibpd) +static void i40iw_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct i40iw_pd *iwpd = to_iwpd(ibpd); struct i40iw_device *iwdev = to_iwdev(ibpd->device); @@ -413,7 +414,7 @@ static void i40iw_clean_cqes(struct i40iw_qp *iwqp, struct i40iw_cq *iwcq) * i40iw_destroy_qp - destroy qp * @ibqp: qp's ib pointer also to get to device's qp address */ -static int i40iw_destroy_qp(struct ib_qp *ibqp) +static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct i40iw_qp *iwqp = to_iwqp(ibqp); @@ -744,8 +745,8 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, err_code = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (err_code) { i40iw_pr_err("copy_to_udata failed\n"); - i40iw_destroy_qp(&iwqp->ibqp); - /* let the completion of the qp destroy free the qp */ + i40iw_destroy_qp(&iwqp->ibqp, udata); + /* let the completion of the qp destroy free the qp */ return ERR_PTR(err_code); } } @@ -1063,8 +1064,9 @@ void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq) /** * i40iw_destroy_cq - destroy cq * @ib_cq: cq pointer + * @udata: user data or NULL for kernel object */ -static int i40iw_destroy_cq(struct ib_cq *ib_cq) +static int i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct i40iw_cq *iwcq; struct i40iw_device *iwdev; @@ -1601,10 +1603,10 @@ static int i40iw_hw_alloc_stag(struct i40iw_device *iwdev, struct i40iw_mr *iwmr * @pd: ibpd pointer * @mr_type: memory for stag registrion * @max_num_sg: man number of pages + * @udata: user data or NULL for kernel objects */ -static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg) +static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct i40iw_pd *iwpd = to_iwpd(pd); struct i40iw_device *iwdev = to_iwdev(pd->device); @@ -2038,7 +2040,7 @@ static void i40iw_del_memlist(struct i40iw_mr *iwmr, * i40iw_dereg_mr - deregister mr * @ib_mr: mr ptr for dereg */ -static int i40iw_dereg_mr(struct ib_mr *ib_mr) +static int i40iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { struct ib_pd *ibpd = ib_mr->pd; struct i40iw_pd *iwpd = to_iwpd(ibpd); diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 1672808262ba..6f552b780b89 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -250,7 +250,7 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags) +int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata) { kfree(to_mah(ah)); return 0; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 03ac72339dd2..0b730737fb25 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -485,7 +485,7 @@ out: return err; } -int mlx4_ib_destroy_cq(struct ib_cq *cq) +int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(cq->device); struct mlx4_ib_cq *mcq = to_mcq(cq); diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 936ee1314bcd..f090c1b40433 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1411,7 +1411,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr); if (sqp->tx_ring[wire_tx_ix].ah) - mlx4_ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0); + mlx4_ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0, NULL); sqp->tx_ring[wire_tx_ix].ah = ah; ib_dma_sync_single_for_cpu(&dev->ib_dev, sqp->tx_ring[wire_tx_ix].buf.map, @@ -1450,7 +1450,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, spin_unlock(&sqp->tx_lock); sqp->tx_ring[wire_tx_ix].ah = NULL; out: - mlx4_ib_destroy_ah(ah, 0); + mlx4_ib_destroy_ah(ah, 0, NULL); return ret; } @@ -1903,7 +1903,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) switch (wc.opcode) { case IB_WC_SEND: mlx4_ib_destroy_ah(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); + (MLX4_NUM_TUNNEL_BUFS - 1)].ah, + 0, NULL); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); @@ -1932,7 +1933,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) ctx->slave, wc.status, wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { mlx4_ib_destroy_ah(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); + (MLX4_NUM_TUNNEL_BUFS - 1)].ah, + 0, NULL); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 733f7bbd5901..e50f9de71119 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1195,7 +1195,7 @@ static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, return 0; } -static void mlx4_ib_dealloc_pd(struct ib_pd *pd) +static void mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn); } @@ -1243,7 +1243,7 @@ err1: return ERR_PTR(err); } -static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd) +static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { ib_destroy_cq(to_mxrcd(xrcd)->cq); ib_dealloc_pd(to_mxrcd(xrcd)->pd); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 24633fc29a29..58112b59cc7c 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -734,13 +734,12 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); -int mlx4_ib_dereg_mr(struct ib_mr *mr); +int mlx4_ib_dereg_mr(struct ib_mr *mr, struct ib_udata *udata); struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); int mlx4_ib_dealloc_mw(struct ib_mw *mw); -struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg); +struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata); int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); @@ -749,7 +748,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); -int mlx4_ib_destroy_cq(struct ib_cq *cq); +int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); @@ -762,7 +761,7 @@ struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, int slave_sgid_index, u8 *s_mac, u16 vlan_tag); int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags); +int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata); struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, @@ -770,7 +769,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); -int mlx4_ib_destroy_srq(struct ib_srq *srq); +int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index); int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); @@ -778,7 +777,7 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); -int mlx4_ib_destroy_qp(struct ib_qp *qp); +int mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata); void mlx4_ib_drain_sq(struct ib_qp *qp); void mlx4_ib_drain_rq(struct ib_qp *qp); int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, @@ -913,7 +912,7 @@ void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port); struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); -int mlx4_ib_destroy_wq(struct ib_wq *wq); +int mlx4_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 395379a480cb..355205a28544 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -595,7 +595,7 @@ mlx4_free_priv_pages(struct mlx4_ib_mr *mr) } } -int mlx4_ib_dereg_mr(struct ib_mr *ibmr) +int mlx4_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct mlx4_ib_mr *mr = to_mmr(ibmr); int ret; @@ -655,9 +655,8 @@ int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) return 0; } -struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg) +struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_mr *mr; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 429a59c5801c..25dfdcc90a05 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1626,7 +1626,7 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) return 0; } -int mlx4_ib_destroy_qp(struct ib_qp *qp) +int mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) { struct mlx4_ib_qp *mqp = to_mqp(qp); @@ -4244,7 +4244,7 @@ int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr, return err; } -int mlx4_ib_destroy_wq(struct ib_wq *ibwq) +int mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(ibwq->device); struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq); diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 381cf899bcef..b51f632f3f7d 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -272,7 +272,7 @@ int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) return 0; } -int mlx4_ib_destroy_srq(struct ib_srq *srq) +int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(srq->device); struct mlx4_ib_srq *msrq = to_msrq(srq); diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 420ae0897333..2e377f9699f1 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -131,7 +131,7 @@ int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags) +int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata) { kfree(to_mah(ah)); return 0; diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 18704e503508..5d238a8ee132 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -996,8 +996,7 @@ err_create: return ERR_PTR(err); } - -int mlx5_ib_destroy_cq(struct ib_cq *cq) +int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(cq->device); struct mlx5_ib_cq *mcq = to_mcq(cq); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 531ff20b32ad..468544819c79 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2314,7 +2314,7 @@ err_free: return ERR_PTR(err); } -int mlx5_ib_dealloc_dm(struct ib_dm *ibdm) +int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs) { struct mlx5_memic *memic = &to_mdev(ibdm->device)->memic; struct mlx5_ib_dm *dm = to_mdm(ibdm); @@ -2370,7 +2370,7 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, return 0; } -static void mlx5_ib_dealloc_pd(struct ib_pd *pd) +static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct mlx5_ib_dev *mdev = to_mdev(pd->device); struct mlx5_ib_pd *mpd = to_mpd(pd); @@ -4590,7 +4590,7 @@ static void destroy_umrc_res(struct mlx5_ib_dev *dev) mlx5_ib_warn(dev, "mr cache cleanup failed\n"); if (dev->umrc.qp) - mlx5_ib_destroy_qp(dev->umrc.qp); + mlx5_ib_destroy_qp(dev->umrc.qp, NULL); if (dev->umrc.cq) ib_free_cq(dev->umrc.cq); if (dev->umrc.pd) @@ -4695,7 +4695,7 @@ static int create_umr_res(struct mlx5_ib_dev *dev) return 0; error_4: - mlx5_ib_destroy_qp(qp); + mlx5_ib_destroy_qp(qp, NULL); dev->umrc.qp = NULL; error_3: @@ -4837,15 +4837,15 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) return 0; error5: - mlx5_ib_destroy_srq(devr->s0); + mlx5_ib_destroy_srq(devr->s0, NULL); error4: - mlx5_ib_dealloc_xrcd(devr->x1); + mlx5_ib_dealloc_xrcd(devr->x1, NULL); error3: - mlx5_ib_dealloc_xrcd(devr->x0); + mlx5_ib_dealloc_xrcd(devr->x0, NULL); error2: - mlx5_ib_destroy_cq(devr->c0); + mlx5_ib_destroy_cq(devr->c0, NULL); error1: - mlx5_ib_dealloc_pd(devr->p0); + mlx5_ib_dealloc_pd(devr->p0, NULL); error0: kfree(devr->p0); return ret; @@ -4857,12 +4857,12 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr) container_of(devr, struct mlx5_ib_dev, devr); int port; - mlx5_ib_destroy_srq(devr->s1); - mlx5_ib_destroy_srq(devr->s0); - mlx5_ib_dealloc_xrcd(devr->x0); - mlx5_ib_dealloc_xrcd(devr->x1); - mlx5_ib_destroy_cq(devr->c0); - mlx5_ib_dealloc_pd(devr->p0); + mlx5_ib_destroy_srq(devr->s1, NULL); + mlx5_ib_destroy_srq(devr->s0, NULL); + mlx5_ib_dealloc_xrcd(devr->x0, NULL); + mlx5_ib_dealloc_xrcd(devr->x1, NULL); + mlx5_ib_destroy_cq(devr->c0, NULL); + mlx5_ib_dealloc_pd(devr->p0, NULL); kfree(devr->p0); /* Make sure no change P_Key work items are still executing */ diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 4a617d78eae1..e45f59b0cc52 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1049,14 +1049,14 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags); +int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata); struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr); -int mlx5_ib_destroy_srq(struct ib_srq *srq); +int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp); @@ -1068,7 +1068,7 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); -int mlx5_ib_destroy_qp(struct ib_qp *qp); +int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata); void mlx5_ib_drain_sq(struct ib_qp *qp); void mlx5_ib_drain_rq(struct ib_qp *qp); int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, @@ -1085,7 +1085,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); -int mlx5_ib_destroy_cq(struct ib_cq *cq); +int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); @@ -1112,10 +1112,9 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_pd *pd, struct ib_udata *udata); -int mlx5_ib_dereg_mr(struct ib_mr *ibmr); -struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg); +int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); +struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata); int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, @@ -1126,7 +1125,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata); -int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd); +int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, @@ -1170,7 +1169,7 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); -int mlx5_ib_destroy_wq(struct ib_wq *wq); +int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, @@ -1182,7 +1181,7 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_dm_alloc_attr *attr, struct uverbs_attr_bundle *attrs); -int mlx5_ib_dealloc_dm(struct ib_dm *ibdm); +int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs); struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, struct ib_dm_mr_attr *attr, struct uverbs_attr_bundle *attrs); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index c85f00255884..7de3683aebbe 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1623,15 +1623,14 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) kfree(mr); } -int mlx5_ib_dereg_mr(struct ib_mr *ibmr) +int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr)); return 0; } -struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg) +struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index f864e454de8f..cd62c909b7eb 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2732,7 +2732,7 @@ static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp) return 0; } -int mlx5_ib_destroy_qp(struct ib_qp *qp) +int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_qp *mqp = to_mqp(qp); @@ -5647,7 +5647,7 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, return &xrcd->ibxrcd; } -int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd) +int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(xrcd->device); u32 xrcdn = to_mxrcd(xrcd)->xrcdn; @@ -5965,7 +5965,7 @@ err: return ERR_PTR(err); } -int mlx5_ib_destroy_wq(struct ib_wq *wq) +int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(wq->device); struct mlx5_ib_rwq *rwq = to_mrwq(wq); diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 1ec1beb1296b..bc1ca6bcea43 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -387,7 +387,7 @@ out_box: return ret; } -int mlx5_ib_destroy_srq(struct ib_srq *srq) +int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(srq->device); struct mlx5_ib_srq *msrq = to_msrq(srq); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 35c3119726bb..872f0ad556a7 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -384,7 +384,7 @@ static int mthca_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, return 0; } -static void mthca_dealloc_pd(struct ib_pd *pd) +static void mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { mthca_pd_free(to_mdev(pd->device), to_mpd(pd)); } @@ -411,7 +411,7 @@ static struct ib_ah *mthca_ah_create(struct ib_pd *pd, return &ah->ibah; } -static int mthca_ah_destroy(struct ib_ah *ah, u32 flags) +static int mthca_ah_destroy(struct ib_ah *ah, u32 flags, struct ib_udata *udata) { mthca_destroy_ah(to_mdev(ah->device), to_mah(ah)); kfree(ah); @@ -477,7 +477,7 @@ err_free: return ERR_PTR(err); } -static int mthca_destroy_srq(struct ib_srq *srq) +static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct mthca_ucontext *context; @@ -607,7 +607,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, return &qp->ibqp; } -static int mthca_destroy_qp(struct ib_qp *qp) +static int mthca_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) { if (qp->uobject) { mthca_unmap_user_db(to_mdev(qp->device), @@ -827,7 +827,7 @@ out: return ret; } -static int mthca_destroy_cq(struct ib_cq *cq) +static int mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { if (cq->uobject) { mthca_unmap_user_db(to_mdev(cq->device), @@ -974,7 +974,7 @@ err: return ERR_PTR(err); } -static int mthca_dereg_mr(struct ib_mr *mr) +static int mthca_dereg_mr(struct ib_mr *mr, struct ib_udata *udata) { struct mthca_mr *mmr = to_mmr(mr); diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 032883180f65..79a43531c66d 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -3033,7 +3033,8 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt) /* Need to free the Last Streaming Mode Message */ if (nesqp->ietf_frame) { if (nesqp->lsmm_mr) - nesibdev->ibdev.ops.dereg_mr(nesqp->lsmm_mr); + nesibdev->ibdev.ops.dereg_mr(nesqp->lsmm_mr, + NULL); pci_free_consistent(nesdev->pcidev, nesqp->private_data_len + nesqp->ietf_frame_size, nesqp->ietf_frame, nesqp->ietf_frame_pbase); diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 526092d435df..4b7855c7dacf 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -52,7 +52,7 @@ atomic_t qps_created; atomic_t sw_qps_destroyed; static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev); -static int nes_dereg_mr(struct ib_mr *ib_mr); +static int nes_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); /** * nes_alloc_mw @@ -306,9 +306,8 @@ static int alloc_fast_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, /* * nes_alloc_mr */ -static struct ib_mr *nes_alloc_mr(struct ib_pd *ibpd, - enum ib_mr_type mr_type, - u32 max_num_sg) +static struct ib_mr *nes_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct nes_pd *nespd = to_nespd(ibpd); struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); @@ -386,7 +385,7 @@ static struct ib_mr *nes_alloc_mr(struct ib_pd *ibpd, return ibmr; err: - nes_dereg_mr(ibmr); + nes_dereg_mr(ibmr, udata); return ERR_PTR(-ENOMEM); } @@ -700,7 +699,7 @@ static int nes_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, /** * nes_dealloc_pd */ -static void nes_dealloc_pd(struct ib_pd *ibpd) +static void nes_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct nes_ucontext *nesucontext; struct nes_pd *nespd = to_nespd(ibpd); @@ -1298,7 +1297,7 @@ static void nes_clean_cq(struct nes_qp *nesqp, struct nes_cq *nescq) /** * nes_destroy_qp */ -static int nes_destroy_qp(struct ib_qp *ibqp) +static int nes_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct nes_qp *nesqp = to_nesqp(ibqp); struct nes_ucontext *nes_ucontext; @@ -1626,7 +1625,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, /** * nes_destroy_cq */ -static int nes_destroy_cq(struct ib_cq *ib_cq) +static int nes_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct nes_cq *nescq; struct nes_device *nesdev; @@ -2377,7 +2376,7 @@ reg_user_mr_err: /** * nes_dereg_mr */ -static int nes_dereg_mr(struct ib_mr *ib_mr) +static int nes_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { struct nes_mr *nesmr = to_nesmr(ib_mr); struct nes_vnic *nesvnic = to_nesvnic(ib_mr->device); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index a7295322efbc..c0419133edfd 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -219,7 +219,7 @@ av_err: return ERR_PTR(status); } -int ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags) +int ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags, struct ib_udata *udata) { struct ocrdma_ah *ah = get_ocrdma_ah(ibah); struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index eb996e14b520..9b84034d8164 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -53,7 +53,7 @@ enum { struct ib_ah *ocrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, struct ib_udata *udata); -int ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); +int ocrdma_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int ocrdma_process_mad(struct ib_device *, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index b4e1777c2c97..b8f891660516 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -680,7 +680,7 @@ exit: return status; } -void ocrdma_dealloc_pd(struct ib_pd *ibpd) +void ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); @@ -922,7 +922,7 @@ umem_err: return ERR_PTR(status); } -int ocrdma_dereg_mr(struct ib_mr *ib_mr) +int ocrdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { struct ocrdma_mr *mr = get_ocrdma_mr(ib_mr); struct ocrdma_dev *dev = get_ocrdma_dev(ib_mr->device); @@ -1076,7 +1076,7 @@ static void ocrdma_flush_cq(struct ocrdma_cq *cq) spin_unlock_irqrestore(&cq->cq_lock, flags); } -int ocrdma_destroy_cq(struct ib_cq *ibcq) +int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); struct ocrdma_eq *eq = NULL; @@ -1697,7 +1697,7 @@ void ocrdma_del_flush_qp(struct ocrdma_qp *qp) spin_unlock_irqrestore(&dev->flush_q_lock, flags); } -int ocrdma_destroy_qp(struct ib_qp *ibqp) +int ocrdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct ocrdma_pd *pd; struct ocrdma_qp *qp; @@ -1885,7 +1885,7 @@ int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) return status; } -int ocrdma_destroy_srq(struct ib_srq *ibsrq) +int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { int status; struct ocrdma_srq *srq; @@ -2931,9 +2931,8 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags) return 0; } -struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, - enum ib_mr_type mr_type, - u32 max_num_sg) +struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { int status; struct ocrdma_mr *mr; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 4c04ab40798e..3636cbcbcaa4 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -71,14 +71,14 @@ int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma); int ocrdma_alloc_pd(struct ib_pd *pd, struct ib_ucontext *uctx, struct ib_udata *udata); -void ocrdma_dealloc_pd(struct ib_pd *pd); +void ocrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *ib_ctx, struct ib_udata *udata); int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); -int ocrdma_destroy_cq(struct ib_cq *); +int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); struct ib_qp *ocrdma_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs, @@ -90,7 +90,7 @@ int ocrdma_modify_qp(struct ib_qp *, struct ib_qp_attr *attr, int ocrdma_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *); -int ocrdma_destroy_qp(struct ib_qp *); +int ocrdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); void ocrdma_del_flush_qp(struct ocrdma_qp *qp); struct ib_srq *ocrdma_create_srq(struct ib_pd *, struct ib_srq_init_attr *, @@ -98,17 +98,16 @@ struct ib_srq *ocrdma_create_srq(struct ib_pd *, struct ib_srq_init_attr *, int ocrdma_modify_srq(struct ib_srq *, struct ib_srq_attr *, enum ib_srq_attr_mask, struct ib_udata *); int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *); -int ocrdma_destroy_srq(struct ib_srq *); +int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); int ocrdma_post_srq_recv(struct ib_srq *, const struct ib_recv_wr *, const struct ib_recv_wr **bad_recv_wr); -int ocrdma_dereg_mr(struct ib_mr *); +int ocrdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc); struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length, u64 virt, int acc, struct ib_udata *); -struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg); +struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata); int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index e9fc15392dda..42755e7a10a8 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -478,7 +478,7 @@ int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, return 0; } -void qedr_dealloc_pd(struct ib_pd *ibpd) +void qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct qedr_dev *dev = get_qedr_dev(ibpd->device); struct qedr_pd *pd = get_qedr_pd(ibpd); @@ -962,7 +962,7 @@ int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata) #define QEDR_DESTROY_CQ_MAX_ITERATIONS (10) #define QEDR_DESTROY_CQ_ITER_DURATION (10) -int qedr_destroy_cq(struct ib_cq *ibcq) +int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct qedr_dev *dev = get_qedr_dev(ibcq->device); struct qed_rdma_destroy_cq_out_params oparams; @@ -1485,7 +1485,7 @@ err0: return ERR_PTR(-EFAULT); } -int qedr_destroy_srq(struct ib_srq *ibsrq) +int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct qed_rdma_destroy_srq_in_params in_params = {}; struct qedr_dev *dev = get_qedr_dev(ibsrq->device); @@ -2488,7 +2488,7 @@ static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp) return 0; } -int qedr_destroy_qp(struct ib_qp *ibqp) +int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct qedr_qp *qp = get_qedr_qp(ibqp); struct qedr_dev *dev = qp->dev; @@ -2556,7 +2556,7 @@ struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, return &ah->ibah; } -int qedr_destroy_ah(struct ib_ah *ibah, u32 flags) +int qedr_destroy_ah(struct ib_ah *ibah, u32 flags, struct ib_udata *udata) { struct qedr_ah *ah = get_qedr_ah(ibah); @@ -2711,7 +2711,7 @@ err0: return ERR_PTR(rc); } -int qedr_dereg_mr(struct ib_mr *ib_mr) +int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { struct qedr_mr *mr = get_qedr_mr(ib_mr); struct qedr_dev *dev = get_qedr_dev(ib_mr->device); @@ -2803,8 +2803,8 @@ err0: return ERR_PTR(rc); } -struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, - enum ib_mr_type mr_type, u32 max_num_sg) +struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct qedr_mr *mr; diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index f0c05f4771ac..cd9659ac2aad 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -49,14 +49,14 @@ void qedr_dealloc_ucontext(struct ib_ucontext *uctx); int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma); int qedr_alloc_pd(struct ib_pd *pd, struct ib_ucontext *uctx, struct ib_udata *udata); -void qedr_dealloc_pd(struct ib_pd *pd); +void qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); struct ib_cq *qedr_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *ib_ctx, struct ib_udata *udata); int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); -int qedr_destroy_cq(struct ib_cq *); +int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); struct ib_qp *qedr_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs, struct ib_udata *); @@ -64,7 +64,7 @@ int qedr_modify_qp(struct ib_qp *, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *); -int qedr_destroy_qp(struct ib_qp *ibqp); +int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, struct ib_srq_init_attr *attr, @@ -72,14 +72,14 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); -int qedr_destroy_srq(struct ib_srq *ibsrq); +int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_recv_wr); struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, u32 flags, struct ib_udata *udata); -int qedr_destroy_ah(struct ib_ah *ibah, u32 flags); +int qedr_destroy_ah(struct ib_ah *ibah, u32 flags, struct ib_udata *udata); -int qedr_dereg_mr(struct ib_mr *); +int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc); struct ib_mr *qedr_reg_user_mr(struct ib_pd *, u64 start, u64 length, @@ -89,7 +89,7 @@ int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); struct ib_mr *qedr_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg); + u32 max_num_sg, struct ib_udata *udata); int qedr_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc); int qedr_post_send(struct ib_qp *, const struct ib_send_wr *, const struct ib_send_wr **bad_wr); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index bd4521b2cc5f..cdb6357337c0 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -461,7 +461,7 @@ int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, return 0; } -void usnic_ib_dealloc_pd(struct ib_pd *pd) +void usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd); } @@ -539,7 +539,7 @@ out_release_mutex: return ERR_PTR(err); } -int usnic_ib_destroy_qp(struct ib_qp *qp) +int usnic_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) { struct usnic_ib_qp_grp *qp_grp; struct usnic_ib_vf *vf; @@ -606,7 +606,7 @@ struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, return cq; } -int usnic_ib_destroy_cq(struct ib_cq *cq) +int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { usnic_dbg("\n"); kfree(cq); @@ -642,7 +642,7 @@ err_free: return ERR_PTR(err); } -int usnic_ib_dereg_mr(struct ib_mr *ibmr) +int usnic_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct usnic_ib_mr *mr = to_umr(ibmr); @@ -731,4 +731,3 @@ int usnic_ib_mmap(struct ib_ucontext *context, return -EINVAL; } -/* End of ib callbacks section */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index c40e89b6246f..349c8dc13a12 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -52,22 +52,22 @@ int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey); int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, struct ib_udata *udata); -void usnic_ib_dealloc_pd(struct ib_pd *pd); +void usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); -int usnic_ib_destroy_qp(struct ib_qp *qp); +int usnic_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata); int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); -int usnic_ib_destroy_cq(struct ib_cq *cq); +int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); -int usnic_ib_dereg_mr(struct ib_mr *ibmr); +int usnic_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); int usnic_ib_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); void usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext); int usnic_ib_mmap(struct ib_ucontext *context, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 104c7db4704f..5ba278324134 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -210,7 +210,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, if (ib_copy_to_udata(udata, &cq_resp, sizeof(cq_resp))) { dev_warn(&dev->pdev->dev, "failed to copy back udata\n"); - pvrdma_destroy_cq(&cq->ibcq); + pvrdma_destroy_cq(&cq->ibcq, udata); return ERR_PTR(-EINVAL); } } @@ -245,10 +245,11 @@ static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq) /** * pvrdma_destroy_cq - destroy completion queue * @cq: the completion queue to destroy. + * @udata: user data or null for kernel object * * @return: 0 for success. */ -int pvrdma_destroy_cq(struct ib_cq *cq) +int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct pvrdma_cq *vcq = to_vcq(cq); union pvrdma_cmd_req req; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c index a85884e90e84..9e6c44ebaf54 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c @@ -201,7 +201,7 @@ err_umem: * @return: ib_mr pointer on success, otherwise returns an errno. */ struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg) + u32 max_num_sg, struct ib_udata *udata) { struct pvrdma_dev *dev = to_vdev(pd->device); struct pvrdma_user_mr *mr; @@ -272,7 +272,7 @@ freemr: * * @return: 0 on success. */ -int pvrdma_dereg_mr(struct ib_mr *ibmr) +int pvrdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct pvrdma_user_mr *mr = to_vmr(ibmr); struct pvrdma_dev *dev = to_vdev(ibmr->device); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 08f4257169bd..0eaaead5baec 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -446,10 +446,11 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp) /** * pvrdma_destroy_qp - destroy a queue pair * @qp: the queue pair to destroy + * @udata: user data or null for kernel object * * @return: 0 on success. */ -int pvrdma_destroy_qp(struct ib_qp *qp) +int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) { struct pvrdma_qp *vqp = to_vqp(qp); union pvrdma_cmd_req req; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 951d9d68107a..21a95780e0ea 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -204,7 +204,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, /* Copy udata back. */ if (ib_copy_to_udata(udata, &srq_resp, sizeof(srq_resp))) { dev_warn(&dev->pdev->dev, "failed to copy back udata\n"); - pvrdma_destroy_srq(&srq->ibsrq); + pvrdma_destroy_srq(&srq->ibsrq, udata); return ERR_PTR(-EINVAL); } @@ -246,10 +246,11 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) /** * pvrdma_destroy_srq - destroy shared receive queue * @srq: the shared receive queue to destroy + * @udata: user data or null for kernel object * * @return: 0 for success. */ -int pvrdma_destroy_srq(struct ib_srq *srq) +int pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct pvrdma_srq *vsrq = to_vsrq(srq); union pvrdma_cmd_req req; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index 8a32e1e435a9..19ff6004b477 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -460,7 +460,7 @@ int pvrdma_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, if (ib_copy_to_udata(udata, &pd_resp, sizeof(pd_resp))) { dev_warn(&dev->pdev->dev, "failed to copy back protection domain\n"); - pvrdma_dealloc_pd(&pd->ibpd); + pvrdma_dealloc_pd(&pd->ibpd, udata); return -EFAULT; } } @@ -476,10 +476,11 @@ err: /** * pvrdma_dealloc_pd - deallocate protection domain * @pd: the protection domain to be released + * @udata: user data or null for kernel object * * @return: 0 on success, otherwise errno. */ -void pvrdma_dealloc_pd(struct ib_pd *pd) +void pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct pvrdma_dev *dev = to_vdev(pd->device); union pvrdma_cmd_req req = {}; @@ -556,7 +557,7 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, * * @return: 0 on success. */ -int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags) +int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata) { struct pvrdma_dev *dev = to_vdev(ah->device); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index 607aa131d67c..2c8ba5bf8d0f 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -400,26 +400,26 @@ int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); void pvrdma_dealloc_ucontext(struct ib_ucontext *context); int pvrdma_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, struct ib_udata *udata); -void pvrdma_dealloc_pd(struct ib_pd *ibpd); +void pvrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); -int pvrdma_dereg_mr(struct ib_mr *mr); +int pvrdma_dereg_mr(struct ib_mr *mr, struct ib_udata *udata); struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg); + u32 max_num_sg, struct ib_udata *udata); int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); -int pvrdma_destroy_cq(struct ib_cq *cq); +int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, struct ib_udata *udata); -int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags); +int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata); struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, @@ -427,7 +427,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); -int pvrdma_destroy_srq(struct ib_srq *srq); +int pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, @@ -436,7 +436,7 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); -int pvrdma_destroy_qp(struct ib_qp *qp); +int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata); int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); int pvrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index fc10e4e26ca7..001a5c052580 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -138,10 +138,12 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd, * rvt_destory_ah - Destory an address handle * @ibah: address handle * @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags) + * @udata: user data or NULL for kernel object * * Return: 0 on success */ -int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags) +int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags, + struct ib_udata *udata) { struct rvt_dev_info *dev = ib_to_rvt(ibah->device); struct rvt_ah *ah = ibah_to_rvtah(ibah); diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h index 72431a618d5d..7b27b82d8a90 100644 --- a/drivers/infiniband/sw/rdmavt/ah.h +++ b/drivers/infiniband/sw/rdmavt/ah.h @@ -54,7 +54,8 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 create_flags, struct ib_udata *udata); -int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags); +int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags, + struct ib_udata *udata); int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 4f1544ad4aff..6f7ff2384506 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -299,12 +299,13 @@ done: /** * rvt_destroy_cq - destroy a completion queue * @ibcq: the completion queue to destroy. + * @udata: user data or NULL for kernel object * * Called by ib_destroy_cq() in the generic verbs code. * * Return: always 0 */ -int rvt_destroy_cq(struct ib_cq *ibcq) +int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); struct rvt_dev_info *rdi = cq->rdi; diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h index 72184b1c176b..e42661ecdef8 100644 --- a/drivers/infiniband/sw/rdmavt/cq.h +++ b/drivers/infiniband/sw/rdmavt/cq.h @@ -55,7 +55,7 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); -int rvt_destroy_cq(struct ib_cq *ibcq); +int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index e8b03ae54914..2d1b88a34f8e 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -548,7 +548,7 @@ bool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey) * * Returns 0 on success. */ -int rvt_dereg_mr(struct ib_mr *ibmr) +int rvt_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct rvt_mr *mr = to_imr(ibmr); int ret; @@ -575,9 +575,8 @@ out: * * Return: the memory region on success, otherwise return an errno. */ -struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg) +struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct rvt_mr *mr; diff --git a/drivers/infiniband/sw/rdmavt/mr.h b/drivers/infiniband/sw/rdmavt/mr.h index 132800ee0205..2c8d0752e8e3 100644 --- a/drivers/infiniband/sw/rdmavt/mr.h +++ b/drivers/infiniband/sw/rdmavt/mr.h @@ -78,10 +78,9 @@ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata); -int rvt_dereg_mr(struct ib_mr *ibmr); -struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg); +int rvt_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); +struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata); int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c index 6033054b22fa..e84341282374 100644 --- a/drivers/infiniband/sw/rdmavt/pd.c +++ b/drivers/infiniband/sw/rdmavt/pd.c @@ -93,10 +93,11 @@ bail: /** * rvt_dealloc_pd - Free PD * @ibpd: Free up PD + * @udata: Valid user data or NULL for kernel object * * Return: always 0 */ -void rvt_dealloc_pd(struct ib_pd *ibpd) +void rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct rvt_dev_info *dev = ib_to_rvt(ibpd->device); diff --git a/drivers/infiniband/sw/rdmavt/pd.h b/drivers/infiniband/sw/rdmavt/pd.h index 7a887e4a45e7..d0368a625e03 100644 --- a/drivers/infiniband/sw/rdmavt/pd.h +++ b/drivers/infiniband/sw/rdmavt/pd.h @@ -52,6 +52,6 @@ int rvt_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, struct ib_udata *udata); -void rvt_dealloc_pd(struct ib_pd *ibpd); +void rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); #endif /* DEF_RDMAVTPD_H */ diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index a34b9a2a32b6..e8bba7e56c29 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1617,7 +1617,7 @@ inval: * * Return: 0 on success. */ -int rvt_destroy_qp(struct ib_qp *ibqp) +int rvt_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h index 6d883972e0b8..450b27ea1fa4 100644 --- a/drivers/infiniband/sw/rdmavt/qp.h +++ b/drivers/infiniband/sw/rdmavt/qp.h @@ -57,7 +57,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct ib_udata *udata); int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); -int rvt_destroy_qp(struct ib_qp *ibqp); +int rvt_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr); int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index 895b3fabd0bf..3090b0935714 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -340,7 +340,7 @@ int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) * * Return always 0 */ -int rvt_destroy_srq(struct ib_srq *ibsrq) +int rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device); diff --git a/drivers/infiniband/sw/rdmavt/srq.h b/drivers/infiniband/sw/rdmavt/srq.h index bf0eaaf56465..69cad2f65408 100644 --- a/drivers/infiniband/sw/rdmavt/srq.h +++ b/drivers/infiniband/sw/rdmavt/srq.h @@ -57,6 +57,6 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); -int rvt_destroy_srq(struct ib_srq *ibsrq); +int rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); #endif /* DEF_RVTSRQ_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 6ecf28570ff0..e625731ae42d 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -185,7 +185,7 @@ static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, return rxe_add_to_pool(&rxe->pd_pool, &pd->pelem); } -static void rxe_dealloc_pd(struct ib_pd *ibpd) +static void rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct rxe_pd *pd = to_rpd(ibpd); @@ -242,7 +242,7 @@ static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) return 0; } -static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags) +static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags, struct ib_udata *udata) { struct rxe_ah *ah = to_rah(ibah); @@ -389,7 +389,7 @@ static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) return 0; } -static int rxe_destroy_srq(struct ib_srq *ibsrq) +static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct rxe_srq *srq = to_rsrq(ibsrq); @@ -509,7 +509,7 @@ static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, return 0; } -static int rxe_destroy_qp(struct ib_qp *ibqp) +static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct rxe_qp *qp = to_rqp(ibqp); @@ -839,7 +839,7 @@ err1: return ERR_PTR(err); } -static int rxe_destroy_cq(struct ib_cq *ibcq) +static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct rxe_cq *cq = to_rcq(ibcq); @@ -990,7 +990,7 @@ err2: return ERR_PTR(err); } -static int rxe_dereg_mr(struct ib_mr *ibmr) +static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct rxe_mem *mr = to_rmr(ibmr); @@ -1001,9 +1001,8 @@ static int rxe_dereg_mr(struct ib_mr *ibmr) return 0; } -static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, - enum ib_mr_type mr_type, - u32 max_num_sg) +static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) { struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3b6eb646066c..54e48dd36644 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2396,13 +2396,13 @@ struct ib_device_ops { void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); int (*alloc_pd)(struct ib_pd *pd, struct ib_ucontext *context, struct ib_udata *udata); - void (*dealloc_pd)(struct ib_pd *pd); + void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); struct ib_ah *(*create_ah)(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, struct ib_udata *udata); int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); - int (*destroy_ah)(struct ib_ah *ah, u32 flags); + int (*destroy_ah)(struct ib_ah *ah, u32 flags, struct ib_udata *udata); struct ib_srq *(*create_srq)(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); @@ -2410,7 +2410,7 @@ struct ib_device_ops { enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr); - int (*destroy_srq)(struct ib_srq *srq); + int (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata); struct ib_qp *(*create_qp)(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr, struct ib_udata *udata); @@ -2418,13 +2418,13 @@ struct ib_device_ops { int qp_attr_mask, struct ib_udata *udata); int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); - int (*destroy_qp)(struct ib_qp *qp); + int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata); struct ib_cq *(*create_cq)(struct ib_device *device, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); - int (*destroy_cq)(struct ib_cq *cq); + int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata); int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags); struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, @@ -2433,9 +2433,9 @@ struct ib_device_ops { int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata); - int (*dereg_mr)(struct ib_mr *mr); + int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata); struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg); + u32 max_num_sg, struct ib_udata *udata); int (*advise_mr)(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge, @@ -2458,7 +2458,7 @@ struct ib_device_ops { struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device, struct ib_ucontext *ucontext, struct ib_udata *udata); - int (*dealloc_xrcd)(struct ib_xrcd *xrcd); + int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata); struct ib_flow *(*create_flow)(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain, struct ib_udata *udata); @@ -2483,7 +2483,7 @@ struct ib_device_ops { struct ib_wq *(*create_wq)(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); - int (*destroy_wq)(struct ib_wq *wq); + int (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata); int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr, u32 wq_attr_mask, struct ib_udata *udata); struct ib_rwq_ind_table *(*create_rwq_ind_table)( @@ -2495,7 +2495,7 @@ struct ib_device_ops { struct ib_ucontext *context, struct ib_dm_alloc_attr *attr, struct uverbs_attr_bundle *attrs); - int (*dealloc_dm)(struct ib_dm *dm); + int (*dealloc_dm)(struct ib_dm *dm, struct uverbs_attr_bundle *attrs); struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm, struct ib_dm_mr_attr *attr, struct uverbs_attr_bundle *attrs); @@ -3252,9 +3252,27 @@ enum ib_pd_flags { struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, const char *caller); + #define ib_alloc_pd(device, flags) \ __ib_alloc_pd((device), (flags), KBUILD_MODNAME) -void ib_dealloc_pd(struct ib_pd *pd); + +/** + * ib_dealloc_pd_user - Deallocate kernel/user PD + * @pd: The protection domain + * @udata: Valid user data or NULL for kernel objects + */ +void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata); + +/** + * ib_dealloc_pd - Deallocate kernel PD + * @pd: The protection domain + * + * NOTE: for user PD use ib_dealloc_pd_user with valid udata! + */ +static inline void ib_dealloc_pd(struct ib_pd *pd) +{ + ib_dealloc_pd_user(pd, NULL); +} enum rdma_create_ah_flags { /* In a sleepable context */ @@ -3367,11 +3385,24 @@ enum rdma_destroy_ah_flags { }; /** - * rdma_destroy_ah - Destroys an address handle. + * rdma_destroy_ah_user - Destroys an address handle. * @ah: The address handle to destroy. * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags). + * @udata: Valid user data or NULL for kernel objects */ -int rdma_destroy_ah(struct ib_ah *ah, u32 flags); +int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata); + +/** + * rdma_destroy_ah - Destroys an kernel address handle. + * @ah: The address handle to destroy. + * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags). + * + * NOTE: for user ah use rdma_destroy_ah_user with valid udata! + */ +static inline int rdma_destroy_ah(struct ib_ah *ah, u32 flags) +{ + return rdma_destroy_ah_user(ah, flags, NULL); +} /** * ib_create_srq - Creates a SRQ associated with the specified protection @@ -3415,10 +3446,22 @@ int ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); /** - * ib_destroy_srq - Destroys the specified SRQ. + * ib_destroy_srq_user - Destroys the specified SRQ. + * @srq: The SRQ to destroy. + * @udata: Valid user data or NULL for kernel objects + */ +int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata); + +/** + * ib_destroy_srq - Destroys the specified kernel SRQ. * @srq: The SRQ to destroy. + * + * NOTE: for user srq use ib_destroy_srq_user with valid udata! */ -int ib_destroy_srq(struct ib_srq *srq); +static inline int ib_destroy_srq(struct ib_srq *srq) +{ + return ib_destroy_srq_user(srq, NULL); +} /** * ib_post_srq_recv - Posts a list of work requests to the specified SRQ. @@ -3438,15 +3481,34 @@ static inline int ib_post_srq_recv(struct ib_srq *srq, } /** - * ib_create_qp - Creates a QP associated with the specified protection + * ib_create_qp_user - Creates a QP associated with the specified protection * domain. * @pd: The protection domain associated with the QP. * @qp_init_attr: A list of initial attributes required to create the * QP. If QP creation succeeds, then the attributes are updated to * the actual capabilities of the created QP. + * @udata: Valid user data or NULL for kernel objects */ -struct ib_qp *ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr); +struct ib_qp *ib_create_qp_user(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata); + +/** + * ib_create_qp - Creates a kernel QP associated with the specified protection + * domain. + * @pd: The protection domain associated with the QP. + * @qp_init_attr: A list of initial attributes required to create the + * QP. If QP creation succeeds, then the attributes are updated to + * the actual capabilities of the created QP. + * @udata: Valid user data or NULL for kernel objects + * + * NOTE: for user qp use ib_create_qp_user with valid udata! + */ +static inline struct ib_qp *ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr) +{ + return ib_create_qp_user(pd, qp_init_attr, NULL); +} /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. @@ -3496,8 +3558,20 @@ int ib_query_qp(struct ib_qp *qp, /** * ib_destroy_qp - Destroys the specified QP. * @qp: The QP to destroy. + * @udata: Valid udata or NULL for kernel objects */ -int ib_destroy_qp(struct ib_qp *qp); +int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata); + +/** + * ib_destroy_qp - Destroys the specified kernel QP. + * @qp: The QP to destroy. + * + * NOTE: for user qp use ib_destroy_qp_user with valid udata! + */ +static inline int ib_destroy_qp(struct ib_qp *qp) +{ + return ib_destroy_qp_user(qp, NULL); +} /** * ib_open_qp - Obtain a reference to an existing sharable QP. @@ -3557,13 +3631,66 @@ static inline int ib_post_recv(struct ib_qp *qp, return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy); } -struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, - int nr_cqe, int comp_vector, - enum ib_poll_context poll_ctx, const char *caller); -#define ib_alloc_cq(device, priv, nr_cqe, comp_vect, poll_ctx) \ - __ib_alloc_cq((device), (priv), (nr_cqe), (comp_vect), (poll_ctx), KBUILD_MODNAME) +struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, + int nr_cqe, int comp_vector, + enum ib_poll_context poll_ctx, + const char *caller, struct ib_udata *udata); + +/** + * ib_alloc_cq_user: Allocate kernel/user CQ + * @dev: The IB device + * @private: Private data attached to the CQE + * @nr_cqe: Number of CQEs in the CQ + * @comp_vector: Completion vector used for the IRQs + * @poll_ctx: Context used for polling the CQ + * @udata: Valid user data or NULL for kernel objects + */ +static inline struct ib_cq *ib_alloc_cq_user(struct ib_device *dev, + void *private, int nr_cqe, + int comp_vector, + enum ib_poll_context poll_ctx, + struct ib_udata *udata) +{ + return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx, + KBUILD_MODNAME, udata); +} + +/** + * ib_alloc_cq: Allocate kernel CQ + * @dev: The IB device + * @private: Private data attached to the CQE + * @nr_cqe: Number of CQEs in the CQ + * @comp_vector: Completion vector used for the IRQs + * @poll_ctx: Context used for polling the CQ + * + * NOTE: for user cq use ib_alloc_cq_user with valid udata! + */ +static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, + int nr_cqe, int comp_vector, + enum ib_poll_context poll_ctx) +{ + return ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx, + NULL); +} + +/** + * ib_free_cq_user - Free kernel/user CQ + * @cq: The CQ to free + * @udata: Valid user data or NULL for kernel objects + */ +void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata); + +/** + * ib_free_cq - Free kernel CQ + * @cq: The CQ to free + * + * NOTE: for user cq use ib_free_cq_user with valid udata! + */ +static inline void ib_free_cq(struct ib_cq *cq) +{ + ib_free_cq_user(cq, NULL); +} -void ib_free_cq(struct ib_cq *cq); int ib_process_cq_direct(struct ib_cq *cq, int budget); /** @@ -3607,10 +3734,22 @@ int ib_resize_cq(struct ib_cq *cq, int cqe); int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period); /** - * ib_destroy_cq - Destroys the specified CQ. + * ib_destroy_cq_user - Destroys the specified CQ. * @cq: The CQ to destroy. + * @udata: Valid user data or NULL for kernel objects */ -int ib_destroy_cq(struct ib_cq *cq); +int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata); + +/** + * ib_destroy_cq - Destroys the specified kernel CQ. + * @cq: The CQ to destroy. + * + * NOTE: for user cq use ib_destroy_cq_user with valid udata! + */ +static inline int ib_destroy_cq(struct ib_cq *cq) +{ + return ib_destroy_cq_user(cq, NULL); +} /** * ib_poll_cq - poll a CQ for completion(s) @@ -3864,17 +4003,37 @@ static inline void ib_dma_free_coherent(struct ib_device *dev, } /** - * ib_dereg_mr - Deregisters a memory region and removes it from the + * ib_dereg_mr_user - Deregisters a memory region and removes it from the + * HCA translation table. + * @mr: The memory region to deregister. + * @udata: Valid user data or NULL for kernel object + * + * This function can fail, if the memory region has memory windows bound to it. + */ +int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata); + +/** + * ib_dereg_mr - Deregisters a kernel memory region and removes it from the * HCA translation table. * @mr: The memory region to deregister. * * This function can fail, if the memory region has memory windows bound to it. + * + * NOTE: for user mr use ib_dereg_mr_user with valid udata! */ -int ib_dereg_mr(struct ib_mr *mr); +static inline int ib_dereg_mr(struct ib_mr *mr) +{ + return ib_dereg_mr_user(mr, NULL); +} + +struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata); -struct ib_mr *ib_alloc_mr(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg); +static inline struct ib_mr *ib_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, u32 max_num_sg) +{ + return ib_alloc_mr_user(pd, mr_type, max_num_sg, NULL); +} /** * ib_update_fast_reg_key - updates the key portion of the fast_reg MR @@ -3972,8 +4131,9 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller); /** * ib_dealloc_xrcd - Deallocates an XRC domain. * @xrcd: The XRC domain to deallocate. + * @udata: Valid user data or NULL for kernel object */ -int ib_dealloc_xrcd(struct ib_xrcd *xrcd); +int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); static inline int ib_check_mr_access(int flags) { @@ -4049,7 +4209,7 @@ struct net_device *ib_device_netdev(struct ib_device *dev, u8 port); struct ib_wq *ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr); -int ib_destroy_wq(struct ib_wq *wq); +int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr, u32 wq_attr_mask); struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, -- cgit v1.2.3 From bdeacabd1a5fb4c0274b949d7220501c3401a3b4 Mon Sep 17 00:00:00 2001 From: Shamir Rabinovitch Date: Sun, 31 Mar 2019 19:10:06 +0300 Subject: IB: Remove 'uobject->context' dependency in object destroy APIs Now that we have the udata passed to all the ib_xxx object destroy APIs and the additional macro 'rdma_udata_to_drv_context' to get the ib_ucontext from ib_udata stored in uverbs_attr_bundle, we can finally start to remove the dependency of the drivers in the ib_xxx->uobject->context. Signed-off-by: Shamir Rabinovitch Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs.h | 3 +- drivers/infiniband/core/uverbs_cmd.c | 7 +++-- drivers/infiniband/core/uverbs_std_types.c | 12 ++++---- drivers/infiniband/core/uverbs_std_types_cq.c | 2 +- drivers/infiniband/hw/cxgb3/iwch_provider.c | 4 +-- drivers/infiniband/hw/cxgb4/cq.c | 6 ++-- drivers/infiniband/hw/cxgb4/qp.c | 4 +-- drivers/infiniband/hw/hns/hns_roce_cq.c | 8 +++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 23 ++++++++------- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 7 +++-- drivers/infiniband/hw/mlx4/cq.c | 10 +++++-- drivers/infiniband/hw/mlx4/qp.c | 32 ++++++++++++-------- drivers/infiniband/hw/mlx4/srq.c | 9 ++++-- drivers/infiniband/hw/mlx5/cq.c | 21 +++++++------- drivers/infiniband/hw/mlx5/main.c | 5 +++- drivers/infiniband/hw/mlx5/qp.c | 32 ++++++++++++-------- drivers/infiniband/hw/mlx5/srq.c | 19 +++++++++--- drivers/infiniband/hw/mthca/mthca_provider.c | 42 ++++++++++++++++++--------- drivers/infiniband/hw/nes/nes_verbs.c | 16 +++++++--- drivers/infiniband/hw/qedr/verbs.c | 9 +++--- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 2 +- drivers/infiniband/hw/usnic/usnic_uiom.c | 3 +- drivers/infiniband/hw/usnic/usnic_uiom.h | 3 +- 23 files changed, 177 insertions(+), 102 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index fa5ea6529333..0fc71ad42490 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -240,7 +240,8 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, - enum rdma_remove_reason why, struct ib_udata *udata); + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs); int uverbs_dealloc_mw(struct ib_mw *mw); void ib_uverbs_detach_umcast(struct ib_qp *qp, diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index fe63dfd5f1b6..da31dba33fc5 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -670,17 +670,18 @@ static int ib_uverbs_close_xrcd(struct uverbs_attr_bundle *attrs) } int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd, - enum rdma_remove_reason why, struct ib_udata *udata) + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) { struct inode *inode; int ret; - struct ib_uverbs_device *dev = uobject->context->ufile->device; + struct ib_uverbs_device *dev = attrs->ufile->device; inode = xrcd->inode; if (inode && !atomic_dec_and_test(&xrcd->usecnt)) return 0; - ret = ib_dealloc_xrcd(xrcd, udata); + ret = ib_dealloc_xrcd(xrcd, &attrs->driver_udata); if (ib_is_destroy_retryable(ret, why, uobject)) { atomic_inc(&xrcd->usecnt); diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index c625f590a8f0..35b2e2c640cc 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -105,7 +105,7 @@ static int uverbs_free_qp(struct ib_uobject *uobject, if (uqp->uxrcd) atomic_dec(&uqp->uxrcd->refcnt); - ib_uverbs_release_uevent(uobject->context->ufile, &uqp->uevent); + ib_uverbs_release_uevent(attrs->ufile, &uqp->uevent); return ret; } @@ -138,7 +138,7 @@ static int uverbs_free_wq(struct ib_uobject *uobject, if (ib_is_destroy_retryable(ret, why, uobject)) return ret; - ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent); + ib_uverbs_release_uevent(attrs->ufile, &uwq->uevent); return ret; } @@ -163,7 +163,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject, atomic_dec(&us->uxrcd->refcnt); } - ib_uverbs_release_uevent(uobject->context->ufile, uevent); + ib_uverbs_release_uevent(attrs->ufile, uevent); return ret; } @@ -180,9 +180,9 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject, if (ret) return ret; - mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex); - ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why, &attrs->driver_udata); - mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex); + mutex_lock(&attrs->ufile->device->xrcd_tree_mutex); + ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why, attrs); + mutex_unlock(&attrs->ufile->device->xrcd_tree_mutex); return ret; } diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index f03506ece016..cde608c268ff 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -49,7 +49,7 @@ static int uverbs_free_cq(struct ib_uobject *uobject, return ret; ib_uverbs_release_ucq( - uobject->context->ufile, + attrs->ufile, ev_queue ? container_of(ev_queue, struct ib_uverbs_completion_event_file, ev_queue) : diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index e10a56242998..bf07e93aeb94 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -760,8 +760,8 @@ static int iwch_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) atomic_dec(&qhp->refcnt); wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); - ucontext = ib_qp->uobject ? to_iwch_ucontext(ib_qp->uobject->context) - : NULL; + ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext, + ibucontext); cxio_destroy_qp(&rhp->rdev, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 562187f0c5af..571281888de0 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -30,6 +30,8 @@ * SOFTWARE. */ +#include + #include "iw_cxgb4.h" static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, @@ -980,8 +982,8 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) atomic_dec(&chp->refcnt); wait_event(chp->wait, !atomic_read(&chp->refcnt)); - ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context) - : NULL; + ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, + ibucontext); destroy_cq(&chp->rhp->rdev, &chp->cq, ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx, chp->destroy_skb, chp->wr_waitp); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 76e6544cf0b9..777231496cc6 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2838,8 +2838,8 @@ int c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) pr_debug("%s id %d\n", __func__, srq->wq.qid); xa_erase_irq(&rhp->qps, srq->wq.qid); - ucontext = ibsrq->uobject ? - to_c4iw_ucontext(ibsrq->uobject->context) : NULL; + ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, + ibucontext); free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, srq->wr_waitp); c4iw_free_srq_idx(&rhp->rdev, srq->idx); diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index a4e95a310c16..305c362ef5c6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -32,6 +32,7 @@ #include #include +#include #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" @@ -456,12 +457,15 @@ int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) hns_roce_free_cq(hr_dev, hr_cq); hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); - if (ib_cq->uobject) { + if (udata) { ib_umem_release(hr_cq->umem); if (hr_cq->db_en == 1) hns_roce_db_unmap_user( - to_hr_ucontext(ib_cq->uobject->context), + rdma_udata_to_drv_context( + udata, + struct hns_roce_ucontext, + ibucontext), &hr_cq->db); } else { /* Free the buff of stored cq */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 30b00240b7c8..b3cda5803c02 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "hnae3.h" #include "hns_roce_common.h" @@ -4442,7 +4443,7 @@ out: static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - bool is_user) + struct ib_udata *udata) { struct hns_roce_cq *send_cq, *recv_cq; struct device *dev = hr_dev->dev; @@ -4464,7 +4465,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, hns_roce_lock_cqs(send_cq, recv_cq); - if (!is_user) { + if (!udata) { __hns_roce_v2_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ? to_hr_srq(hr_qp->ibqp.srq) : NULL); if (send_cq != recv_cq) @@ -4485,16 +4486,18 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); - if (is_user) { + if (udata) { + struct hns_roce_ucontext *context = + rdma_udata_to_drv_context( + udata, + struct hns_roce_ucontext, + ibucontext); + if (hr_qp->sq.wqe_cnt && (hr_qp->sdb_en == 1)) - hns_roce_db_unmap_user( - to_hr_ucontext(hr_qp->ibqp.uobject->context), - &hr_qp->sdb); + hns_roce_db_unmap_user(context, &hr_qp->sdb); if (hr_qp->rq.wqe_cnt && (hr_qp->rdb_en == 1)) - hns_roce_db_unmap_user( - to_hr_ucontext(hr_qp->ibqp.uobject->context), - &hr_qp->rdb); + hns_roce_db_unmap_user(context, &hr_qp->rdb); ib_umem_release(hr_qp->umem); } else { kfree(hr_qp->sq.wrid); @@ -4519,7 +4522,7 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); int ret; - ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, ibqp->uobject); + ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); if (ret) { dev_err(hr_dev->dev, "Destroy qp failed(%d)\n", ret); return ret; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index fd2d7426c832..fcb9e2448a49 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2060,9 +2060,12 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) if (iwmr->type != IW_MEMREG_TYPE_MEM) { /* region is released. only test for userness. */ if (iwmr->region) { - struct i40iw_ucontext *ucontext; + struct i40iw_ucontext *ucontext = + rdma_udata_to_drv_context( + udata, + struct i40iw_ucontext, + ibucontext); - ucontext = to_ucontext(ibpd->uobject->context); i40iw_del_memlist(iwmr, ucontext); } if (iwpbl->pbl_allocated && iwmr->type != IW_MEMREG_TYPE_QP) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 0b730737fb25..5403a1ff7cc2 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -38,6 +38,7 @@ #include "mlx4_ib.h" #include +#include static void mlx4_ib_cq_comp(struct mlx4_cq *cq) { @@ -493,8 +494,13 @@ int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) mlx4_cq_free(dev->dev, &mcq->mcq); mlx4_mtt_cleanup(dev->dev, &mcq->buf.mtt); - if (cq->uobject) { - mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db); + if (udata) { + mlx4_ib_db_unmap_user( + rdma_udata_to_drv_context( + udata, + struct mlx4_ib_ucontext, + ibucontext), + &mcq->db); ib_umem_release(mcq->umem); } else { mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 25dfdcc90a05..99ceffe5cfec 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1338,7 +1338,8 @@ static void destroy_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) } static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, - enum mlx4_ib_source_type src, bool is_user) + enum mlx4_ib_source_type src, + struct ib_udata *udata) { struct mlx4_ib_cq *send_cq, *recv_cq; unsigned long flags; @@ -1380,7 +1381,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, list_del(&qp->qps_list); list_del(&qp->cq_send_list); list_del(&qp->cq_recv_list); - if (!is_user) { + if (!udata) { __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL); if (send_cq != recv_cq) @@ -1398,19 +1399,26 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, if (qp->flags & MLX4_IB_QP_NETIF) mlx4_ib_steer_qp_free(dev, qp->mqp.qpn, 1); else if (src == MLX4_IB_RWQ_SRC) - mlx4_ib_release_wqn(to_mucontext( - qp->ibwq.uobject->context), qp, 1); + mlx4_ib_release_wqn( + rdma_udata_to_drv_context( + udata, + struct mlx4_ib_ucontext, + ibucontext), + qp, 1); else mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); } mlx4_mtt_cleanup(dev->dev, &qp->mtt); - if (is_user) { + if (udata) { if (qp->rq.wqe_cnt) { - struct mlx4_ib_ucontext *mcontext = !src ? - to_mucontext(qp->ibqp.uobject->context) : - to_mucontext(qp->ibwq.uobject->context); + struct mlx4_ib_ucontext *mcontext = + rdma_udata_to_drv_context( + udata, + struct mlx4_ib_ucontext, + ibucontext); + mlx4_ib_db_unmap_user(mcontext, &qp->db); } ib_umem_release(qp->umem); @@ -1594,7 +1602,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, return ibqp; } -static int _mlx4_ib_destroy_qp(struct ib_qp *qp) +static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(qp->device); struct mlx4_ib_qp *mqp = to_mqp(qp); @@ -1615,7 +1623,7 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) if (qp->rwq_ind_tbl) { destroy_qp_rss(dev, mqp); } else { - destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, qp->uobject); + destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, udata); } if (is_sqp(dev, mqp)) @@ -1637,7 +1645,7 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) ib_destroy_qp(sqp->roce_v2_gsi); } - return _mlx4_ib_destroy_qp(qp); + return _mlx4_ib_destroy_qp(qp, udata); } static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type) @@ -4252,7 +4260,7 @@ int mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata) if (qp->counter_index) mlx4_ib_free_qp_counter(dev, qp); - destroy_qp_common(dev, qp, MLX4_IB_RWQ_SRC, 1); + destroy_qp_common(dev, qp, MLX4_IB_RWQ_SRC, udata); kfree(qp); diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index b51f632f3f7d..2a20205d1662 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -280,8 +280,13 @@ int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) mlx4_srq_free(dev->dev, &msrq->msrq); mlx4_mtt_cleanup(dev->dev, &msrq->mtt); - if (srq->uobject) { - mlx4_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); + if (udata) { + mlx4_ib_db_unmap_user( + rdma_udata_to_drv_context( + udata, + struct mlx4_ib_ucontext, + ibucontext), + &msrq->db); ib_umem_release(msrq->umem); } else { kvfree(msrq->wrid); diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 5d238a8ee132..5bed098ccdef 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -796,9 +796,12 @@ err_umem: return err; } -static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context) +static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_udata *udata) { - mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); + struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); + + mlx5_ib_db_unmap_user(context, &cq->db); ib_umem_release(cq->buf.umem); } @@ -923,7 +926,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, INIT_LIST_HEAD(&cq->list_send_qp); INIT_LIST_HEAD(&cq->list_recv_qp); - if (context) { + if (udata) { err = create_cq_user(dev, udata, context, cq, entries, &cqb, &cqe_size, &index, &inlen); if (err) @@ -985,8 +988,8 @@ err_cmd: err_cqb: kvfree(cqb); - if (context) - destroy_cq_user(cq, context); + if (udata) + destroy_cq_user(cq, udata); else destroy_cq_kernel(dev, cq); @@ -1000,14 +1003,10 @@ int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(cq->device); struct mlx5_ib_cq *mcq = to_mcq(cq); - struct ib_ucontext *context = NULL; - - if (cq->uobject) - context = cq->uobject->context; mlx5_core_destroy_cq(dev->mdev, &mcq->mcq); - if (context) - destroy_cq_user(mcq, context); + if (udata) + destroy_cq_user(mcq, udata); else destroy_cq_kernel(dev, mcq); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 468544819c79..769a5952a0f6 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2329,7 +2329,10 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs) page_idx = (dm->dev_addr - pci_resource_start(memic->dev->pdev, 0) - MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >> PAGE_SHIFT; - bitmap_clear(to_mucontext(ibdm->uobject->context)->dm_pages, + bitmap_clear(rdma_udata_to_drv_context( + &attrs->driver_udata, + struct mlx5_ib_ucontext, + ibucontext)->dm_pages, page_idx, DIV_ROUND_UP(act_size, PAGE_SIZE)); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index cd62c909b7eb..940ac1caa590 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -777,14 +777,17 @@ err_umem: } static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, - struct mlx5_ib_rwq *rwq) + struct mlx5_ib_rwq *rwq, struct ib_udata *udata) { - struct mlx5_ib_ucontext *context; + struct mlx5_ib_ucontext *context = + rdma_udata_to_drv_context( + udata, + struct mlx5_ib_ucontext, + ibucontext); if (rwq->create_flags & MLX5_IB_WQ_FLAGS_DELAY_DROP) atomic_dec(&dev->delay_drop.rqs_cnt); - context = to_mucontext(pd->uobject->context); mlx5_ib_db_unmap_user(context, &rwq->db); if (rwq->umem) ib_umem_release(rwq->umem); @@ -983,11 +986,15 @@ err_bfreg: } static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd, - struct mlx5_ib_qp *qp, struct mlx5_ib_qp_base *base) + struct mlx5_ib_qp *qp, struct mlx5_ib_qp_base *base, + struct ib_udata *udata) { - struct mlx5_ib_ucontext *context; + struct mlx5_ib_ucontext *context = + rdma_udata_to_drv_context( + udata, + struct mlx5_ib_ucontext, + ibucontext); - context = to_mucontext(pd->uobject->context); mlx5_ib_db_unmap_user(context, &qp->db); if (base->ubuffer.umem) ib_umem_release(base->ubuffer.umem); @@ -2284,7 +2291,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, err_create: if (qp->create_type == MLX5_QP_USER) - destroy_qp_user(dev, pd, qp, base); + destroy_qp_user(dev, pd, qp, base, udata); else if (qp->create_type == MLX5_QP_KERNEL) destroy_qp_kernel(dev, qp); @@ -2395,7 +2402,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, const struct mlx5_modify_raw_qp_param *raw_qp_param, u8 lag_tx_affinity); -static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) +static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + struct ib_udata *udata) { struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_ib_qp_base *base; @@ -2466,7 +2474,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) if (qp->create_type == MLX5_QP_KERNEL) destroy_qp_kernel(dev, qp); else if (qp->create_type == MLX5_QP_USER) - destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base); + destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base, udata); } static const char *ib_qp_type_str(enum ib_qp_type type) @@ -2743,7 +2751,7 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) if (mqp->qp_sub_type == MLX5_IB_QPT_DCT) return mlx5_ib_destroy_dct(mqp); - destroy_qp_common(dev, mqp); + destroy_qp_common(dev, mqp, udata); kfree(mqp); @@ -5959,7 +5967,7 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, err_copy: mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); err_user_rq: - destroy_user_rq(dev, pd, rwq); + destroy_user_rq(dev, pd, rwq, udata); err: kfree(rwq); return ERR_PTR(err); @@ -5971,7 +5979,7 @@ int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) struct mlx5_ib_rwq *rwq = to_mrwq(wq); mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); - destroy_user_rq(dev, wq->pd, rwq); + destroy_user_rq(dev, wq->pd, rwq, udata); kfree(rwq); return 0; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index bc1ca6bcea43..2e389f3444c6 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -194,9 +194,15 @@ err_db: return err; } -static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq) +static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, + struct ib_udata *udata) { - mlx5_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db); + mlx5_ib_db_unmap_user( + rdma_udata_to_drv_context( + udata, + struct mlx5_ib_ucontext, + ibucontext), + &srq->db); ib_umem_release(srq->umem); } @@ -327,7 +333,7 @@ err_core: err_usr_kern_srq: if (udata) - destroy_srq_user(pd, srq); + destroy_srq_user(pd, srq, udata); else destroy_srq_kernel(dev, srq); @@ -395,7 +401,12 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) mlx5_cmd_destroy_srq(dev, &msrq->msrq); if (srq->uobject) { - mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); + mlx5_ib_db_unmap_user( + rdma_udata_to_drv_context( + udata, + struct mlx5_ib_ucontext, + ibucontext), + &msrq->db); ib_umem_release(msrq->umem); } else { destroy_srq_kernel(dev, msrq); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 872f0ad556a7..9e4efd58c119 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -479,10 +479,12 @@ err_free: static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { - struct mthca_ucontext *context; - - if (srq->uobject) { - context = to_mucontext(srq->uobject->context); + if (udata) { + struct mthca_ucontext *context = + rdma_udata_to_drv_context( + udata, + struct mthca_ucontext, + ibucontext); mthca_unmap_user_db(to_mdev(srq->device), &context->uar, context->db_tab, to_msrq(srq)->db_index); @@ -609,14 +611,20 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, static int mthca_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) { - if (qp->uobject) { + if (udata) { + struct mthca_ucontext *context = + rdma_udata_to_drv_context( + udata, + struct mthca_ucontext, + ibucontext); + mthca_unmap_user_db(to_mdev(qp->device), - &to_mucontext(qp->uobject->context)->uar, - to_mucontext(qp->uobject->context)->db_tab, + &context->uar, + context->db_tab, to_mqp(qp)->sq.db_index); mthca_unmap_user_db(to_mdev(qp->device), - &to_mucontext(qp->uobject->context)->uar, - to_mucontext(qp->uobject->context)->db_tab, + &context->uar, + context->db_tab, to_mqp(qp)->rq.db_index); } mthca_free_qp(to_mdev(qp->device), to_mqp(qp)); @@ -829,14 +837,20 @@ out: static int mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { - if (cq->uobject) { + if (udata) { + struct mthca_ucontext *context = + rdma_udata_to_drv_context( + udata, + struct mthca_ucontext, + ibucontext); + mthca_unmap_user_db(to_mdev(cq->device), - &to_mucontext(cq->uobject->context)->uar, - to_mucontext(cq->uobject->context)->db_tab, + &context->uar, + context->db_tab, to_mcq(cq)->arm_db_index); mthca_unmap_user_db(to_mdev(cq->device), - &to_mucontext(cq->uobject->context)->uar, - to_mucontext(cq->uobject->context)->db_tab, + &context->uar, + context->db_tab, to_mcq(cq)->set_ci_db_index); } mthca_free_cq(to_mdev(cq->device), to_mcq(cq)); diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 4b7855c7dacf..244255b1e940 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -707,8 +707,12 @@ static void nes_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; - if ((ibpd->uobject) && (ibpd->uobject->context)) { - nesucontext = to_nesucontext(ibpd->uobject->context); + if (udata) { + nesucontext = + rdma_udata_to_drv_context( + udata, + struct nes_ucontext, + ibucontext); nes_debug(NES_DBG_PD, "Clearing bit %u from allocated doorbells\n", nespd->mmap_db_index); clear_bit(nespd->mmap_db_index, nesucontext->allocated_doorbells); @@ -1337,8 +1341,12 @@ static int nes_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) } if (nesqp->user_mode) { - if ((ibqp->uobject)&&(ibqp->uobject->context)) { - nes_ucontext = to_nesucontext(ibqp->uobject->context); + if (udata) { + nes_ucontext = + rdma_udata_to_drv_context( + udata, + struct nes_ucontext, + ibucontext); clear_bit(nesqp->mmap_sq_db_index, nes_ucontext->allocated_wqs); nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = NULL; if (nes_ucontext->first_free_wq > nesqp->mmap_sq_db_index) { diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 42755e7a10a8..4cd16ad16430 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -986,7 +986,7 @@ int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) dev->ops->common->chain_free(dev->cdev, &cq->pbl); - if (ibcq->uobject && ibcq->uobject->context) { + if (udata) { qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); ib_umem_release(cq->q.umem); } @@ -2470,7 +2470,8 @@ err: return rc; } -static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp) +static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, + struct ib_udata *udata) { int rc = 0; @@ -2480,7 +2481,7 @@ static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp) return rc; } - if (qp->ibqp.uobject && qp->ibqp.uobject->context) + if (udata) qedr_cleanup_user(dev, qp); else qedr_cleanup_kernel(dev, qp); @@ -2532,7 +2533,7 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) if (qp->qp_type == IB_QPT_GSI) qedr_destroy_gsi_qp(dev); - qedr_free_qp_resources(dev, qp); + qedr_free_qp_resources(dev, qp, udata); if (atomic_dec_and_test(&qp->refcnt) && rdma_protocol_iwarp(&dev->ibdev, 1)) { diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index cdb6357337c0..e282eea8ecce 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -648,7 +648,7 @@ int usnic_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length); - usnic_uiom_reg_release(mr->umem, ibmr->uobject->context); + usnic_uiom_reg_release(mr->umem); kfree(mr); return 0; } diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 06862a6af185..da35d6fdfc5e 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -432,8 +432,7 @@ static inline size_t usnic_uiom_num_pages(struct usnic_uiom_reg *uiomr) return PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; } -void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, - struct ib_ucontext *context) +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr) { __usnic_uiom_reg_release(uiomr->pd, uiomr, 1); diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h index c88cfa087e3a..70be49b1ca05 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.h +++ b/drivers/infiniband/hw/usnic/usnic_uiom.h @@ -90,7 +90,6 @@ void usnic_uiom_free_dev_list(struct device **devs); struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, unsigned long addr, size_t size, int access, int dmasync); -void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, - struct ib_ucontext *ucontext); +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr); int usnic_uiom_init(char *drv_name); #endif /* USNIC_UIOM_H_ */ -- cgit v1.2.3 From ff23dfa134576e071ace69e91761d229a0f73139 Mon Sep 17 00:00:00 2001 From: Shamir Rabinovitch Date: Sun, 31 Mar 2019 19:10:07 +0300 Subject: IB: Pass only ib_udata in function prototypes Now when ib_udata is passed to all the driver's object create/destroy APIs the ib_udata will carry the ib_ucontext for every user command. There is no need to also pass the ib_ucontext via the functions prototypes. Make ib_udata the only argument psssed. Signed-off-by: Shamir Rabinovitch Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cq.c | 2 +- drivers/infiniband/core/uverbs_cmd.c | 8 ++-- drivers/infiniband/core/uverbs_std_types_cq.c | 3 +- drivers/infiniband/core/verbs.c | 6 +-- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 21 ++++------- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 4 +- drivers/infiniband/hw/cxgb3/iwch_provider.c | 16 ++++---- drivers/infiniband/hw/cxgb4/cq.c | 9 ++--- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 - drivers/infiniband/hw/cxgb4/provider.c | 5 +-- drivers/infiniband/hw/hns/hns_roce_cq.c | 23 ++++++------ drivers/infiniband/hw/hns/hns_roce_device.h | 4 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 4 +- drivers/infiniband/hw/hns/hns_roce_pd.c | 5 +-- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 20 ++++------ drivers/infiniband/hw/mlx4/cq.c | 28 +++++++------- drivers/infiniband/hw/mlx4/doorbell.c | 6 ++- drivers/infiniband/hw/mlx4/main.c | 6 +-- drivers/infiniband/hw/mlx4/mlx4_ib.h | 4 +- drivers/infiniband/hw/mlx4/qp.c | 8 ++-- drivers/infiniband/hw/mlx4/srq.c | 3 +- drivers/infiniband/hw/mlx5/cq.c | 23 ++++++------ drivers/infiniband/hw/mlx5/main.c | 17 +++++---- drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 +- drivers/infiniband/hw/mlx5/qp.c | 3 +- drivers/infiniband/hw/mthca/mthca_provider.c | 45 +++++++++++----------- drivers/infiniband/hw/nes/nes_verbs.c | 35 ++++++++--------- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 50 +++++++++++++++---------- drivers/infiniband/hw/ocrdma/ocrdma_verbs.h | 4 +- drivers/infiniband/hw/qedr/verbs.c | 19 ++++++---- drivers/infiniband/hw/qedr/verbs.h | 4 +- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 4 +- drivers/infiniband/hw/usnic/usnic_ib_verbs.h | 4 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c | 12 +++--- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 13 ++++--- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h | 4 +- drivers/infiniband/sw/rdmavt/cq.c | 4 +- drivers/infiniband/sw/rdmavt/cq.h | 1 - drivers/infiniband/sw/rdmavt/mmap.c | 16 +++++--- drivers/infiniband/sw/rdmavt/mmap.h | 6 +-- drivers/infiniband/sw/rdmavt/pd.c | 4 +- drivers/infiniband/sw/rdmavt/pd.h | 3 +- drivers/infiniband/sw/rdmavt/qp.c | 5 +-- drivers/infiniband/sw/rdmavt/srq.c | 6 +-- drivers/infiniband/sw/rxe/rxe_cq.c | 10 ++--- drivers/infiniband/sw/rxe/rxe_loc.h | 16 ++++---- drivers/infiniband/sw/rxe/rxe_mmap.c | 14 ++++--- drivers/infiniband/sw/rxe/rxe_qp.c | 15 +++----- drivers/infiniband/sw/rxe/rxe_queue.c | 22 ++++------- drivers/infiniband/sw/rxe/rxe_queue.h | 15 +++----- drivers/infiniband/sw/rxe/rxe_srq.c | 14 +++---- drivers/infiniband/sw/rxe/rxe_verbs.c | 16 +++----- include/rdma/ib_verbs.h | 5 +-- 53 files changed, 271 insertions(+), 328 deletions(-) diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 4797eef549c3..a4c81992267c 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -147,7 +147,7 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, struct ib_cq *cq; int ret = -ENOMEM; - cq = dev->ops.create_cq(dev, &cq_attr, NULL, NULL); + cq = dev->ops.create_cq(dev, &cq_attr, NULL); if (IS_ERR(cq)) return cq; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index da31dba33fc5..89b0f5420dfe 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -423,7 +423,7 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) atomic_set(&pd->usecnt, 0); pd->res.type = RDMA_RESTRACK_PD; - ret = ib_dev->ops.alloc_pd(pd, uobj->context, &attrs->driver_udata); + ret = ib_dev->ops.alloc_pd(pd, &attrs->driver_udata); if (ret) goto err_alloc; @@ -594,8 +594,7 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) } if (!xrcd) { - xrcd = ib_dev->ops.alloc_xrcd(ib_dev, obj->uobject.context, - &attrs->driver_udata); + xrcd = ib_dev->ops.alloc_xrcd(ib_dev, &attrs->driver_udata); if (IS_ERR(xrcd)) { ret = PTR_ERR(xrcd); goto err; @@ -1009,8 +1008,7 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, attr.comp_vector = cmd->comp_vector; attr.flags = cmd->flags; - cq = ib_dev->ops.create_cq(ib_dev, &attr, obj->uobject.context, - &attrs->driver_udata); + cq = ib_dev->ops.create_cq(ib_dev, &attr, &attrs->driver_udata); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_file; diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index cde608c268ff..977e386009fc 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -111,8 +111,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( INIT_LIST_HEAD(&obj->comp_list); INIT_LIST_HEAD(&obj->async_list); - cq = ib_dev->ops.create_cq(ib_dev, &attr, obj->uobject.context, - &attrs->driver_udata); + cq = ib_dev->ops.create_cq(ib_dev, &attr, &attrs->driver_udata); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_event_file; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index ba9a89df815d..a479f4c12541 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -269,7 +269,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, pd->res.type = RDMA_RESTRACK_PD; rdma_restrack_set_task(&pd->res, caller); - ret = device->ops.alloc_pd(pd, NULL, NULL); + ret = device->ops.alloc_pd(pd, NULL); if (ret) { kfree(pd); return ERR_PTR(ret); @@ -1911,7 +1911,7 @@ struct ib_cq *__ib_create_cq(struct ib_device *device, { struct ib_cq *cq; - cq = device->ops.create_cq(device, cq_attr, NULL, NULL); + cq = device->ops.create_cq(device, cq_attr, NULL); if (!IS_ERR(cq)) { cq->device = device; @@ -2142,7 +2142,7 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller) if (!device->ops.alloc_xrcd) return ERR_PTR(-EOPNOTSUPP); - xrcd = device->ops.alloc_xrcd(device, NULL, NULL); + xrcd = device->ops.alloc_xrcd(device, NULL); if (!IS_ERR(xrcd)) { xrcd->device = device; xrcd->inode = NULL; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index a586ac28630b..04e3529ffe06 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -576,14 +576,12 @@ void bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata) &pd->qplib_pd); } -int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *ucontext, - struct ib_udata *udata) +int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct ib_device *ibdev = ibpd->device; struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); - struct bnxt_re_ucontext *ucntx = container_of(ucontext, - struct bnxt_re_ucontext, - ib_uctx); + struct bnxt_re_ucontext *ucntx = rdma_udata_to_drv_context( + udata, struct bnxt_re_ucontext, ib_uctx); struct bnxt_re_pd *pd = container_of(ibpd, struct bnxt_re_pd, ib_pd); int rc; @@ -2589,7 +2587,6 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); @@ -2616,12 +2613,10 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, if (entries > dev_attr->max_cq_wqes + 1) entries = dev_attr->max_cq_wqes + 1; - if (context) { + if (udata) { struct bnxt_re_cq_req req; - struct bnxt_re_ucontext *uctx = container_of - (context, - struct bnxt_re_ucontext, - ib_uctx); + struct bnxt_re_ucontext *uctx = rdma_udata_to_drv_context( + udata, struct bnxt_re_ucontext, ib_uctx); if (ib_copy_from_udata(&req, udata, sizeof(req))) { rc = -EFAULT; goto fail; @@ -2672,7 +2667,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, atomic_inc(&rdev->cq_count); spin_lock_init(&cq->cq_lock); - if (context) { + if (udata) { struct bnxt_re_cq_resp resp; resp.cqid = cq->qplib_cq.id; @@ -2690,7 +2685,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, return &cq->ib_cq; c2fail: - if (context) + if (udata) ib_umem_release(cq->umem); fail: kfree(cq->cql); diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 44e49988600e..488dc735a260 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -163,8 +163,7 @@ int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num, int index, union ib_gid *gid); enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev, u8 port_num); -int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, - struct ib_udata *udata); +int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); void bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, @@ -197,7 +196,6 @@ int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata); int bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index bf07e93aeb94..62b99d26f0d3 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -106,7 +106,6 @@ static int iwch_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_context, struct ib_udata *udata) { int entries = attr->cqe; @@ -114,7 +113,6 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, struct iwch_cq *chp; struct iwch_create_cq_resp uresp; struct iwch_create_cq_req ureq; - struct iwch_ucontext *ucontext = NULL; static int warned; size_t resplen; @@ -127,8 +125,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, if (!chp) return ERR_PTR(-ENOMEM); - if (ib_context) { - ucontext = to_iwch_ucontext(ib_context); + if (udata) { if (!t3a_device(rhp)) { if (ib_copy_from_udata(&ureq, udata, sizeof (ureq))) { kfree(chp); @@ -154,7 +151,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, entries = roundup_pow_of_two(entries); chp->cq.size_log2 = ilog2(entries); - if (cxio_create_cq(&rhp->rdev, &chp->cq, !ucontext)) { + if (cxio_create_cq(&rhp->rdev, &chp->cq, !udata)) { kfree(chp); return ERR_PTR(-ENOMEM); } @@ -170,8 +167,10 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, return ERR_PTR(-ENOMEM); } - if (ucontext) { + if (udata) { struct iwch_mm_entry *mm; + struct iwch_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct iwch_ucontext, ibucontext); mm = kmalloc(sizeof *mm, GFP_KERNEL); if (!mm) { @@ -378,8 +377,7 @@ static void iwch_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid); } -static int iwch_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context, - struct ib_udata *udata) +static int iwch_allocate_pd(struct ib_pd *pd, struct ib_udata *udata) { struct iwch_pd *php = to_iwch_pd(pd); struct ib_device *ibdev = pd->device; @@ -394,7 +392,7 @@ static int iwch_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context, php->pdid = pdid; php->rhp = rhp; - if (context) { + if (udata) { struct iwch_alloc_pd_resp resp = {.pdid = php->pdid}; if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 571281888de0..52ce586621c6 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -994,7 +994,6 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_context, struct ib_udata *udata) { int entries = attr->cqe; @@ -1003,10 +1002,11 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, struct c4iw_cq *chp; struct c4iw_create_cq ucmd; struct c4iw_create_cq_resp uresp; - struct c4iw_ucontext *ucontext = NULL; int ret, wr_len; size_t memsize, hwentries; struct c4iw_mm_entry *mm, *mm2; + struct c4iw_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct c4iw_ucontext, ibucontext); pr_debug("ib_dev %p entries %d\n", ibdev, entries); if (attr->flags) @@ -1017,8 +1017,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (vector >= rhp->rdev.lldi.nciq) return ERR_PTR(-EINVAL); - if (ib_context) { - ucontext = to_c4iw_ucontext(ib_context); + if (udata) { if (udata->inlen < sizeof(ucmd)) ucontext->is_32b_cqe = 1; } @@ -1070,7 +1069,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, /* * memsize must be a multiple of the page size if its a user cq. */ - if (ucontext) + if (udata) memsize = roundup(memsize, PAGE_SIZE); chp->cq.size = hwentries; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 586fd1a00d33..4b721a261053 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -995,7 +995,6 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_context, struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr, diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 12f7d3ae6a53..0fbad47661cc 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -204,8 +204,7 @@ static void c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) mutex_unlock(&rhp->rdev.stats.lock); } -static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context, - struct ib_udata *udata) +static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_udata *udata) { struct c4iw_pd *php = to_c4iw_pd(pd); struct ib_device *ibdev = pd->device; @@ -220,7 +219,7 @@ static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context, php->pdid = pdid; php->rhp = rhp; - if (context) { + if (udata) { struct c4iw_alloc_pd_resp uresp = {.pdid = php->pdid}; if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 305c362ef5c6..9caf35061721 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -302,7 +302,6 @@ static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev, struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); @@ -314,6 +313,8 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, int vector = attr->comp_vector; int cq_entries = attr->cqe; int ret; + struct hns_roce_ucontext *context = rdma_udata_to_drv_context( + udata, struct hns_roce_ucontext, ibucontext); if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { dev_err(dev, "Creat CQ failed. entries=%d, max=%d\n", @@ -332,7 +333,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, hr_cq->ib_cq.cqe = cq_entries - 1; spin_lock_init(&hr_cq->lock); - if (context) { + if (udata) { if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { dev_err(dev, "Failed to copy_from_udata.\n"); ret = -EFAULT; @@ -350,8 +351,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && (udata->outlen >= sizeof(resp))) { - ret = hns_roce_db_map_user(to_hr_ucontext(context), - udata, ucmd.db_addr, + ret = hns_roce_db_map_user(context, udata, ucmd.db_addr, &hr_cq->db); if (ret) { dev_err(dev, "cq record doorbell map failed!\n"); @@ -362,7 +362,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, } /* Get user space parameters */ - uar = &to_hr_ucontext(context)->uar; + uar = &context->uar; } else { if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) { ret = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1); @@ -401,7 +401,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, * problems if tptr is set to zero here, so we initialze it in user * space. */ - if (!context && hr_cq->tptr_addr) + if (!udata && hr_cq->tptr_addr) *hr_cq->tptr_addr = 0; /* Get created cq handler and carry out event */ @@ -409,7 +409,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, hr_cq->event = hns_roce_ib_cq_event; hr_cq->cq_depth = cq_entries; - if (context) { + if (udata) { resp.cqn = hr_cq->cqn; ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); if (ret) @@ -422,21 +422,20 @@ err_cqc: hns_roce_free_cq(hr_dev, hr_cq); err_dbmap: - if (context && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && + if (udata && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && (udata->outlen >= sizeof(resp))) - hns_roce_db_unmap_user(to_hr_ucontext(context), - &hr_cq->db); + hns_roce_db_unmap_user(context, &hr_cq->db); err_mtt: hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); - if (context) + if (udata) ib_umem_release(hr_cq->umem); else hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, hr_cq->ib_cq.cqe); err_db: - if (!context && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) + if (!udata && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) hns_roce_free_db(hr_dev, &hr_cq->db); err_cq: diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 780a7ba204db..b23b13f06d58 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1112,8 +1112,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata); -int hns_roce_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, - struct ib_udata *udata); +int hns_roce_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc); @@ -1177,7 +1176,6 @@ int to_hr_qp_type(int qp_type); struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata); int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 1863516f6be9..98c6a41edefd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -730,7 +730,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) /* Reserved cq for loop qp */ cq_init_attr.cqe = HNS_ROCE_MIN_WQE_NUM * 2; cq_init_attr.comp_vector = 0; - cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL, NULL); + cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL); if (IS_ERR(cq)) { dev_err(dev, "Create cq for reserved loop qp failed!"); return -ENOMEM; @@ -749,7 +749,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) goto alloc_mem_failed; pd->device = ibdev; - ret = hns_roce_alloc_pd(pd, NULL, NULL); + ret = hns_roce_alloc_pd(pd, NULL); if (ret) goto alloc_pd_failed; diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 504e6e466d72..813401384d78 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -57,8 +57,7 @@ void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev) hns_roce_bitmap_cleanup(&hr_dev->pd_bitmap); } -int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct ib_device *ib_dev = ibpd->device; struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); @@ -72,7 +71,7 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, return ret; } - if (context) { + if (udata) { struct hns_roce_ib_alloc_pd_resp uresp = {.pdn = pd->pdn}; if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index fcb9e2448a49..7bf7fe854464 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -291,18 +291,15 @@ static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_ /** * i40iw_alloc_pd - allocate protection domain * @pd: PD pointer - * @context: user context created during alloc * @udata: user data */ -static int i40iw_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, - struct ib_udata *udata) +static int i40iw_alloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct i40iw_pd *iwpd = to_iwpd(pd); struct i40iw_device *iwdev = to_iwdev(pd->device); struct i40iw_sc_dev *dev = &iwdev->sc_dev; struct i40iw_alloc_pd_resp uresp; struct i40iw_sc_pd *sc_pd; - struct i40iw_ucontext *ucontext; u32 pd_id = 0; int err; @@ -318,8 +315,9 @@ static int i40iw_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, sc_pd = &iwpd->sc_pd; - if (context) { - ucontext = to_ucontext(context); + if (udata) { + struct i40iw_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct i40iw_ucontext, ibucontext); dev->iw_pd_ops->pd_init(dev, sc_pd, pd_id, ucontext->abi_ver); memset(&uresp, 0, sizeof(uresp)); uresp.pd_id = pd_id; @@ -1091,12 +1089,10 @@ static int i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) * i40iw_create_cq - create cq * @ibdev: device pointer from stack * @attr: attributes for cq - * @context: user context created during alloc * @udata: user data */ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { struct i40iw_device *iwdev = to_iwdev(ibdev); @@ -1146,14 +1142,14 @@ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev, info.ceq_id_valid = true; info.ceqe_mask = 1; info.type = I40IW_CQ_TYPE_IWARP; - if (context) { - struct i40iw_ucontext *ucontext; + if (udata) { + struct i40iw_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct i40iw_ucontext, ibucontext); struct i40iw_create_cq_req req; struct i40iw_cq_mr *cqmr; memset(&req, 0, sizeof(req)); iwcq->user_mode = true; - ucontext = to_ucontext(context); if (ib_copy_from_udata(&req, udata, sizeof(struct i40iw_create_cq_req))) { err_code = -EFAULT; goto cq_free_resources; @@ -1223,7 +1219,7 @@ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev, goto cq_free_resources; } - if (context) { + if (udata) { struct i40iw_create_cq_resp resp; memset(&resp, 0, sizeof(resp)); diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 5403a1ff7cc2..022a0b4ea452 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -174,7 +174,6 @@ err_buf: #define CQ_CREATE_FLAGS_SUPPORTED IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { int entries = attr->cqe; @@ -184,6 +183,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, struct mlx4_uar *uar; void *buf_addr; int err; + struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx4_ib_ucontext, ibucontext); if (entries < 1 || entries > dev->dev->caps.max_cqes) return ERR_PTR(-EINVAL); @@ -205,7 +206,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, INIT_LIST_HEAD(&cq->send_qp_list); INIT_LIST_HEAD(&cq->recv_qp_list); - if (context) { + if (udata) { struct mlx4_ib_create_cq ucmd; if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { @@ -219,12 +220,11 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, if (err) goto err_cq; - err = mlx4_ib_db_map_user(to_mucontext(context), udata, - ucmd.db_addr, &cq->db); + err = mlx4_ib_db_map_user(udata, ucmd.db_addr, &cq->db); if (err) goto err_mtt; - uar = &to_mucontext(context)->uar; + uar = &context->uar; cq->mcq.usage = MLX4_RES_USAGE_USER_VERBS; } else { err = mlx4_db_alloc(dev->dev, &cq->db, 1); @@ -249,21 +249,21 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, if (dev->eq_table) vector = dev->eq_table[vector % ibdev->num_comp_vectors]; - err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, - cq->db.dma, &cq->mcq, vector, 0, + err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, cq->db.dma, + &cq->mcq, vector, 0, !!(cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION), - buf_addr, !!context); + buf_addr, !!udata); if (err) goto err_dbmap; - if (context) + if (udata) cq->mcq.tasklet_ctx.comp = mlx4_ib_cq_comp; else cq->mcq.comp = mlx4_ib_cq_comp; cq->mcq.event = mlx4_ib_cq_event; - if (context) + if (udata) if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) { err = -EFAULT; goto err_cq_free; @@ -275,19 +275,19 @@ err_cq_free: mlx4_cq_free(dev->dev, &cq->mcq); err_dbmap: - if (context) - mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db); + if (udata) + mlx4_ib_db_unmap_user(context, &cq->db); err_mtt: mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt); - if (context) + if (udata) ib_umem_release(cq->umem); else mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); err_db: - if (!context) + if (!udata) mlx4_db_free(dev->dev, &cq->db); err_cq: diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c index 3aab71b29ce8..0f390351cef0 100644 --- a/drivers/infiniband/hw/mlx4/doorbell.c +++ b/drivers/infiniband/hw/mlx4/doorbell.c @@ -31,6 +31,7 @@ */ #include +#include #include "mlx4_ib.h" @@ -41,12 +42,13 @@ struct mlx4_ib_user_db_page { int refcnt; }; -int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, - struct ib_udata *udata, unsigned long virt, +int mlx4_ib_db_map_user(struct ib_udata *udata, unsigned long virt, struct mlx4_db *db) { struct mlx4_ib_user_db_page *page; int err = 0; + struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx4_ib_ucontext, ibucontext); mutex_lock(&context->db_page_mutex); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index e50f9de71119..952b1bac46db 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1177,8 +1177,7 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) } } -static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct mlx4_ib_pd *pd = to_mpd(ibpd); struct ib_device *ibdev = ibpd->device; @@ -1188,7 +1187,7 @@ static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, if (err) return err; - if (context && ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) { + if (udata && ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) { mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn); return -EFAULT; } @@ -1201,7 +1200,6 @@ static void mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) } static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, - struct ib_ucontext *context, struct ib_udata *udata) { struct mlx4_ib_xrcd *xrcd; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 58112b59cc7c..79143848b560 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -723,8 +723,7 @@ static inline u8 mlx4_ib_bond_next_port(struct mlx4_ib_dev *dev) int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev); void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev); -int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, - struct ib_udata *udata, unsigned long virt, +int mlx4_ib_db_map_user(struct ib_udata *udata, unsigned long virt, struct mlx4_db *db); void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db); @@ -746,7 +745,6 @@ int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata); int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 99ceffe5cfec..364e16b5f8e1 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1041,11 +1041,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_mtt; if (qp_has_rq(init_attr)) { - err = mlx4_ib_db_map_user( - context, udata, - (src == MLX4_IB_QP_SRC) ? ucmd.qp.db_addr : + err = mlx4_ib_db_map_user(udata, + (src == MLX4_IB_QP_SRC) ? + ucmd.qp.db_addr : ucmd.wq.db_addr, - &qp->db); + &qp->db); if (err) goto err_mtt; } diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 2a20205d1662..94c3c334a672 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -131,8 +131,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_mtt; - err = mlx4_ib_db_map_user(ucontext, udata, ucmd.db_addr, - &srq->db); + err = mlx4_ib_db_map_user(udata, ucmd.db_addr, &srq->db); if (err) goto err_mtt; } else { diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 5bed098ccdef..2e2e65f00257 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -679,8 +679,7 @@ static int mini_cqe_res_format_to_hw(struct mlx5_ib_dev *dev, u8 format) } static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, - struct ib_ucontext *context, struct mlx5_ib_cq *cq, - int entries, u32 **cqb, + struct mlx5_ib_cq *cq, int entries, u32 **cqb, int *cqe_size, int *index, int *inlen) { struct mlx5_ib_create_cq ucmd = {}; @@ -691,6 +690,8 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, int ncont; void *cqc; int err; + struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); ucmdlen = udata->inlen < sizeof(ucmd) ? (sizeof(ucmd) - sizeof(ucmd.flags)) : sizeof(ucmd); @@ -715,8 +716,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, return err; } - err = mlx5_ib_db_map_user(to_mucontext(context), udata, ucmd.db_addr, - &cq->db); + err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &cq->db); if (err) goto err_umem; @@ -740,7 +740,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, MLX5_SET(cqc, cqc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - *index = to_mucontext(context)->bfregi.sys_pages[0]; + *index = context->bfregi.sys_pages[0]; if (ucmd.cqe_comp_en == 1) { int mini_cqe_format; @@ -782,14 +782,14 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD; } - MLX5_SET(create_cq_in, *cqb, uid, to_mucontext(context)->devx_uid); + MLX5_SET(create_cq_in, *cqb, uid, context->devx_uid); return 0; err_cqb: kvfree(*cqb); err_db: - mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); + mlx5_ib_db_unmap_user(context, &cq->db); err_umem: ib_umem_release(cq->buf.umem); @@ -886,7 +886,6 @@ static void notify_soft_wc_handler(struct work_struct *work) struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { int entries = attr->cqe; @@ -927,8 +926,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, INIT_LIST_HEAD(&cq->list_recv_qp); if (udata) { - err = create_cq_user(dev, udata, context, cq, entries, - &cqb, &cqe_size, &index, &inlen); + err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size, + &index, &inlen); if (err) goto err_create; } else { @@ -965,7 +964,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); cq->mcq.irqn = irqn; - if (context) + if (udata) cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp; else cq->mcq.comp = mlx5_ib_cq_comp; @@ -973,7 +972,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, INIT_LIST_HEAD(&cq->wc_list); - if (context) + if (udata) if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) { err = -EFAULT; goto err_cmd; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 769a5952a0f6..f706e1bd40ad 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2341,8 +2341,7 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs) return 0; } -static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct mlx5_ib_pd *pd = to_mpd(ibpd); struct ib_device *ibdev = ibpd->device; @@ -2351,8 +2350,10 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {}; u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; u16 uid = 0; + struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); - uid = context ? to_mucontext(context)->devx_uid : 0; + uid = context ? context->devx_uid : 0; MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); MLX5_SET(alloc_pd_in, in, uid, uid); err = mlx5_cmd_exec(to_mdev(ibdev)->mdev, in, sizeof(in), @@ -2362,7 +2363,7 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, pd->pdn = MLX5_GET(alloc_pd_out, out, pd); pd->uid = uid; - if (context) { + if (udata) { resp.pdn = pd->pdn; if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { mlx5_cmd_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid); @@ -4749,11 +4750,11 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) devr->p0->uobject = NULL; atomic_set(&devr->p0->usecnt, 0); - ret = mlx5_ib_alloc_pd(devr->p0, NULL, NULL); + ret = mlx5_ib_alloc_pd(devr->p0, NULL); if (ret) goto error0; - devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL); + devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL); if (IS_ERR(devr->c0)) { ret = PTR_ERR(devr->c0); goto error1; @@ -4765,7 +4766,7 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) devr->c0->cq_context = NULL; atomic_set(&devr->c0->usecnt, 0); - devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); + devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); if (IS_ERR(devr->x0)) { ret = PTR_ERR(devr->x0); goto error2; @@ -4776,7 +4777,7 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) mutex_init(&devr->x0->tgt_qp_mutex); INIT_LIST_HEAD(&devr->x0->tgt_qp_list); - devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); + devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); if (IS_ERR(devr->x1)) { ret = PTR_ERR(devr->x1); goto error3; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e45f59b0cc52..f7314d78aafd 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1083,7 +1083,6 @@ int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer, int buflen, size_t *bc); struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata); int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); @@ -1123,8 +1122,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_mad_hdr *out, size_t *out_mad_size, u16 *out_mad_pkey_index); struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata); + struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 940ac1caa590..3470a9c496b1 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -5632,8 +5632,7 @@ out: } struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_udata *udata) + struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_xrcd *xrcd; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 9e4efd58c119..9a77374a327b 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -363,18 +363,17 @@ static int mthca_mmap_uar(struct ib_ucontext *context, return 0; } -static int mthca_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +static int mthca_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct ib_device *ibdev = ibpd->device; struct mthca_pd *pd = to_mpd(ibpd); int err; - err = mthca_pd_alloc(to_mdev(ibdev), !context, pd); + err = mthca_pd_alloc(to_mdev(ibdev), !udata, pd); if (err) return err; - if (context) { + if (udata) { if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) { mthca_pd_free(to_mdev(ibdev), pd); return -EFAULT; @@ -634,7 +633,6 @@ static int mthca_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { int entries = attr->cqe; @@ -642,6 +640,8 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, struct mthca_cq *cq; int nent; int err; + struct mthca_ucontext *context = rdma_udata_to_drv_context( + udata, struct mthca_ucontext, ibucontext); if (attr->flags) return ERR_PTR(-EINVAL); @@ -649,19 +649,19 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) return ERR_PTR(-EINVAL); - if (context) { + if (udata) { if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) return ERR_PTR(-EFAULT); - err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, - to_mucontext(context)->db_tab, - ucmd.set_db_index, ucmd.set_db_page); + err = mthca_map_user_db(to_mdev(ibdev), &context->uar, + context->db_tab, ucmd.set_db_index, + ucmd.set_db_page); if (err) return ERR_PTR(err); - err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, - to_mucontext(context)->db_tab, - ucmd.arm_db_index, ucmd.arm_db_page); + err = mthca_map_user_db(to_mdev(ibdev), &context->uar, + context->db_tab, ucmd.arm_db_index, + ucmd.arm_db_page); if (err) goto err_unmap_set; } @@ -672,7 +672,7 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, goto err_unmap_arm; } - if (context) { + if (udata) { cq->buf.mr.ibmr.lkey = ucmd.lkey; cq->set_ci_db_index = ucmd.set_db_index; cq->arm_db_index = ucmd.arm_db_index; @@ -681,14 +681,13 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, for (nent = 1; nent <= entries; nent <<= 1) ; /* nothing */ - err = mthca_init_cq(to_mdev(ibdev), nent, - context ? to_mucontext(context) : NULL, - context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num, + err = mthca_init_cq(to_mdev(ibdev), nent, context, + udata ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num, cq); if (err) goto err_free; - if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) { + if (udata && ib_copy_to_udata(udata, &cq->cqn, sizeof(__u32))) { mthca_free_cq(to_mdev(ibdev), cq); err = -EFAULT; goto err_free; @@ -702,14 +701,14 @@ err_free: kfree(cq); err_unmap_arm: - if (context) - mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, - to_mucontext(context)->db_tab, ucmd.arm_db_index); + if (udata) + mthca_unmap_user_db(to_mdev(ibdev), &context->uar, + context->db_tab, ucmd.arm_db_index); err_unmap_set: - if (context) - mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, - to_mucontext(context)->db_tab, ucmd.set_db_index); + if (udata) + mthca_unmap_user_db(to_mdev(ibdev), &context->uar, + context->db_tab, ucmd.set_db_index); return ERR_PTR(err); } diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 244255b1e940..a3b5e8eecb98 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -640,22 +640,24 @@ static int nes_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) /** * nes_alloc_pd */ -static int nes_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, - struct ib_udata *udata) +static int nes_alloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct ib_device *ibdev = pd->device; struct nes_pd *nespd = to_nespd(pd); struct nes_vnic *nesvnic = to_nesvnic(ibdev); struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; - struct nes_ucontext *nesucontext; struct nes_alloc_pd_resp uresp; u32 pd_num = 0; int err; + struct nes_ucontext *nesucontext = rdma_udata_to_drv_context( + udata, struct nes_ucontext, ibucontext); - nes_debug(NES_DBG_PD, "nesvnic=%p, netdev=%p %s, ibdev=%p, context=%p, netdev refcnt=%u\n", - nesvnic, nesdev->netdev[0], nesdev->netdev[0]->name, ibdev, context, - netdev_refcnt_read(nesvnic->netdev)); + nes_debug( + NES_DBG_PD, + "nesvnic=%p, netdev=%p %s, ibdev=%p, context=%p, netdev refcnt=%u\n", + nesvnic, nesdev->netdev[0], nesdev->netdev[0]->name, ibdev, + &nesucontext->ibucontext, netdev_refcnt_read(nesvnic->netdev)); err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds, nesadapter->max_pd, &pd_num, &nesadapter->next_pd, NES_RESOURCE_PD); @@ -667,8 +669,7 @@ static int nes_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, nespd->pd_id = (pd_num << (PAGE_SHIFT-12)) + nesadapter->base_pd; - if (context) { - nesucontext = to_nesucontext(context); + if (udata) { nespd->mmap_db_index = find_next_zero_bit(nesucontext->allocated_doorbells, NES_MAX_USER_DB_REGIONS, nesucontext->first_free_db); nes_debug(NES_DBG_PD, "find_first_zero_biton doorbells returned %u, mapping pd_id %u.\n", @@ -1375,7 +1376,6 @@ static int nes_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) */ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { int entries = attr->cqe; @@ -1420,9 +1420,10 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, nescq->hw_cq.cq_number = cq_num; nescq->ibcq.cqe = nescq->hw_cq.cq_size - 1; + if (udata) { + struct nes_ucontext *nes_ucontext = rdma_udata_to_drv_context( + udata, struct nes_ucontext, ibucontext); - if (context) { - nes_ucontext = to_nesucontext(context); if (ib_copy_from_udata(&req, udata, sizeof (struct nes_create_cq_req))) { nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); kfree(nescq); @@ -1489,7 +1490,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, cqp_request = nes_get_cqp_request(nesdev); if (cqp_request == NULL) { nes_debug(NES_DBG_CQ, "Failed to get a cqp_request.\n"); - if (!context) + if (!udata) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); else { @@ -1518,7 +1519,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, if (nesadapter->free_4kpbl == 0) { spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); nes_free_cqp_request(nesdev, cqp_request); - if (!context) + if (!udata) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); else { @@ -1540,7 +1541,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, if (nesadapter->free_256pbl == 0) { spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); nes_free_cqp_request(nesdev, cqp_request); - if (!context) + if (!udata) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); else { @@ -1566,7 +1567,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, (nescq->hw_cq.cq_number | ((u32)nesdev->ceq_index << 16))); - if (context) { + if (udata) { if (pbl_entries != 1) u64temp = (u64)nespbl->pbl_pbase; else @@ -1597,7 +1598,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, nescq->hw_cq.cq_number, ret); if ((!ret) || (cqp_request->major_code)) { nes_put_cqp_request(nesdev, cqp_request); - if (!context) + if (!udata) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); else { @@ -1611,7 +1612,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, } nes_put_cqp_request(nesdev, cqp_request); - if (context) { + if (udata) { /* free the nespbl */ pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, nespbl->pbl_pbase); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index b8f891660516..cf7aeb963dce 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -47,6 +47,7 @@ #include #include #include +#include #include "ocrdma.h" #include "ocrdma_hw.h" @@ -367,6 +368,16 @@ static int ocrdma_get_pd_num(struct ocrdma_dev *dev, struct ocrdma_pd *pd) return status; } +/* + * NOTE: + * + * ocrdma_ucontext must be used here because this function is also + * called from ocrdma_alloc_ucontext where ib_udata does not have + * valid ib_ucontext pointer. ib_uverbs_get_context does not call + * uobj_{alloc|get_xxx} helpers which are used to store the + * ib_ucontext in uverbs_attr_bundle wrapping the ib_udata. so + * ib_udata does NOT imply valid ib_ucontext here! + */ static int _ocrdma_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd, struct ocrdma_ucontext *uctx, struct ib_udata *udata) @@ -593,7 +604,6 @@ int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) } static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd, - struct ib_ucontext *ib_ctx, struct ib_udata *udata) { int status; @@ -601,7 +611,8 @@ static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd, u64 dpp_page_addr = 0; u32 db_page_size; struct ocrdma_alloc_pd_uresp rsp; - struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx); + struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context( + udata, struct ocrdma_ucontext, ibucontext); memset(&rsp, 0, sizeof(rsp)); rsp.id = pd->id; @@ -639,18 +650,17 @@ dpp_map_err: return status; } -int ocrdma_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +int ocrdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct ib_device *ibdev = ibpd->device; struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); struct ocrdma_pd *pd; - struct ocrdma_ucontext *uctx = NULL; int status; u8 is_uctx_pd = false; + struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context( + udata, struct ocrdma_ucontext, ibucontext); - if (udata && context) { - uctx = get_ocrdma_ucontext(context); + if (udata) { pd = ocrdma_get_ucontext_pd(uctx); if (pd) { is_uctx_pd = true; @@ -664,8 +674,8 @@ int ocrdma_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, goto exit; pd_mapping: - if (udata && context) { - status = ocrdma_copy_pd_uresp(dev, pd, context, udata); + if (udata) { + status = ocrdma_copy_pd_uresp(dev, pd, udata); if (status) goto err; } @@ -946,13 +956,17 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) } static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq, - struct ib_udata *udata, - struct ib_ucontext *ib_ctx) + struct ib_udata *udata) { int status; - struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx); + struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context( + udata, struct ocrdma_ucontext, ibucontext); struct ocrdma_create_cq_uresp uresp; + /* this must be user flow! */ + if (!udata) + return -EINVAL; + memset(&uresp, 0, sizeof(uresp)); uresp.cq_id = cq->id; uresp.page_size = PAGE_ALIGN(cq->len); @@ -983,13 +997,13 @@ err: struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_ctx, struct ib_udata *udata) { int entries = attr->cqe; struct ocrdma_cq *cq; struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); - struct ocrdma_ucontext *uctx = NULL; + struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context( + udata, struct ocrdma_ucontext, ibucontext); u16 pd_id = 0; int status; struct ocrdma_create_cq_ureq ureq; @@ -1011,18 +1025,16 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, INIT_LIST_HEAD(&cq->sq_head); INIT_LIST_HEAD(&cq->rq_head); - if (ib_ctx) { - uctx = get_ocrdma_ucontext(ib_ctx); + if (udata) pd_id = uctx->cntxt_pd->id; - } status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq, pd_id); if (status) { kfree(cq); return ERR_PTR(status); } - if (ib_ctx) { - status = ocrdma_copy_cq_uresp(dev, cq, udata, ib_ctx); + if (udata) { + status = ocrdma_copy_cq_uresp(dev, cq, udata); if (status) goto ctx_err; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 3636cbcbcaa4..dfdebe4e48e6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -69,13 +69,11 @@ void ocrdma_dealloc_ucontext(struct ib_ucontext *uctx); int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma); -int ocrdma_alloc_pd(struct ib_pd *pd, struct ib_ucontext *uctx, - struct ib_udata *udata); +int ocrdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); void ocrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_ctx, struct ib_udata *udata); int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 4cd16ad16430..44ab86718c2f 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include "qedr_hsi_rdma.h" @@ -436,8 +437,7 @@ int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) vma->vm_page_prot); } -int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct ib_device *ibdev = ibpd->device; struct qedr_dev *dev = get_qedr_dev(ibdev); @@ -446,7 +446,7 @@ int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, int rc; DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n", - (udata && context) ? "User Lib" : "Kernel"); + udata ? "User Lib" : "Kernel"); if (!dev->rdma_ctx) { DP_ERR(dev, "invalid RDMA context\n"); @@ -459,10 +459,12 @@ int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, pd->pd_id = pd_id; - if (udata && context) { + if (udata) { struct qedr_alloc_pd_uresp uresp = { .pd_id = pd_id, }; + struct qedr_ucontext *context = rdma_udata_to_drv_context( + udata, struct qedr_ucontext, ibucontext); rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rc) { @@ -471,7 +473,7 @@ int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, return rc; } - pd->uctx = get_qedr_ucontext(context); + pd->uctx = context; pd->uctx->pd = pd; } @@ -816,9 +818,10 @@ int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) struct ib_cq *qedr_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_ctx, struct ib_udata *udata) + struct ib_udata *udata) { - struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx); + struct qedr_ucontext *ctx = rdma_udata_to_drv_context( + udata, struct qedr_ucontext, ibucontext); struct qed_rdma_destroy_cq_out_params destroy_oparams; struct qed_rdma_destroy_cq_in_params destroy_iparams; struct qedr_dev *dev = get_qedr_dev(ibdev); @@ -906,7 +909,7 @@ struct ib_cq *qedr_create_cq(struct ib_device *ibdev, cq->sig = QEDR_CQ_MAGIC_NUMBER; spin_lock_init(&cq->cq_lock); - if (ib_ctx) { + if (udata) { rc = qedr_copy_cq_uresp(dev, cq, udata); if (rc) goto err3; diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index cd9659ac2aad..46a9828b9777 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -47,13 +47,11 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); void qedr_dealloc_ucontext(struct ib_ucontext *uctx); int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma); -int qedr_alloc_pd(struct ib_pd *pd, struct ib_ucontext *uctx, - struct ib_udata *udata); +int qedr_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); void qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); struct ib_cq *qedr_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *ib_ctx, struct ib_udata *udata); int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index e282eea8ecce..e9352750e029 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -447,8 +447,7 @@ int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, return 0; } -int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct usnic_ib_pd *pd = to_upd(ibpd); void *umem_pd; @@ -590,7 +589,6 @@ out_unlock: struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { struct ib_cq *cq; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 349c8dc13a12..028f322f8e9b 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -50,8 +50,7 @@ int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid); int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey); -int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata); +int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); void usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, @@ -61,7 +60,6 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata); int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 5ba278324134..d7deb19a2800 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -49,6 +49,7 @@ #include #include #include +#include #include "pvrdma.h" @@ -93,7 +94,6 @@ int pvrdma_req_notify_cq(struct ib_cq *ibcq, * pvrdma_create_cq - create completion queue * @ibdev: the device * @attr: completion queue attributes - * @context: user context * @udata: user data * * @return: ib_cq completion queue pointer on success, @@ -101,7 +101,6 @@ int pvrdma_req_notify_cq(struct ib_cq *ibcq, */ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { int entries = attr->cqe; @@ -116,6 +115,8 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp; struct pvrdma_create_cq_resp cq_resp = {0}; struct pvrdma_create_cq ucmd; + struct pvrdma_ucontext *context = rdma_udata_to_drv_context( + udata, struct pvrdma_ucontext, ibucontext); BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64); @@ -133,7 +134,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, } cq->ibcq.cqe = entries; - cq->is_kernel = !context; + cq->is_kernel = !udata; if (!cq->is_kernel) { if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { @@ -185,8 +186,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, memset(cmd, 0, sizeof(*cmd)); cmd->hdr.cmd = PVRDMA_CMD_CREATE_CQ; cmd->nchunks = npages; - cmd->ctx_handle = (context) ? - (u64)to_vucontext(context)->ctx_handle : 0; + cmd->ctx_handle = context ? context->ctx_handle : 0; cmd->cqe = entries; cmd->pdir_dma = cq->pdir.dir_dma; ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_CQ_RESP); @@ -204,7 +204,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); if (!cq->is_kernel) { - cq->uar = &(to_vucontext(context)->uar); + cq->uar = &context->uar; /* Copy udata back. */ if (ib_copy_to_udata(udata, &cq_resp, sizeof(cq_resp))) { diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index 19ff6004b477..0302fa3b6c85 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "pvrdma.h" @@ -419,13 +420,11 @@ int pvrdma_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) /** * pvrdma_alloc_pd - allocate protection domain * @ibpd: PD pointer - * @context: user context * @udata: user data * * @return: the ib_pd protection domain pointer on success, otherwise errno. */ -int pvrdma_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +int pvrdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct ib_device *ibdev = ibpd->device; struct pvrdma_pd *pd = to_vpd(ibpd); @@ -436,13 +435,15 @@ int pvrdma_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, struct pvrdma_cmd_create_pd_resp *resp = &rsp.create_pd_resp; struct pvrdma_alloc_pd_resp pd_resp = {0}; int ret; + struct pvrdma_ucontext *context = rdma_udata_to_drv_context( + udata, struct pvrdma_ucontext, ibucontext); /* Check allowed max pds */ if (!atomic_add_unless(&dev->num_pds, 1, dev->dsr->caps.max_pd)) return -ENOMEM; cmd->hdr.cmd = PVRDMA_CMD_CREATE_PD; - cmd->ctx_handle = (context) ? to_vucontext(context)->ctx_handle : 0; + cmd->ctx_handle = context ? context->ctx_handle : 0; ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_PD_RESP); if (ret < 0) { dev_warn(&dev->pdev->dev, @@ -451,12 +452,12 @@ int pvrdma_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, goto err; } - pd->privileged = !context; + pd->privileged = !udata; pd->pd_handle = resp->pd_handle; pd->pdn = resp->pd_handle; pd_resp.pdn = resp->pd_handle; - if (context) { + if (udata) { if (ib_copy_to_udata(udata, &pd_resp, sizeof(pd_resp))) { dev_warn(&dev->pdev->dev, "failed to copy back protection domain\n"); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index 2c8ba5bf8d0f..562b70e70e79 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -398,8 +398,7 @@ int pvrdma_modify_port(struct ib_device *ibdev, u8 port, int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); void pvrdma_dealloc_ucontext(struct ib_ucontext *context); -int pvrdma_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, - struct ib_udata *udata); +int pvrdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); void pvrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, @@ -412,7 +411,6 @@ int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata); int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 6f7ff2384506..a06e6da7a026 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -168,7 +168,6 @@ static void send_complete(struct work_struct *work) * rvt_create_cq - create a completion queue * @ibdev: the device this completion queue is attached to * @attr: creation attributes - * @context: unused by the QLogic_IB driver * @udata: user data for libibverbs.so * * Called by ib_create_cq() in the generic verbs code. @@ -178,7 +177,6 @@ static void send_complete(struct work_struct *work) */ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { struct rvt_dev_info *rdi = ib_to_rvt(ibdev); @@ -232,7 +230,7 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, if (udata && udata->outlen >= sizeof(__u64)) { int err; - cq->ip = rvt_create_mmap_info(rdi, sz, context, wc); + cq->ip = rvt_create_mmap_info(rdi, sz, udata, wc); if (!cq->ip) { ret = ERR_PTR(-ENOMEM); goto bail_wc; diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h index e42661ecdef8..3ad6faf18ecb 100644 --- a/drivers/infiniband/sw/rdmavt/cq.h +++ b/drivers/infiniband/sw/rdmavt/cq.h @@ -53,7 +53,6 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata); int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c index 6b712eecbd37..652f4a7efc1b 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.c +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -49,6 +49,7 @@ #include #include #include +#include #include "mmap.h" /** @@ -150,18 +151,19 @@ done: * rvt_create_mmap_info - allocate information for hfi1_mmap * @rdi: rvt dev struct * @size: size in bytes to map - * @context: user context + * @udata: user data (must be valid!) * @obj: opaque pointer to a cq, wq etc * * Return: rvt_mmap struct on success */ -struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, - u32 size, - struct ib_ucontext *context, - void *obj) +struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, u32 size, + struct ib_udata *udata, void *obj) { struct rvt_mmap_info *ip; + if (!udata) + return ERR_PTR(-EINVAL); + ip = kmalloc_node(sizeof(*ip), GFP_KERNEL, rdi->dparms.node); if (!ip) return ip; @@ -177,7 +179,9 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, INIT_LIST_HEAD(&ip->pending_mmaps); ip->size = size; - ip->context = context; + ip->context = + container_of(udata, struct uverbs_attr_bundle, driver_udata) + ->context; ip->obj = obj; kref_init(&ip->ref); diff --git a/drivers/infiniband/sw/rdmavt/mmap.h b/drivers/infiniband/sw/rdmavt/mmap.h index fab0e7b1daf9..02466c40bc1e 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.h +++ b/drivers/infiniband/sw/rdmavt/mmap.h @@ -53,10 +53,8 @@ void rvt_mmap_init(struct rvt_dev_info *rdi); void rvt_release_mmap_info(struct kref *ref); int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); -struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, - u32 size, - struct ib_ucontext *context, - void *obj); +struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, u32 size, + struct ib_udata *udata, void *obj); void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, u32 size, void *obj); diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c index e84341282374..a403718f0b5e 100644 --- a/drivers/infiniband/sw/rdmavt/pd.c +++ b/drivers/infiniband/sw/rdmavt/pd.c @@ -51,15 +51,13 @@ /** * rvt_alloc_pd - allocate a protection domain * @ibpd: PD - * @context: optional user context * @udata: optional user data * * Allocate and keep track of a PD. * * Return: 0 on success */ -int rvt_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +int rvt_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct ib_device *ibdev = ibpd->device; struct rvt_dev_info *dev = ib_to_rvt(ibdev); diff --git a/drivers/infiniband/sw/rdmavt/pd.h b/drivers/infiniband/sw/rdmavt/pd.h index d0368a625e03..71ba76d72b1d 100644 --- a/drivers/infiniband/sw/rdmavt/pd.h +++ b/drivers/infiniband/sw/rdmavt/pd.h @@ -50,8 +50,7 @@ #include -int rvt_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context, - struct ib_udata *udata); +int rvt_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); void rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); #endif /* DEF_RDMAVTPD_H */ diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index e8bba7e56c29..90ed99f4b026 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -957,8 +957,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, size_t sg_list_sz; struct ib_qp *ret = ERR_PTR(-ENOMEM); struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); - struct rvt_ucontext *ucontext = rdma_udata_to_drv_context( - udata, struct rvt_ucontext, ibucontext); void *priv = NULL; size_t sqsize; @@ -1131,8 +1129,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } else { u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz; - qp->ip = rvt_create_mmap_info(rdi, s, - &ucontext->ibucontext, + qp->ip = rvt_create_mmap_info(rdi, s, udata, qp->r_rq.wq); if (!qp->ip) { ret = ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index 3090b0935714..21d276eaf15a 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -78,8 +78,6 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, struct ib_udata *udata) { struct rvt_dev_info *dev = ib_to_rvt(ibpd->device); - struct rvt_ucontext *ucontext = rdma_udata_to_drv_context( - udata, struct rvt_ucontext, ibucontext); struct rvt_srq *srq; u32 sz; struct ib_srq *ret; @@ -121,9 +119,7 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, int err; u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz; - srq->ip = - rvt_create_mmap_info(dev, s, &ucontext->ibucontext, - srq->rq.wq); + srq->ip = rvt_create_mmap_info(dev, s, udata, srq->rq.wq); if (!srq->ip) { ret = ERR_PTR(-ENOMEM); goto bail_wq; diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index a57276f2cb84..ad3090131126 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -82,7 +82,7 @@ static void rxe_send_complete(unsigned long data) } int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, - int comp_vector, struct ib_ucontext *context, + int comp_vector, struct ib_udata *udata, struct rxe_create_cq_resp __user *uresp) { int err; @@ -94,7 +94,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, return -ENOMEM; } - err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, context, + err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, udata, cq->queue->buf, cq->queue->buf_size, &cq->queue->ip); if (err) { vfree(cq->queue->buf); @@ -115,13 +115,13 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, } int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe, - struct rxe_resize_cq_resp __user *uresp) + struct rxe_resize_cq_resp __user *uresp, + struct ib_udata *udata) { int err; err = rxe_queue_resize(cq->queue, (unsigned int *)&cqe, - sizeof(struct rxe_cqe), - cq->queue->ip ? cq->queue->ip->context : NULL, + sizeof(struct rxe_cqe), udata, uresp ? &uresp->mi : NULL, NULL, &cq->cq_lock); if (!err) cq->ibcq.cqe = cqe; diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 3d8cef836f0d..775c23becaec 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -53,11 +53,12 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, int comp_vector); int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, - int comp_vector, struct ib_ucontext *context, + int comp_vector, struct ib_udata *udata, struct rxe_create_cq_resp __user *uresp); int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe, - struct rxe_resize_cq_resp __user *uresp); + struct rxe_resize_cq_resp __user *uresp, + struct ib_udata *udata); int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited); @@ -91,10 +92,8 @@ struct rxe_mmap_info { void rxe_mmap_release(struct kref *ref); -struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *dev, - u32 size, - struct ib_ucontext *context, - void *obj); +struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *dev, u32 size, + struct ib_udata *udata, void *obj); int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); @@ -224,13 +223,12 @@ int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask); int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, - struct ib_srq_init_attr *init, - struct ib_ucontext *context, + struct ib_srq_init_attr *init, struct ib_udata *udata, struct rxe_create_srq_resp __user *uresp); int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, - struct rxe_modify_srq_cmd *ucmd); + struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata); void rxe_dealloc(struct ib_device *ib_dev); diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index d22431e3a908..48f48122ddcb 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "rxe.h" #include "rxe_loc.h" @@ -140,13 +141,14 @@ done: /* * Allocate information for rxe_mmap */ -struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe, - u32 size, - struct ib_ucontext *context, - void *obj) +struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe, u32 size, + struct ib_udata *udata, void *obj) { struct rxe_mmap_info *ip; + if (!udata) + return ERR_PTR(-EINVAL); + ip = kmalloc(sizeof(*ip), GFP_KERNEL); if (!ip) return NULL; @@ -165,7 +167,9 @@ struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe, INIT_LIST_HEAD(&ip->pending_mmaps); ip->info.size = size; - ip->context = context; + ip->context = + container_of(udata, struct uverbs_attr_bundle, driver_udata) + ->context; ip->obj = obj; kref_init(&ip->ref); diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 09ede70dc1e8..e2c6d1cedf41 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -217,8 +217,7 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, } static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, - struct ib_qp_init_attr *init, - struct ib_ucontext *context, + struct ib_qp_init_attr *init, struct ib_udata *udata, struct rxe_create_qp_resp __user *uresp) { int err; @@ -254,7 +253,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, if (!qp->sq.queue) return -ENOMEM; - err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, context, + err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, udata, qp->sq.queue->buf, qp->sq.queue->buf_size, &qp->sq.queue->ip); @@ -287,7 +286,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_init_attr *init, - struct ib_ucontext *context, + struct ib_udata *udata, struct rxe_create_qp_resp __user *uresp) { int err; @@ -308,7 +307,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, if (!qp->rq.queue) return -ENOMEM; - err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, context, + err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, udata, qp->rq.queue->buf, qp->rq.queue->buf_size, &qp->rq.queue->ip); if (err) { @@ -344,8 +343,6 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, struct rxe_cq *rcq = to_rcq(init->recv_cq); struct rxe_cq *scq = to_rcq(init->send_cq); struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL; - struct rxe_ucontext *ucontext = - rdma_udata_to_drv_context(udata, struct rxe_ucontext, ibuc); rxe_add_ref(pd); rxe_add_ref(rcq); @@ -360,11 +357,11 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, rxe_qp_init_misc(rxe, qp, init); - err = rxe_qp_init_req(rxe, qp, init, &ucontext->ibuc, uresp); + err = rxe_qp_init_req(rxe, qp, init, udata, uresp); if (err) goto err1; - err = rxe_qp_init_resp(rxe, qp, init, &ucontext->ibuc, uresp); + err = rxe_qp_init_resp(rxe, qp, init, udata, uresp); if (err) goto err2; diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c index f84ab4469261..ff92704de32f 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.c +++ b/drivers/infiniband/sw/rxe/rxe_queue.c @@ -36,18 +36,15 @@ #include "rxe_loc.h" #include "rxe_queue.h" -int do_mmap_info(struct rxe_dev *rxe, - struct mminfo __user *outbuf, - struct ib_ucontext *context, - struct rxe_queue_buf *buf, - size_t buf_size, - struct rxe_mmap_info **ip_p) +int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf, + struct ib_udata *udata, struct rxe_queue_buf *buf, + size_t buf_size, struct rxe_mmap_info **ip_p) { int err; struct rxe_mmap_info *ip = NULL; if (outbuf) { - ip = rxe_create_mmap_info(rxe, buf_size, context, buf); + ip = rxe_create_mmap_info(rxe, buf_size, udata, buf); if (!ip) goto err1; @@ -153,12 +150,9 @@ static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q, return 0; } -int rxe_queue_resize(struct rxe_queue *q, - unsigned int *num_elem_p, - unsigned int elem_size, - struct ib_ucontext *context, - struct mminfo __user *outbuf, - spinlock_t *producer_lock, +int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p, + unsigned int elem_size, struct ib_udata *udata, + struct mminfo __user *outbuf, spinlock_t *producer_lock, spinlock_t *consumer_lock) { struct rxe_queue *new_q; @@ -170,7 +164,7 @@ int rxe_queue_resize(struct rxe_queue *q, if (!new_q) return -ENOMEM; - err = do_mmap_info(new_q->rxe, outbuf, context, new_q->buf, + err = do_mmap_info(new_q->rxe, outbuf, udata, new_q->buf, new_q->buf_size, &new_q->ip); if (err) { vfree(new_q->buf); diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h index 79ba4b320054..acd0a925481c 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.h +++ b/drivers/infiniband/sw/rxe/rxe_queue.h @@ -76,12 +76,9 @@ struct rxe_queue { unsigned int index_mask; }; -int do_mmap_info(struct rxe_dev *rxe, - struct mminfo __user *outbuf, - struct ib_ucontext *context, - struct rxe_queue_buf *buf, - size_t buf_size, - struct rxe_mmap_info **ip_p); +int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf, + struct ib_udata *udata, struct rxe_queue_buf *buf, + size_t buf_size, struct rxe_mmap_info **ip_p); void rxe_queue_reset(struct rxe_queue *q); @@ -89,10 +86,8 @@ struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem, unsigned int elem_size); -int rxe_queue_resize(struct rxe_queue *q, - unsigned int *num_elem_p, - unsigned int elem_size, - struct ib_ucontext *context, +int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p, + unsigned int elem_size, struct ib_udata *udata, struct mminfo __user *outbuf, /* Protect producers while resizing queue */ spinlock_t *producer_lock, diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index c41a5fee81f7..d8459431534e 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -99,8 +99,7 @@ err1: } int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, - struct ib_srq_init_attr *init, - struct ib_ucontext *context, + struct ib_srq_init_attr *init, struct ib_udata *udata, struct rxe_create_srq_resp __user *uresp) { int err; @@ -128,7 +127,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, srq->rq.queue = q; - err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, context, q->buf, + err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, udata, q->buf, q->buf_size, &q->ip); if (err) { vfree(q->buf); @@ -149,7 +148,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, - struct rxe_modify_srq_cmd *ucmd) + struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata) { int err; struct rxe_queue *q = srq->rq.queue; @@ -163,11 +162,8 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, mi = u64_to_user_ptr(ucmd->mmap_info_addr); err = rxe_queue_resize(q, &attr->max_wr, - rcv_wqe_size(srq->rq.max_sge), - srq->rq.queue->ip ? - srq->rq.queue->ip->context : - NULL, - mi, &srq->rq.producer_lock, + rcv_wqe_size(srq->rq.max_sge), udata, mi, + &srq->rq.producer_lock, &srq->rq.consumer_lock); if (err) goto err2; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index e625731ae42d..4f581af2ad54 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -176,8 +176,7 @@ static int rxe_port_immutable(struct ib_device *dev, u8 port_num, return 0; } -static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context, - struct ib_udata *udata) +static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); @@ -305,8 +304,6 @@ static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd, int err; struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); - struct rxe_ucontext *ucontext = - rdma_udata_to_drv_context(udata, struct rxe_ucontext, ibuc); struct rxe_srq *srq; struct rxe_create_srq_resp __user *uresp = NULL; @@ -330,7 +327,7 @@ static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd, rxe_add_ref(pd); srq->pd = pd; - err = rxe_srq_from_init(rxe, srq, init, &ucontext->ibuc, uresp); + err = rxe_srq_from_init(rxe, srq, init, udata, uresp); if (err) goto err2; @@ -366,7 +363,7 @@ static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, if (err) goto err1; - err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd); + err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata); if (err) goto err1; @@ -799,7 +796,6 @@ err1: static struct ib_cq *rxe_create_cq(struct ib_device *dev, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata) { int err; @@ -826,8 +822,8 @@ static struct ib_cq *rxe_create_cq(struct ib_device *dev, goto err1; } - err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, - context, uresp); + err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, udata, + uresp); if (err) goto err2; @@ -866,7 +862,7 @@ static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) if (err) goto err1; - err = rxe_cq_resize_queue(cq, cqe, uresp); + err = rxe_cq_resize_queue(cq, cqe, uresp, udata); if (err) goto err1; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 54e48dd36644..0e24f6b6c61d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2394,8 +2394,7 @@ struct ib_device_ops { void (*dealloc_ucontext)(struct ib_ucontext *context); int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); - int (*alloc_pd)(struct ib_pd *pd, struct ib_ucontext *context, - struct ib_udata *udata); + int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata); void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); struct ib_ah *(*create_ah)(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, @@ -2421,7 +2420,6 @@ struct ib_device_ops { int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata); struct ib_cq *(*create_cq)(struct ib_device *device, const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, struct ib_udata *udata); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata); @@ -2456,7 +2454,6 @@ struct ib_device_ops { int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device, - struct ib_ucontext *ucontext, struct ib_udata *udata); int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata); struct ib_flow *(*create_flow)(struct ib_qp *qp, -- cgit v1.2.3 From 6734b2973565e36659e97e12ab0d0faf1d9f3fbe Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 19 Mar 2019 11:10:08 +0200 Subject: RDMA/hns: Fix bad endianess of port_pd variable port_pd is treated as le32 in declaration and read, fix assignment to be in le32 too. This change fixes the following compilation warnings. drivers/infiniband/hw/hns/hns_roce_ah.c:67:24: warning: incorrect type in assignment (different base types) drivers/infiniband/hw/hns/hns_roce_ah.c:67:24: expected restricted __le32 [usertype] port_pd drivers/infiniband/hw/hns/hns_roce_ah.c:67:24: got restricted __be32 [usertype] Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Signed-off-by: Leon Romanovsky Reviewed-by: Gal Pressman Reviewed-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_ah.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 42067325ae5e..6ba505bc7cce 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -70,7 +70,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, HNS_ROCE_VLAN_SL_BIT_MASK) << HNS_ROCE_VLAN_SL_SHIFT; - ah->av.port_pd = cpu_to_be32(to_hr_pd(ibpd)->pdn | + ah->av.port_pd = cpu_to_le32(to_hr_pd(ibpd)->pdn | (rdma_ah_get_port_num(ah_attr) << HNS_ROCE_PORT_NUM_SHIFT)); ah->av.gid_index = grh->sgid_index; -- cgit v1.2.3 From 061ccb52d23cfa2cf3195546a21c3a87194db5b7 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 2 Apr 2019 15:35:13 +0300 Subject: RDMA/cma: Set proper port number as index Conversion from IDR to XArray missed the fact that idr_alloc() returned index as a return value, this index was saved in port variable and used as query index later on. This caused to the following error. BUG: KASAN: use-after-free in cma_check_port+0x86a/0xa20 [rdma_cm] Read of size 8 at addr ffff888069fde998 by task ucmatose/387 CPU: 3 PID: 387 Comm: ucmatose Not tainted 5.1.0-rc2+ #253 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.11.0-0-g63451fca13-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0x7c/0xc0 print_address_description+0x6c/0x23c ? cma_check_port+0x86a/0xa20 [rdma_cm] kasan_report.cold.3+0x1c/0x35 ? cma_check_port+0x86a/0xa20 [rdma_cm] ? cma_check_port+0x86a/0xa20 [rdma_cm] cma_check_port+0x86a/0xa20 [rdma_cm] rdma_bind_addr+0x11bc/0x1b00 [rdma_cm] ? find_held_lock+0x33/0x1c0 ? cma_ndev_work_handler+0x180/0x180 [rdma_cm] ? wait_for_completion+0x3d0/0x3d0 ucma_bind+0x120/0x160 [rdma_ucm] ? ucma_resolve_addr+0x1a0/0x1a0 [rdma_ucm] ucma_write+0x1f8/0x2b0 [rdma_ucm] ? ucma_open+0x260/0x260 [rdma_ucm] vfs_write+0x157/0x460 ksys_write+0xb8/0x170 ? __ia32_sys_read+0xb0/0xb0 ? trace_hardirqs_off_caller+0x5b/0x160 ? do_syscall_64+0x18/0x3c0 do_syscall_64+0x95/0x3c0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Allocated by task 381: __kasan_kmalloc.constprop.5+0xc1/0xd0 cma_alloc_port+0x4d/0x160 [rdma_cm] rdma_bind_addr+0x14e7/0x1b00 [rdma_cm] ucma_bind+0x120/0x160 [rdma_ucm] ucma_write+0x1f8/0x2b0 [rdma_ucm] vfs_write+0x157/0x460 ksys_write+0xb8/0x170 do_syscall_64+0x95/0x3c0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 381: __kasan_slab_free+0x12e/0x180 kfree+0xed/0x290 rdma_destroy_id+0x6b6/0x9e0 [rdma_cm] ucma_close+0x110/0x300 [rdma_ucm] __fput+0x25a/0x740 task_work_run+0x10e/0x190 do_exit+0x85e/0x29e0 do_group_exit+0xf0/0x2e0 get_signal+0x2e0/0x17e0 do_signal+0x94/0x1570 exit_to_usermode_loop+0xfa/0x130 do_syscall_64+0x327/0x3c0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Reported-by: Reported-by: Ran Rozenstein Fixes: 638267537ad9 ("cma: Convert portspace IDRs to XArray") Signed-off-by: Leon Romanovsky Reviewed-by: Bart Van Assche Tested-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 895899230a7e..7e139b3839dc 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3251,7 +3251,7 @@ static int cma_alloc_port(enum rdma_ucm_port_space ps, goto err; bind_list->ps = ps; - bind_list->port = (unsigned short)ret; + bind_list->port = snum; cma_bind_port(bind_list, id_priv); return 0; err: -- cgit v1.2.3 From f6f3f532556e4fcaa2d259fd04a800bfb4f9670d Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 18 Mar 2019 09:58:30 -0700 Subject: IB/hfi1: Delay the release of destination mr for TID RDMA WRITE DATA The reference of destination memory region is first obtained when TID RDMA WRITE request is first received on the responder side. This reference is released once all TID RDMA WRITE RESP packets are sent to the requester side, even though not all TID RDMA WRITE DATA packets may have been received. This early release will especially be undesired if the software needs to access the destination memory before the last data packet is received. This patch delays the release of the MR until all TID RDMA DATA packets have been received. A helper function to release the reference is also created to simplify the code. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Reviewed-by: Michael J. Ruhl Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/rc.c | 23 ++++++----------------- drivers/infiniband/hw/hfi1/rc.h | 8 ++++++++ drivers/infiniband/hw/hfi1/tid_rdma.c | 16 ++++------------ 3 files changed, 18 insertions(+), 29 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 5991211d72bd..5ba39a9f65ad 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -140,10 +140,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, case OP(RDMA_READ_RESPONSE_LAST): case OP(RDMA_READ_RESPONSE_ONLY): e = &qp->s_ack_queue[qp->s_tail_ack_queue]; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); /* FALLTHROUGH */ case OP(ATOMIC_ACKNOWLEDGE): /* @@ -343,7 +340,8 @@ write_resp: break; e->sent = 1; - qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); + /* Do not free e->rdma_sge until all data are received */ + qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); break; case TID_OP(READ_RESP): @@ -2643,10 +2641,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data, len = be32_to_cpu(reth->length); if (unlikely(offset + len != e->rdma_sge.sge_length)) goto unlock_done; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); if (len != 0) { u32 rkey = be32_to_cpu(reth->rkey); u64 vaddr = get_ib_reth_vaddr(reth); @@ -3088,10 +3083,7 @@ send_last: update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); reth = &ohdr->u.rc.reth; len = be32_to_cpu(reth->length); if (len) { @@ -3166,10 +3158,7 @@ send_last: update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); /* Process OPFN special virtual address */ if (opfn) { opfn_conn_response(qp, e, ateth); diff --git a/drivers/infiniband/hw/hfi1/rc.h b/drivers/infiniband/hw/hfi1/rc.h index 8e0935b9bf2a..5ed5e85d5841 100644 --- a/drivers/infiniband/hw/hfi1/rc.h +++ b/drivers/infiniband/hw/hfi1/rc.h @@ -41,6 +41,14 @@ static inline u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, return rvt_restart_sge(ss, wqe, len); } +static inline void release_rdma_sge_mr(struct rvt_ack_entry *e) +{ + if (e->rdma_sge.mr) { + rvt_put_mr(e->rdma_sge.mr); + e->rdma_sge.mr = NULL; + } +} + struct rvt_ack_entry *find_prev_entry(struct rvt_qp *qp, u32 psn, u8 *prev, u8 *prev_ack, bool *scheduled); int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, u64 val, diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index fdda33aca77f..145dd3348fdd 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -2036,10 +2036,7 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet, if (psn != e->psn || len != req->total_len) goto unlock; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); rkey = be32_to_cpu(reth->rkey); vaddr = get_ib_reth_vaddr(reth); @@ -2285,10 +2282,7 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet) update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); rkey = be32_to_cpu(reth->rkey); qp->r_len = len; @@ -3751,10 +3745,7 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet) goto update_head; } - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); /* The length needs to be in multiples of PAGE_SIZE */ if (!len || len & ~PAGE_MASK) @@ -4347,6 +4338,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) priv->r_tid_ack = priv->r_tid_tail; if (opcode == TID_OP(WRITE_DATA_LAST)) { + release_rdma_sge_mr(e); for (next = priv->r_tid_tail + 1; ; next++) { if (next > rvt_size_atomic(&dev->rdi)) next = 0; -- cgit v1.2.3 From 6a40693a884dacae68c1771d369ad3be0594ba1c Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 18 Mar 2019 09:58:40 -0700 Subject: IB/hfi1: Add a function to read next expected psn from hardware flow This patch adds a function to read next expected KDETH PSN from hardware flow to simplify the code. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Reviewed-by: Michael J. Ruhl Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/tid_rdma.c | 38 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 145dd3348fdd..d540e9cffa37 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -128,6 +128,7 @@ static int make_tid_rdma_ack(struct rvt_qp *qp, struct ib_other_headers *ohdr, struct hfi1_pkt_state *ps); static void hfi1_do_tid_send(struct rvt_qp *qp); +static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx); static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p) { @@ -2807,18 +2808,10 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, } priv->flow_state.r_next_psn++; } else { - u64 reg; u32 last_psn; - /* - * The only sane way to get the amount of - * progress is to read the HW flow state. - */ - reg = read_uctxt_csr(dd, rcd->ctxt, - RCV_TID_FLOW_TABLE + - (8 * flow->idx)); - last_psn = mask_psn(reg); - + last_psn = read_r_next_psn(dd, rcd->ctxt, + flow->idx); priv->flow_state.r_next_psn = last_psn; priv->flow_state.flags |= TID_FLOW_SW_PSN; /* @@ -2968,17 +2961,10 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, switch (rte) { case RHF_RTE_EXPECTED_FLOW_SEQ_ERR: if (!(qpriv->s_flags & HFI1_R_TID_SW_PSN)) { - u64 reg; - qpriv->s_flags |= HFI1_R_TID_SW_PSN; - /* - * The only sane way to get the amount of - * progress is to read the HW flow state. - */ - reg = read_uctxt_csr(dd, rcd->ctxt, - RCV_TID_FLOW_TABLE + - (8 * flow->idx)); - flow->flow_state.r_next_psn = mask_psn(reg); + flow->flow_state.r_next_psn = + read_r_next_psn(dd, rcd->ctxt, + flow->idx); qpriv->r_next_psn_kdeth = flow->flow_state.r_next_psn; goto nak_psn; @@ -5456,3 +5442,15 @@ bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e) } return false; } + +static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx) +{ + u64 reg; + + /* + * The only sane way to get the amount of + * progress is to read the HW flow state. + */ + reg = read_uctxt_csr(dd, ctxt, RCV_TID_FLOW_TABLE + (8 * fidx)); + return mask_psn(reg); +} -- cgit v1.2.3 From b885d5be9ca10dff6110a8738c45eb4b3fb5a40a Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 18 Mar 2019 09:58:50 -0700 Subject: IB/hfi1: Unify the software PSN check for TID RDMA READ/WRITE For expected packet receiving, the hfi1 hardware checks the KDETH PSN automatically. However, when sequence error occurs, the hfi1 driver can check the sequence instead until the hardware flow generation is reloaded. TID RDMA READ and WRITE protocols implement similar software checking mechanisms, but with different flags and different local variables to store next expected PSN. Unify the handling by using only one set of flag and local variable for both TID RDMA READ and WRITE protocols. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Reviewed-by: Michael J. Ruhl Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/tid_rdma.c | 45 +++++++++++++++++++++------------- drivers/infiniband/hw/hfi1/tid_rdma.h | 2 -- drivers/infiniband/hw/hfi1/trace_tid.h | 12 +++------ 3 files changed, 31 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index d540e9cffa37..9ade7d3954b0 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -67,8 +67,6 @@ static u32 mask_generation(u32 a) #define TID_RDMA_DESTQP_FLOW_SHIFT 11 #define TID_RDMA_DESTQP_FLOW_MASK 0x1f -#define TID_FLOW_SW_PSN BIT(0) - #define TID_OPFN_QP_CTXT_MASK 0xff #define TID_OPFN_QP_CTXT_SHIFT 56 #define TID_OPFN_QP_KDETH_MASK 0xff @@ -777,7 +775,6 @@ int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp) rcd->flows[fs->index].generation = fs->generation; fs->generation = kern_setup_hw_flow(rcd, fs->index); fs->psn = 0; - fs->flags = 0; dequeue_tid_waiter(rcd, &rcd->flow_queue, qp); /* get head before dropping lock */ fqp = first_qp(rcd, &rcd->flow_queue); @@ -1808,6 +1805,7 @@ sync_check: goto done; hfi1_kern_clear_hw_flow(req->rcd, qp); + qpriv->s_flags &= ~HFI1_R_TID_SW_PSN; req->state = TID_REQUEST_ACTIVE; } @@ -2476,8 +2474,13 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet) flow = &req->flows[req->clear_tail]; /* When header suppression is disabled */ - if (cmp_psn(ipsn, flow->flow_state.ib_lpsn)) + if (cmp_psn(ipsn, flow->flow_state.ib_lpsn)) { + if (cmp_psn(kpsn, flow->flow_state.r_next_psn)) + goto ack_done; + flow->flow_state.r_next_psn = mask_psn(kpsn + 1); goto ack_done; + } + flow->flow_state.r_next_psn = mask_psn(kpsn + 1); req->ack_pending--; priv->pending_tid_r_segs--; qp->s_num_rd_atomic--; @@ -2519,6 +2522,7 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet) req->comp_seg == req->cur_seg) || priv->tid_r_comp == priv->tid_r_reqs) { hfi1_kern_clear_hw_flow(priv->rcd, qp); + priv->s_flags &= ~HFI1_R_TID_SW_PSN; if (req->state == TID_REQUEST_SYNC) req->state = TID_REQUEST_ACTIVE; } @@ -2768,9 +2772,9 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); return ret; } - if (priv->flow_state.flags & TID_FLOW_SW_PSN) { + if (priv->s_flags & HFI1_R_TID_SW_PSN) { diff = cmp_psn(psn, - priv->flow_state.r_next_psn); + flow->flow_state.r_next_psn); if (diff > 0) { if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) restart_tid_rdma_read_req(rcd, @@ -2806,14 +2810,15 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, qp->r_flags &= ~RVT_R_RDMAR_SEQ; } - priv->flow_state.r_next_psn++; + flow->flow_state.r_next_psn = + mask_psn(psn + 1); } else { u32 last_psn; last_psn = read_r_next_psn(dd, rcd->ctxt, flow->idx); - priv->flow_state.r_next_psn = last_psn; - priv->flow_state.flags |= TID_FLOW_SW_PSN; + flow->flow_state.r_next_psn = last_psn; + priv->s_flags |= HFI1_R_TID_SW_PSN; /* * If no request has been restarted yet, * restart the current one. @@ -2878,6 +2883,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, struct rvt_ack_entry *e; struct tid_rdma_request *req; struct tid_rdma_flow *flow; + int diff = 0; trace_hfi1_msg_handle_kdeth_eflags(NULL, "Kdeth error: rhf ", packet->rhf); @@ -2977,10 +2983,12 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, * mismatch could be due to packets that were * already in flight. */ - if (psn != flow->flow_state.r_next_psn) { - psn = flow->flow_state.r_next_psn; + diff = cmp_psn(psn, + flow->flow_state.r_next_psn); + if (diff > 0) goto nak_psn; - } + else if (diff < 0) + break; qpriv->s_nak_state = 0; /* @@ -2991,8 +2999,10 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, if (psn == full_flow_psn(flow, flow->flow_state.lpsn)) ret = false; + flow->flow_state.r_next_psn = + mask_psn(psn + 1); qpriv->r_next_psn_kdeth = - ++flow->flow_state.r_next_psn; + flow->flow_state.r_next_psn; } break; @@ -3497,8 +3507,10 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) if (qpriv->r_tid_alloc == qpriv->r_tid_head) { /* If all data has been received, clear the flow */ if (qpriv->flow_state.index < RXE_NUM_TID_FLOWS && - !qpriv->alloc_w_segs) + !qpriv->alloc_w_segs) { hfi1_kern_clear_hw_flow(rcd, qp); + qpriv->s_flags &= ~HFI1_R_TID_SW_PSN; + } break; } @@ -3524,8 +3536,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) if (qpriv->sync_pt && !qpriv->alloc_w_segs) { hfi1_kern_clear_hw_flow(rcd, qp); qpriv->sync_pt = false; - if (qpriv->s_flags & HFI1_R_TID_SW_PSN) - qpriv->s_flags &= ~HFI1_R_TID_SW_PSN; + qpriv->s_flags &= ~HFI1_R_TID_SW_PSN; } /* Allocate flow if we don't have one */ @@ -4299,7 +4310,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.lpsn))) { if (cmp_psn(psn, flow->flow_state.r_next_psn)) goto send_nak; - flow->flow_state.r_next_psn++; + flow->flow_state.r_next_psn = mask_psn(psn + 1); goto exit; } flow->flow_state.r_next_psn = mask_psn(psn + 1); diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h index 53ab24ef4f02..1c536185261e 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.h +++ b/drivers/infiniband/hw/hfi1/tid_rdma.h @@ -76,10 +76,8 @@ struct tid_rdma_qp_params { struct tid_flow_state { u32 generation; u32 psn; - u32 r_next_psn; /* next PSN to be received (in TID space) */ u8 index; u8 last_index; - u8 flags; }; enum tid_rdma_req_state { diff --git a/drivers/infiniband/hw/hfi1/trace_tid.h b/drivers/infiniband/hw/hfi1/trace_tid.h index 548dfc45a407..4388b594ed1b 100644 --- a/drivers/infiniband/hw/hfi1/trace_tid.h +++ b/drivers/infiniband/hw/hfi1/trace_tid.h @@ -53,7 +53,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent); "tid_r_comp %u pending_tid_r_segs %u " \ "s_flags 0x%x ps_flags 0x%x iow_flags 0x%lx " \ "s_state 0x%x hw_flow_index %u generation 0x%x " \ - "fpsn 0x%x flow_flags 0x%x" + "fpsn 0x%x" #define TID_REQ_PRN "[%s] qpn 0x%x newreq %u opcode 0x%x psn 0x%x lpsn 0x%x " \ "cur_seg %u comp_seg %u ack_seg %u alloc_seg %u " \ @@ -71,7 +71,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent); "pending_tid_w_segs %u sync_pt %s " \ "ps_nak_psn 0x%x ps_nak_state 0x%x " \ "prnr_nak_state 0x%x hw_flow_index %u generation "\ - "0x%x fpsn 0x%x flow_flags 0x%x resync %s" \ + "0x%x fpsn 0x%x resync %s" \ "r_next_psn_kdeth 0x%x" #define TID_WRITE_SENDER_PRN "[%s] qpn 0x%x newreq %u s_tid_cur %u " \ @@ -973,7 +973,6 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */ __field(u32, hw_flow_index) __field(u32, generation) __field(u32, fpsn) - __field(u32, flow_flags) ), TP_fast_assign(/* assign */ struct hfi1_qp_priv *priv = qp->priv; @@ -991,7 +990,6 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */ __entry->hw_flow_index = priv->flow_state.index; __entry->generation = priv->flow_state.generation; __entry->fpsn = priv->flow_state.psn; - __entry->flow_flags = priv->flow_state.flags; ), TP_printk(/* print */ TID_READ_SENDER_PRN, @@ -1007,8 +1005,7 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */ __entry->s_state, __entry->hw_flow_index, __entry->generation, - __entry->fpsn, - __entry->flow_flags + __entry->fpsn ) ); @@ -1338,7 +1335,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */ __field(u32, hw_flow_index) __field(u32, generation) __field(u32, fpsn) - __field(u32, flow_flags) __field(bool, resync) __field(u32, r_next_psn_kdeth) ), @@ -1360,7 +1356,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */ __entry->hw_flow_index = priv->flow_state.index; __entry->generation = priv->flow_state.generation; __entry->fpsn = priv->flow_state.psn; - __entry->flow_flags = priv->flow_state.flags; __entry->resync = priv->resync; __entry->r_next_psn_kdeth = priv->r_next_psn_kdeth; ), @@ -1381,7 +1376,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */ __entry->hw_flow_index, __entry->generation, __entry->fpsn, - __entry->flow_flags, __entry->resync ? "yes" : "no", __entry->r_next_psn_kdeth ) -- cgit v1.2.3 From 8da0f0f26f80612efadc23beb72d5b66a498a386 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 18 Mar 2019 09:59:00 -0700 Subject: IB/hfi1: Remove WARN_ON when freeing expected receive groups When PSM user receive context is freed, the expected receive groups allocated by the receive context will also been freed. However, if there are still TID entries in use, the receive groups rcd->tid_full_list or rcd->tid_used_list will not be empty, and thus triggering the WARN_ONs in the function hfi1_free_ctxt_rcv_groups(). Even if the two lists may not be empty, the hfi1 driver will free all TID entries and receive groups associated with the receive context to prevent any resource leakage. Since a clean user application exit is not controlled by the hfi1 driver, this patch will remove the WARN_ONs in hfi1_free_ctxt_rcv_groups(). Reviewed-by: Mike Marciniszyn Reviewed-by: Michael J. Ruhl Reviewed-by: Dennis Dalessandro Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/exp_rcv.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c index 1be49a0d9c11..e9d5cc8b771a 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/exp_rcv.c @@ -112,9 +112,6 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) */ void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) { - WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_full_list)); - WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_used_list)); - kfree(rcd->groups); rcd->groups = NULL; hfi1_exp_tid_group_init(rcd); -- cgit v1.2.3 From 747b931fbe2362366dee30617f816501f3126882 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 18 Mar 2019 12:20:59 -0700 Subject: IB/hfi1: Implement CCA for TID RDMA protocol Currently, FECN handling is not implemented on TID RDMA expected receive packets and therefore CCA can't be turned on when TID RDMA is enabled. This patch adds the CCA support to TID RDMA protocol by: - modifying FECN RSM rule to include kernel receive contexts - For TID_RDMA READ RESP or TID RDMA ACK packet, a CNP will be sent out if the FECN bit is set. For other TID RDMA packets that generate at least one response packet, the BECN bit will be set in the first response packet - Copying expected packet data to destination buffer when FECN bit is set in the TID RDMA READ RESP or TID RDMA WRITE DATA packet. In this case, the expected packet is received as an eager packet - Handling the TID sequence error for subsequent normal expected packets. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Reviewed-by: Michael J. Ruhl Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 35 ++++--- drivers/infiniband/hw/hfi1/driver.c | 4 +- drivers/infiniband/hw/hfi1/tid_rdma.c | 173 ++++++++++++++++++++++++++++------ 3 files changed, 167 insertions(+), 45 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9784c6c0d2ec..16861d9ba1b7 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -13297,15 +13297,18 @@ static int set_up_context_variables(struct hfi1_devdata *dd) /* * The RMT entries are currently allocated as shown below: * 1. QOS (0 to 128 entries); - * 2. FECN for PSM (num_user_contexts + num_vnic_contexts); + * 2. FECN (num_kernel_context - 1 + num_user_contexts + + * num_vnic_contexts); * 3. VNIC (num_vnic_contexts). - * It should be noted that PSM FECN oversubscribe num_vnic_contexts + * It should be noted that FECN oversubscribe num_vnic_contexts * entries of RMT because both VNIC and PSM could allocate any receive * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts, * and PSM FECN must reserve an RMT entry for each possible PSM receive * context. */ rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2); + if (HFI1_CAP_IS_KSET(TID_RDMA)) + rmt_count += num_kernel_contexts - 1; if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) { user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count; dd_dev_err(dd, @@ -14288,37 +14291,43 @@ bail: init_qpmap_table(dd, FIRST_KERNEL_KCTXT, dd->n_krcv_queues - 1); } -static void init_user_fecn_handling(struct hfi1_devdata *dd, - struct rsm_map_table *rmt) +static void init_fecn_handling(struct hfi1_devdata *dd, + struct rsm_map_table *rmt) { struct rsm_rule_data rrd; u64 reg; - int i, idx, regoff, regidx; + int i, idx, regoff, regidx, start; u8 offset; u32 total_cnt; + if (HFI1_CAP_IS_KSET(TID_RDMA)) + /* Exclude context 0 */ + start = 1; + else + start = dd->first_dyn_alloc_ctxt; + + total_cnt = dd->num_rcv_contexts - start; + /* there needs to be enough room in the map table */ - total_cnt = dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt; if (rmt->used + total_cnt >= NUM_MAP_ENTRIES) { - dd_dev_err(dd, "User FECN handling disabled - too many user contexts allocated\n"); + dd_dev_err(dd, "FECN handling disabled - too many contexts allocated\n"); return; } /* * RSM will extract the destination context as an index into the * map table. The destination contexts are a sequential block - * in the range first_dyn_alloc_ctxt...num_rcv_contexts-1 (inclusive). + * in the range start...num_rcv_contexts-1 (inclusive). * Map entries are accessed as offset + extracted value. Adjust * the added offset so this sequence can be placed anywhere in * the table - as long as the entries themselves do not wrap. * There are only enough bits in offset for the table size, so * start with that to allow for a "negative" offset. */ - offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used - - (int)dd->first_dyn_alloc_ctxt); + offset = (u8)(NUM_MAP_ENTRIES + rmt->used - start); - for (i = dd->first_dyn_alloc_ctxt, idx = rmt->used; - i < dd->num_rcv_contexts; i++, idx++) { + for (i = start, idx = rmt->used; i < dd->num_rcv_contexts; + i++, idx++) { /* replace with identity mapping */ regoff = (idx % 8) * 8; regidx = idx / 8; @@ -14440,7 +14449,7 @@ static void init_rxe(struct hfi1_devdata *dd) rmt = alloc_rsm_map_table(dd); /* set up QOS, including the QPN map table */ init_qos(dd, rmt); - init_user_fecn_handling(dd, rmt); + init_fecn_handling(dd, rmt); complete_rsm_map_table(dd, rmt); /* record number of used rsm map entries for vnic */ dd->vnic.rmt_start = rmt->used; diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 2a9d2912f5db..1f4fe0269b51 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -516,7 +516,9 @@ bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, */ do_cnp = prescan || (opcode >= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST && - opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE); + opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE) || + opcode == TID_OP(READ_RESP) || + opcode == TID_OP(ACK); /* Call appropriate CNP handler */ if (!ignore_fecn && do_cnp && fecn) diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 9ade7d3954b0..eae6f05ca2fa 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -127,6 +127,14 @@ static int make_tid_rdma_ack(struct rvt_qp *qp, struct hfi1_pkt_state *ps); static void hfi1_do_tid_send(struct rvt_qp *qp); static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx); +static void tid_rdma_rcv_err(struct hfi1_packet *packet, + struct ib_other_headers *ohdr, + struct rvt_qp *qp, u32 psn, int diff, bool fecn); +static void update_r_next_psn_fecn(struct hfi1_packet *packet, + struct hfi1_qp_priv *priv, + struct hfi1_ctxtdata *rcd, + struct tid_rdma_flow *flow, + bool fecn); static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p) { @@ -2234,7 +2242,7 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet) struct ib_reth *reth; struct hfi1_qp_priv *qpriv = qp->priv; u32 bth0, psn, len, rkey; - bool is_fecn; + bool fecn; u8 next; u64 vaddr; int diff; @@ -2244,7 +2252,7 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet) if (hfi1_ruc_check_hdr(ibp, packet)) return; - is_fecn = process_ecn(qp, packet); + fecn = process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); trace_hfi1_rsp_rcv_tid_read_req(qp, psn); @@ -2263,9 +2271,8 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet) diff = delta_psn(psn, qp->r_psn); if (unlikely(diff)) { - if (tid_rdma_rcv_error(packet, ohdr, qp, psn, diff)) - return; - goto send_ack; + tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn); + return; } /* We've verified the request, insert it into the ack queue. */ @@ -2317,11 +2324,11 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet) /* Schedule the send tasklet. */ qp->s_flags |= RVT_S_RESP_PENDING; + if (fecn) + qp->s_flags |= RVT_S_ECN; hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - goto send_ack; return; nack_inv_unlock: @@ -2338,8 +2345,6 @@ nack_acc: rvt_rc_error(qp, IB_WC_LOC_PROT_ERR); qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; -send_ack: - hfi1_send_rc_ack(packet, is_fecn); } u32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e, @@ -2456,12 +2461,12 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet) struct tid_rdma_request *req; struct tid_rdma_flow *flow; u32 opcode, aeth; - bool is_fecn; + bool fecn; unsigned long flags; u32 kpsn, ipsn; trace_hfi1_sender_rcv_tid_read_resp(qp); - is_fecn = process_ecn(qp, packet); + fecn = process_ecn(qp, packet); kpsn = mask_psn(be32_to_cpu(ohdr->bth[2])); aeth = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.aeth); opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; @@ -2475,9 +2480,39 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet) flow = &req->flows[req->clear_tail]; /* When header suppression is disabled */ if (cmp_psn(ipsn, flow->flow_state.ib_lpsn)) { + update_r_next_psn_fecn(packet, priv, rcd, flow, fecn); + if (cmp_psn(kpsn, flow->flow_state.r_next_psn)) goto ack_done; flow->flow_state.r_next_psn = mask_psn(kpsn + 1); + /* + * Copy the payload to destination buffer if this packet is + * delivered as an eager packet due to RSM rule and FECN. + * The RSM rule selects FECN bit in BTH and SH bit in + * KDETH header and therefore will not match the last + * packet of each segment that has SH bit cleared. + */ + if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) { + struct rvt_sge_state ss; + u32 len; + u32 tlen = packet->tlen; + u16 hdrsize = packet->hlen; + u8 pad = packet->pad; + u8 extra_bytes = pad + packet->extra_byte + + (SIZE_OF_CRC << 2); + u32 pmtu = qp->pmtu; + + if (unlikely(tlen != (hdrsize + pmtu + extra_bytes))) + goto ack_op_err; + len = restart_sge(&ss, req->e.swqe, ipsn, pmtu); + if (unlikely(len < pmtu)) + goto ack_op_err; + rvt_copy_sge(qp, &ss, packet->payload, pmtu, false, + false); + /* Raise the sw sequence check flag for next packet */ + priv->s_flags |= HFI1_R_TID_SW_PSN; + } + goto ack_done; } flow->flow_state.r_next_psn = mask_psn(kpsn + 1); @@ -2544,8 +2579,6 @@ ack_op_err: ack_done: spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - hfi1_send_rc_ack(packet, is_fecn); } void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp) @@ -3678,7 +3711,7 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet) struct hfi1_qp_priv *qpriv = qp->priv; struct tid_rdma_request *req; u32 bth0, psn, len, rkey, num_segs; - bool is_fecn; + bool fecn; u8 next; u64 vaddr; int diff; @@ -3687,7 +3720,7 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet) if (hfi1_ruc_check_hdr(ibp, packet)) return; - is_fecn = process_ecn(qp, packet); + fecn = process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); trace_hfi1_rsp_rcv_tid_write_req(qp, psn); @@ -3704,9 +3737,8 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet) num_segs = DIV_ROUND_UP(len, qpriv->tid_rdma.local.max_len); diff = delta_psn(psn, qp->r_psn); if (unlikely(diff)) { - if (tid_rdma_rcv_error(packet, ohdr, qp, psn, diff)) - return; - goto send_ack; + tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn); + return; } /* @@ -3822,11 +3854,11 @@ update_head: /* Schedule the send tasklet. */ qp->s_flags |= RVT_S_RESP_PENDING; + if (fecn) + qp->s_flags |= RVT_S_ECN; hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - goto send_ack; return; nack_inv_unlock: @@ -3843,8 +3875,6 @@ nack_acc: rvt_rc_error(qp, IB_WC_LOC_PROT_ERR); qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; -send_ack: - hfi1_send_rc_ack(packet, is_fecn); } u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e, @@ -4061,10 +4091,10 @@ void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet) struct tid_rdma_flow *flow; enum ib_wc_status status; u32 opcode, aeth, psn, flow_psn, i, tidlen = 0, pktlen; - bool is_fecn; + bool fecn; unsigned long flags; - is_fecn = process_ecn(qp, packet); + fecn = process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); aeth = be32_to_cpu(ohdr->u.tid_rdma.w_rsp.aeth); opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; @@ -4204,7 +4234,6 @@ void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet) qpriv->s_tid_cur = i; } qp->s_flags &= ~HFI1_S_WAIT_TID_RESP; - hfi1_schedule_tid_send(qp); goto ack_done; @@ -4213,9 +4242,9 @@ ack_op_err: ack_err: rvt_error_qp(qp, status); ack_done: + if (fecn) + qp->s_flags |= RVT_S_ECN; spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - hfi1_send_rc_ack(packet, is_fecn); } bool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe, @@ -4295,7 +4324,9 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) unsigned long flags; u32 psn, next; u8 opcode; + bool fecn; + fecn = process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; @@ -4308,9 +4339,53 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) req = ack_to_tid_req(e); flow = &req->flows[req->clear_tail]; if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.lpsn))) { + update_r_next_psn_fecn(packet, priv, rcd, flow, fecn); + if (cmp_psn(psn, flow->flow_state.r_next_psn)) goto send_nak; + flow->flow_state.r_next_psn = mask_psn(psn + 1); + /* + * Copy the payload to destination buffer if this packet is + * delivered as an eager packet due to RSM rule and FECN. + * The RSM rule selects FECN bit in BTH and SH bit in + * KDETH header and therefore will not match the last + * packet of each segment that has SH bit cleared. + */ + if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) { + struct rvt_sge_state ss; + u32 len; + u32 tlen = packet->tlen; + u16 hdrsize = packet->hlen; + u8 pad = packet->pad; + u8 extra_bytes = pad + packet->extra_byte + + (SIZE_OF_CRC << 2); + u32 pmtu = qp->pmtu; + + if (unlikely(tlen != (hdrsize + pmtu + extra_bytes))) + goto send_nak; + len = req->comp_seg * req->seg_len; + len += delta_psn(psn, + full_flow_psn(flow, flow->flow_state.spsn)) * + pmtu; + if (unlikely(req->total_len - len < pmtu)) + goto send_nak; + + /* + * The e->rdma_sge field is set when TID RDMA WRITE REQ + * is first received and is never modified thereafter. + */ + ss.sge = e->rdma_sge; + ss.sg_list = NULL; + ss.num_sge = 1; + ss.total_len = req->total_len; + rvt_skip_sge(&ss, len, false); + rvt_copy_sge(qp, &ss, packet->payload, pmtu, false, + false); + /* Raise the sw sequence check flag for next packet */ + priv->r_next_psn_kdeth = mask_psn(psn + 1); + priv->s_flags |= HFI1_R_TID_SW_PSN; + } goto exit; } flow->flow_state.r_next_psn = mask_psn(psn + 1); @@ -4375,6 +4450,8 @@ done: hfi1_schedule_tid_send(qp); exit: priv->r_next_psn_kdeth = flow->flow_state.r_next_psn; + if (fecn) + qp->s_flags |= RVT_S_ECN; spin_unlock_irqrestore(&qp->s_lock, flags); return; @@ -4476,12 +4553,11 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) struct tid_rdma_request *req; struct tid_rdma_flow *flow; u32 aeth, psn, req_psn, ack_psn, fspsn, resync_psn, ack_kpsn; - bool is_fecn; unsigned long flags; u16 fidx; trace_hfi1_tid_write_sender_rcv_tid_ack(qp, 0); - is_fecn = process_ecn(qp, packet); + process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); aeth = be32_to_cpu(ohdr->u.tid_rdma.ack.aeth); req_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.verbs_psn)); @@ -4835,10 +4911,10 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet) struct tid_rdma_flow *flow; struct tid_flow_state *fs = &qpriv->flow_state; u32 psn, generation, idx, gen_next; - bool is_fecn; + bool fecn; unsigned long flags; - is_fecn = process_ecn(qp, packet); + fecn = process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); generation = mask_psn(psn + 1) >> HFI1_KDETH_BTH_SEQ_SHIFT; @@ -4929,6 +5005,8 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet) qpriv->s_flags |= RVT_S_ACK_PENDING; hfi1_schedule_tid_send(qp); bail: + if (fecn) + qp->s_flags |= RVT_S_ECN; spin_unlock_irqrestore(&qp->s_lock, flags); } @@ -5465,3 +5543,36 @@ static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx) reg = read_uctxt_csr(dd, ctxt, RCV_TID_FLOW_TABLE + (8 * fidx)); return mask_psn(reg); } + +static void tid_rdma_rcv_err(struct hfi1_packet *packet, + struct ib_other_headers *ohdr, + struct rvt_qp *qp, u32 psn, int diff, bool fecn) +{ + unsigned long flags; + + tid_rdma_rcv_error(packet, ohdr, qp, psn, diff); + if (fecn) { + spin_lock_irqsave(&qp->s_lock, flags); + qp->s_flags |= RVT_S_ECN; + spin_unlock_irqrestore(&qp->s_lock, flags); + } +} + +static void update_r_next_psn_fecn(struct hfi1_packet *packet, + struct hfi1_qp_priv *priv, + struct hfi1_ctxtdata *rcd, + struct tid_rdma_flow *flow, + bool fecn) +{ + /* + * If a start/middle packet is delivered here due to + * RSM rule and FECN, we need to update the r_next_psn. + */ + if (fecn && packet->etype == RHF_RCV_TYPE_EAGER && + !(priv->s_flags & HFI1_R_TID_SW_PSN)) { + struct hfi1_devdata *dd = rcd->dd; + + flow->flow_state.r_next_psn = + read_r_next_psn(dd, rcd->ctxt, flow->idx); + } +} -- cgit v1.2.3 From 0f51427bd0976fc4824ca16e73b7985f224cbbf8 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 25 Feb 2019 08:56:14 +0200 Subject: RDMA/mlx5: Cleanup WQE page fault handler Refactor the page fault handler to be more readable and extensible, this cleanup was triggered by the error reported below. The code structure made it unclear to the automatic tools to identify that such a flow is not possible in real life because "requestor != NULL" means that "qp != NULL" too. drivers/infiniband/hw/mlx5/odp.c:1254 mlx5_ib_mr_wqe_pfault_handler() error: we previously assumed 'qp' could be null (see line 1230) Fixes: 08100fad5cac ("IB/mlx5: Add ODP SRQ support") Reported-by: Dan Carpenter Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 119 +++++++++++++++++---------------------- 1 file changed, 52 insertions(+), 67 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 91669e35c6ca..cdb0d63fa4b1 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -929,7 +929,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault, void *wqe, void *wqe_end, u32 *bytes_mapped, - u32 *total_wqe_bytes, int receive_queue) + u32 *total_wqe_bytes, bool receive_queue) { int ret = 0, npages = 0; u64 io_virt; @@ -1209,17 +1209,15 @@ static inline struct mlx5_ib_srq *res_to_srq(struct mlx5_core_rsc_common *res) static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault) { - int ret; - void *wqe, *wqe_end; + bool sq = pfault->type & MLX5_PFAULT_REQUESTOR; + u16 wqe_index = pfault->wqe.wqe_index; + void *wqe = NULL, *wqe_end = NULL; u32 bytes_mapped, total_wqe_bytes; - char *buffer = NULL; + struct mlx5_core_rsc_common *res; int resume_with_error = 1; - u16 wqe_index = pfault->wqe.wqe_index; - int requestor = pfault->type & MLX5_PFAULT_REQUESTOR; - struct mlx5_core_rsc_common *res = NULL; - struct mlx5_ib_qp *qp = NULL; - struct mlx5_ib_srq *srq = NULL; + struct mlx5_ib_qp *qp; size_t bytes_copied; + int ret = 0; res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type); if (!res) { @@ -1227,87 +1225,74 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, return; } - switch (res->res) { - case MLX5_RES_QP: - qp = res_to_qp(res); - break; - case MLX5_RES_SRQ: - case MLX5_RES_XSRQ: - srq = res_to_srq(res); - break; - default: - mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", pfault->type); + if (res->res != MLX5_RES_QP && res->res != MLX5_RES_SRQ && + res->res != MLX5_RES_XSRQ) { + mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", + pfault->type); goto resolve_page_fault; } - buffer = (char *)__get_free_page(GFP_KERNEL); - if (!buffer) { + wqe = (void *)__get_free_page(GFP_KERNEL); + if (!wqe) { mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n"); goto resolve_page_fault; } - if (qp) { - if (requestor) { - ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index, - buffer, PAGE_SIZE, - &bytes_copied); - } else { - ret = mlx5_ib_read_user_wqe_rq(qp, wqe_index, - buffer, PAGE_SIZE, - &bytes_copied); - } - } else { - ret = mlx5_ib_read_user_wqe_srq(srq, wqe_index, - buffer, PAGE_SIZE, + qp = (res->res == MLX5_RES_QP) ? res_to_qp(res) : NULL; + if (qp && sq) { + ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index, wqe, PAGE_SIZE, + &bytes_copied); + if (ret) + goto read_user; + ret = mlx5_ib_mr_initiator_pfault_handler( + dev, pfault, qp, &wqe, &wqe_end, bytes_copied); + } else if (qp && !sq) { + ret = mlx5_ib_read_user_wqe_rq(qp, wqe_index, wqe, PAGE_SIZE, + &bytes_copied); + if (ret) + goto read_user; + ret = mlx5_ib_mr_responder_pfault_handler_rq( + dev, qp, wqe, &wqe_end, bytes_copied); + } else if (!qp) { + struct mlx5_ib_srq *srq = res_to_srq(res); + + ret = mlx5_ib_read_user_wqe_srq(srq, wqe_index, wqe, PAGE_SIZE, &bytes_copied); + if (ret) + goto read_user; + ret = mlx5_ib_mr_responder_pfault_handler_srq( + dev, srq, &wqe, &wqe_end, bytes_copied); } - if (ret) { - mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%d, wqe_index=%x, qpn=%x\n", - ret, wqe_index, pfault->token); + if (ret < 0 || wqe >= wqe_end) goto resolve_page_fault; - } - wqe = buffer; - if (requestor) - ret = mlx5_ib_mr_initiator_pfault_handler(dev, pfault, qp, - &wqe, &wqe_end, - bytes_copied); - else if (qp) - ret = mlx5_ib_mr_responder_pfault_handler_rq(dev, qp, - wqe, &wqe_end, - bytes_copied); - else - ret = mlx5_ib_mr_responder_pfault_handler_srq(dev, srq, - &wqe, &wqe_end, - bytes_copied); + ret = pagefault_data_segments(dev, pfault, wqe, wqe_end, &bytes_mapped, + &total_wqe_bytes, !sq); + if (ret == -EAGAIN) + goto out; - if (ret < 0) + if (ret < 0 || total_wqe_bytes > bytes_mapped) goto resolve_page_fault; - if (wqe >= wqe_end) { - mlx5_ib_err(dev, "ODP fault on invalid WQE.\n"); - goto resolve_page_fault; - } +out: + ret = 0; + resume_with_error = 0; - ret = pagefault_data_segments(dev, pfault, wqe, wqe_end, - &bytes_mapped, &total_wqe_bytes, - !requestor); - if (ret == -EAGAIN) { - resume_with_error = 0; - goto resolve_page_fault; - } else if (ret < 0 || total_wqe_bytes > bytes_mapped) { - goto resolve_page_fault; - } +read_user: + if (ret) + mlx5_ib_err( + dev, + "Failed reading a WQE following page fault, error %d, wqe_index %x, qpn %x\n", + ret, wqe_index, pfault->token); - resume_with_error = 0; resolve_page_fault: mlx5_ib_page_fault_resume(dev, pfault, resume_with_error); mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n", pfault->wqe.wq_num, resume_with_error, pfault->type); mlx5_core_res_put(res); - free_page((unsigned long)buffer); + free_page((unsigned long)wqe); } static int pages_in_range(u64 address, u32 length) -- cgit v1.2.3 From d2c33370ae73105c7c7df8f7048d20653991b4cb Mon Sep 17 00:00:00 2001 From: Potnuri Bharat Teja Date: Tue, 2 Apr 2019 14:46:11 +0530 Subject: RDMA/iw_cxgb4: Always disconnect when QP is transitioning to TERMINATE state On receiving a TERM from tje peer, Host moves the QP to TERMINATE state and then moves the adapter out of RDMA mode. After issuing a TERM, peer issues a CLOSE and at this point of time if the connectivity between peer and host is lost for a significant amount of time, the QP remains in TERMINATE state. Therefore c4iw_modify_qp() needs to initiate a close on entering terminate state. Signed-off-by: Potnuri Bharat Teja Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/qp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 777231496cc6..63780e6eface 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1976,10 +1976,10 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, qhp->attr.layer_etype = attrs->layer_etype; qhp->attr.ecode = attrs->ecode; ep = qhp->ep; + c4iw_get_ep(&ep->com); + disconnect = 1; if (!internal) { - c4iw_get_ep(&qhp->ep->com); terminate = 1; - disconnect = 1; } else { terminate = qhp->attr.send_term; ret = rdma_fini(rhp, qhp, ep); -- cgit v1.2.3 From c7252a6532995fe6971295b7878e5a74b4f85d0c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 2 Apr 2019 15:39:55 +0300 Subject: RDMA/cm: Remove useless zeroing of static global variable Static global variables are initialized to zero by C standard, there is no need to zero them again. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 5671c92b69bd..1dd2b572f59f 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -4498,7 +4498,6 @@ static int __init ib_cm_init(void) { int ret; - memset(&cm, 0, sizeof cm); INIT_LIST_HEAD(&cm.device_list); rwlock_init(&cm.device_lock); spin_lock_init(&cm.lock); -- cgit v1.2.3 From d10bcf947a3ea240351a8182d71e4aa9c8ddba56 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 2 Apr 2019 14:52:52 -0500 Subject: RDMA/umem: Combine contiguous PAGE_SIZE regions in SGEs Combine contiguous regions of PAGE_SIZE pages into single scatter list entry while building the scatter table for a umem. This minimizes the number of the entries in the scatter list and reduces the DMA mapping overhead, particularly with the IOMMU. Set default max_seg_size in core for IB devices to 2G and do not combine if we exceed this limit. Also, purge npages in struct ib_umem as we now DMA map the umem SGL with sg_nents and npage computation is not needed. Drivers should now be using ib_umem_num_pages(), so fix the last stragglers. Move npages tracking to ib_umem_odp as ODP drivers still need it. Suggested-by: Jason Gunthorpe Reviewed-by: Michael J. Ruhl Reviewed-by: Ira Weiny Acked-by: Adit Ranadive Signed-off-by: Shiraz Saleem Tested-by: Gal Pressman Tested-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 3 + drivers/infiniband/core/umem.c | 101 +++++++++++++++++++++------ drivers/infiniband/core/umem_odp.c | 4 +- drivers/infiniband/hw/mlx5/odp.c | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c | 11 +-- include/rdma/ib_umem.h | 2 +- include/rdma/ib_umem_odp.h | 1 + 7 files changed, 95 insertions(+), 29 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 2dbd04739ac6..0f98da17af23 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1089,6 +1089,9 @@ static void setup_dma_device(struct ib_device *device) WARN_ON_ONCE(!parent); device->dma_device = parent; } + /* Setup default max segment size for all IB devices */ + dma_set_max_seg_size(device->dma_device, SZ_2G); + } /* diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 89a7d57f9fa5..d31f5e386c7d 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -39,25 +39,22 @@ #include #include #include +#include #include #include "uverbs.h" - static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) { - struct scatterlist *sg; + struct sg_page_iter sg_iter; struct page *page; - int i; if (umem->nmap > 0) - ib_dma_unmap_sg(dev, umem->sg_head.sgl, - umem->npages, + ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents, DMA_BIDIRECTIONAL); - for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) { - - page = sg_page(sg); + for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) { + page = sg_page_iter_page(&sg_iter); if (!PageDirty(page) && umem->writable && dirty) set_page_dirty_lock(page); put_page(page); @@ -66,6 +63,69 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d sg_free_table(&umem->sg_head); } +/* ib_umem_add_sg_table - Add N contiguous pages to scatter table + * + * sg: current scatterlist entry + * page_list: array of npage struct page pointers + * npages: number of pages in page_list + * max_seg_sz: maximum segment size in bytes + * nents: [out] number of entries in the scatterlist + * + * Return new end of scatterlist + */ +static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg, + struct page **page_list, + unsigned long npages, + unsigned int max_seg_sz, + int *nents) +{ + unsigned long first_pfn; + unsigned long i = 0; + bool update_cur_sg = false; + bool first = !sg_page(sg); + + /* Check if new page_list is contiguous with end of previous page_list. + * sg->length here is a multiple of PAGE_SIZE and sg->offset is 0. + */ + if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) == + page_to_pfn(page_list[0]))) + update_cur_sg = true; + + while (i != npages) { + unsigned long len; + struct page *first_page = page_list[i]; + + first_pfn = page_to_pfn(first_page); + + /* Compute the number of contiguous pages we have starting + * at i + */ + for (len = 0; i != npages && + first_pfn + len == page_to_pfn(page_list[i]); + len++) + i++; + + /* Squash N contiguous pages from page_list into current sge */ + if (update_cur_sg && + ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT))) { + sg_set_page(sg, sg_page(sg), + sg->length + (len << PAGE_SHIFT), 0); + update_cur_sg = false; + continue; + } + + /* Squash N contiguous pages into next sge or first sge */ + if (!first) + sg = sg_next(sg); + + (*nents)++; + sg_set_page(sg, first_page, len << PAGE_SHIFT, 0); + first = false; + } + + return sg; +} + /** * ib_umem_get - Pin and DMA map userspace memory. * @@ -93,7 +153,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, int ret; int i; unsigned long dma_attrs = 0; - struct scatterlist *sg, *sg_list_start; + struct scatterlist *sg; unsigned int gup_flags = FOLL_WRITE; if (!udata) @@ -190,7 +250,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, if (!umem->writable) gup_flags |= FOLL_FORCE; - sg_list_start = umem->sg_head.sgl; + sg = umem->sg_head.sgl; while (npages) { down_read(&mm->mmap_sem); @@ -203,28 +263,29 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, goto umem_release; } - umem->npages += ret; cur_base += ret * PAGE_SIZE; npages -= ret; + sg = ib_umem_add_sg_table(sg, page_list, ret, + dma_get_max_seg_size(context->device->dma_device), + &umem->sg_nents); + /* Continue to hold the mmap_sem as vma_list access * needs to be protected. */ - for_each_sg(sg_list_start, sg, ret, i) { + for (i = 0; i < ret && umem->hugetlb; i++) { if (vma_list && !is_vm_hugetlb_page(vma_list[i])) umem->hugetlb = 0; - - sg_set_page(sg, page_list[i], PAGE_SIZE, 0); } - up_read(&mm->mmap_sem); - /* preparing for next loop */ - sg_list_start = sg; + up_read(&mm->mmap_sem); } + sg_mark_end(sg); + umem->nmap = ib_dma_map_sg_attrs(context->device, umem->sg_head.sgl, - umem->npages, + umem->sg_nents, DMA_BIDIRECTIONAL, dma_attrs); @@ -320,8 +381,8 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, return -EINVAL; } - ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->npages, dst, length, - offset + ib_umem_offset(umem)); + ret = sg_pcopy_to_buffer(umem->sg_head.sgl, ib_umem_num_pages(umem), + dst, length, offset + ib_umem_offset(umem)); if (ret < 0) return ret; diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 6f8c36fcda78..97219143f16f 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -526,7 +526,7 @@ static int ib_umem_odp_map_dma_single_page( } umem_odp->dma_list[page_index] = dma_addr | access_mask; umem_odp->page_list[page_index] = page; - umem->npages++; + umem_odp->npages++; } else if (umem_odp->page_list[page_index] == page) { umem_odp->dma_list[page_index] |= access_mask; } else { @@ -752,7 +752,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, } umem_odp->page_list[idx] = NULL; umem_odp->dma_list[idx] = 0; - umem->npages--; + umem_odp->npages--; } } mutex_unlock(&umem_odp->umem_mutex); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index cdb0d63fa4b1..91507a2e9290 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -288,7 +288,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, ib_umem_odp_unmap_dma_pages(umem_odp, start, end); - if (unlikely(!umem->npages && mr->parent && + if (unlikely(!umem_odp->npages && mr->parent && !umem_odp->dying)) { WRITE_ONCE(umem_odp->dying, 1); atomic_inc(&mr->parent->num_leaf_free); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c index 9e6c44ebaf54..65dc47ffb8f3 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c @@ -119,7 +119,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, union pvrdma_cmd_resp rsp; struct pvrdma_cmd_create_mr *cmd = &req.create_mr; struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp; - int ret; + int ret, npages; if (length == 0 || length > dev->dsr->caps.max_mr_size) { dev_warn(&dev->pdev->dev, "invalid mem region length\n"); @@ -133,9 +133,10 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_CAST(umem); } - if (umem->npages < 0 || umem->npages > PVRDMA_PAGE_DIR_MAX_PAGES) { + npages = ib_umem_num_pages(umem); + if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) { dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n", - umem->npages); + npages); ret = -EINVAL; goto err_umem; } @@ -150,7 +151,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->mmr.size = length; mr->umem = umem; - ret = pvrdma_page_dir_init(dev, &mr->pdir, umem->npages, false); + ret = pvrdma_page_dir_init(dev, &mr->pdir, npages, false); if (ret) { dev_warn(&dev->pdev->dev, "could not allocate page directory\n"); @@ -167,7 +168,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, cmd->length = length; cmd->pd_handle = to_vpd(pd)->pd_handle; cmd->access_flags = access_flags; - cmd->nchunks = umem->npages; + cmd->nchunks = npages; cmd->pdir_dma = mr->pdir.dir_dma; ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP); diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 73af05db04c7..b13a2e9a50d4 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -53,7 +53,7 @@ struct ib_umem { struct work_struct work; struct sg_table sg_head; int nmap; - int npages; + unsigned int sg_nents; }; /* Returns the offset of the umem start relative to the first page. */ diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index dadc96dea39c..eeec4e53c448 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -69,6 +69,7 @@ struct ib_umem_odp { int notifiers_seq; int notifiers_count; + int npages; /* Tree tracking */ struct umem_odp_node interval_tree; -- cgit v1.2.3 From 4d2e11d42fe4117c24e79a012904cf0fa7fdcfe3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 4 Apr 2019 11:04:39 +0100 Subject: opa_vnic: fix check on record->event, incorrect operator used The check on record->event is always true because the wrong operator is being used, used && instead of || Addresses-Coverity: ("Constant expression result") Fixes: fae7a699a925 ("opa_vnic: Convert vport_idr to XArray") Signed-off-by: Colin Ian King Acked-by: Dennis Dalessandro Reviewed-by: Mukesh Ojha Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index 76cd09410d9a..be5befd92d16 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -869,7 +869,7 @@ static void opa_vnic_event(struct ib_event_handler *handler, record->event, dev_name(&record->device->dev), record->element.port_num); - if (record->event != IB_EVENT_PORT_ERR || + if (record->event != IB_EVENT_PORT_ERR && record->event != IB_EVENT_PORT_ACTIVE) return; -- cgit v1.2.3 From c87e65cfb97c7f325132a68288ed76ba7bdcd2c6 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 11 Mar 2019 14:40:31 +0200 Subject: RDMA/cm: Move debug counters to be under relevant IB device The sysfs layout is created by CM incorrectly presented RDMA devices with InfiniBand link layer. Layout of such devices represents device tree of connections. By moving CM statistics to be under relevant port of IB device, we will fix the following issues: * Symlink name - It used device name instead of specific identifier. * Target location - It was supposed to point to PCI-ID/infiniband_cm/ instead of PCI-ID/infiniband/ * Target name - It created extra device file under already existing device folder, e.g. mlx5_0/mlx5_0 * Crash during boot with RDMA persistent naming patches. sysfs: cannot create duplicate filename '/class/infiniband_cm/mlx5_0' CPU: 29 PID: 433 Comm: modprobe Not tainted 5.0.0-rc5+ #178 Call Trace: dump_stack+0xcc/0x180 sysfs_warn_dup.cold.3+0x17/0x2d sysfs_do_create_link_sd.isra.2+0xd0/0xf0 device_add+0x7cb/0x1450 device_create_groups_vargs+0x1ae/0x220 device_create+0x93/0xc0 cm_add_one+0x38f/0xf60 [ib_cm] add_client_context+0x167/0x210 [ib_core] enable_device_and_get+0x230/0x3f0 [ib_core] ib_register_device+0x823/0xbf0 [ib_core] __mlx5_ib_add+0x45/0x150 [mlx5_ib] mlx5_ib_add+0x1b3/0x5e0 [mlx5_ib] mlx5_add_device+0x130/0x3a0 [mlx5_core] mlx5_register_interface+0x1a9/0x270 [mlx5_core] do_one_initcall+0x14f/0x5de do_init_module+0x247/0x7c0 load_module+0x4c2f/0x60d0 entry_SYSCALL_64_after_hwframe+0x49/0xbe After this change: [leonro@server ~]$ ls -al /sys/class/infiniband/ibp0s12f0/ports/1/ drwxr-xr-x 2 root root 0 Mar 11 11:17 cm_rx_duplicates drwxr-xr-x 2 root root 0 Mar 11 11:17 cm_rx_msgs drwxr-xr-x 2 root root 0 Mar 11 11:17 cm_tx_msgs drwxr-xr-x 2 root root 0 Mar 11 11:17 cm_tx_retries Fixes: 110cf374a809 ("infiniband: make cm_device use a struct device and not a kobject.") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 46 +++++++------------------------------ drivers/infiniband/core/core_priv.h | 6 +++++ drivers/infiniband/core/sysfs.c | 43 ++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 38 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 1dd2b572f59f..4df59f2b0f04 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -52,6 +52,7 @@ #include #include #include "cm_msgs.h" +#include "core_priv.h" MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("InfiniBand CM"); @@ -220,7 +221,6 @@ struct cm_port { struct cm_device { struct list_head list; struct ib_device *ib_device; - struct device *device; u8 ack_delay; int going_down; struct cm_port *port[0]; @@ -4272,18 +4272,6 @@ static struct kobj_type cm_counter_obj_type = { .default_attrs = cm_counter_default_attrs }; -static void cm_release_port_obj(struct kobject *obj) -{ - struct cm_port *cm_port; - - cm_port = container_of(obj, struct cm_port, port_obj); - kfree(cm_port); -} - -static struct kobj_type cm_port_obj_type = { - .release = cm_release_port_obj -}; - static char *cm_devnode(struct device *dev, umode_t *mode) { if (mode) @@ -4302,19 +4290,12 @@ static int cm_create_port_fs(struct cm_port *port) { int i, ret; - ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type, - &port->cm_dev->device->kobj, - "%d", port->port_num); - if (ret) { - kfree(port); - return ret; - } - for (i = 0; i < CM_COUNTER_GROUPS; i++) { - ret = kobject_init_and_add(&port->counter_group[i].obj, - &cm_counter_obj_type, - &port->port_obj, - "%s", counter_group_names[i]); + ret = ib_port_register_module_stat(port->cm_dev->ib_device, + port->port_num, + &port->counter_group[i].obj, + &cm_counter_obj_type, + counter_group_names[i]); if (ret) goto error; } @@ -4323,8 +4304,7 @@ static int cm_create_port_fs(struct cm_port *port) error: while (i--) - kobject_put(&port->counter_group[i].obj); - kobject_put(&port->port_obj); + ib_port_unregister_module_stat(&port->counter_group[i].obj); return ret; } @@ -4334,9 +4314,8 @@ static void cm_remove_port_fs(struct cm_port *port) int i; for (i = 0; i < CM_COUNTER_GROUPS; i++) - kobject_put(&port->counter_group[i].obj); + ib_port_unregister_module_stat(&port->counter_group[i].obj); - kobject_put(&port->port_obj); } static void cm_add_one(struct ib_device *ib_device) @@ -4363,13 +4342,6 @@ static void cm_add_one(struct ib_device *ib_device) cm_dev->ib_device = ib_device; cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay; cm_dev->going_down = 0; - cm_dev->device = device_create(&cm_class, &ib_device->dev, - MKDEV(0, 0), NULL, - "%s", dev_name(&ib_device->dev)); - if (IS_ERR(cm_dev->device)) { - kfree(cm_dev); - return; - } set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); for (i = 1; i <= ib_device->phys_port_cnt; i++) { @@ -4436,7 +4408,6 @@ error1: cm_remove_port_fs(port); } free: - device_unregister(cm_dev->device); kfree(cm_dev); } @@ -4490,7 +4461,6 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) cm_remove_port_fs(port); } - device_unregister(cm_dev->device); kfree(cm_dev); } diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 0663fc64e950..5b0ffbb6b3c9 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -344,4 +344,10 @@ int ib_setup_port_attrs(struct ib_core_device *coredev, bool alloc_hw_stats); int rdma_compatdev_set(u8 enable); + +int ib_port_register_module_stat(struct ib_device *device, u8 port_num, + struct kobject *kobj, struct kobj_type *ktype, + const char *name); +void ib_port_unregister_module_stat(struct kobject *kobj); + #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 2ce3f58157a5..2fe89754e592 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1354,3 +1354,46 @@ void ib_device_unregister_sysfs(struct ib_device *device) ib_free_port_attrs(&device->coredev); } + +/** + * ib_port_register_module_stat - add module counters under relevant port + * of IB device. + * + * @device: IB device to add counters + * @port_num: valid port number + * @kobj: pointer to the kobject to initialize + * @ktype: pointer to the ktype for this kobject. + * @name: the name of the kobject + */ +int ib_port_register_module_stat(struct ib_device *device, u8 port_num, + struct kobject *kobj, struct kobj_type *ktype, + const char *name) +{ + struct kobject *p, *t; + int ret; + + list_for_each_entry_safe(p, t, &device->coredev.port_list, entry) { + struct ib_port *port = container_of(p, struct ib_port, kobj); + + if (port->port_num != port_num) + continue; + + ret = kobject_init_and_add(kobj, ktype, &port->kobj, "%s", + name); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL(ib_port_register_module_stat); + +/** + * ib_port_unregister_module_stat - release module counters + * @kobj: pointer to the kobject to release + */ +void ib_port_unregister_module_stat(struct kobject *kobj) +{ + kobject_put(kobj); +} +EXPORT_SYMBOL(ib_port_unregister_module_stat); -- cgit v1.2.3 From 9e886b39a73ad0219b3f2ff574c135d770118a6b Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 2 Apr 2019 21:50:34 +0300 Subject: RDMA/nldev: Return device protocol Add new RDMA_NLDEV_ATTR_DEV_PROTOCOL attribute to give ability for UDEV rules create IB device stable names based on link type protocol. The assumption that devices like mlx4 with duality in their link type under one IB device struct won't be allowed in the future. Signed-off-by: Leon Romanovsky Reviewed-by: Parav Pandit Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/nldev.c | 24 +++++++++++++++++++++++- include/uapi/rdma/rdma_netlink.h | 5 +++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 28b4ed8f9930..8cb3851d212e 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -117,6 +117,8 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING, .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN }, [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN }, }; static int put_driver_name_print_type(struct sk_buff *msg, const char *name, @@ -199,6 +201,8 @@ static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device) static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) { char fw[IB_FW_VERSION_NAME_MAX]; + int ret = 0; + u8 port; if (fill_nldev_handle(msg, device)) return -EMSGSIZE; @@ -227,7 +231,25 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) return -EMSGSIZE; if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type)) return -EMSGSIZE; - return 0; + + /* + * Link type is determined on first port and mlx4 device + * which can potentially have two different link type for the same + * IB device is considered as better to be avoided in the future, + */ + port = rdma_start_port(device); + if (rdma_cap_opa_mad(device, port)) + ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa"); + else if (rdma_protocol_ib(device, port)) + ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib"); + else if (rdma_protocol_iwarp(device, port)) + ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw"); + else if (rdma_protocol_roce(device, port)) + ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce"); + else if (rdma_protocol_usnic(device, port)) + ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, + "usnic"); + return ret; } static int fill_port_info(struct sk_buff *msg, diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 3a231a989974..d49f491341f6 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -470,6 +470,11 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_SYS_ATTR_NETNS_MODE, /* u8 */ + /* + * Device protocol, e.g. ib, iw, usnic, roce and opa + */ + RDMA_NLDEV_ATTR_DEV_PROTOCOL, /* string */ + /* * Always the end */ -- cgit v1.2.3 From e79c9c60622a59a814c54a1ee70298afe544441a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 1 Apr 2019 17:08:23 -0300 Subject: IB/mlx5: Remove references to uboject->context These should all go through udata now. Add mlx5_udata_to_mdev to convert a udata into the struct mlx5_ib_dev as these call sites require. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 16 +++++++++------- drivers/infiniband/hw/mlx5/flow.c | 13 ++++++------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 8 ++++++++ 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 0770dcc74add..d468f11a81d1 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -373,8 +373,10 @@ static u64 devx_get_obj_id(const void *in) return obj_id; } -static bool devx_is_valid_obj_id(struct ib_uobject *uobj, const void *in) +static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs, + struct ib_uobject *uobj, const void *in) { + struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); u64 obj_id = devx_get_obj_id(in); if (!obj_id) @@ -389,7 +391,6 @@ static bool devx_is_valid_obj_id(struct ib_uobject *uobj, const void *in) case UVERBS_OBJECT_SRQ: { struct mlx5_core_srq *srq = &(to_msrq(uobj->object)->msrq); - struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); u16 opcode; switch (srq->common.res) { @@ -1136,7 +1137,8 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, return ret; if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { - struct mlx5_ib_dev *dev = to_mdev(uobject->context->device); + struct mlx5_ib_dev *dev = + mlx5_udata_to_mdev(&attrs->driver_udata); call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu, devx_free_indirect_mkey); @@ -1261,7 +1263,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( if (!devx_is_obj_modify_cmd(cmd_in)) return -EINVAL; - if (!devx_is_valid_obj_id(uobj, cmd_in)) + if (!devx_is_valid_obj_id(attrs, uobj, cmd_in)) return -EINVAL; cmd_out = uverbs_zalloc(attrs, cmd_out_len); @@ -1303,7 +1305,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( if (!devx_is_obj_query_cmd(cmd_in)) return -EINVAL; - if (!devx_is_valid_obj_id(uobj, cmd_in)) + if (!devx_is_valid_obj_id(attrs, uobj, cmd_in)) return -EINVAL; cmd_out = uverbs_zalloc(attrs, cmd_out_len); @@ -1351,7 +1353,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)( struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE); - struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); + struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); ev_file = container_of(uobj, struct devx_async_cmd_event_file, uobj); @@ -1413,7 +1415,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)( if (err) return err; - if (!devx_is_valid_obj_id(uobj, cmd_in)) + if (!devx_is_valid_obj_id(attrs, uobj, cmd_in)) return -EINVAL; fd_uobj = uverbs_attr_get_uobject(attrs, diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index e8c3847a1a10..b9affbdb5d79 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -75,7 +75,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( struct ib_qp *qp = NULL; struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); - struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); + struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); int len, ret, i; u32 counter_id = 0; @@ -208,7 +208,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); - struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); + struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); struct mlx5_ib_flow_matcher *obj; u32 flags; int err; @@ -327,7 +327,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE); - struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); + struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); enum mlx5_ib_uapi_flow_table_type ft_type; struct ib_flow_action *action; int num_actions; @@ -354,7 +354,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( if (IS_ERR(action)) return PTR_ERR(action); - uverbs_flow_action_fill_action(action, uobj, uobj->context->device, + uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev, IB_FLOW_ACTION_UNSPECIFIED); return 0; @@ -446,7 +446,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( { struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE); - struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); + struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata); enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt; enum mlx5_ib_uapi_flow_table_type ft_type; struct mlx5_ib_flow_action *maction; @@ -494,8 +494,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( goto free_maction; } - uverbs_flow_action_fill_action(&maction->ib_action, uobj, - uobj->context->device, + uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev, IB_FLOW_ACTION_UNSPECIFIED); return 0; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f7314d78aafd..e36aa2f79943 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -968,6 +968,14 @@ static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev) return container_of(ibdev, struct mlx5_ib_dev, ib_dev); } +static inline struct mlx5_ib_dev *mlx5_udata_to_mdev(struct ib_udata *udata) +{ + struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); + + return to_mdev(context->ibucontext.device); +} + static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq) { return container_of(ibcq, struct mlx5_ib_cq, ibcq); -- cgit v1.2.3 From feec576a6af299143e40dcfb34b6c5604e4ff397 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 1 Apr 2019 17:08:24 -0300 Subject: IB: When attrs.udata/ufile is available use that instead of uobject The ucontext and ufile should not be accessed via the uobject, all these cases have an attrs so use that instead. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 6 +++--- drivers/infiniband/core/uverbs_std_types_counters.c | 2 +- drivers/infiniband/core/uverbs_std_types_cq.c | 2 +- drivers/infiniband/core/uverbs_std_types_dm.c | 4 ++-- drivers/infiniband/core/uverbs_std_types_flow_action.c | 2 +- include/rdma/uverbs_std_types.h | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index d0a6755c0562..e9c905220abd 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -128,7 +128,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, enum rdma_remove_reason reason, struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_file *ufile = uobj->ufile; + struct ib_uverbs_file *ufile = attrs->ufile; unsigned long flags; int ret; @@ -200,7 +200,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, */ int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_file *ufile = uobj->ufile; + struct ib_uverbs_file *ufile = attrs->ufile; int ret; down_read(&ufile->hw_destroy_rwsem); @@ -655,7 +655,7 @@ static int alloc_commit_fd_uobject(struct ib_uobject *uobj) int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_file *ufile = uobj->ufile; + struct ib_uverbs_file *ufile = attrs->ufile; int ret; /* alloc_commit consumes the uobj kref */ diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index 87aaf91072e3..9f013304e677 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -54,7 +54,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)( { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE); - struct ib_device *ib_dev = uobj->context->device; + struct ib_device *ib_dev = attrs->context->device; struct ib_counters *counters; int ret; diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 977e386009fc..db5c46a1bb2d 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -64,7 +64,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( struct ib_ucq_object *obj = container_of( uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE), typeof(*obj), uobject); - struct ib_device *ib_dev = obj->uobject.context->device; + struct ib_device *ib_dev = attrs->context->device; int ret; u64 user_handle; struct ib_cq_init_attr attr = {}; diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c index c9b68dcf8f5c..d5a1de33c2c9 100644 --- a/drivers/infiniband/core/uverbs_std_types_dm.c +++ b/drivers/infiniband/core/uverbs_std_types_dm.c @@ -55,7 +55,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)( struct ib_uobject *uobj = uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE) ->obj_attr.uobject; - struct ib_device *ib_dev = uobj->context->device; + struct ib_device *ib_dev = attrs->context->device; struct ib_dm *dm; int ret; @@ -72,7 +72,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)( if (ret) return ret; - dm = ib_dev->ops.alloc_dm(ib_dev, uobj->context, &attr, attrs); + dm = ib_dev->ops.alloc_dm(ib_dev, attrs->context, &attr, attrs); if (IS_ERR(dm)) return PTR_ERR(dm); diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index d6dbc1d580e5..459cf165b231 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -310,7 +310,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)( { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE); - struct ib_device *ib_dev = uobj->context->device; + struct ib_device *ib_dev = attrs->context->device; int ret; struct ib_flow_action *action; struct ib_flow_action_esp_attr esp_attr = {}; diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index b9226a5cdfd7..05eabfd5d0d3 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -128,7 +128,7 @@ __uobj_alloc(const struct uverbs_api_object *obj, rdma_alloc_begin_uobject(obj, attrs->ufile, attrs); if (!IS_ERR(uobj)) - *ib_dev = uobj->context->device; + *ib_dev = attrs->context->device; return uobj; } -- cgit v1.2.3 From f6316032fd3243d3544603d94f237b976f90bb73 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 28 Mar 2019 15:12:58 +0200 Subject: RDMA/core: Support object allocation in atomic context AH objects are allocated in atomic context and those allocations should be done with GFP_ATOMIC. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0e24f6b6c61d..7e965bc06477 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -59,6 +59,8 @@ #include #include #include +#include +#include #include #include #include @@ -2281,8 +2283,11 @@ struct uverbs_attr_bundle; !__same_type(((struct drv_struct *)NULL)->member, \ struct ib_struct))) +#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp) \ + ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, gfp)) + #define rdma_zalloc_drv_obj(ib_dev, ib_type) \ - ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, GFP_KERNEL)) + rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, GFP_KERNEL) #define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct -- cgit v1.2.3 From d345691471b426e540140a4cc431c69f80abfcb6 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 3 Apr 2019 16:42:42 +0300 Subject: RDMA: Handle AH allocations by IB/core Simplify drivers by ensuring lifetime of ib_ah object. The changes in .create_ah() go hand in hand with relevant update in .destroy_ah(). We will use this opportunity and convert .destroy_ah() to don't fail, as it was suggested a long time ago, because there is nothing to do in case of failure during destroy. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/verbs.c | 41 ++++++----- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 44 ++++-------- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 10 ++- drivers/infiniband/hw/bnxt_re/main.c | 1 + drivers/infiniband/hw/bnxt_re/qplib_sp.c | 12 ++-- drivers/infiniband/hw/bnxt_re/qplib_sp.h | 4 +- drivers/infiniband/hw/hns/hns_roce_ah.c | 24 +++---- drivers/infiniband/hw/hns/hns_roce_device.h | 8 +-- drivers/infiniband/hw/hns/hns_roce_main.c | 2 + drivers/infiniband/hw/mlx4/ah.c | 95 ++++++++++--------------- drivers/infiniband/hw/mlx4/mad.c | 35 +++++---- drivers/infiniband/hw/mlx4/main.c | 2 + drivers/infiniband/hw/mlx4/mlx4_ib.h | 12 ++-- drivers/infiniband/hw/mlx5/ah.c | 33 ++++----- drivers/infiniband/hw/mlx5/main.c | 2 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 +- drivers/infiniband/hw/mthca/mthca_provider.c | 29 +++----- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 25 +++---- drivers/infiniband/hw/ocrdma/ocrdma_ah.h | 6 +- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 3 +- drivers/infiniband/hw/ocrdma/ocrdma_hw.h | 4 +- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 2 + drivers/infiniband/hw/qedr/main.c | 2 + drivers/infiniband/hw/qedr/verbs.c | 16 ++--- drivers/infiniband/hw/qedr/verbs.h | 6 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 2 + drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 36 +++------- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h | 6 +- drivers/infiniband/sw/rdmavt/ah.c | 37 ++++------ drivers/infiniband/sw/rdmavt/ah.h | 9 +-- drivers/infiniband/sw/rdmavt/vt.c | 2 + drivers/infiniband/sw/rxe/rxe_pool.c | 2 +- drivers/infiniband/sw/rxe/rxe_verbs.c | 30 ++++---- drivers/infiniband/sw/rxe/rxe_verbs.h | 2 +- include/rdma/ib_verbs.h | 8 +-- 36 files changed, 229 insertions(+), 330 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 0f98da17af23..b5fad8a68a35 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2222,6 +2222,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, set_vf_link_state); SET_DEVICE_OP(dev_ops, unmap_fmr); + SET_OBJ_SIZE(dev_ops, ib_ah); SET_OBJ_SIZE(dev_ops, ib_pd); SET_OBJ_SIZE(dev_ops, ib_ucontext); } diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index a479f4c12541..6172019481a4 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -496,25 +496,33 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, u32 flags, struct ib_udata *udata) { + struct ib_device *device = pd->device; struct ib_ah *ah; + int ret; might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE); - if (!pd->device->ops.create_ah) + if (!device->ops.create_ah) return ERR_PTR(-EOPNOTSUPP); - ah = pd->device->ops.create_ah(pd, ah_attr, flags, udata); + ah = rdma_zalloc_drv_obj_gfp( + device, ib_ah, + (flags & RDMA_CREATE_AH_SLEEPABLE) ? GFP_KERNEL : GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); - if (!IS_ERR(ah)) { - ah->device = pd->device; - ah->pd = pd; - ah->uobject = NULL; - ah->type = ah_attr->type; - ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL); + ah->device = device; + ah->pd = pd; + ah->type = ah_attr->type; + ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL); - atomic_inc(&pd->usecnt); + ret = device->ops.create_ah(ah, ah_attr, flags, udata); + if (ret) { + kfree(ah); + return ERR_PTR(ret); } + atomic_inc(&pd->usecnt); return ah; } @@ -935,19 +943,18 @@ int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata) { const struct ib_gid_attr *sgid_attr = ah->sgid_attr; struct ib_pd *pd; - int ret; might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE); pd = ah->pd; - ret = ah->device->ops.destroy_ah(ah, flags, udata); - if (!ret) { - atomic_dec(&pd->usecnt); - if (sgid_attr) - rdma_put_gid_attr(sgid_attr); - } - return ret; + ah->device->ops.destroy_ah(ah, flags); + atomic_dec(&pd->usecnt); + if (sgid_attr) + rdma_put_gid_attr(sgid_attr); + + kfree(ah); + return 0; } EXPORT_SYMBOL(rdma_destroy_ah_user); diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 04e3529ffe06..a9e2e29d7ad0 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -633,20 +633,13 @@ fail: } /* Address Handles */ -int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags, struct ib_udata *udata) +void bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) { struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah); struct bnxt_re_dev *rdev = ah->rdev; - int rc; - rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, - !(flags & RDMA_DESTROY_AH_SLEEPABLE)); - if (rc) { - dev_err(rdev_to_dev(rdev), "Failed to destroy HW AH"); - return rc; - } - kfree(ah); - return 0; + bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, + !(flags & RDMA_DESTROY_AH_SLEEPABLE)); } static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype) @@ -667,26 +660,22 @@ static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype) return nw_type; } -struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, - struct rdma_ah_attr *ah_attr, - u32 flags, - struct ib_udata *udata) +int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { + struct ib_pd *ib_pd = ib_ah->pd; struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); struct bnxt_re_dev *rdev = pd->rdev; const struct ib_gid_attr *sgid_attr; - struct bnxt_re_ah *ah; + struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah); u8 nw_type; int rc; if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) { dev_err(rdev_to_dev(rdev), "Failed to alloc AH: GRH not set"); - return ERR_PTR(-EINVAL); + return -EINVAL; } - ah = kzalloc(sizeof(*ah), GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); ah->rdev = rdev; ah->qplib_ah.pd = &pd->qplib_pd; @@ -716,7 +705,7 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, !(flags & RDMA_CREATE_AH_SLEEPABLE)); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to allocate HW AH"); - goto fail; + return rc; } /* Write AVID to shared page. */ @@ -733,11 +722,7 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, spin_unlock_irqrestore(&uctx->sh_lock, flag); } - return &ah->ib_ah; - -fail: - kfree(ah); - return ERR_PTR(rc); + return 0; } int bnxt_re_modify_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr) @@ -810,13 +795,8 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) bnxt_qplib_free_qp_res(&rdev->qplib_res, &qp->qplib_qp); if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) { - rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, - &rdev->sqp_ah->qplib_ah, false); - if (rc) { - dev_err(rdev_to_dev(rdev), - "Failed to destroy HW AH for shadow QP"); - return rc; - } + bnxt_qplib_destroy_ah(&rdev->qplib_res, &rdev->sqp_ah->qplib_ah, + false); bnxt_qplib_clean_qp(&qp->qplib_qp); rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 488dc735a260..953da89e5ec0 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -63,8 +63,8 @@ struct bnxt_re_pd { }; struct bnxt_re_ah { - struct bnxt_re_dev *rdev; struct ib_ah ib_ah; + struct bnxt_re_dev *rdev; struct bnxt_qplib_ah qplib_ah; }; @@ -165,13 +165,11 @@ enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev, u8 port_num); int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); void bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); -struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - u32 flags, - struct ib_udata *udata); +int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, + struct ib_udata *udata); int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); -int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata); +void bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_srq *bnxt_re_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 2bd24ac45ee4..ec22853f8363 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -637,6 +637,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .query_srq = bnxt_re_query_srq, .reg_user_mr = bnxt_re_reg_user_mr, .req_notify_cq = bnxt_re_req_notify_cq, + INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah), INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd), INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx), }; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index ef1938733a41..48793d3512ac 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -532,25 +532,21 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, return 0; } -int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, - bool block) +void bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct cmdq_destroy_ah req; struct creq_destroy_ah_resp resp; u16 cmd_flags = 0; - int rc; /* Clean up the AH table in the device */ RCFW_CMD_PREP(req, DESTROY_AH, cmd_flags); req.ah_cid = cpu_to_le32(ah->id); - rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, - NULL, block); - if (rc) - return rc; - return 0; + bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, NULL, + block); } /* MRW */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 39454b3f738d..0ec3b12b0bcd 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -243,8 +243,8 @@ int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res, struct bnxt_qplib_ctx *ctx); int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, bool block); -int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, - bool block); +void bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block); int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw); int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw, diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 6ba505bc7cce..d9498313ea46 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -39,23 +39,17 @@ #define HNS_ROCE_VLAN_SL_BIT_MASK 7 #define HNS_ROCE_VLAN_SL_SHIFT 13 -struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, - struct rdma_ah_attr *ah_attr, - u32 flags, - struct ib_udata *udata) +int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { - struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device); + struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); const struct ib_gid_attr *gid_attr; struct device *dev = hr_dev->dev; - struct hns_roce_ah *ah; + struct hns_roce_ah *ah = to_hr_ah(ibah); u16 vlan_tag = 0xffff; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); bool vlan_en = false; - ah = kzalloc(sizeof(*ah), GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); - /* Get mac address */ memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); @@ -70,7 +64,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, HNS_ROCE_VLAN_SL_BIT_MASK) << HNS_ROCE_VLAN_SL_SHIFT; - ah->av.port_pd = cpu_to_le32(to_hr_pd(ibpd)->pdn | + ah->av.port_pd = cpu_to_le32(to_hr_pd(ibah->pd)->pdn | (rdma_ah_get_port_num(ah_attr) << HNS_ROCE_PORT_NUM_SHIFT)); ah->av.gid_index = grh->sgid_index; @@ -86,7 +80,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, ah->av.sl_tclass_flowlabel = cpu_to_le32(rdma_ah_get_sl(ah_attr) << HNS_ROCE_SL_SHIFT); - return &ah->ibah; + return 0; } int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) @@ -111,9 +105,7 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata) +void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags) { - kfree(to_hr_ah(ah)); - - return 0; + return; } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index b23b13f06d58..e424dcca2c74 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1105,12 +1105,10 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, unsigned long obj, int cnt, int rr); -struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - u32 flags, - struct ib_udata *udata); +int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata); int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata); +void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags); int hns_roce_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index c929125da84b..176bade523ea 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -468,6 +468,8 @@ static const struct ib_device_ops hns_roce_dev_ops = { .query_pkey = hns_roce_query_pkey, .query_port = hns_roce_query_port, .reg_user_mr = hns_roce_reg_user_mr, + + INIT_RDMA_OBJ_SIZE(ib_ah, hns_roce_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, hns_roce_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_ucontext, hns_roce_ucontext, ibucontext), }; diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 6f552b780b89..b53772ab2401 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -40,13 +40,12 @@ #include "mlx4_ib.h" -static struct ib_ah *create_ib_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - struct mlx4_ib_ah *ah) +static void create_ib_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr) { - struct mlx4_dev *dev = to_mdev(pd->device)->dev; + struct mlx4_ib_ah *ah = to_mah(ib_ah); + struct mlx4_dev *dev = to_mdev(ib_ah->device)->dev; - ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | + ah->av.ib.port_pd = cpu_to_be32(to_mpd(ib_ah->pd)->pdn | (rdma_ah_get_port_num(ah_attr) << 24)); ah->av.ib.g_slid = rdma_ah_get_path_bits(ah_attr); ah->av.ib.sl_tclass_flowlabel = @@ -73,15 +72,12 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, --static_rate; ah->av.ib.stat_rate = static_rate; } - - return &ah->ibah; } -static struct ib_ah *create_iboe_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - struct mlx4_ib_ah *ah) +static int create_iboe_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr) { - struct mlx4_ib_dev *ibdev = to_mdev(pd->device); + struct mlx4_ib_dev *ibdev = to_mdev(ib_ah->device); + struct mlx4_ib_ah *ah = to_mah(ib_ah); const struct ib_gid_attr *gid_attr; struct mlx4_dev *dev = ibdev->dev; int is_mcast = 0; @@ -108,7 +104,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, memcpy(ah->av.eth.s_mac, gid_attr->ndev->dev_addr, ETH_ALEN); ret = mlx4_ib_gid_index_to_real_index(ibdev, gid_attr); if (ret < 0) - return ERR_PTR(ret); + return ret; ah->av.eth.gid_index = ret; } else { /* mlx4_ib_create_ah_slave fills in the s_mac and the vlan */ @@ -117,7 +113,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, if (vlan_tag < 0x1000) vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13; - ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | + ah->av.eth.port_pd = cpu_to_be32(to_mpd(ib_ah->pd)->pdn | (rdma_ah_get_port_num(ah_attr) << 24)); ah->av.eth.vlan = cpu_to_be16(vlan_tag); ah->av.eth.hop_limit = grh->hop_limit; @@ -140,63 +136,45 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, memcpy(ah->av.eth.dgid, grh->dgid.raw, 16); ah->av.eth.sl_tclass_flowlabel |= cpu_to_be32(rdma_ah_get_sl(ah_attr) << 29); - return &ah->ibah; + return 0; } -struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int mlx4_ib_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { - struct mlx4_ib_ah *ah; - struct ib_ah *ret; - - ah = kzalloc(sizeof *ah, GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); - if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { - if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) { - ret = ERR_PTR(-EINVAL); - } else { - /* - * TBD: need to handle the case when we get - * called in an atomic context and there we - * might sleep. We don't expect this - * currently since we're working with link - * local addresses which we can translate - * without going to sleep. - */ - ret = create_iboe_ah(pd, ah_attr, ah); - } - - if (IS_ERR(ret)) - kfree(ah); + if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) + return -EINVAL; + /* + * TBD: need to handle the case when we get + * called in an atomic context and there we + * might sleep. We don't expect this + * currently since we're working with link + * local addresses which we can translate + * without going to sleep. + */ + return create_iboe_ah(ib_ah, ah_attr); + } - return ret; - } else - return create_ib_ah(pd, ah_attr, ah); /* never fails */ + create_ib_ah(ib_ah, ah_attr); + return 0; } -/* AH's created via this call must be free'd by mlx4_ib_destroy_ah. */ -struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - int slave_sgid_index, u8 *s_mac, - u16 vlan_tag) +int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, + int slave_sgid_index, u8 *s_mac, u16 vlan_tag) { struct rdma_ah_attr slave_attr = *ah_attr; - struct mlx4_ib_ah *mah; - struct ib_ah *ah; + struct mlx4_ib_ah *mah = to_mah(ah); + int ret; slave_attr.grh.sgid_attr = NULL; slave_attr.grh.sgid_index = slave_sgid_index; - ah = mlx4_ib_create_ah(pd, &slave_attr, 0, NULL); - if (IS_ERR(ah)) - return ah; + ret = mlx4_ib_create_ah(ah, &slave_attr, 0, NULL); + if (ret) + return ret; - ah->device = pd->device; - ah->pd = pd; ah->type = ah_attr->type; - mah = to_mah(ah); /* get rid of force-loopback bit */ mah->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF); @@ -208,7 +186,7 @@ struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13; mah->av.eth.vlan = cpu_to_be16(vlan_tag); - return ah; + return 0; } int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) @@ -250,8 +228,7 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata) +void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags) { - kfree(to_mah(ah)); - return 0; + return; } diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index f090c1b40433..68c951491a08 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1371,9 +1371,9 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, struct ib_ah *ah; struct ib_qp *send_qp = NULL; unsigned wire_tx_ix = 0; - int ret = 0; u16 wire_pkey_ix; int src_qpnum; + int ret; sqp_ctx = dev->sriov.sqps[port-1]; @@ -1393,12 +1393,20 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, send_qp = sqp->qp; - /* create ah */ - ah = mlx4_ib_create_ah_slave(sqp_ctx->pd, attr, - rdma_ah_retrieve_grh(attr)->sgid_index, - s_mac, vlan_id); - if (IS_ERR(ah)) + ah = rdma_zalloc_drv_obj(sqp_ctx->pd->device, ib_ah); + if (!ah) return -ENOMEM; + + ah->device = sqp_ctx->pd->device; + ah->pd = sqp_ctx->pd; + + /* create ah */ + ret = mlx4_ib_create_ah_slave(ah, attr, + rdma_ah_retrieve_grh(attr)->sgid_index, + s_mac, vlan_id); + if (ret) + goto out; + spin_lock(&sqp->tx_lock); if (sqp->tx_ix_head - sqp->tx_ix_tail >= (MLX4_NUM_TUNNEL_BUFS - 1)) @@ -1410,8 +1418,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, goto out; sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr); - if (sqp->tx_ring[wire_tx_ix].ah) - mlx4_ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0, NULL); + kfree(sqp->tx_ring[wire_tx_ix].ah); sqp->tx_ring[wire_tx_ix].ah = ah; ib_dma_sync_single_for_cpu(&dev->ib_dev, sqp->tx_ring[wire_tx_ix].buf.map, @@ -1450,7 +1457,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, spin_unlock(&sqp->tx_lock); sqp->tx_ring[wire_tx_ix].ah = NULL; out: - mlx4_ib_destroy_ah(ah, 0, NULL); + kfree(ah); return ret; } @@ -1902,9 +1909,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) if (wc.status == IB_WC_SUCCESS) { switch (wc.opcode) { case IB_WC_SEND: - mlx4_ib_destroy_ah(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah, - 0, NULL); + kfree(sqp->tx_ring[wc.wr_id & + (MLX4_NUM_TUNNEL_BUFS - 1)].ah); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); @@ -1932,9 +1938,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) " status = %d, wrid = 0x%llx\n", ctx->slave, wc.status, wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { - mlx4_ib_destroy_ah(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah, - 0, NULL); + kfree(sqp->tx_ring[wc.wr_id & + (MLX4_NUM_TUNNEL_BUFS - 1)].ah); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 952b1bac46db..27f38897ca9e 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2558,6 +2558,8 @@ static const struct ib_device_ops mlx4_ib_dev_ops = { .req_notify_cq = mlx4_ib_arm_cq, .rereg_user_mr = mlx4_ib_rereg_user_mr, .resize_cq = mlx4_ib_resize_cq, + + INIT_RDMA_OBJ_SIZE(ib_ah, mlx4_ib_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, mlx4_ib_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx4_ib_ucontext, ibucontext), }; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 79143848b560..14ca042ea715 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -752,14 +752,12 @@ int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); -struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata); -struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - int slave_sgid_index, u8 *s_mac, - u16 vlan_tag); +int mlx4_ib_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, + struct ib_udata *udata); +int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, + int slave_sgid_index, u8 *s_mac, u16 vlan_tag); int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata); +void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 2e377f9699f1..80642dd359bc 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -32,9 +32,8 @@ #include "mlx5_ib.h" -static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, - struct mlx5_ib_ah *ah, - struct rdma_ah_attr *ah_attr) +static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, + struct rdma_ah_attr *ah_attr) { enum ib_gid_type gid_type; @@ -67,21 +66,19 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, ah->av.fl_mlid = rdma_ah_get_path_bits(ah_attr) & 0x7f; ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0xf); } - - return &ah->ibah; } -struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { - struct mlx5_ib_ah *ah; - struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_ah *ah = to_mah(ibah); + struct mlx5_ib_dev *dev = to_mdev(ibah->device); enum rdma_ah_attr_type ah_type = ah_attr->type; if ((ah_type == RDMA_AH_ATTR_TYPE_ROCE) && !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) - return ERR_PTR(-EINVAL); + return -EINVAL; if (ah_type == RDMA_AH_ATTR_TYPE_ROCE && udata) { int err; @@ -90,21 +87,18 @@ struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, sizeof(resp.dmac); if (udata->outlen < min_resp_len) - return ERR_PTR(-EINVAL); + return -EINVAL; resp.response_length = min_resp_len; memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) - return ERR_PTR(err); + return err; } - ah = kzalloc(sizeof(*ah), GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); - - return create_ib_ah(dev, ah, ah_attr); /* never fails */ + create_ib_ah(dev, ah, ah_attr); + return 0; } int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) @@ -131,8 +125,7 @@ int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata) +void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags) { - kfree(to_mah(ah)); - return 0; + return; } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index f706e1bd40ad..f4827d12677a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5986,6 +5986,8 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .req_notify_cq = mlx5_ib_arm_cq, .rereg_user_mr = mlx5_ib_rereg_user_mr, .resize_cq = mlx5_ib_resize_cq, + + INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext), }; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e36aa2f79943..5b4206653fdb 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1054,10 +1054,10 @@ void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db) void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); -struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata); +int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, + struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata); +void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 9a77374a327b..6fc371a9e45f 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -388,34 +388,19 @@ static void mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) mthca_pd_free(to_mdev(pd->device), to_mpd(pd)); } -static struct ib_ah *mthca_ah_create(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - u32 flags, - struct ib_udata *udata) +static int mthca_ah_create(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { - int err; - struct mthca_ah *ah; - - ah = kmalloc(sizeof *ah, GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); + struct mthca_ah *ah = to_mah(ibah); - err = mthca_create_ah(to_mdev(pd->device), to_mpd(pd), ah_attr, ah); - if (err) { - kfree(ah); - return ERR_PTR(err); - } - - return &ah->ibah; + return mthca_create_ah(to_mdev(ibah->device), to_mpd(ibah->pd), ah_attr, + ah); } -static int mthca_ah_destroy(struct ib_ah *ah, u32 flags, struct ib_udata *udata) +static void mthca_ah_destroy(struct ib_ah *ah, u32 flags) { mthca_destroy_ah(to_mdev(ah->device), to_mah(ah)); - kfree(ah); - - return 0; } static struct ib_srq *mthca_create_srq(struct ib_pd *pd, @@ -1213,6 +1198,8 @@ static const struct ib_device_ops mthca_dev_ops = { .query_qp = mthca_query_qp, .reg_user_mr = mthca_reg_user_mr, .resize_cq = mthca_resize_cq, + + INIT_RDMA_OBJ_SIZE(ib_ah, mthca_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, mthca_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_ucontext, mthca_ucontext, ibucontext), }; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index c0419133edfd..a17747cb086a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -156,29 +156,25 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, return status; } -struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, - u32 flags, struct ib_udata *udata) +int ocrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, + struct ib_udata *udata) { u32 *ahid_addr; int status; - struct ocrdma_ah *ah; + struct ocrdma_ah *ah = get_ocrdma_ah(ibah); bool isvlan = false; u16 vlan_tag = 0xffff; const struct ib_gid_attr *sgid_attr; - struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); - struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); + struct ocrdma_pd *pd = get_ocrdma_pd(ibah->pd); + struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device); if ((attr->type != RDMA_AH_ATTR_TYPE_ROCE) || !(rdma_ah_get_ah_flags(attr) & IB_AH_GRH)) - return ERR_PTR(-EINVAL); + return -EINVAL; if (atomic_cmpxchg(&dev->update_sl, 1, 0)) ocrdma_init_service_level(dev); - ah = kzalloc(sizeof(*ah), GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); - status = ocrdma_alloc_av(dev, ah); if (status) goto av_err; @@ -210,23 +206,20 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, OCRDMA_AH_VLAN_VALID_SHIFT); } - return &ah->ibah; + return 0; av_conf_err: ocrdma_free_av(dev, ah); av_err: - kfree(ah); - return ERR_PTR(status); + return status; } -int ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags, struct ib_udata *udata) +void ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags) { struct ocrdma_ah *ah = get_ocrdma_ah(ibah); struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device); ocrdma_free_av(dev, ah); - kfree(ah); - return 0; } int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 9b84034d8164..64cb82c08664 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -51,9 +51,9 @@ enum { OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */ }; -struct ib_ah *ocrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata); -int ocrdma_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata); +int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, + struct ib_udata *udata); +void ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int ocrdma_process_mad(struct ib_device *, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 097e5ab2a19f..e693eb352959 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -3067,13 +3067,12 @@ int ocrdma_alloc_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah) return status; } -int ocrdma_free_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah) +void ocrdma_free_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah) { unsigned long flags; spin_lock_irqsave(&dev->av_tbl.lock, flags); ah->av->valid = 0; spin_unlock_irqrestore(&dev->av_tbl.lock, flags); - return 0; } static int ocrdma_create_eqs(struct ocrdma_dev *dev) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index ebc1f442aec3..88d45aa19ded 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -139,8 +139,8 @@ int ocrdma_mbx_modify_srq(struct ocrdma_srq *, struct ib_srq_attr *); int ocrdma_mbx_query_srq(struct ocrdma_srq *, struct ib_srq_attr *); int ocrdma_mbx_destroy_srq(struct ocrdma_dev *, struct ocrdma_srq *); -int ocrdma_alloc_av(struct ocrdma_dev *, struct ocrdma_ah *); -int ocrdma_free_av(struct ocrdma_dev *, struct ocrdma_ah *); +int ocrdma_alloc_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah); +void ocrdma_free_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah); int ocrdma_qp_state_change(struct ocrdma_qp *, enum ib_qp_state new_state, enum ib_qp_state *old_ib_state); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index b9e10d55a58e..8642a2e60be7 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -179,6 +179,8 @@ static const struct ib_device_ops ocrdma_dev_ops = { .reg_user_mr = ocrdma_reg_user_mr, .req_notify_cq = ocrdma_arm_cq, .resize_cq = ocrdma_resize_cq, + + INIT_RDMA_OBJ_SIZE(ib_ah, ocrdma_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, ocrdma_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_ucontext, ocrdma_ucontext, ibucontext), }; diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 2119158e3692..f32ea7052c48 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -238,6 +238,8 @@ static const struct ib_device_ops qedr_dev_ops = { .reg_user_mr = qedr_reg_user_mr, .req_notify_cq = qedr_arm_cq, .resize_cq = qedr_resize_cq, + + INIT_RDMA_OBJ_SIZE(ib_ah, qedr_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, qedr_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_ucontext, qedr_ucontext, ibucontext), }; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 44ab86718c2f..8ea06856e7b9 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2546,27 +2546,21 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) return rc; } -struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, - u32 flags, struct ib_udata *udata) +int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, + struct ib_udata *udata) { - struct qedr_ah *ah; - - ah = kzalloc(sizeof(*ah), GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); + struct qedr_ah *ah = get_qedr_ah(ibah); rdma_copy_ah_attr(&ah->attr, attr); - return &ah->ibah; + return 0; } -int qedr_destroy_ah(struct ib_ah *ibah, u32 flags, struct ib_udata *udata) +void qedr_destroy_ah(struct ib_ah *ibah, u32 flags) { struct qedr_ah *ah = get_qedr_ah(ibah); rdma_destroy_ah_attr(&ah->attr); - kfree(ah); - return 0; } static void free_mr_info(struct qedr_dev *dev, struct mr_info *info) diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 46a9828b9777..772af35a5055 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -73,9 +73,9 @@ int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_recv_wr); -struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, - u32 flags, struct ib_udata *udata); -int qedr_destroy_ah(struct ib_ah *ibah, u32 flags, struct ib_udata *udata); +int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, + struct ib_udata *udata); +void qedr_destroy_ah(struct ib_ah *ibah, u32 flags); int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 6d8b3e0de57a..6cbc271a1b7d 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -195,6 +195,8 @@ static const struct ib_device_ops pvrdma_dev_ops = { .query_qp = pvrdma_query_qp, .reg_user_mr = pvrdma_reg_user_mr, .req_notify_cq = pvrdma_req_notify_cq, + + INIT_RDMA_OBJ_SIZE(ib_ah, pvrdma_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, pvrdma_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_ucontext, pvrdma_ucontext, ibucontext), }; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index 0302fa3b6c85..faf7ecd7b3fa 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -507,34 +507,28 @@ void pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) * @udata: user data blob * @flags: create address handle flags (see enum rdma_create_ah_flags) * - * @return: the ib_ah pointer on success, otherwise errno. + * @return: 0 on success, otherwise errno. */ -struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int pvrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { - struct pvrdma_dev *dev = to_vdev(pd->device); - struct pvrdma_ah *ah; + struct pvrdma_dev *dev = to_vdev(ibah->device); + struct pvrdma_ah *ah = to_vah(ibah); const struct ib_global_route *grh; u8 port_num = rdma_ah_get_port_num(ah_attr); if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) - return ERR_PTR(-EINVAL); + return -EINVAL; grh = rdma_ah_read_grh(ah_attr); if ((ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE) || rdma_is_multicast_addr((struct in6_addr *)grh->dgid.raw)) - return ERR_PTR(-EINVAL); + return -EINVAL; if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah)) - return ERR_PTR(-ENOMEM); - - ah = kzalloc(sizeof(*ah), GFP_ATOMIC); - if (!ah) { - atomic_dec(&dev->num_ahs); - return ERR_PTR(-ENOMEM); - } + return -ENOMEM; - ah->av.port_pd = to_vpd(pd)->pd_handle | (port_num << 24); + ah->av.port_pd = to_vpd(ibah->pd)->pd_handle | (port_num << 24); ah->av.src_path_bits = rdma_ah_get_path_bits(ah_attr); ah->av.src_path_bits |= 0x80; ah->av.gid_index = grh->sgid_index; @@ -544,11 +538,7 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, memcpy(ah->av.dgid, grh->dgid.raw, 16); memcpy(ah->av.dmac, ah_attr->roce.dmac, ETH_ALEN); - ah->ibah.device = pd->device; - ah->ibah.pd = pd; - ah->ibah.uobject = NULL; - - return &ah->ibah; + return 0; } /** @@ -556,14 +546,10 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, * @ah: the address handle to destroyed * @flags: destroy address handle flags (see enum rdma_destroy_ah_flags) * - * @return: 0 on success. */ -int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata) +void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags) { struct pvrdma_dev *dev = to_vdev(ah->device); - kfree(to_vah(ah)); atomic_dec(&dev->num_ahs); - - return 0; } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index 562b70e70e79..013c73f2eba3 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -415,9 +415,9 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); -struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata); -int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags, struct ib_udata *udata); +int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, + struct ib_udata *udata); +void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index 001a5c052580..e6f7e4689d4d 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -89,36 +89,29 @@ EXPORT_SYMBOL(rvt_check_ah); /** * rvt_create_ah - create an address handle - * @pd: the protection domain + * @ibah: the IB address handle * @ah_attr: the attributes of the AH * @create_flags: create address handle flags (see enum rdma_create_ah_flags) * @udata: pointer to user's input output buffer information. * * This may be called from interrupt context. * - * Return: newly allocated ah + * Return: 0 on success */ -struct ib_ah *rvt_create_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - u32 create_flags, - struct ib_udata *udata) +int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, + u32 create_flags, struct ib_udata *udata) { - struct rvt_ah *ah; - struct rvt_dev_info *dev = ib_to_rvt(pd->device); + struct rvt_ah *ah = ibah_to_rvtah(ibah); + struct rvt_dev_info *dev = ib_to_rvt(ibah->device); unsigned long flags; - if (rvt_check_ah(pd->device, ah_attr)) - return ERR_PTR(-EINVAL); - - ah = kmalloc(sizeof(*ah), GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); + if (rvt_check_ah(ibah->device, ah_attr)) + return -EINVAL; spin_lock_irqsave(&dev->n_ahs_lock, flags); if (dev->n_ahs_allocated == dev->dparms.props.max_ah) { spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - kfree(ah); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } dev->n_ahs_allocated++; @@ -129,9 +122,9 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd, atomic_set(&ah->refcount, 0); if (dev->driver_f.notify_new_ah) - dev->driver_f.notify_new_ah(pd->device, ah_attr, ah); + dev->driver_f.notify_new_ah(ibah->device, ah_attr, ah); - return &ah->ibah; + return 0; } /** @@ -142,24 +135,20 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd, * * Return: 0 on success */ -int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags, - struct ib_udata *udata) +void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags) { struct rvt_dev_info *dev = ib_to_rvt(ibah->device); struct rvt_ah *ah = ibah_to_rvtah(ibah); unsigned long flags; if (atomic_read(&ah->refcount) != 0) - return -EBUSY; + return; spin_lock_irqsave(&dev->n_ahs_lock, flags); dev->n_ahs_allocated--; spin_unlock_irqrestore(&dev->n_ahs_lock, flags); rdma_destroy_ah_attr(&ah->attr); - kfree(ah); - - return 0; } /** diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h index 7b27b82d8a90..bbb4d3bdec4e 100644 --- a/drivers/infiniband/sw/rdmavt/ah.h +++ b/drivers/infiniband/sw/rdmavt/ah.h @@ -50,12 +50,9 @@ #include -struct ib_ah *rvt_create_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - u32 create_flags, - struct ib_udata *udata); -int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags, - struct ib_udata *udata); +int rvt_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, + u32 create_flags, struct ib_udata *udata); +void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags); int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 42c9d35f832d..f4b3bb57ab06 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -425,6 +425,8 @@ static const struct ib_device_ops rvt_dev_ops = { .req_notify_cq = rvt_req_notify_cq, .resize_cq = rvt_resize_cq, .unmap_fmr = rvt_unmap_fmr, + + INIT_RDMA_OBJ_SIZE(ib_ah, rvt_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, rvt_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_ucontext, rvt_ucontext, ibucontext), }; diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 120fa9005954..756bd36fd268 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -52,7 +52,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_AH] = { .name = "rxe-ah", .size = sizeof(struct rxe_ah), - .flags = RXE_POOL_ATOMIC, + .flags = RXE_POOL_ATOMIC | RXE_POOL_NO_ALLOC, }, [RXE_TYPE_SRQ] = { .name = "rxe-srq", diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 4f581af2ad54..a6c63a260626 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -191,30 +191,24 @@ static void rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) rxe_drop_ref(pd); } -static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, - struct rdma_ah_attr *attr, - u32 flags, - struct ib_udata *udata) +static int rxe_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, + u32 flags, struct ib_udata *udata) { int err; - struct rxe_dev *rxe = to_rdev(ibpd->device); - struct rxe_pd *pd = to_rpd(ibpd); - struct rxe_ah *ah; + struct rxe_dev *rxe = to_rdev(ibah->device); + struct rxe_ah *ah = to_rah(ibah); err = rxe_av_chk_attr(rxe, attr); if (err) - return ERR_PTR(err); - - ah = rxe_alloc(&rxe->ah_pool); - if (!ah) - return ERR_PTR(-ENOMEM); + return err; - rxe_add_ref(pd); - ah->pd = pd; + err = rxe_add_to_pool(&rxe->ah_pool, &ah->pelem); + if (err) + return err; rxe_init_av(attr, &ah->av); - return &ah->ibah; + return 0; } static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) @@ -241,13 +235,11 @@ static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) return 0; } -static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags, struct ib_udata *udata) +static void rxe_destroy_ah(struct ib_ah *ibah, u32 flags) { struct rxe_ah *ah = to_rah(ibah); - rxe_drop_ref(ah->pd); rxe_drop_ref(ah); - return 0; } static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) @@ -1171,6 +1163,8 @@ static const struct ib_device_ops rxe_dev_ops = { .reg_user_mr = rxe_reg_user_mr, .req_notify_cq = rxe_req_notify_cq, .resize_cq = rxe_resize_cq, + + INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc), }; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 157e51aeb1e1..23c5002b5134 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -71,8 +71,8 @@ struct rxe_pd { }; struct rxe_ah { - struct rxe_pool_entry pelem; struct ib_ah ibah; + struct rxe_pool_entry pelem; struct rxe_pd *pd; struct rxe_av av; }; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7e965bc06477..3232a84c4fdb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2401,12 +2401,11 @@ struct ib_device_ops { void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata); void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); - struct ib_ah *(*create_ah)(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, u32 flags, - struct ib_udata *udata); + int (*create_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata); int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); - int (*destroy_ah)(struct ib_ah *ah, u32 flags, struct ib_udata *udata); + void (*destroy_ah)(struct ib_ah *ah, u32 flags); struct ib_srq *(*create_srq)(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); @@ -2552,6 +2551,7 @@ struct ib_device_ops { */ void (*dealloc_driver)(struct ib_device *dev); + DECLARE_RDMA_OBJ_SIZE(ib_ah); DECLARE_RDMA_OBJ_SIZE(ib_pd); DECLARE_RDMA_OBJ_SIZE(ib_ucontext); }; -- cgit v1.2.3 From 68e326dea1dba935f6a5299a24343a58b33eed10 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 3 Apr 2019 16:42:43 +0300 Subject: RDMA: Handle SRQ allocations by IB/core Convert SRQ allocation from drivers to be in the IB/core Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/uverbs_cmd.c | 12 +++- drivers/infiniband/core/verbs.c | 78 ++++++++++++------------- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 32 ++++------ drivers/infiniband/hw/bnxt_re/ib_verbs.h | 10 ++-- drivers/infiniband/hw/bnxt_re/main.c | 1 + drivers/infiniband/hw/bnxt_re/qplib_fp.c | 12 ++-- drivers/infiniband/hw/bnxt_re/qplib_fp.h | 4 +- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 7 +-- drivers/infiniband/hw/cxgb4/provider.c | 1 + drivers/infiniband/hw/cxgb4/qp.c | 32 ++++------ drivers/infiniband/hw/hns/hns_roce_device.h | 8 +-- drivers/infiniband/hw/hns/hns_roce_main.c | 2 + drivers/infiniband/hw/hns/hns_roce_srq.c | 52 ++++++----------- drivers/infiniband/hw/mlx4/main.c | 1 + drivers/infiniband/hw/mlx4/mlx4_ib.h | 7 +-- drivers/infiniband/hw/mlx4/srq.c | 47 +++++---------- drivers/infiniband/hw/mlx5/main.c | 36 ++++++++---- drivers/infiniband/hw/mlx5/mlx5_ib.h | 7 +-- drivers/infiniband/hw/mlx5/srq.c | 59 ++++++++----------- drivers/infiniband/hw/mlx5/srq.h | 2 +- drivers/infiniband/hw/mlx5/srq_cmd.c | 8 +-- drivers/infiniband/hw/mthca/mthca_provider.c | 53 +++++++---------- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 8 +-- drivers/infiniband/hw/ocrdma/ocrdma_hw.h | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 2 + drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 47 +++++++-------- drivers/infiniband/hw/ocrdma/ocrdma_verbs.h | 6 +- drivers/infiniband/hw/qedr/main.c | 1 + drivers/infiniband/hw/qedr/verbs.c | 32 ++++------ drivers/infiniband/hw/qedr/verbs.h | 7 +-- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 2 + drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c | 40 +++++-------- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h | 7 +-- drivers/infiniband/sw/rdmavt/srq.c | 43 +++++--------- drivers/infiniband/sw/rdmavt/srq.h | 7 +-- drivers/infiniband/sw/rdmavt/vt.c | 1 + drivers/infiniband/sw/rxe/rxe_pool.c | 2 +- drivers/infiniband/sw/rxe/rxe_verbs.c | 31 ++++------ drivers/infiniband/sw/rxe/rxe_verbs.h | 2 +- include/rdma/ib_verbs.h | 9 +-- 41 files changed, 308 insertions(+), 413 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index b5fad8a68a35..88c4238bbee1 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2224,6 +2224,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_OBJ_SIZE(dev_ops, ib_ah); SET_OBJ_SIZE(dev_ops, ib_pd); + SET_OBJ_SIZE(dev_ops, ib_srq); SET_OBJ_SIZE(dev_ops, ib_ucontext); } EXPORT_SYMBOL(ib_set_device_ops); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 89b0f5420dfe..04d08135b374 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3409,9 +3409,9 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); - srq = pd->device->ops.create_srq(pd, &attr, udata); - if (IS_ERR(srq)) { - ret = PTR_ERR(srq); + srq = rdma_zalloc_drv_obj(ib_dev, ib_srq); + if (!srq) { + ret = -ENOMEM; goto err_put; } @@ -3422,6 +3422,10 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, srq->event_handler = attr.event_handler; srq->srq_context = attr.srq_context; + ret = pd->device->ops.create_srq(srq, &attr, udata); + if (ret) + goto err_free; + if (ib_srq_has_cq(cmd->srq_type)) { srq->ext.cq = attr.ext.cq; atomic_inc(&attr.ext.cq->usecnt); @@ -3461,6 +3465,8 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, err_copy: ib_destroy_srq_user(srq, &attrs->driver_udata); +err_free: + kfree(srq); err_put: uobj_put_obj_read(pd); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 6172019481a4..7313edc9f091 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -964,29 +964,40 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr) { struct ib_srq *srq; + int ret; if (!pd->device->ops.create_srq) return ERR_PTR(-EOPNOTSUPP); - srq = pd->device->ops.create_srq(pd, srq_init_attr, NULL); - - if (!IS_ERR(srq)) { - srq->device = pd->device; - srq->pd = pd; - srq->uobject = NULL; - srq->event_handler = srq_init_attr->event_handler; - srq->srq_context = srq_init_attr->srq_context; - srq->srq_type = srq_init_attr->srq_type; - if (ib_srq_has_cq(srq->srq_type)) { - srq->ext.cq = srq_init_attr->ext.cq; - atomic_inc(&srq->ext.cq->usecnt); - } - if (srq->srq_type == IB_SRQT_XRC) { - srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; - atomic_inc(&srq->ext.xrc.xrcd->usecnt); - } - atomic_inc(&pd->usecnt); - atomic_set(&srq->usecnt, 0); + srq = rdma_zalloc_drv_obj(pd->device, ib_srq); + if (!srq) + return ERR_PTR(-ENOMEM); + + srq->device = pd->device; + srq->pd = pd; + srq->event_handler = srq_init_attr->event_handler; + srq->srq_context = srq_init_attr->srq_context; + srq->srq_type = srq_init_attr->srq_type; + + if (ib_srq_has_cq(srq->srq_type)) { + srq->ext.cq = srq_init_attr->ext.cq; + atomic_inc(&srq->ext.cq->usecnt); + } + if (srq->srq_type == IB_SRQT_XRC) { + srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; + atomic_inc(&srq->ext.xrc.xrcd->usecnt); + } + atomic_inc(&pd->usecnt); + + ret = pd->device->ops.create_srq(srq, srq_init_attr, NULL); + if (ret) { + atomic_dec(&srq->pd->usecnt); + if (srq->srq_type == IB_SRQT_XRC) + atomic_dec(&srq->ext.xrc.xrcd->usecnt); + if (ib_srq_has_cq(srq->srq_type)) + atomic_dec(&srq->ext.cq->usecnt); + kfree(srq); + return ERR_PTR(ret); } return srq; @@ -1013,32 +1024,19 @@ EXPORT_SYMBOL(ib_query_srq); int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata) { - struct ib_pd *pd; - enum ib_srq_type srq_type; - struct ib_xrcd *uninitialized_var(xrcd); - struct ib_cq *uninitialized_var(cq); - int ret; - if (atomic_read(&srq->usecnt)) return -EBUSY; - pd = srq->pd; - srq_type = srq->srq_type; - if (ib_srq_has_cq(srq_type)) - cq = srq->ext.cq; - if (srq_type == IB_SRQT_XRC) - xrcd = srq->ext.xrc.xrcd; + srq->device->ops.destroy_srq(srq, udata); - ret = srq->device->ops.destroy_srq(srq, udata); - if (!ret) { - atomic_dec(&pd->usecnt); - if (srq_type == IB_SRQT_XRC) - atomic_dec(&xrcd->usecnt); - if (ib_srq_has_cq(srq_type)) - atomic_dec(&cq->usecnt); - } + atomic_dec(&srq->pd->usecnt); + if (srq->srq_type == IB_SRQT_XRC) + atomic_dec(&srq->ext.xrc.xrcd->usecnt); + if (ib_srq_has_cq(srq->srq_type)) + atomic_dec(&srq->ext.cq->usecnt); + kfree(srq); - return ret; + return 0; } EXPORT_SYMBOL(ib_destroy_srq_user); diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index a9e2e29d7ad0..dc53604171a0 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1305,30 +1305,22 @@ static enum ib_mtu __to_ib_mtu(u32 mtu) } /* Shared Receive Queues */ -int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) +void bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) { struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); struct bnxt_re_dev *rdev = srq->rdev; struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq; struct bnxt_qplib_nq *nq = NULL; - int rc; if (qplib_srq->cq) nq = qplib_srq->cq->nq; - rc = bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq); - if (rc) { - dev_err(rdev_to_dev(rdev), "Destroy HW SRQ failed!"); - return rc; - } - + bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq); if (srq->umem) ib_umem_release(srq->umem); - kfree(srq); atomic_dec(&rdev->srq_count); if (nq) nq->budget--; - return 0; } static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, @@ -1362,14 +1354,16 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, return 0; } -struct ib_srq *bnxt_re_create_srq(struct ib_pd *ib_pd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata) +int bnxt_re_create_srq(struct ib_srq *ib_srq, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) { + struct ib_pd *ib_pd = ib_srq->pd; struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; - struct bnxt_re_srq *srq; + struct bnxt_re_srq *srq = + container_of(ib_srq, struct bnxt_re_srq, ib_srq); struct bnxt_qplib_nq *nq = NULL; int rc, entries; @@ -1384,11 +1378,6 @@ struct ib_srq *bnxt_re_create_srq(struct ib_pd *ib_pd, goto exit; } - srq = kzalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) { - rc = -ENOMEM; - goto exit; - } srq->rdev = rdev; srq->qplib_srq.pd = &pd->qplib_pd; srq->qplib_srq.dpi = &rdev->dpi_privileged; @@ -1434,14 +1423,13 @@ struct ib_srq *bnxt_re_create_srq(struct ib_pd *ib_pd, nq->budget++; atomic_inc(&rdev->srq_count); - return &srq->ib_srq; + return 0; fail: if (srq->umem) ib_umem_release(srq->umem); - kfree(srq); exit: - return ERR_PTR(rc); + return rc; } int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 953da89e5ec0..bd4fa22985eb 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -69,9 +69,9 @@ struct bnxt_re_ah { }; struct bnxt_re_srq { + struct ib_srq ib_srq; struct bnxt_re_dev *rdev; u32 srq_limit; - struct ib_srq ib_srq; struct bnxt_qplib_srq qplib_srq; struct ib_umem *umem; spinlock_t lock; /* protect srq */ @@ -170,14 +170,14 @@ int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); void bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags); -struct ib_srq *bnxt_re_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata); +int bnxt_re_create_srq(struct ib_srq *srq, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); int bnxt_re_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); -int bnxt_re_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); +void bnxt_re_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); int bnxt_re_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd, diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index ec22853f8363..bbdfbbf5e9a5 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -639,6 +639,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .req_notify_cq = bnxt_re_req_notify_cq, INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah), INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd), + INIT_RDMA_OBJ_SIZE(ib_srq, bnxt_re_srq, ib_srq), INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx), }; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index f034cab303f6..958c1ff9c515 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -507,7 +507,7 @@ static void bnxt_qplib_arm_srq(struct bnxt_qplib_srq *srq, u32 arm_type) writeq(val, db); } -int bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res, +void bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_srq *srq) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; @@ -521,14 +521,12 @@ int bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res, /* Configure the request */ req.srq_cid = cpu_to_le32(srq->id); - rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, - (void *)&resp, NULL, 0); + rc = bnxt_qplib_rcfw_send_message(rcfw, (struct cmdq_base *)&req, + (struct creq_base *)&resp, NULL, 0); + kfree(srq->swq); if (rc) - return rc; - + return; bnxt_qplib_free_hwq(res->pdev, &srq->hwq); - kfree(srq->swq); - return 0; } int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 31436af2a4ec..99e0a13cbefa 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -518,8 +518,8 @@ int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_srq *srq); int bnxt_qplib_query_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_srq *srq); -int bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res, - struct bnxt_qplib_srq *srq); +void bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res, + struct bnxt_qplib_srq *srq); int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, struct bnxt_qplib_swqe *wqe); int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 4b721a261053..916ef982172e 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -1000,10 +1000,9 @@ int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); -int c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata); -struct ib_srq *c4iw_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *attrs, - struct ib_udata *udata); +void c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata); +int c4iw_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attrs, + struct ib_udata *udata); int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata); struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 0fbad47661cc..74ffc24321cd 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -545,6 +545,7 @@ static const struct ib_device_ops c4iw_dev_ops = { .reg_user_mr = c4iw_reg_user_mr, .req_notify_cq = c4iw_arm_cq, INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_srq, c4iw_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext), }; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 63780e6eface..9c8962d7bf97 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2683,11 +2683,12 @@ void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16) } } -struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, +int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs, struct ib_udata *udata) { + struct ib_pd *pd = ib_srq->pd; struct c4iw_dev *rhp; - struct c4iw_srq *srq; + struct c4iw_srq *srq = to_c4iw_srq(ib_srq); struct c4iw_pd *php; struct c4iw_create_srq_resp uresp; struct c4iw_ucontext *ucontext; @@ -2702,11 +2703,11 @@ struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, rhp = php->rhp; if (!rhp->rdev.lldi.vr->srq.size) - return ERR_PTR(-EINVAL); + return -EINVAL; if (attrs->attr.max_wr > rhp->rdev.hw_queue.t4_max_rq_size) - return ERR_PTR(-E2BIG); + return -E2BIG; if (attrs->attr.max_sge > T4_MAX_RECV_SGE) - return ERR_PTR(-E2BIG); + return -E2BIG; /* * SRQ RQT and RQ must be a power of 2 and at least 16 deep. @@ -2717,15 +2718,9 @@ struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, ibucontext); - srq = kzalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) - return ERR_PTR(-ENOMEM); - srq->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); - if (!srq->wr_waitp) { - ret = -ENOMEM; - goto err_free_srq; - } + if (!srq->wr_waitp) + return -ENOMEM; srq->idx = c4iw_alloc_srq_idx(&rhp->rdev); if (srq->idx < 0) { @@ -2805,7 +2800,8 @@ struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, (unsigned long)srq->wq.memsize, attrs->attr.max_wr); spin_lock_init(&srq->lock); - return &srq->ibsrq; + return 0; + err_free_srq_db_key_mm: kfree(srq_db_key_mm); err_free_srq_key_mm: @@ -2821,12 +2817,10 @@ err_free_srq_idx: c4iw_free_srq_idx(&rhp->rdev, srq->idx); err_free_wr_wait: c4iw_put_wr_wait(srq->wr_waitp); -err_free_srq: - kfree(srq); - return ERR_PTR(ret); + return ret; } -int c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_srq *srq; @@ -2844,6 +2838,4 @@ int c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) srq->wr_waitp); c4iw_free_srq_idx(&rhp->rdev, srq->idx); c4iw_put_wr_wait(srq->wr_waitp); - kfree(srq); - return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index e424dcca2c74..88a5a9ac7350 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1142,13 +1142,13 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct ib_umem *umem); -struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata); +int hns_roce_create_srq(struct ib_srq *srq, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); -int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); +void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd, struct ib_qp_init_attr *init_attr, diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 176bade523ea..e85b31a72eea 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -491,6 +491,8 @@ static const struct ib_device_ops hns_roce_dev_frmr_ops = { static const struct ib_device_ops hns_roce_dev_srq_ops = { .create_srq = hns_roce_create_srq, .destroy_srq = hns_roce_destroy_srq, + + INIT_RDMA_OBJ_SIZE(ib_srq, hns_roce_srq, ibsrq), }; static int hns_roce_register_device(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 5874dbb391fd..b3421b1f21e0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -206,13 +206,13 @@ static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq, return 0; } -struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata) +int hns_roce_create_srq(struct ib_srq *ib_srq, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) { - struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); + struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device); struct hns_roce_ib_create_srq_resp resp = {}; - struct hns_roce_srq *srq; + struct hns_roce_srq *srq = to_hr_srq(ib_srq); int srq_desc_size; int srq_buf_size; u32 page_shift; @@ -223,11 +223,7 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, /* Check the actual SRQ wqe and SRQ sge num */ if (srq_init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || srq_init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) - return ERR_PTR(-EINVAL); - - srq = kzalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) - return ERR_PTR(-ENOMEM); + return -EINVAL; mutex_init(&srq->mutex); spin_lock_init(&srq->lock); @@ -249,17 +245,13 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, if (udata) { struct hns_roce_ib_create_srq ucmd; - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { - ret = -EFAULT; - goto err_srq; - } + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) + return -EFAULT; srq->umem = ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0, 0); - if (IS_ERR(srq->umem)) { - ret = PTR_ERR(srq->umem); - goto err_srq; - } + if (IS_ERR(srq->umem)) + return PTR_ERR(srq->umem); if (hr_dev->caps.srqwqe_buf_pg_sz) { npages = (ib_umem_page_count(srq->umem) + @@ -321,11 +313,9 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, } else { page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; if (hns_roce_buf_alloc(hr_dev, srq_buf_size, - (1 << page_shift) * 2, - &srq->buf, page_shift)) { - ret = -ENOMEM; - goto err_srq; - } + (1 << page_shift) * 2, &srq->buf, + page_shift)) + return -ENOMEM; srq->head = 0; srq->tail = srq->max - 1; @@ -340,7 +330,7 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, goto err_srq_mtt; page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; - ret = hns_roce_create_idx_que(pd, srq, page_shift); + ret = hns_roce_create_idx_que(ib_srq->pd, srq, page_shift); if (ret) { dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n", ret); @@ -372,7 +362,7 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; - ret = hns_roce_srq_alloc(hr_dev, to_hr_pd(pd)->pdn, cqn, 0, + ret = hns_roce_srq_alloc(hr_dev, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, &srq->mtt, 0, srq); if (ret) goto err_wrid; @@ -389,7 +379,7 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, } } - return &srq->ibsrq; + return 0; err_srqc_alloc: hns_roce_srq_free(hr_dev, srq); @@ -418,12 +408,10 @@ err_buf: else hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf); -err_srq: - kfree(srq); - return ERR_PTR(ret); + return ret; } -int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); struct hns_roce_srq *srq = to_hr_srq(ibsrq); @@ -440,10 +428,6 @@ int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift, &srq->buf); } - - kfree(srq); - - return 0; } int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 27f38897ca9e..25d09d53b51c 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2561,6 +2561,7 @@ static const struct ib_device_ops mlx4_ib_dev_ops = { INIT_RDMA_OBJ_SIZE(ib_ah, mlx4_ib_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, mlx4_ib_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_srq, mlx4_ib_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx4_ib_ucontext, ibucontext), }; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 14ca042ea715..26897102057d 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -759,13 +759,12 @@ int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags); -struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata); +int mlx4_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); -int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); +void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index); int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 94c3c334a672..4bf2946b9759 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -69,14 +69,14 @@ static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type) } } -struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +int mlx4_ib_create_srq(struct ib_srq *ib_srq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { - struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct mlx4_ib_dev *dev = to_mdev(ib_srq->device); struct mlx4_ib_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct mlx4_ib_ucontext, ibucontext); - struct mlx4_ib_srq *srq; + struct mlx4_ib_srq *srq = to_msrq(ib_srq); struct mlx4_wqe_srq_next_seg *next; struct mlx4_wqe_data_seg *scatter; u32 cqn; @@ -89,11 +89,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, /* Sanity check SRQ size before proceeding */ if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes || init_attr->attr.max_sge > dev->dev->caps.max_srq_sge) - return ERR_PTR(-EINVAL); - - srq = kmalloc(sizeof *srq, GFP_KERNEL); - if (!srq) - return ERR_PTR(-ENOMEM); + return -EINVAL; mutex_init(&srq->mutex); spin_lock_init(&srq->lock); @@ -111,16 +107,12 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (udata) { struct mlx4_ib_create_srq ucmd; - if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { - err = -EFAULT; - goto err_srq; - } + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) + return -EFAULT; srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0); - if (IS_ERR(srq->umem)) { - err = PTR_ERR(srq->umem); - goto err_srq; - } + if (IS_ERR(srq->umem)) + return PTR_ERR(srq->umem); err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem), srq->umem->page_shift, &srq->mtt); @@ -137,7 +129,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, } else { err = mlx4_db_alloc(dev->dev, &srq->db, 0); if (err) - goto err_srq; + return err; *srq->db.db = 0; @@ -184,8 +176,8 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ? to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn : (u16) dev->dev->caps.reserved_xrcds; - err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, cqn, xrcdn, &srq->mtt, - srq->db.dma, &srq->msrq); + err = mlx4_srq_alloc(dev->dev, to_mpd(ib_srq->pd)->pdn, cqn, xrcdn, + &srq->mtt, srq->db.dma, &srq->msrq); if (err) goto err_wrid; @@ -200,7 +192,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, init_attr->attr.max_wr = srq->msrq.max - 1; - return &srq->ibsrq; + return 0; err_wrid: if (udata) @@ -221,10 +213,7 @@ err_db: if (!udata) mlx4_db_free(dev->dev, &srq->db); -err_srq: - kfree(srq); - - return ERR_PTR(err); + return err; } int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, @@ -271,7 +260,7 @@ int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) return 0; } -int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) +void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(srq->device); struct mlx4_ib_srq *msrq = to_msrq(srq); @@ -293,10 +282,6 @@ int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) &msrq->buf); mlx4_db_free(dev->dev, &msrq->db); } - - kfree(msrq); - - return 0; } void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index f4827d12677a..06578e8a9787 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4795,19 +4795,21 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) attr.ext.cq = devr->c0; attr.ext.xrc.xrcd = devr->x0; - devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL); - if (IS_ERR(devr->s0)) { - ret = PTR_ERR(devr->s0); + devr->s0 = rdma_zalloc_drv_obj(ibdev, ib_srq); + if (!devr->s0) { + ret = -ENOMEM; goto error4; } + devr->s0->device = &dev->ib_dev; devr->s0->pd = devr->p0; - devr->s0->uobject = NULL; - devr->s0->event_handler = NULL; - devr->s0->srq_context = NULL; devr->s0->srq_type = IB_SRQT_XRC; devr->s0->ext.xrc.xrcd = devr->x0; devr->s0->ext.cq = devr->c0; + ret = mlx5_ib_create_srq(devr->s0, &attr, NULL); + if (ret) + goto err_create; + atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); atomic_inc(&devr->s0->ext.cq->usecnt); atomic_inc(&devr->p0->usecnt); @@ -4817,18 +4819,21 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) attr.attr.max_sge = 1; attr.attr.max_wr = 1; attr.srq_type = IB_SRQT_BASIC; - devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL); - if (IS_ERR(devr->s1)) { - ret = PTR_ERR(devr->s1); + devr->s1 = rdma_zalloc_drv_obj(ibdev, ib_srq); + if (!devr->s1) { + ret = -ENOMEM; goto error5; } + devr->s1->device = &dev->ib_dev; devr->s1->pd = devr->p0; - devr->s1->uobject = NULL; - devr->s1->event_handler = NULL; - devr->s1->srq_context = NULL; devr->s1->srq_type = IB_SRQT_BASIC; devr->s1->ext.cq = devr->c0; + + ret = mlx5_ib_create_srq(devr->s1, &attr, NULL); + if (ret) + goto error6; + atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s1->usecnt, 0); @@ -4840,8 +4845,12 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) return 0; +error6: + kfree(devr->s1); error5: mlx5_ib_destroy_srq(devr->s0, NULL); +err_create: + kfree(devr->s0); error4: mlx5_ib_dealloc_xrcd(devr->x1, NULL); error3: @@ -4862,7 +4871,9 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr) int port; mlx5_ib_destroy_srq(devr->s1, NULL); + kfree(devr->s1); mlx5_ib_destroy_srq(devr->s0, NULL); + kfree(devr->s0); mlx5_ib_dealloc_xrcd(devr->x0, NULL); mlx5_ib_dealloc_xrcd(devr->x1, NULL); mlx5_ib_destroy_cq(devr->c0, NULL); @@ -5989,6 +6000,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext), }; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 5b4206653fdb..3e8d54618c78 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1058,13 +1058,12 @@ int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags); -struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata); +int mlx5_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr); -int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); +void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp); diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 2e389f3444c6..4e7fde86c96b 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -214,16 +214,16 @@ static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq) mlx5_db_free(dev->mdev, &srq->db); } -struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +int mlx5_ib_create_srq(struct ib_srq *ib_srq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_ib_srq *srq; + struct mlx5_ib_dev *dev = to_mdev(ib_srq->device); + struct mlx5_ib_srq *srq = to_msrq(ib_srq); size_t desc_size; size_t buf_size; int err; - struct mlx5_srq_attr in = {0}; + struct mlx5_srq_attr in = {}; __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); /* Sanity check SRQ size before proceeding */ @@ -231,13 +231,9 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", init_attr->attr.max_wr, max_srq_wqes); - return ERR_PTR(-EINVAL); + return -EINVAL; } - srq = kmalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) - return ERR_PTR(-ENOMEM); - mutex_init(&srq->mutex); spin_lock_init(&srq->lock); srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1); @@ -245,35 +241,32 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, desc_size = sizeof(struct mlx5_wqe_srq_next_seg) + srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg); - if (desc_size == 0 || srq->msrq.max_gs > desc_size) { - err = -EINVAL; - goto err_srq; - } + if (desc_size == 0 || srq->msrq.max_gs > desc_size) + return -EINVAL; + desc_size = roundup_pow_of_two(desc_size); desc_size = max_t(size_t, 32, desc_size); - if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg)) { - err = -EINVAL; - goto err_srq; - } + if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg)) + return -EINVAL; + srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) / sizeof(struct mlx5_wqe_data_seg); srq->msrq.wqe_shift = ilog2(desc_size); buf_size = srq->msrq.max * desc_size; - if (buf_size < desc_size) { - err = -EINVAL; - goto err_srq; - } + if (buf_size < desc_size) + return -EINVAL; + in.type = init_attr->srq_type; if (udata) - err = create_srq_user(pd, srq, &in, udata, buf_size); + err = create_srq_user(ib_srq->pd, srq, &in, udata, buf_size); else err = create_srq_kernel(dev, srq, &in, buf_size); if (err) { mlx5_ib_warn(dev, "create srq %s failed, err %d\n", udata ? "user" : "kernel", err); - goto err_srq; + return err; } in.log_size = ilog2(srq->msrq.max); @@ -303,7 +296,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, else in.cqn = to_mcq(dev->devr.c0)->mcq.cqn; - in.pd = to_mpd(pd)->pdn; + in.pd = to_mpd(ib_srq->pd)->pdn; in.db_record = srq->db.dma; err = mlx5_cmd_create_srq(dev, &srq->msrq, &in); kvfree(in.pas); @@ -326,21 +319,18 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, init_attr->attr.max_wr = srq->msrq.max - 1; - return &srq->ibsrq; + return 0; err_core: mlx5_cmd_destroy_srq(dev, &srq->msrq); err_usr_kern_srq: if (udata) - destroy_srq_user(pd, srq, udata); + destroy_srq_user(ib_srq->pd, srq, udata); else destroy_srq_kernel(dev, srq); -err_srq: - kfree(srq); - - return ERR_PTR(err); + return err; } int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, @@ -393,7 +383,7 @@ out_box: return ret; } -int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) +void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(srq->device); struct mlx5_ib_srq *msrq = to_msrq(srq); @@ -411,9 +401,6 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) } else { destroy_srq_kernel(dev, msrq); } - - kfree(srq); - return 0; } void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index) diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h index 2c3627b2509d..af197c36d757 100644 --- a/drivers/infiniband/hw/mlx5/srq.h +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -56,7 +56,7 @@ struct mlx5_srq_table { int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in); -int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq); +void mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq); int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *out); int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index 1e04319684f4..b0d0687c7a68 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -607,7 +607,7 @@ err_destroy_srq_split: return err; } -int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +void mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { struct mlx5_srq_table *table = &dev->srq_table; struct mlx5_core_srq *tmp; @@ -615,16 +615,14 @@ int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) tmp = xa_erase_irq(&table->array, srq->srqn); if (!tmp || tmp != srq) - return -EINVAL; + return; err = destroy_srq_split(dev, srq); if (err) - return err; + return; mlx5_core_res_put(&srq->common); wait_for_completion(&srq->common.free); - - return 0; } int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 6fc371a9e45f..4f40dfedf920 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -403,65 +403,53 @@ static void mthca_ah_destroy(struct ib_ah *ah, u32 flags) mthca_destroy_ah(to_mdev(ah->device), to_mah(ah)); } -static struct ib_srq *mthca_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +static int mthca_create_srq(struct ib_srq *ibsrq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { struct mthca_create_srq ucmd; struct mthca_ucontext *context = rdma_udata_to_drv_context( udata, struct mthca_ucontext, ibucontext); - struct mthca_srq *srq; + struct mthca_srq *srq = to_msrq(ibsrq); int err; if (init_attr->srq_type != IB_SRQT_BASIC) - return ERR_PTR(-EOPNOTSUPP); - - srq = kmalloc(sizeof *srq, GFP_KERNEL); - if (!srq) - return ERR_PTR(-ENOMEM); + return -EOPNOTSUPP; if (udata) { - if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { - err = -EFAULT; - goto err_free; - } + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) + return -EFAULT; - err = mthca_map_user_db(to_mdev(pd->device), &context->uar, + err = mthca_map_user_db(to_mdev(ibsrq->device), &context->uar, context->db_tab, ucmd.db_index, ucmd.db_page); if (err) - goto err_free; + return err; srq->mr.ibmr.lkey = ucmd.lkey; srq->db_index = ucmd.db_index; } - err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd), + err = mthca_alloc_srq(to_mdev(ibsrq->device), to_mpd(ibsrq->pd), &init_attr->attr, srq, udata); if (err && udata) - mthca_unmap_user_db(to_mdev(pd->device), &context->uar, + mthca_unmap_user_db(to_mdev(ibsrq->device), &context->uar, context->db_tab, ucmd.db_index); if (err) - goto err_free; + return err; - if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof (__u32))) { - mthca_free_srq(to_mdev(pd->device), srq); - err = -EFAULT; - goto err_free; + if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) { + mthca_free_srq(to_mdev(ibsrq->device), srq); + return -EFAULT; } - return &srq->ibsrq; - -err_free: - kfree(srq); - - return ERR_PTR(err); + return 0; } -static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) +static void mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { if (udata) { struct mthca_ucontext *context = @@ -475,9 +463,6 @@ static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) } mthca_free_srq(to_mdev(srq->device), to_msrq(srq)); - kfree(srq); - - return 0; } static struct ib_qp *mthca_create_qp(struct ib_pd *pd, @@ -1210,6 +1195,8 @@ static const struct ib_device_ops mthca_dev_arbel_srq_ops = { .modify_srq = mthca_modify_srq, .post_srq_recv = mthca_arbel_post_srq_recv, .query_srq = mthca_query_srq, + + INIT_RDMA_OBJ_SIZE(ib_srq, mthca_srq, ibsrq), }; static const struct ib_device_ops mthca_dev_tavor_srq_ops = { @@ -1218,6 +1205,8 @@ static const struct ib_device_ops mthca_dev_tavor_srq_ops = { .modify_srq = mthca_modify_srq, .post_srq_recv = mthca_tavor_post_srq_recv, .query_srq = mthca_query_srq, + + INIT_RDMA_OBJ_SIZE(ib_srq, mthca_srq, ibsrq), }; static const struct ib_device_ops mthca_dev_arbel_fmr_ops = { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index e693eb352959..5d96b5a94583 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2863,21 +2863,19 @@ int ocrdma_mbx_query_srq(struct ocrdma_srq *srq, struct ib_srq_attr *srq_attr) return status; } -int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq) +void ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq) { - int status = -ENOMEM; struct ocrdma_destroy_srq *cmd; struct pci_dev *pdev = dev->nic_info.pdev; cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_SRQ, sizeof(*cmd)); if (!cmd) - return status; + return; cmd->id = srq->id; - status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); if (srq->rq.va) dma_free_coherent(&pdev->dev, srq->rq.len, srq->rq.va, srq->rq.pa); kfree(cmd); - return status; } static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index 88d45aa19ded..06ec59326a90 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -137,7 +137,7 @@ int ocrdma_mbx_create_srq(struct ocrdma_dev *, struct ocrdma_srq *, struct ocrdma_pd *); int ocrdma_mbx_modify_srq(struct ocrdma_srq *, struct ib_srq_attr *); int ocrdma_mbx_query_srq(struct ocrdma_srq *, struct ib_srq_attr *); -int ocrdma_mbx_destroy_srq(struct ocrdma_dev *, struct ocrdma_srq *); +void ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq); int ocrdma_alloc_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah); void ocrdma_free_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 8642a2e60be7..34d3d59f3ca7 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -191,6 +191,8 @@ static const struct ib_device_ops ocrdma_dev_srq_ops = { .modify_srq = ocrdma_modify_srq, .post_srq_recv = ocrdma_post_srq_recv, .query_srq = ocrdma_query_srq, + + INIT_RDMA_OBJ_SIZE(ib_srq, ocrdma_srq, ibsrq), }; static int ocrdma_register_device(struct ocrdma_dev *dev) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index cf7aeb963dce..ffdd3ac55086 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1805,45 +1805,43 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq, return status; } -struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +int ocrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { - int status = -ENOMEM; - struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); - struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); - struct ocrdma_srq *srq; + int status; + struct ocrdma_pd *pd = get_ocrdma_pd(ibsrq->pd); + struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device); + struct ocrdma_srq *srq = get_ocrdma_srq(ibsrq); if (init_attr->attr.max_sge > dev->attr.max_recv_sge) - return ERR_PTR(-EINVAL); + return -EINVAL; if (init_attr->attr.max_wr > dev->attr.max_rqe) - return ERR_PTR(-EINVAL); - - srq = kzalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) - return ERR_PTR(status); + return -EINVAL; spin_lock_init(&srq->q_lock); srq->pd = pd; srq->db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size); status = ocrdma_mbx_create_srq(dev, srq, init_attr, pd); if (status) - goto err; + return status; - if (udata == NULL) { - status = -ENOMEM; + if (!udata) { srq->rqe_wr_id_tbl = kcalloc(srq->rq.max_cnt, sizeof(u64), GFP_KERNEL); - if (srq->rqe_wr_id_tbl == NULL) + if (!srq->rqe_wr_id_tbl) { + status = -ENOMEM; goto arm_err; + } srq->bit_fields_len = (srq->rq.max_cnt / 32) + (srq->rq.max_cnt % 32 ? 1 : 0); srq->idx_bit_fields = kmalloc_array(srq->bit_fields_len, sizeof(u32), GFP_KERNEL); - if (srq->idx_bit_fields == NULL) + if (!srq->idx_bit_fields) { + status = -ENOMEM; goto arm_err; + } memset(srq->idx_bit_fields, 0xff, srq->bit_fields_len * sizeof(u32)); } @@ -1860,15 +1858,13 @@ struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd, goto arm_err; } - return &srq->ibsrq; + return 0; arm_err: ocrdma_mbx_destroy_srq(dev, srq); -err: kfree(srq->rqe_wr_id_tbl); kfree(srq->idx_bit_fields); - kfree(srq); - return ERR_PTR(status); + return status; } int ocrdma_modify_srq(struct ib_srq *ibsrq, @@ -1897,15 +1893,14 @@ int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) return status; } -int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { - int status; struct ocrdma_srq *srq; struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device); srq = get_ocrdma_srq(ibsrq); - status = ocrdma_mbx_destroy_srq(dev, srq); + ocrdma_mbx_destroy_srq(dev, srq); if (srq->pd->uctx) ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, @@ -1913,8 +1908,6 @@ int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) kfree(srq->idx_bit_fields); kfree(srq->rqe_wr_id_tbl); - kfree(srq); - return status; } /* unprivileged verbs and their support functions. */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index dfdebe4e48e6..c6489a1439dc 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -91,12 +91,12 @@ int ocrdma_query_qp(struct ib_qp *, int ocrdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); void ocrdma_del_flush_qp(struct ocrdma_qp *qp); -struct ib_srq *ocrdma_create_srq(struct ib_pd *, struct ib_srq_init_attr *, - struct ib_udata *); +int ocrdma_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attr, + struct ib_udata *udata); int ocrdma_modify_srq(struct ib_srq *, struct ib_srq_attr *, enum ib_srq_attr_mask, struct ib_udata *); int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *); -int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); +void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); int ocrdma_post_srq_recv(struct ib_srq *, const struct ib_recv_wr *, const struct ib_recv_wr **bad_recv_wr); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index f32ea7052c48..cbcdc5c669c6 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -241,6 +241,7 @@ static const struct ib_device_ops qedr_dev_ops = { INIT_RDMA_OBJ_SIZE(ib_ah, qedr_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, qedr_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_srq, qedr_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, qedr_ucontext, ibucontext), }; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 8ea06856e7b9..5e92b6229da2 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1267,7 +1267,7 @@ static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp) } } -static int qedr_check_srq_params(struct ib_pd *ibpd, struct qedr_dev *dev, +static int qedr_check_srq_params(struct qedr_dev *dev, struct ib_srq_init_attr *attrs, struct ib_udata *udata) { @@ -1383,33 +1383,28 @@ err0: return rc; } -struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { struct qed_rdma_destroy_srq_in_params destroy_in_params; struct qed_rdma_create_srq_in_params in_params = {}; - struct qedr_dev *dev = get_qedr_dev(ibpd->device); + struct qedr_dev *dev = get_qedr_dev(ibsrq->device); struct qed_rdma_create_srq_out_params out_params; - struct qedr_pd *pd = get_qedr_pd(ibpd); + struct qedr_pd *pd = get_qedr_pd(ibsrq->pd); struct qedr_create_srq_ureq ureq = {}; u64 pbl_base_addr, phy_prod_pair_addr; struct qedr_srq_hwq_info *hw_srq; u32 page_cnt, page_size; - struct qedr_srq *srq; + struct qedr_srq *srq = get_qedr_srq(ibsrq); int rc = 0; DP_DEBUG(dev, QEDR_MSG_QP, "create SRQ called from %s (pd %p)\n", (udata) ? "User lib" : "kernel", pd); - rc = qedr_check_srq_params(ibpd, dev, init_attr, udata); + rc = qedr_check_srq_params(dev, init_attr, udata); if (rc) - return ERR_PTR(-EINVAL); - - srq = kzalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) - return ERR_PTR(-ENOMEM); + return -EINVAL; srq->dev = dev; hw_srq = &srq->hw_srq; @@ -1471,7 +1466,7 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, DP_DEBUG(dev, QEDR_MSG_SRQ, "create srq: created srq with srq_id=0x%0x\n", srq->srq_id); - return &srq->ibsrq; + return 0; err2: destroy_in_params.srq_id = srq->srq_id; @@ -1483,12 +1478,10 @@ err1: else qedr_free_srq_kernel_params(srq); err0: - kfree(srq); - - return ERR_PTR(-EFAULT); + return -EFAULT; } -int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct qed_rdma_destroy_srq_in_params in_params = {}; struct qedr_dev *dev = get_qedr_dev(ibsrq->device); @@ -1506,9 +1499,6 @@ int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) DP_DEBUG(dev, QEDR_MSG_SRQ, "destroy srq: destroyed srq with srq_id=0x%0x\n", srq->srq_id); - kfree(srq); - - return 0; } int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 772af35a5055..9328c80375ef 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -64,13 +64,12 @@ int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *); int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); -struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, - struct ib_srq_init_attr *attr, - struct ib_udata *udata); +int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *attr, + struct ib_udata *udata); int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); -int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); +void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_recv_wr); int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 6cbc271a1b7d..823846947a5b 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -206,6 +206,8 @@ static const struct ib_device_ops pvrdma_dev_srq_ops = { .destroy_srq = pvrdma_destroy_srq, .modify_srq = pvrdma_modify_srq, .query_srq = pvrdma_query_srq, + + INIT_RDMA_OBJ_SIZE(ib_srq, pvrdma_srq, ibsrq), }; static int pvrdma_register_device(struct pvrdma_dev *dev) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 21a95780e0ea..6cac0c88cf39 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -94,19 +94,18 @@ int pvrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) * @init_attr: shared receive queue attributes * @udata: user data * - * @return: the ib_srq pointer on success, otherwise returns an errno. + * @return: 0 on success, otherwise returns an errno. */ -struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +int pvrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { - struct pvrdma_srq *srq = NULL; - struct pvrdma_dev *dev = to_vdev(pd->device); + struct pvrdma_srq *srq = to_vsrq(ibsrq); + struct pvrdma_dev *dev = to_vdev(ibsrq->device); union pvrdma_cmd_req req; union pvrdma_cmd_resp rsp; struct pvrdma_cmd_create_srq *cmd = &req.create_srq; struct pvrdma_cmd_create_srq_resp *resp = &rsp.create_srq_resp; - struct pvrdma_create_srq_resp srq_resp = {0}; + struct pvrdma_create_srq_resp srq_resp = {}; struct pvrdma_create_srq ucmd; unsigned long flags; int ret; @@ -115,31 +114,25 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, /* No support for kernel clients. */ dev_warn(&dev->pdev->dev, "no shared receive queue support for kernel client\n"); - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } if (init_attr->srq_type != IB_SRQT_BASIC) { dev_warn(&dev->pdev->dev, "shared receive queue type %d not supported\n", init_attr->srq_type); - return ERR_PTR(-EINVAL); + return -EINVAL; } if (init_attr->attr.max_wr > dev->dsr->caps.max_srq_wr || init_attr->attr.max_sge > dev->dsr->caps.max_srq_sge) { dev_warn(&dev->pdev->dev, "shared receive queue size invalid\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } if (!atomic_add_unless(&dev->num_srqs, 1, dev->dsr->caps.max_srq)) - return ERR_PTR(-ENOMEM); - - srq = kmalloc(sizeof(*srq), GFP_KERNEL); - if (!srq) { - ret = -ENOMEM; - goto err_srq; - } + return -ENOMEM; spin_lock_init(&srq->lock); refcount_set(&srq->refcnt, 1); @@ -181,7 +174,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, cmd->hdr.cmd = PVRDMA_CMD_CREATE_SRQ; cmd->srq_type = init_attr->srq_type; cmd->nchunks = srq->npages; - cmd->pd_handle = to_vpd(pd)->pd_handle; + cmd->pd_handle = to_vpd(ibsrq->pd)->pd_handle; cmd->attrs.max_wr = init_attr->attr.max_wr; cmd->attrs.max_sge = init_attr->attr.max_sge; cmd->attrs.srq_limit = init_attr->attr.srq_limit; @@ -205,20 +198,19 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, if (ib_copy_to_udata(udata, &srq_resp, sizeof(srq_resp))) { dev_warn(&dev->pdev->dev, "failed to copy back udata\n"); pvrdma_destroy_srq(&srq->ibsrq, udata); - return ERR_PTR(-EINVAL); + return -EINVAL; } - return &srq->ibsrq; + return 0; err_page_dir: pvrdma_page_dir_cleanup(dev, &srq->pdir); err_umem: ib_umem_release(srq->umem); err_srq: - kfree(srq); atomic_dec(&dev->num_srqs); - return ERR_PTR(ret); + return ret; } static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) @@ -250,7 +242,7 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) * * @return: 0 for success. */ -int pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) +void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct pvrdma_srq *vsrq = to_vsrq(srq); union pvrdma_cmd_req req; @@ -269,8 +261,6 @@ int pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) ret); pvrdma_free_srq(dev, vsrq); - - return 0; } /** diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index 013c73f2eba3..9d7b021e1c59 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -419,13 +419,12 @@ int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, struct ib_udata *udata); void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags); -struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata); +int pvrdma_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); -int pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); +void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index 21d276eaf15a..8d6b3e764255 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -71,29 +71,24 @@ void rvt_driver_srq_init(struct rvt_dev_info *rdi) * @srq_init_attr: the attributes of the SRQ * @udata: data from libibverbs when creating a user SRQ * - * Return: Allocated srq object + * Return: 0 on success */ -struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata) +int rvt_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) { - struct rvt_dev_info *dev = ib_to_rvt(ibpd->device); - struct rvt_srq *srq; + struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device); + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); u32 sz; - struct ib_srq *ret; + int ret; if (srq_init_attr->srq_type != IB_SRQT_BASIC) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; if (srq_init_attr->attr.max_sge == 0 || srq_init_attr->attr.max_sge > dev->dparms.props.max_srq_sge || srq_init_attr->attr.max_wr == 0 || srq_init_attr->attr.max_wr > dev->dparms.props.max_srq_wr) - return ERR_PTR(-EINVAL); - - srq = kzalloc_node(sizeof(*srq), GFP_KERNEL, dev->dparms.node); - if (!srq) - return ERR_PTR(-ENOMEM); + return -EINVAL; /* * Need to use vmalloc() if we want to support large #s of entries. @@ -107,7 +102,7 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, vzalloc_node(sizeof(struct rvt_rwq) + srq->rq.size * sz, dev->dparms.node); if (!srq->rq.wq) { - ret = ERR_PTR(-ENOMEM); + ret = -ENOMEM; goto bail_srq; } @@ -116,21 +111,18 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, * See rvt_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { - int err; u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz; srq->ip = rvt_create_mmap_info(dev, s, udata, srq->rq.wq); if (!srq->ip) { - ret = ERR_PTR(-ENOMEM); + ret = -ENOMEM; goto bail_wq; } - err = ib_copy_to_udata(udata, &srq->ip->offset, + ret = ib_copy_to_udata(udata, &srq->ip->offset, sizeof(srq->ip->offset)); - if (err) { - ret = ERR_PTR(err); + if (ret) goto bail_ip; - } } /* @@ -142,7 +134,7 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, spin_lock(&dev->n_srqs_lock); if (dev->n_srqs_allocated == dev->dparms.props.max_srq) { spin_unlock(&dev->n_srqs_lock); - ret = ERR_PTR(-ENOMEM); + ret = -ENOMEM; goto bail_ip; } @@ -155,14 +147,13 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, spin_unlock_irq(&dev->pending_lock); } - return &srq->ibsrq; + return 0; bail_ip: kfree(srq->ip); bail_wq: vfree(srq->rq.wq); bail_srq: - kfree(srq); return ret; } @@ -334,9 +325,8 @@ int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) * rvt_destroy_srq - destory an srq * @ibsrq: srq object to destroy * - * Return always 0 */ -int rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device); @@ -348,7 +338,4 @@ int rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) kref_put(&srq->ip->ref, rvt_release_mmap_info); else vfree(srq->rq.wq); - kfree(srq); - - return 0; } diff --git a/drivers/infiniband/sw/rdmavt/srq.h b/drivers/infiniband/sw/rdmavt/srq.h index 69cad2f65408..6427d7d62a9a 100644 --- a/drivers/infiniband/sw/rdmavt/srq.h +++ b/drivers/infiniband/sw/rdmavt/srq.h @@ -50,13 +50,12 @@ #include void rvt_driver_srq_init(struct rvt_dev_info *rdi); -struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata); +int rvt_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); -int rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); +void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); #endif /* DEF_RVTSRQ_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index f4b3bb57ab06..9546a837a8ac 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -428,6 +428,7 @@ static const struct ib_device_ops rvt_dev_ops = { INIT_RDMA_OBJ_SIZE(ib_ah, rvt_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, rvt_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_srq, rvt_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, rvt_ucontext, ibucontext), }; diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 756bd36fd268..56cf18af016a 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -57,7 +57,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_SRQ] = { .name = "rxe-srq", .size = sizeof(struct rxe_srq), - .flags = RXE_POOL_INDEX, + .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, .min_index = RXE_MIN_SRQ_INDEX, .max_index = RXE_MAX_SRQ_INDEX, }, diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index a6c63a260626..8c3e2a18cfe4 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -289,19 +289,18 @@ err1: return err; } -static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd, - struct ib_srq_init_attr *init, - struct ib_udata *udata) +static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, + struct ib_udata *udata) { int err; - struct rxe_dev *rxe = to_rdev(ibpd->device); - struct rxe_pd *pd = to_rpd(ibpd); - struct rxe_srq *srq; + struct rxe_dev *rxe = to_rdev(ibsrq->device); + struct rxe_pd *pd = to_rpd(ibsrq->pd); + struct rxe_srq *srq = to_rsrq(ibsrq); struct rxe_create_srq_resp __user *uresp = NULL; if (udata) { if (udata->outlen < sizeof(*uresp)) - return ERR_PTR(-EINVAL); + return -EINVAL; uresp = udata->outbuf; } @@ -309,13 +308,10 @@ static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd, if (err) goto err1; - srq = rxe_alloc(&rxe->srq_pool); - if (!srq) { - err = -ENOMEM; + err = rxe_add_to_pool(&rxe->srq_pool, &srq->pelem); + if (err) goto err1; - } - rxe_add_index(srq); rxe_add_ref(pd); srq->pd = pd; @@ -323,14 +319,13 @@ static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd, if (err) goto err2; - return &srq->ibsrq; + return 0; err2: rxe_drop_ref(pd); - rxe_drop_index(srq); rxe_drop_ref(srq); err1: - return ERR_PTR(err); + return err; } static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, @@ -378,7 +373,7 @@ static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) return 0; } -static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +static void rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct rxe_srq *srq = to_rsrq(ibsrq); @@ -386,10 +381,7 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) rxe_queue_cleanup(srq->rq.queue); rxe_drop_ref(srq->pd); - rxe_drop_index(srq); rxe_drop_ref(srq); - - return 0; } static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, @@ -1166,6 +1158,7 @@ static const struct ib_device_ops rxe_dev_ops = { INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_srq, rxe_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc), }; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 23c5002b5134..e8be7f44e3be 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -120,8 +120,8 @@ struct rxe_rq { }; struct rxe_srq { - struct rxe_pool_entry pelem; struct ib_srq ibsrq; + struct rxe_pool_entry pelem; struct rxe_pd *pd; struct rxe_rq rq; u32 srq_num; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3232a84c4fdb..43a75ab8ea8a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2406,14 +2406,14 @@ struct ib_device_ops { int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); void (*destroy_ah)(struct ib_ah *ah, u32 flags); - struct ib_srq *(*create_srq)(struct ib_pd *pd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata); + int (*create_srq)(struct ib_srq *srq, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr); - int (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata); + void (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata); struct ib_qp *(*create_qp)(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr, struct ib_udata *udata); @@ -2553,6 +2553,7 @@ struct ib_device_ops { DECLARE_RDMA_OBJ_SIZE(ib_ah); DECLARE_RDMA_OBJ_SIZE(ib_pd); + DECLARE_RDMA_OBJ_SIZE(ib_srq); DECLARE_RDMA_OBJ_SIZE(ib_ucontext); }; -- cgit v1.2.3 From e1c9a0dc2939e7d51926265123ee93b1caa5e93e Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Mon, 1 Apr 2019 19:13:35 +0800 Subject: RDMA/hns: Dump detailed driver-specific CQ This patch adds support of resource track for hip08 and take dumping cq context state used for debugging as an example. More resources track supports for hns driver will be added in future. The output should be as follows. $ rdma res show cq dev hnseth0 -d dev hnseth0 cqe 1023 users 2 poll-ctx WORKQUEUE pid 0 comm [ib_core] drv_state 2 drv_ceq n 0 drv_cqn 0 drv_hopnum 1 drv_pi 0 drv_ci 0 drv_coalesce 0 drv_period 0 drv_cnt 0 Signed-off-by: Tao Tian Signed-off-by: Yangyang Li Signed-off-by: chenglang Signed-off-by: Lijun Ou Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/Makefile | 4 +- drivers/infiniband/hw/hns/hns_roce_cmd.h | 1 + drivers/infiniband/hw/hns/hns_roce_device.h | 8 ++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 5 + drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 3 + drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c | 35 +++++++ drivers/infiniband/hw/hns/hns_roce_main.c | 1 + drivers/infiniband/hw/hns/hns_roce_restrack.c | 126 +++++++++++++++++++++++++ 8 files changed, 181 insertions(+), 2 deletions(-) create mode 100644 drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c create mode 100644 drivers/infiniband/hw/hns/hns_roce_restrack.c diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index e2a7f1488f76..eee5205f936f 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -7,8 +7,8 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ - hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o + hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o hns-roce-hw-v1-objs := hns_roce_hw_v1.o obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o -hns-roce-hw-v2-objs := hns_roce_hw_v2.o +hns-roce-hw-v2-objs := hns_roce_hw_v2.o hns_roce_hw_v2_dfx.o diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index 059fd1da493e..2b6ac646ca9a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -53,6 +53,7 @@ enum { HNS_ROCE_CMD_QUERY_QPC = 0x42, HNS_ROCE_CMD_MODIFY_CQC = 0x52, + HNS_ROCE_CMD_QUERY_CQC = 0x53, /* CQC BT commands */ HNS_ROCE_CMD_WRITE_CQC_BT0 = 0x10, HNS_ROCE_CMD_WRITE_CQC_BT1 = 0x11, diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 88a5a9ac7350..563cf39df6d5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -867,6 +867,11 @@ struct hns_roce_work { int sub_type; }; +struct hns_roce_dfx_hw { + int (*query_cqc_info)(struct hns_roce_dev *hr_dev, u32 cqn, + int *buffer); +}; + struct hns_roce_hw { int (*reset)(struct hns_roce_dev *hr_dev, bool enable); int (*cmq_init)(struct hns_roce_dev *hr_dev); @@ -984,6 +989,7 @@ struct hns_roce_dev { const struct hns_roce_hw *hw; void *priv; struct workqueue_struct *irq_workq; + const struct hns_roce_dfx_hw *dfx; }; static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) @@ -1196,4 +1202,6 @@ int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); +int hns_roce_fill_res_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b3cda5803c02..0e9718293482 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6068,6 +6068,10 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, return ret; } +static const struct hns_roce_dfx_hw hns_roce_dfx_hw_v2 = { + .query_cqc_info = hns_roce_v2_query_cqc_info, +}; + static const struct ib_device_ops hns_roce_v2_dev_ops = { .destroy_qp = hns_roce_v2_destroy_qp, .modify_cq = hns_roce_v2_modify_cq, @@ -6140,6 +6144,7 @@ static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, int i; hr_dev->hw = &hns_roce_hw_v2; + hr_dev->dfx = &hns_roce_dfx_hw_v2; hr_dev->sdb_offset = ROCEE_DB_SQ_L_0_REG; hr_dev->odb_offset = hr_dev->sdb_offset; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 1136763fa12f..edfdbe2ce0db 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1799,6 +1799,9 @@ struct hns_roce_sccc_clr_done { __le32 rsv[5]; }; +int hns_roce_v2_query_cqc_info(struct hns_roce_dev *hr_dev, u32 cqn, + int *buffer); + static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2], void __iomem *dest) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c new file mode 100644 index 000000000000..5a97b5a0b7be --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +// Copyright (c) 2019 Hisilicon Limited. + +#include "hnae3.h" +#include "hns_roce_device.h" +#include "hns_roce_cmd.h" +#include "hns_roce_hw_v2.h" + +int hns_roce_v2_query_cqc_info(struct hns_roce_dev *hr_dev, u32 cqn, + int *buffer) +{ + struct hns_roce_v2_cq_context *cq_context; + struct hns_roce_cmd_mailbox *mailbox; + int ret; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + cq_context = mailbox->buf; + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, cqn, 0, + HNS_ROCE_CMD_QUERY_CQC, + HNS_ROCE_CMD_TIMEOUT_MSECS); + if (ret) { + dev_err(hr_dev->dev, "QUERY cqc cmd process error\n"); + goto err_mailbox; + } + + memcpy(buffer, cq_context, sizeof(*cq_context)); + +err_mailbox: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return ret; +} diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index e85b31a72eea..363a996f475e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -455,6 +455,7 @@ static const struct ib_device_ops hns_roce_dev_ops = { .destroy_ah = hns_roce_destroy_ah, .destroy_cq = hns_roce_ib_destroy_cq, .disassociate_ucontext = hns_roce_disassociate_ucontext, + .fill_res_entry = hns_roce_fill_res_entry, .get_dma_mr = hns_roce_get_dma_mr, .get_link_layer = hns_roce_get_link_layer, .get_netdev = hns_roce_get_netdev, diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c new file mode 100644 index 000000000000..0a31d0a3d657 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +// Copyright (c) 2019 Hisilicon Limited. + +#include +#include +#include +#include "hnae3.h" +#include "hns_roce_common.h" +#include "hns_roce_device.h" +#include "hns_roce_hw_v2.h" + +static int hns_roce_fill_cq(struct sk_buff *msg, + struct hns_roce_v2_cq_context *context) +{ + if (rdma_nl_put_driver_u32(msg, "state", + roce_get_field(context->byte_4_pg_ceqn, + V2_CQC_BYTE_4_ARM_ST_M, + V2_CQC_BYTE_4_ARM_ST_S))) + goto err; + + if (rdma_nl_put_driver_u32(msg, "ceqn", + roce_get_field(context->byte_4_pg_ceqn, + V2_CQC_BYTE_4_CEQN_M, + V2_CQC_BYTE_4_CEQN_S))) + goto err; + + if (rdma_nl_put_driver_u32(msg, "cqn", + roce_get_field(context->byte_8_cqn, + V2_CQC_BYTE_8_CQN_M, + V2_CQC_BYTE_8_CQN_S))) + goto err; + + if (rdma_nl_put_driver_u32(msg, "hopnum", + roce_get_field(context->byte_16_hop_addr, + V2_CQC_BYTE_16_CQE_HOP_NUM_M, + V2_CQC_BYTE_16_CQE_HOP_NUM_S))) + goto err; + + if (rdma_nl_put_driver_u32( + msg, "pi", + roce_get_field(context->byte_28_cq_pi, + V2_CQC_BYTE_28_CQ_PRODUCER_IDX_M, + V2_CQC_BYTE_28_CQ_PRODUCER_IDX_S))) + goto err; + + if (rdma_nl_put_driver_u32( + msg, "ci", + roce_get_field(context->byte_32_cq_ci, + V2_CQC_BYTE_32_CQ_CONSUMER_IDX_M, + V2_CQC_BYTE_32_CQ_CONSUMER_IDX_S))) + goto err; + + if (rdma_nl_put_driver_u32( + msg, "coalesce", + roce_get_field(context->byte_56_cqe_period_maxcnt, + V2_CQC_BYTE_56_CQ_MAX_CNT_M, + V2_CQC_BYTE_56_CQ_MAX_CNT_S))) + goto err; + + if (rdma_nl_put_driver_u32( + msg, "period", + roce_get_field(context->byte_56_cqe_period_maxcnt, + V2_CQC_BYTE_56_CQ_PERIOD_M, + V2_CQC_BYTE_56_CQ_PERIOD_S))) + goto err; + + if (rdma_nl_put_driver_u32(msg, "cnt", + roce_get_field(context->byte_52_cqe_cnt, + V2_CQC_BYTE_52_CQE_CNT_M, + V2_CQC_BYTE_52_CQE_CNT_S))) + goto err; + + return 0; + +err: + return -EMSGSIZE; +} + +static int hns_roce_fill_res_cq_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct ib_cq *ib_cq = container_of(res, struct ib_cq, res); + struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); + struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); + struct hns_roce_v2_cq_context *context; + struct nlattr *table_attr; + int ret; + + if (!hr_dev->dfx->query_cqc_info) + return -EINVAL; + + context = kzalloc(sizeof(struct hns_roce_v2_cq_context), GFP_KERNEL); + if (!context) + return -ENOMEM; + + ret = hr_dev->dfx->query_cqc_info(hr_dev, hr_cq->cqn, (int *)context); + if (ret) + goto err; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + goto err; + + if (hns_roce_fill_cq(msg, context)) + goto err_cancel_table; + + nla_nest_end(msg, table_attr); + kfree(context); + + return 0; + +err_cancel_table: + nla_nest_cancel(msg, table_attr); +err: + kfree(context); + return -EMSGSIZE; +} + +int hns_roce_fill_res_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (res->type == RDMA_RESTRACK_CQ) + return hns_roce_fill_res_cq_entry(msg, res); + + return 0; +} -- cgit v1.2.3 From d0b5c01bb446f87e94265b172c00f4e89829116d Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Thu, 4 Apr 2019 10:22:47 -0500 Subject: RDMA/umem: Use correct value for SG entries in sg_copy_to_buffer() With page combining, the assumption that number of SG entries in umem SGL equal to number of system pages in umem no longer holds. umem->sg_nents tracks the SG entries in umem SGL. Use it in sg_pcopy_to_buffer() as opposed to ib_umem_num_pages(umem). Fixes: d10bcf947a3e ("RDMA/umem: Combine contiguous PAGE_SIZE regions in SGEs") Reported-by: Jason Gunthorpe Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index d31f5e386c7d..7e912a91ec8e 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -381,8 +381,8 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, return -EINVAL; } - ret = sg_pcopy_to_buffer(umem->sg_head.sgl, ib_umem_num_pages(umem), - dst, length, offset + ib_umem_offset(umem)); + ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->sg_nents, dst, length, + offset + ib_umem_offset(umem)); if (ret < 0) return ret; -- cgit v1.2.3 From 2b277dae0679c8177f161278dbad035688838d6e Mon Sep 17 00:00:00 2001 From: chenglang Date: Sun, 7 Apr 2019 13:23:37 +0800 Subject: RDMA/hns: Support to create 1M srq queue In mhop 0 mode, 64*bt_num queues can be supported. In mhop 1 mode, 32K*bt_num queues can be supported. Config srqc_hop_num to 1 to support 1M SRQ queues. Signed-off-by: chenglang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0e9718293482..f155d2d0b8cd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1560,7 +1560,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->qpc_hop_num = HNS_ROCE_CONTEXT_HOP_NUM; caps->srqc_ba_pg_sz = 0; caps->srqc_buf_pg_sz = 0; - caps->srqc_hop_num = HNS_ROCE_HOP_NUM_0; + caps->srqc_hop_num = HNS_ROCE_CONTEXT_HOP_NUM; caps->cqc_ba_pg_sz = 0; caps->cqc_buf_pg_sz = 0; caps->cqc_hop_num = HNS_ROCE_CONTEXT_HOP_NUM; -- cgit v1.2.3 From 4b38da75e089a149d224fde8f3be1f8be7c0d32e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 4 Apr 2019 16:56:57 -0300 Subject: RDMA/drivers: Convert easy drivers to use ib_device_set_netdev() Drivers that never change their ndev dynamically do not need to use the get_netdev callback. Signed-off-by: Jason Gunthorpe Acked-by: Selvin Xavier Acked-by: Michal Kalderon Acked-by: Adit Ranadive --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 15 ------------- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 2 -- drivers/infiniband/hw/bnxt_re/main.c | 6 +++++- drivers/infiniband/hw/hns/hns_roce_main.c | 30 +++++++++----------------- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 8 ++++++- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 18 ---------------- drivers/infiniband/hw/ocrdma/ocrdma_verbs.h | 1 - drivers/infiniband/hw/qedr/main.c | 19 ++++------------ drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 24 +++++---------------- 9 files changed, 31 insertions(+), 92 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index dc53604171a0..8cf255054fda 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -119,21 +119,6 @@ static int bnxt_re_build_sgl(struct ib_sge *ib_sg_list, } /* Device */ -struct net_device *bnxt_re_get_netdev(struct ib_device *ibdev, u8 port_num) -{ - struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); - struct net_device *netdev = NULL; - - rcu_read_lock(); - if (rdev) - netdev = rdev->netdev; - if (netdev) - dev_hold(netdev); - - rcu_read_unlock(); - return netdev; -} - int bnxt_re_query_device(struct ib_device *ibdev, struct ib_device_attr *ib_attr, struct ib_udata *udata) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index bd4fa22985eb..09a33049e42f 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -142,8 +142,6 @@ struct bnxt_re_ucontext { spinlock_t sh_lock; /* protect shpg */ }; -struct net_device *bnxt_re_get_netdev(struct ib_device *ibdev, u8 port_num); - int bnxt_re_query_device(struct ib_device *ibdev, struct ib_device_attr *ib_attr, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index bbdfbbf5e9a5..814f959c7db9 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -617,7 +617,6 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .get_dma_mr = bnxt_re_get_dma_mr, .get_hw_stats = bnxt_re_ib_get_hw_stats, .get_link_layer = bnxt_re_get_link_layer, - .get_netdev = bnxt_re_get_netdev, .get_port_immutable = bnxt_re_get_port_immutable, .map_mr_sg = bnxt_re_map_mr_sg, .mmap = bnxt_re_mmap, @@ -646,6 +645,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) { struct ib_device *ibdev = &rdev->ibdev; + int ret; /* ib device init */ ibdev->owner = THIS_MODULE; @@ -693,6 +693,10 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group); ibdev->driver_id = RDMA_DRIVER_BNXT_RE; ib_set_device_ops(ibdev, &bnxt_re_dev_ops); + ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1); + if (ret) + return ret; + return ib_register_device(ibdev, "bnxt_re%d"); } diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 363a996f475e..8da5f18bf820 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -234,25 +234,6 @@ static int hns_roce_query_device(struct ib_device *ib_dev, return 0; } -static struct net_device *hns_roce_get_netdev(struct ib_device *ib_dev, - u8 port_num) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); - struct net_device *ndev; - - if (port_num < 1 || port_num > hr_dev->caps.num_ports) - return NULL; - - rcu_read_lock(); - - ndev = hr_dev->iboe.netdevs[port_num - 1]; - if (ndev) - dev_hold(ndev); - - rcu_read_unlock(); - return ndev; -} - static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num, struct ib_port_attr *props) { @@ -458,7 +439,6 @@ static const struct ib_device_ops hns_roce_dev_ops = { .fill_res_entry = hns_roce_fill_res_entry, .get_dma_mr = hns_roce_get_dma_mr, .get_link_layer = hns_roce_get_link_layer, - .get_netdev = hns_roce_get_netdev, .get_port_immutable = hns_roce_port_immutable, .mmap = hns_roce_mmap, .modify_device = hns_roce_modify_device, @@ -502,6 +482,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) struct hns_roce_ib_iboe *iboe = NULL; struct ib_device *ib_dev = NULL; struct device *dev = hr_dev->dev; + unsigned int i; iboe = &hr_dev->iboe; spin_lock_init(&iboe->lock); @@ -567,6 +548,15 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_dev->driver_id = RDMA_DRIVER_HNS; ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops); ib_set_device_ops(ib_dev, &hns_roce_dev_ops); + for (i = 0; i < hr_dev->caps.num_ports; i++) { + if (!hr_dev->iboe.netdevs[i]) + continue; + + ret = ib_device_set_netdev(ib_dev, hr_dev->iboe.netdevs[i], + i + 1); + if (ret) + return ret; + } ret = ib_register_device(ib_dev, "hns_%d"); if (ret) { dev_err(dev, "ib_register_device failed!\n"); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 34d3d59f3ca7..52d10c86caf2 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -161,7 +161,6 @@ static const struct ib_device_ops ocrdma_dev_ops = { .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = ocrdma_get_dma_mr, .get_link_layer = ocrdma_link_layer, - .get_netdev = ocrdma_get_netdev, .get_port_immutable = ocrdma_port_immutable, .map_mr_sg = ocrdma_map_mr_sg, .mmap = ocrdma_mmap, @@ -197,6 +196,8 @@ static const struct ib_device_ops ocrdma_dev_srq_ops = { static int ocrdma_register_device(struct ocrdma_dev *dev) { + int ret; + ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid); BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, @@ -251,6 +252,10 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) } rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group); dev->ibdev.driver_id = RDMA_DRIVER_OCRDMA; + ret = ib_device_set_netdev(&dev->ibdev, dev->nic_info.netdev, 1); + if (ret) + return ret; + return ib_register_device(&dev->ibdev, "ocrdma%d"); } @@ -308,6 +313,7 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) pr_err("Unable to allocate ib device\n"); return NULL; } + dev->mbx_cmd = kzalloc(sizeof(struct ocrdma_mqe_emb_cmd), GFP_KERNEL); if (!dev->mbx_cmd) goto idr_err; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index ffdd3ac55086..35ec87015792 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -113,24 +113,6 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, return 0; } -struct net_device *ocrdma_get_netdev(struct ib_device *ibdev, u8 port_num) -{ - struct ocrdma_dev *dev; - struct net_device *ndev = NULL; - - rcu_read_lock(); - - dev = get_ocrdma_dev(ibdev); - if (dev) - ndev = dev->nic_info.netdev; - if (ndev) - dev_hold(ndev); - - rcu_read_unlock(); - - return ndev; -} - static inline void get_link_speed_and_width(struct ocrdma_dev *dev, u8 *ib_speed, u8 *ib_width) { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index c6489a1439dc..d76aae7ed0d3 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -61,7 +61,6 @@ enum rdma_protocol_type ocrdma_query_protocol(struct ib_device *device, u8 port_num); void ocrdma_get_guid(struct ocrdma_dev *, u8 *guid); -struct net_device *ocrdma_get_netdev(struct ib_device *device, u8 port_num); int ocrdma_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey); int ocrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index cbcdc5c669c6..a0a49ed26860 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -81,20 +81,6 @@ static void qedr_get_dev_fw_str(struct ib_device *ibdev, char *str) (fw_ver >> 8) & 0xFF, fw_ver & 0xFF); } -static struct net_device *qedr_get_netdev(struct ib_device *dev, u8 port_num) -{ - struct qedr_dev *qdev; - - qdev = get_qedr_dev(dev); - dev_hold(qdev->ndev); - - /* The HW vendor's device driver must guarantee - * that this function returns NULL before the net device has finished - * NETDEV_UNREGISTER state. - */ - return qdev->ndev; -} - static int qedr_roce_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { @@ -219,7 +205,6 @@ static const struct ib_device_ops qedr_dev_ops = { .get_dev_fw_str = qedr_get_dev_fw_str, .get_dma_mr = qedr_get_dma_mr, .get_link_layer = qedr_link_layer, - .get_netdev = qedr_get_netdev, .map_mr_sg = qedr_map_mr_sg, .mmap = qedr_mmap, .modify_port = qedr_modify_port, @@ -295,6 +280,10 @@ static int qedr_register_device(struct qedr_dev *dev) ib_set_device_ops(&dev->ibdev, &qedr_dev_ops); dev->ibdev.driver_id = RDMA_DRIVER_QEDR; + rc = ib_device_set_netdev(&dev->ibdev, dev->ndev, 1); + if (rc) + return rc; + return ib_register_device(&dev->ibdev, "qedr%d"); } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 823846947a5b..f81d38ae56a0 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -143,24 +143,6 @@ static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev, - u8 port_num) -{ - struct net_device *netdev; - struct pvrdma_dev *dev = to_vdev(ibdev); - - if (port_num != 1) - return NULL; - - rcu_read_lock(); - netdev = dev->netdev; - if (netdev) - dev_hold(netdev); - rcu_read_unlock(); - - return netdev; -} - static const struct ib_device_ops pvrdma_dev_ops = { .add_gid = pvrdma_add_gid, .alloc_mr = pvrdma_alloc_mr, @@ -179,7 +161,6 @@ static const struct ib_device_ops pvrdma_dev_ops = { .get_dev_fw_str = pvrdma_get_fw_ver_str, .get_dma_mr = pvrdma_get_dma_mr, .get_link_layer = pvrdma_port_link_layer, - .get_netdev = pvrdma_get_netdev, .get_port_immutable = pvrdma_port_immutable, .map_mr_sg = pvrdma_map_mr_sg, .mmap = pvrdma_mmap, @@ -281,6 +262,9 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) goto err_qp_free; } dev->ib_dev.driver_id = RDMA_DRIVER_VMW_PVRDMA; + ret = ib_device_set_netdev(&dev->ib_dev, dev->netdev, 1); + if (ret) + return ret; spin_lock_init(&dev->srq_tbl_lock); rdma_set_device_sysfs_group(&dev->ib_dev, &pvrdma_attr_group); @@ -724,6 +708,7 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); break; case NETDEV_UNREGISTER: + ib_device_set_netdev(&dev->ib_dev, NULL, 1); dev_put(dev->netdev); dev->netdev = NULL; break; @@ -735,6 +720,7 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, if ((dev->netdev == NULL) && (pci_get_drvdata(pdev_net) == ndev)) { /* this is our netdev */ + ib_device_set_netdev(&dev->ib_dev, ndev, 1); dev->netdev = ndev; dev_hold(ndev); } -- cgit v1.2.3 From ab7efbe24b283b38f323f906502ef6ea09156ebc Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 4 Apr 2019 16:56:58 -0300 Subject: RDMA/cxgb4: Use ib_device_set_netdev() cxgb4 has a simple non-dynamic use of get_netdev, so conversion is straightforward. Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/provider.c | 36 ++++++++++++++++------------------ 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 74ffc24321cd..3c5197ee77f5 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -482,24 +482,6 @@ static void get_dev_fw_str(struct ib_device *dev, char *str) FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers)); } -static struct net_device *get_netdev(struct ib_device *dev, u8 port) -{ - struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, ibdev); - struct c4iw_rdev *rdev = &c4iw_dev->rdev; - struct net_device *ndev; - - if (!port || port > rdev->lldi.nports) - return NULL; - - rcu_read_lock(); - ndev = rdev->lldi.ports[port - 1]; - if (ndev) - dev_hold(ndev); - rcu_read_unlock(); - - return ndev; -} - static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res) { return (res->type < ARRAY_SIZE(c4iw_restrack_funcs) && @@ -527,7 +509,6 @@ static const struct ib_device_ops c4iw_dev_ops = { .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = c4iw_get_dma_mr, .get_hw_stats = c4iw_get_mib, - .get_netdev = get_netdev, .get_port_immutable = c4iw_port_immutable, .map_mr_sg = c4iw_map_mr_sg, .mmap = c4iw_mmap, @@ -549,6 +530,20 @@ static const struct ib_device_ops c4iw_dev_ops = { INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext), }; +static int set_netdevs(struct ib_device *ib_dev, struct c4iw_rdev *rdev) +{ + int ret; + int i; + + for (i = 0; i < rdev->lldi.nports; i++) { + ret = ib_device_set_netdev(ib_dev, rdev->lldi.ports[i], + i + 1); + if (ret) + return ret; + } + return 0; +} + void c4iw_register_device(struct work_struct *work) { int ret; @@ -613,6 +608,9 @@ void c4iw_register_device(struct work_struct *work) rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group); dev->ibdev.driver_id = RDMA_DRIVER_CXGB4; ib_set_device_ops(&dev->ibdev, &c4iw_dev_ops); + ret = set_netdevs(&dev->ibdev, &dev->rdev); + if (ret) + goto err_kfree_iwcm; ret = ib_register_device(&dev->ibdev, "cxgb4_%d"); if (ret) goto err_kfree_iwcm; -- cgit v1.2.3 From 95579e785a9ae7d98c199b38c4b79b64a31d90fa Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 28 Mar 2019 15:27:33 +0200 Subject: RDMA/mlx5: Move netdev info into the port struct Netdev info is stored in a separate array and holds data relevant on a per port basis, move it to be part of the port struct. Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 32 ++++++++++++++++---------------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 14 +++++++------- drivers/infiniband/hw/mlx5/qp.c | 4 ++-- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ebd3641e7e62..315167f36466 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -257,11 +257,11 @@ static struct net_device *mlx5_ib_get_netdev(struct ib_device *device, /* Ensure ndev does not disappear before we invoke dev_hold() */ - read_lock(&ibdev->roce[port_num - 1].netdev_lock); - ndev = ibdev->roce[port_num - 1].netdev; + read_lock(&ibdev->port[port_num - 1].roce.netdev_lock); + ndev = ibdev->port[port_num - 1].roce.netdev; if (ndev) dev_hold(ndev); - read_unlock(&ibdev->roce[port_num - 1].netdev_lock); + read_unlock(&ibdev->port[port_num - 1].roce.netdev_lock); out: mlx5_ib_put_native_port_mdev(ibdev, port_num); @@ -1952,11 +1952,11 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, print_lib_caps(dev, context->lib_caps); if (dev->lag_active) { - u8 port = mlx5_core_native_port_num(dev->mdev); + u8 port = mlx5_core_native_port_num(dev->mdev) - 1; atomic_set(&context->tx_port_affinity, atomic_add_return( - 1, &dev->roce[port].tx_port_affinity)); + 1, &dev->port[port].roce.tx_port_affinity)); } return 0; @@ -5024,10 +5024,10 @@ static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) { int err; - dev->roce[port_num].nb.notifier_call = mlx5_netdev_event; - err = register_netdevice_notifier(&dev->roce[port_num].nb); + dev->port[port_num].roce.nb.notifier_call = mlx5_netdev_event; + err = register_netdevice_notifier(&dev->port[port_num].roce.nb); if (err) { - dev->roce[port_num].nb.notifier_call = NULL; + dev->port[port_num].roce.nb.notifier_call = NULL; return err; } @@ -5036,9 +5036,9 @@ static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) { - if (dev->roce[port_num].nb.notifier_call) { - unregister_netdevice_notifier(&dev->roce[port_num].nb); - dev->roce[port_num].nb.notifier_call = NULL; + if (dev->port[port_num].roce.nb.notifier_call) { + unregister_netdevice_notifier(&dev->port[port_num].roce.nb); + dev->port[port_num].roce.nb.notifier_call = NULL; } } @@ -5587,7 +5587,7 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, mlx5_ib_err(ibdev, "Failed to unaffiliate port %u\n", port_num + 1); - ibdev->roce[port_num].last_port_state = IB_PORT_DOWN; + ibdev->port[port_num].roce.last_port_state = IB_PORT_DOWN; } /* The mlx5_ib_multiport_mutex should be held when calling this function */ @@ -5860,7 +5860,7 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) for (i = 0; i < dev->num_ports; i++) { spin_lock_init(&dev->port[i].mp.mpi_lock); - rwlock_init(&dev->roce[i].netdev_lock); + rwlock_init(&dev->port[i].roce.netdev_lock); } err = mlx5_ib_init_multiport_master(dev); @@ -6163,9 +6163,9 @@ static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) int i; for (i = 0; i < dev->num_ports; i++) { - dev->roce[i].dev = dev; - dev->roce[i].native_port_num = i + 1; - dev->roce[i].last_port_state = IB_PORT_DOWN; + dev->port[i].roce.dev = dev; + dev->port[i].roce.native_port_num = i + 1; + dev->port[i].roce.last_port_state = IB_PORT_DOWN; } dev->ib_dev.uverbs_ex_cmd_mask |= diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 3e8d54618c78..ad0effec3d33 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -702,12 +702,6 @@ struct mlx5_ib_multiport { spinlock_t mpi_lock; }; -struct mlx5_ib_port { - struct mlx5_ib_counters cnts; - struct mlx5_ib_multiport mp; - struct mlx5_ib_dbg_cc_params *dbg_cc_params; -}; - struct mlx5_roce { /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL * netdev pointer @@ -721,6 +715,13 @@ struct mlx5_roce { u8 native_port_num; }; +struct mlx5_ib_port { + struct mlx5_ib_counters cnts; + struct mlx5_ib_multiport mp; + struct mlx5_ib_dbg_cc_params *dbg_cc_params; + struct mlx5_roce roce; +}; + struct mlx5_ib_dbg_param { int offset; struct mlx5_ib_dev *dev; @@ -905,7 +906,6 @@ struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; struct notifier_block mdev_events; - struct mlx5_roce roce[MLX5_MAX_PORTS]; int num_ports; /* serialize update of capability mask */ diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 76ae54bb2230..db03b2768a9d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3303,7 +3303,7 @@ static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev, } else { tx_port_affinity = (unsigned int)atomic_add_return( - 1, &dev->roce[port_num].tx_port_affinity) % + 1, &dev->port[port_num].roce.tx_port_affinity) % MLX5_MAX_PORTS + 1; mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x\n", @@ -3408,7 +3408,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, (ibqp->qp_type == IB_QPT_XRC_INI) || (ibqp->qp_type == IB_QPT_XRC_TGT)) { if (dev->lag_active) { - u8 p = mlx5_core_native_port_num(dev->mdev); + u8 p = mlx5_core_native_port_num(dev->mdev) - 1; tx_affinity = get_tx_affinity(dev, pd, base, p, udata); context->flags |= cpu_to_be32(tx_affinity << 24); -- cgit v1.2.3 From 4a6dc8552ab2f670fdff317a5ac1bc42f85a8772 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 28 Mar 2019 15:27:34 +0200 Subject: RDMA/mlx5: Free IB device on remove Simplify the code and move the deallocation of the IB device into the remove function. Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/ib_rep.c | 5 +---- drivers/infiniband/hw/mlx5/main.c | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index b8639ac71336..87d553396fb4 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -65,10 +65,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) ibdev->mdev = dev; ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports), MLX5_CAP_GEN(dev, num_vhca_ports)); - if (!__mlx5_ib_add(ibdev, profile)) { - ib_dealloc_device(&ibdev->ib_dev); + if (!__mlx5_ib_add(ibdev, profile)) return -EINVAL; - } rep->rep_if[REP_IB].priv = ibdev; @@ -86,7 +84,6 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) dev = mlx5_ib_rep_to_dev(rep); __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); rep->rep_if[REP_IB].priv = NULL; - ib_dealloc_device(&dev->ib_dev); } static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 315167f36466..23f31069ec0a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6417,6 +6417,8 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, if (profile->stage[stage].cleanup) profile->stage[stage].cleanup(dev); } + + ib_dealloc_device(&dev->ib_dev); } void *__mlx5_ib_add(struct mlx5_ib_dev *dev, @@ -6639,8 +6641,6 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) dev = context; __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); - - ib_dealloc_device((struct ib_device *)dev); } static struct mlx5_interface mlx5_ib_interface = { -- cgit v1.2.3 From da796ccb3e0eba24b15beedb168178c9b74ce6f2 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 28 Mar 2019 15:27:35 +0200 Subject: RDMA/mlx5: Move ports allocation to outside of INIT stage In downstream patches we will need access to the ports before doing any stages, in order to set net device per representor. Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/ib_rep.c | 12 ++++++++++-- drivers/infiniband/hw/mlx5/main.c | 24 ++++++++++++------------ 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 87d553396fb4..14ac728b460c 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -51,6 +51,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { const struct mlx5_ib_profile *profile; struct mlx5_ib_dev *ibdev; + int num_ports = 1; if (rep->vport == MLX5_VPORT_UPLINK) profile = &uplink_rep_profile; @@ -61,10 +62,17 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) if (!ibdev) return -ENOMEM; + ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port), + GFP_KERNEL); + if (!ibdev->port) { + ib_dealloc_device(&ibdev->ib_dev); + return -ENOMEM; + } + ibdev->rep = rep; ibdev->mdev = dev; - ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports), - MLX5_CAP_GEN(dev, num_vhca_ports)); + ibdev->num_ports = num_ports; + if (!__mlx5_ib_add(ibdev, profile)) return -EINVAL; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 23f31069ec0a..0d86b5266960 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5844,7 +5844,6 @@ void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) srcu_barrier(&dev->mr_srcu); cleanup_srcu_struct(&dev->mr_srcu); } - kfree(dev->port); } int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) @@ -5853,11 +5852,6 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) int err; int i; - dev->port = kcalloc(dev->num_ports, sizeof(*dev->port), - GFP_KERNEL); - if (!dev->port) - return -ENOMEM; - for (i = 0; i < dev->num_ports; i++) { spin_lock_init(&dev->port[i].mp.mpi_lock); rwlock_init(&dev->port[i].roce.netdev_lock); @@ -5865,7 +5859,7 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) err = mlx5_ib_init_multiport_master(dev); if (err) - goto err_free_port; + return err; if (!mlx5_core_mp_enabled(mdev)) { for (i = 1; i <= dev->num_ports; i++) { @@ -5906,9 +5900,6 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) err_mp: mlx5_ib_cleanup_multiport_master(dev); -err_free_port: - kfree(dev->port); - return -ENOMEM; } @@ -6418,6 +6409,7 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, profile->stage[stage].cleanup(dev); } + kfree(dev->port); ib_dealloc_device(&dev->ib_dev); } @@ -6593,6 +6585,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) enum rdma_link_layer ll; struct mlx5_ib_dev *dev; int port_type_cap; + int num_ports; printk_once(KERN_INFO "%s", mlx5_version); @@ -6608,13 +6601,20 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) return mlx5_ib_add_slave_port(mdev); + num_ports = max(MLX5_CAP_GEN(mdev, num_ports), + MLX5_CAP_GEN(mdev, num_vhca_ports)); dev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!dev) return NULL; + dev->port = kcalloc(num_ports, sizeof(*dev->port), + GFP_KERNEL); + if (!dev->port) { + ib_dealloc_device((struct ib_device *)dev); + return NULL; + } dev->mdev = mdev; - dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports), - MLX5_CAP_GEN(mdev, num_vhca_ports)); + dev->num_ports = num_ports; return __mlx5_ib_add(dev, &pf_profile); } -- cgit v1.2.3 From 5d8f6a0e92070c938f121258841ac36072d17cc3 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 28 Mar 2019 15:27:36 +0200 Subject: RDMA/mlx5: Use correct size for device resources On allocation we use the array size and on destruction num_ports, use the array size of destruction as well, in this context the array corresponds to the native/actual ports on the NIC so no need to adjust this logic for representors. Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 0d86b5266960..007ba3f05787 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4866,8 +4866,6 @@ error0: static void destroy_dev_resources(struct mlx5_ib_resources *devr) { - struct mlx5_ib_dev *dev = - container_of(devr, struct mlx5_ib_dev, devr); int port; mlx5_ib_destroy_srq(devr->s1, NULL); @@ -4881,7 +4879,7 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr) kfree(devr->p0); /* Make sure no change P_Key work items are still executing */ - for (port = 0; port < dev->num_ports; ++port) + for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) cancel_work_sync(&devr->ports[port].pkey_change_work); } -- cgit v1.2.3 From 6a4d00be08334f15502f2fbec08eabbdddc2e64a Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 28 Mar 2019 15:27:37 +0200 Subject: RDMA/mlx5: Move rep into port struct In preparation of moving into a model of single IB device multiple ports move rep to be part of the port structure. We mark a representor device by setting is_rep, no functional change with this patch. Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 2 +- drivers/infiniband/hw/mlx5/flow.c | 2 +- drivers/infiniband/hw/mlx5/ib_rep.c | 7 ++++--- drivers/infiniband/hw/mlx5/main.c | 22 +++++++++++++--------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++- drivers/infiniband/hw/mlx5/mr.c | 6 +++--- drivers/infiniband/hw/mlx5/qp.c | 4 ++-- 7 files changed, 26 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index d468f11a81d1..9e35560665c5 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1904,7 +1904,7 @@ static bool devx_is_supported(struct ib_device *device) { struct mlx5_ib_dev *dev = to_mdev(device); - return !dev->rep && MLX5_CAP_GEN(dev->mdev, log_max_uctx); + return !dev->is_rep && MLX5_CAP_GEN(dev->mdev, log_max_uctx); } const struct uapi_definition mlx5_ib_devx_defs[] = { diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index b9affbdb5d79..09f5bc6142c9 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -621,7 +621,7 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, static bool flow_is_supported(struct ib_device *device) { - return !to_mdev(device)->rep; + return !to_mdev(device)->is_rep; } const struct uapi_definition mlx5_ib_flow_defs[] = { diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 14ac728b460c..64256dc1d1de 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -69,7 +69,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) return -ENOMEM; } - ibdev->rep = rep; + ibdev->is_rep = true; + ibdev->port[0].rep = rep; ibdev->mdev = dev; ibdev->num_ports = num_ports; @@ -151,12 +152,12 @@ int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, struct mlx5_flow_handle *flow_rule; struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; - if (!dev->rep) + if (!dev->is_rep) return 0; flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, - dev->rep->vport, + dev->port[0].rep->vport, sq->base.mqp.qpn); if (IS_ERR(flow_rule)) return PTR_ERR(flow_rule); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 007ba3f05787..38c71565d598 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -173,12 +173,12 @@ static int mlx5_netdev_event(struct notifier_block *this, switch (event) { case NETDEV_REGISTER: write_lock(&roce->netdev_lock); - if (ibdev->rep) { + if (ibdev->is_rep) { struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep = ibdev->port[0].rep; struct net_device *rep_ndev; - rep_ndev = mlx5_ib_get_rep_netdev(esw, - ibdev->rep->vport); + rep_ndev = mlx5_ib_get_rep_netdev(esw, rep->vport); if (rep_ndev == ndev) roce->netdev = ndev; } else if (ndev->dev.parent == &mdev->pdev->dev) { @@ -3153,10 +3153,10 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, if (ft_type == MLX5_IB_FT_RX) { fn_type = MLX5_FLOW_NAMESPACE_BYPASS; prio = &dev->flow_db->prios[priority]; - if (!dev->rep && + if (!dev->is_rep && MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (!dev->rep && + if (!dev->is_rep && MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, reformat_l3_tunnel_to_l2)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; @@ -3166,7 +3166,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, log_max_ft_size)); fn_type = MLX5_FLOW_NAMESPACE_EGRESS; prio = &dev->flow_db->egress_prios[priority]; - if (!dev->rep && + if (!dev->is_rep && MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; } @@ -3372,7 +3372,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, if (!is_valid_attr(dev->mdev, flow_attr)) return ERR_PTR(-EINVAL); - if (dev->rep && is_egress) + if (dev->is_rep && is_egress) return ERR_PTR(-EINVAL); spec = kvzalloc(sizeof(*spec), GFP_KERNEL); @@ -3403,13 +3403,17 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, if (!flow_is_multicast_only(flow_attr)) set_underlay_qp(dev, spec, underlay_qpn); - if (dev->rep) { + if (dev->is_rep) { void *misc; + if (!dev->port[flow_attr->port - 1].rep) { + err = -EINVAL; + goto free; + } misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); MLX5_SET(fte_match_set_misc, misc, source_port, - dev->rep->vport); + dev->port[flow_attr->port - 1].rep->vport); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index ad0effec3d33..9445e7f2c8fd 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -720,6 +720,7 @@ struct mlx5_ib_port { struct mlx5_ib_multiport mp; struct mlx5_ib_dbg_cc_params *dbg_cc_params; struct mlx5_roce roce; + struct mlx5_eswitch_rep *rep; }; struct mlx5_ib_dbg_param { @@ -940,7 +941,7 @@ struct mlx5_ib_dev { struct mlx5_sq_bfreg fp_bfreg; struct mlx5_ib_delay_drop delay_drop; const struct mlx5_ib_profile *profile; - struct mlx5_eswitch_rep *rep; + bool is_rep; int lag_active; struct mlx5_ib_lb_state lb; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b7bb7abea798..4381cddab97b 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -600,7 +600,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) { - if (!mlx5_debugfs_root || dev->rep) + if (!mlx5_debugfs_root || dev->is_rep) return; debugfs_remove_recursive(dev->cache.root); @@ -614,7 +614,7 @@ static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) struct dentry *dir; int i; - if (!mlx5_debugfs_root || dev->rep) + if (!mlx5_debugfs_root || dev->is_rep) return; cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root); @@ -677,7 +677,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) MLX5_IB_UMR_OCTOWORD; ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && - !dev->rep && + !dev->is_rep && mlx5_core_is_pf(dev->mdev)) ent->limit = dev->mdev->profile->mr_cache[i].limit; else diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index db03b2768a9d..1bb445669c80 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1436,7 +1436,7 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST; - if (dev->rep) { + if (dev->is_rep) { lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; *qp_flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC; } @@ -1648,7 +1648,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return -EOPNOTSUPP; } - if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC || dev->rep) { + if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC || dev->is_rep) { lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC; } -- cgit v1.2.3 From d5ed8ac34cefc678d0633bfb88d0e20523ba3068 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 28 Mar 2019 15:27:38 +0200 Subject: RDMA/mlx5: Move default representors SQ steering to rule to modify QP Currently the steering for SQs created on representors is done on creation, once we move to representors as ports of an IB device we need the port argument which is given only at the modify QP stage, adjust the code appropriately. Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/ib_rep.c | 25 +++++++++++------------ drivers/infiniband/hw/mlx5/ib_rep.h | 13 +++++++----- drivers/infiniband/hw/mlx5/qp.c | 40 ++++++++++++++++++++++++++----------- 3 files changed, 48 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 64256dc1d1de..d3988f6ae2ae 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -146,22 +146,21 @@ struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport) return mlx5_eswitch_vport_rep(esw, vport); } -int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, - struct mlx5_ib_sq *sq) +struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, + struct mlx5_ib_sq *sq, + u16 port) { - struct mlx5_flow_handle *flow_rule; struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep; - if (!dev->is_rep) - return 0; + if (!dev->is_rep || !port) + return NULL; - flow_rule = - mlx5_eswitch_add_send_to_vport_rule(esw, - dev->port[0].rep->vport, - sq->base.mqp.qpn); - if (IS_ERR(flow_rule)) - return PTR_ERR(flow_rule); - sq->flow_rule = flow_rule; + if (!dev->port[port - 1].rep) + return ERR_PTR(-EINVAL); - return 0; + rep = dev->port[port - 1].rep; + + return mlx5_eswitch_add_send_to_vport_rule(esw, rep->vport, + sq->base.mqp.qpn); } diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index 798d41e61fb4..1d9778da8a50 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -20,8 +20,9 @@ struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport_index); void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev); void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev); -int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, - struct mlx5_ib_sq *sq); +struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, + struct mlx5_ib_sq *sq, + u16 port); struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, int vport_index); #else /* CONFIG_MLX5_ESWITCH */ @@ -52,10 +53,12 @@ struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, static inline void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) {} static inline void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) {} -static inline int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, - struct mlx5_ib_sq *sq) +static inline +struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, + struct mlx5_ib_sq *sq, + u16 port) { - return 0; + return NULL; } static inline diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 1bb445669c80..a970af0ffc3e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -92,6 +92,7 @@ struct mlx5_modify_raw_qp_param { struct mlx5_rate_limit rl; u8 rq_q_ctr_id; + u16 port; }; static void get_cqs(enum ib_qp_type qp_type, @@ -1213,11 +1214,11 @@ static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev, mlx5_cmd_destroy_tis(dev->mdev, sq->tisn, to_mpd(pd)->uid); } -static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev, - struct mlx5_ib_sq *sq) +static void destroy_flow_rule_vport_sq(struct mlx5_ib_sq *sq) { if (sq->flow_rule) mlx5_del_flow_rules(sq->flow_rule); + sq->flow_rule = NULL; } static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, @@ -1285,15 +1286,8 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, if (err) goto err_umem; - err = create_flow_rule_vport_sq(dev, sq); - if (err) - goto err_flow; - return 0; -err_flow: - mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp); - err_umem: ib_umem_release(sq->ubuffer.umem); sq->ubuffer.umem = NULL; @@ -1304,7 +1298,7 @@ err_umem: static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq) { - destroy_flow_rule_vport_sq(dev, sq); + destroy_flow_rule_vport_sq(sq); mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp); ib_umem_release(sq->ubuffer.umem); } @@ -3269,6 +3263,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, } if (modify_sq) { + struct mlx5_flow_handle *flow_rule; + if (tx_affinity) { err = modify_raw_packet_tx_affinity(dev->mdev, sq, tx_affinity, @@ -3277,8 +3273,25 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return err; } - return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, - raw_qp_param, qp->ibqp.pd); + flow_rule = create_flow_rule_vport_sq(dev, sq, + raw_qp_param->port); + if (IS_ERR(flow_rule)) + return err; + + err = modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, + raw_qp_param, qp->ibqp.pd); + if (err) { + if (flow_rule) + mlx5_del_flow_rules(flow_rule); + return err; + } + + if (flow_rule) { + destroy_flow_rule_vport_sq(sq); + sq->flow_rule = flow_rule; + } + + return err; } return 0; @@ -3561,6 +3574,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID; } + if (attr_mask & IB_QP_PORT) + raw_qp_param.port = attr->port_num; + if (attr_mask & IB_QP_RATE_LIMIT) { raw_qp_param.rl.rate = attr->rate_limit; -- cgit v1.2.3 From 35b0aa67b29867cdeaee32f920da3cb300dbb851 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 28 Mar 2019 15:27:39 +0200 Subject: RDMA/mlx5: Refactor netdev affinity code The design of representors is such that once an IB representor is created, the netdev of representor already exists, we can use that fact to simplify the netdev affinity code. Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/ib_rep.c | 2 ++ drivers/infiniband/hw/mlx5/main.c | 47 +++++++++++++++++++++++++++++-------- 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index d3988f6ae2ae..7946cf26421b 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -71,6 +71,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) ibdev->is_rep = true; ibdev->port[0].rep = rep; + ibdev->port[0].roce.netdev = + mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport); ibdev->mdev = dev; ibdev->num_ports = num_ports; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 38c71565d598..0c3335d3fc3d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -156,6 +156,34 @@ static int get_port_state(struct ib_device *ibdev, return ret; } +static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev, + struct net_device *ndev, + u8 *port_num) +{ + struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; + struct net_device *rep_ndev; + struct mlx5_ib_port *port; + int i; + + for (i = 0; i < dev->num_ports; i++) { + port = &dev->port[i]; + if (!port->rep) + continue; + + read_lock(&port->roce.netdev_lock); + rep_ndev = mlx5_ib_get_rep_netdev(esw, + port->rep->vport); + if (rep_ndev == ndev) { + read_unlock(&port->roce.netdev_lock); + *port_num = i + 1; + return &port->roce; + } + read_unlock(&port->roce.netdev_lock); + } + + return NULL; +} + static int mlx5_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -172,22 +200,17 @@ static int mlx5_netdev_event(struct notifier_block *this, switch (event) { case NETDEV_REGISTER: + /* Should already be registered during the load */ + if (ibdev->is_rep) + break; write_lock(&roce->netdev_lock); - if (ibdev->is_rep) { - struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = ibdev->port[0].rep; - struct net_device *rep_ndev; - - rep_ndev = mlx5_ib_get_rep_netdev(esw, rep->vport); - if (rep_ndev == ndev) - roce->netdev = ndev; - } else if (ndev->dev.parent == &mdev->pdev->dev) { + if (ndev->dev.parent == &mdev->pdev->dev) roce->netdev = ndev; - } write_unlock(&roce->netdev_lock); break; case NETDEV_UNREGISTER: + /* In case of reps, ib device goes away before the netdevs */ write_lock(&roce->netdev_lock); if (roce->netdev == ndev) roce->netdev = NULL; @@ -205,6 +228,10 @@ static int mlx5_netdev_event(struct notifier_block *this, dev_put(lag_ndev); } + if (ibdev->is_rep) + roce = mlx5_get_rep_roce(ibdev, ndev, &port_num); + if (!roce) + return NOTIFY_DONE; if ((upper == ndev || (!upper && ndev == roce->netdev)) && ibdev->ib_active) { struct ib_event ibev = { }; -- cgit v1.2.3 From a989ea01cb10a12bcf339ddcbbea9c49e098609f Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 28 Mar 2019 15:27:40 +0200 Subject: RDMA/mlx5: Move SMI caps logic We store the SMI information in the core device's struct, make sure we set that information only once (and not per port), while here make the for loop based on the actual size of the array. Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 0c3335d3fc3d..c8cbfe2e964a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4538,7 +4538,7 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) int err; int port; - for (port = 1; port <= dev->num_ports; port++) { + for (port = 1; port <= ARRAY_SIZE(dev->mdev->port_caps); port++) { dev->mdev->port_caps[port - 1].has_smi = false; if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) { @@ -4584,10 +4584,6 @@ static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) if (!dprops) goto out; - err = set_has_smi_cap(dev); - if (err) - goto out; - err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw); if (err) { mlx5_ib_warn(dev, "query_device failed %d\n", err); @@ -5890,6 +5886,10 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) if (err) return err; + err = set_has_smi_cap(dev); + if (err) + return err; + if (!mlx5_core_mp_enabled(mdev)) { for (i = 1; i <= dev->num_ports; i++) { err = get_port_caps(dev, i); -- cgit v1.2.3 From 26628e2d58c910fa724312c6bcc3f4d12c9e805e Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 28 Mar 2019 15:27:41 +0200 Subject: RDMA/mlx5: Move to single device multiport ports in switchdev mode Move from IB device (representor) per virtual function to single IB device with port per virtual function (port 1 represents the uplink). As number of ports is a static property of an IB device, declare the IB device with as many port as the possible according to the PCI bus. Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/ib_rep.c | 31 ++++++++++++++++++++++++++----- drivers/infiniband/hw/mlx5/main.c | 26 ++++++++++++++++++++++---- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + 3 files changed, 49 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 7946cf26421b..224ef6c88d17 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -46,17 +46,36 @@ static const struct mlx5_ib_profile vf_rep_profile = { NULL), }; +static int +mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +{ + struct mlx5_ib_dev *ibdev; + int vport_index; + + ibdev = mlx5_ib_get_uplink_ibdev(dev->priv.eswitch); + vport_index = ibdev->free_port++; + + ibdev->port[vport_index].rep = rep; + write_lock(&ibdev->port[vport_index].roce.netdev_lock); + ibdev->port[vport_index].roce.netdev = + mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport); + write_unlock(&ibdev->port[vport_index].roce.netdev_lock); + + return 0; +} + static int mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { + int num_ports = MLX5_TOTAL_VPORTS(dev); const struct mlx5_ib_profile *profile; struct mlx5_ib_dev *ibdev; - int num_ports = 1; + int vport_index; if (rep->vport == MLX5_VPORT_UPLINK) profile = &uplink_rep_profile; else - profile = &vf_rep_profile; + return mlx5_ib_set_vport_rep(dev, rep); ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!ibdev) @@ -70,8 +89,9 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) } ibdev->is_rep = true; - ibdev->port[0].rep = rep; - ibdev->port[0].roce.netdev = + vport_index = ibdev->free_port++; + ibdev->port[vport_index].rep = rep; + ibdev->port[vport_index].roce.netdev = mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport); ibdev->mdev = dev; ibdev->num_ports = num_ports; @@ -89,7 +109,8 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) { struct mlx5_ib_dev *dev; - if (!rep->rep_if[REP_IB].priv) + if (!rep->rep_if[REP_IB].priv || + rep->vport != MLX5_VPORT_UPLINK) return; dev = mlx5_ib_rep_to_dev(rep); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index c8cbfe2e964a..91f481c4352b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -506,9 +506,14 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, /* Possible bad flows are checked before filling out props so in case * of an error it will still be zeroed out. + * Use native port in case of reps */ - err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, - mdev_port_num); + if (dev->is_rep) + err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, + 1); + else + err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, + mdev_port_num); if (err) goto out; ext = MLX5_CAP_PCAM_FEATURE(dev->mdev, ptys_extended_ethernet); @@ -1432,7 +1437,9 @@ static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port, { int ret; - /* Only link layer == ethernet is valid for representors */ + /* Only link layer == ethernet is valid for representors + * and we always use port 1 + */ ret = mlx5_query_port_roce(ibdev, port, props); if (ret || !props) return ret; @@ -4569,7 +4576,7 @@ static void get_ext_port_caps(struct mlx5_ib_dev *dev) mlx5_query_ext_port_caps(dev, port); } -static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) +static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) { struct ib_device_attr *dprops = NULL; struct ib_port_attr *pprops = NULL; @@ -4612,6 +4619,16 @@ out: return err; } +static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) +{ + /* For representors use port 1, is this is the only native + * port + */ + if (dev->is_rep) + return __get_port_caps(dev, 1); + return __get_port_caps(dev, port); +} + static void destroy_umrc_res(struct mlx5_ib_dev *dev) { int err; @@ -6198,6 +6215,7 @@ static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) port_num = mlx5_core_native_port_num(dev->mdev) - 1; + /* Register only for native ports */ return mlx5_add_netdev_notifier(dev, port_num); } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 9445e7f2c8fd..4bab14e699ae 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -952,6 +952,7 @@ struct mlx5_ib_dev { u16 devx_whitelist_uid; struct mlx5_srq_table srq_table; struct mlx5_async_ctx async_ctx; + int free_port; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) -- cgit v1.2.3 From fb652d3299023c8fd1af13c9f897c3e3d8a424d3 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 28 Mar 2019 15:27:42 +0200 Subject: RDMA/mlx5: Remove VF representor profile Now that we have a single IB device with multiple ports we can remove the VF representor profile. Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/ib_rep.c | 39 ------------------------------ drivers/infiniband/hw/mlx5/main.c | 46 +++++++++++++----------------------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 17 ------------- 3 files changed, 16 insertions(+), 86 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 224ef6c88d17..cbcc40d776b9 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -7,45 +7,6 @@ #include "ib_rep.h" #include "srq.h" -static const struct mlx5_ib_profile vf_rep_profile = { - STAGE_CREATE(MLX5_IB_STAGE_INIT, - mlx5_ib_stage_init_init, - mlx5_ib_stage_init_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, - mlx5_ib_stage_rep_flow_db_init, - NULL), - STAGE_CREATE(MLX5_IB_STAGE_CAPS, - mlx5_ib_stage_caps_init, - NULL), - STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, - mlx5_ib_stage_rep_non_default_cb, - NULL), - STAGE_CREATE(MLX5_IB_STAGE_ROCE, - mlx5_ib_stage_rep_roce_init, - mlx5_ib_stage_rep_roce_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_SRQ, - mlx5_init_srq_table, - mlx5_cleanup_srq_table), - STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, - mlx5_ib_stage_dev_res_init, - mlx5_ib_stage_dev_res_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, - mlx5_ib_stage_counters_init, - mlx5_ib_stage_counters_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_BFREG, - mlx5_ib_stage_bfrag_init, - mlx5_ib_stage_bfrag_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, - NULL, - mlx5_ib_stage_pre_ib_reg_umr_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_IB_REG, - mlx5_ib_stage_ib_reg_init, - mlx5_ib_stage_ib_reg_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, - mlx5_ib_stage_post_ib_reg_umr_init, - NULL), -}; - static int mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 91f481c4352b..a3248f6419a8 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5879,7 +5879,7 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, return &mcounters->ibcntrs; } -void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { @@ -5888,7 +5888,7 @@ void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) } } -int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; int err; @@ -5961,20 +5961,6 @@ static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev) return 0; } -int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev) -{ - struct mlx5_ib_dev *nic_dev; - - nic_dev = mlx5_ib_get_uplink_ibdev(dev->mdev->priv.eswitch); - - if (!nic_dev) - return -EINVAL; - - dev->flow_db = nic_dev->flow_db; - - return 0; -} - static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) { kfree(dev->flow_db); @@ -6073,7 +6059,7 @@ static const struct ib_device_ops mlx5_ib_dev_dm_ops = { .reg_dm_mr = mlx5_ib_reg_dm_mr, }; -int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; int err; @@ -6179,7 +6165,7 @@ static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = { .query_port = mlx5_ib_rep_query_port, }; -int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev) { ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_rep_ops); return 0; @@ -6226,7 +6212,7 @@ static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev) mlx5_remove_netdev_notifier(dev, port_num); } -int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; @@ -6242,7 +6228,7 @@ int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev) return err; } -void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_stage_common_roce_cleanup(dev); } @@ -6289,12 +6275,12 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev) } } -int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev) { return create_dev_resources(&dev->devr); } -void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev) { destroy_dev_resources(&dev->devr); } @@ -6316,7 +6302,7 @@ static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = { .get_hw_stats = mlx5_ib_get_hw_stats, }; -int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops); @@ -6327,7 +6313,7 @@ int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) return 0; } -void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) mlx5_ib_dealloc_counters(dev); @@ -6357,7 +6343,7 @@ static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev) mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar); } -int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) { int err; @@ -6372,13 +6358,13 @@ int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) return err; } -void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev) { mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); mlx5_free_bfreg(dev->mdev, &dev->bfreg); } -int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) { const char *name; @@ -6390,17 +6376,17 @@ int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) return ib_register_device(&dev->ib_dev, name); } -void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) { destroy_umrc_res(dev); } -void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) { ib_unregister_device(&dev->ib_dev); } -int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) { return create_umr_res(dev); } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 4bab14e699ae..f5d572d1a492 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1236,23 +1236,6 @@ static inline void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ /* Needed for rep profile */ -int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev); -void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev); -int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev); void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage); -- cgit v1.2.3 From 1c00d7bc96c2a4f5a8c8353705dec93bb036ad78 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Wed, 10 Apr 2019 05:10:07 -0400 Subject: RDMA/ocrdma: Remove use of idr use pci bdf instead Removing the use of IDR variable just to name the function ids. Using the PCI_FUNC(pdev->devfn) instead to create the device name, associated resources and to print driver into at various places. Reported-by: Matthew Wilcox Signed-off-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 52d10c86caf2..fc6c0962dea9 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -62,8 +62,6 @@ MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION); MODULE_AUTHOR("Emulex Corporation"); MODULE_LICENSE("Dual BSD/GPL"); -static DEFINE_IDR(ocrdma_dev_id); - void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid) { u8 mac_addr[6]; @@ -316,13 +314,10 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) dev->mbx_cmd = kzalloc(sizeof(struct ocrdma_mqe_emb_cmd), GFP_KERNEL); if (!dev->mbx_cmd) - goto idr_err; + goto init_err; memcpy(&dev->nic_info, dev_info, sizeof(*dev_info)); - dev->id = idr_alloc(&ocrdma_dev_id, NULL, 0, 0, GFP_KERNEL); - if (dev->id < 0) - goto idr_err; - + dev->id = PCI_FUNC(dev->nic_info.pdev->devfn); status = ocrdma_init_hw(dev); if (status) goto init_err; @@ -359,8 +354,6 @@ alloc_err: ocrdma_free_resources(dev); ocrdma_cleanup_hw(dev); init_err: - idr_remove(&ocrdma_dev_id, dev->id); -idr_err: kfree(dev->mbx_cmd); ib_dealloc_device(&dev->ibdev); pr_err("%s() leaving. ret=%d\n", __func__, status); @@ -370,7 +363,6 @@ idr_err: static void ocrdma_remove_free(struct ocrdma_dev *dev) { - idr_remove(&ocrdma_dev_id, dev->id); kfree(dev->mbx_cmd); ib_dealloc_device(&dev->ibdev); } @@ -475,7 +467,6 @@ static void __exit ocrdma_exit_module(void) { be_roce_unregister_driver(&ocrdma_drv); ocrdma_rem_debugfs(); - idr_destroy(&ocrdma_dev_id); } module_init(ocrdma_init_module); -- cgit v1.2.3 From 1db86318c4d1a0d1c8a19535290f71a03a2f13ad Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 12 Apr 2019 11:40:17 +0100 Subject: RDMA/mlx5: Check for error return in flow_rule rather than err Currently when the call to create_flow_rule_vport_sq fails, the error check is being performed on err rather than on the return pointer flow_rule. The return flow_rule maybe NULL (which is not considered an error) or an error code, so check for the error on flow_rule. Addresses-Coverity: ("Uninitialized scalar variable") Fixes: d5ed8ac34cef ("RDMA/mlx5: Move default representors SQ steering to rule to modify QP") Signed-off-by: Colin Ian King Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index a970af0ffc3e..487dccbe1852 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3276,7 +3276,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, flow_rule = create_flow_rule_vport_sq(dev, sq, raw_qp_param->port); if (IS_ERR(flow_rule)) - return err; + return PTR_ERR(flow_rule); err = modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, raw_qp_param, qp->ibqp.pd); -- cgit v1.2.3 From a6d2a5a92e67d151c98886babdc86d530d27111c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 13 Apr 2019 17:00:26 +0100 Subject: RDMA/cxgb4: Fix null pointer dereference on alloc_skb failure Currently if alloc_skb fails to allocate the skb a null skb is passed to t4_set_arp_err_handler and this ends up dereferencing the null skb. Avoid the NULL pointer dereference by checking for a NULL skb and returning early. Addresses-Coverity: ("Dereference null return") Fixes: b38a0ad8ec11 ("RDMA/cxgb4: Set arp error handler for PASS_ACCEPT_RPL messages") Signed-off-by: Colin Ian King Acked-by: Potnuri Bharat Teja Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 1e68d87b663d..0f3b1193d5f8 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -460,6 +460,8 @@ static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp) skb_reset_transport_header(skb); } else { skb = alloc_skb(len, gfp); + if (!skb) + return NULL; } t4_set_arp_err_handler(skb, NULL, NULL); return skb; -- cgit v1.2.3 From ff5eefe6d3a3a2cd93b71165741ebdeda6d58e1d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 16 Apr 2019 15:38:04 +0100 Subject: RDMA/cxgb4: Fix spelling mistake "immedate" -> "immediate" There is a spelling mistake in a module parameter description. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 9c8962d7bf97..e92b9544357a 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -57,7 +57,7 @@ MODULE_PARM_DESC(db_coalescing_threshold, static int max_fr_immd = T4_MAX_FR_IMMD; module_param(max_fr_immd, int, 0644); -MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immedate"); +MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immediate"); static int alloc_ird(struct c4iw_dev *dev, u32 ird) { -- cgit v1.2.3 From 2d959849775772d6a78ebedc68ba04ebd2904e7a Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 20 Apr 2019 15:55:12 +0800 Subject: infiniband/qib: Fix typo in comment Fix typo 'faspath' -> 'pastpath'. Signed-off-by: Chengguang Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h index a4a1f56ce824..f91f23e02283 100644 --- a/drivers/infiniband/hw/qib/qib_common.h +++ b/drivers/infiniband/hw/qib/qib_common.h @@ -57,7 +57,7 @@ * QIB_VERBOSE_TRACING define as 1 if you want additional tracing in * fastpath code * QIB_TRACE_REGWRITES define as 1 if you want register writes to be - * traced in faspath code + * traced in fastpath code * _QIB_TRACING define as 0 if you want to remove all tracing in a * compilation unit */ -- cgit v1.2.3 From 3042492bd1f9a08e9cf4c1a4621e359fb0f9a126 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 15 Apr 2019 13:22:49 +0300 Subject: RDMA/core: Avoid freeing netdevs in disable_device() So we can use the disable_device() helper while changing the net namespace of the rdma device in a subsequent patch, move free_netdevs() out of it. Signed-off-by: Parav Pandit Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 88c4238bbee1..25f49b646007 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1152,9 +1152,6 @@ static void disable_device(struct ib_device *device) * devices and before device is disabled. */ remove_compat_devs(device); - - /* Expedite removing unregistered pointers from the hash table */ - free_netdevs(device); } /* @@ -1297,6 +1294,10 @@ static void __ib_unregister_device(struct ib_device *ib_dev) goto out; disable_device(ib_dev); + + /* Expedite removing unregistered pointers from the hash table */ + free_netdevs(ib_dev); + ib_device_unregister_sysfs(ib_dev); device_del(&ib_dev->dev); ib_device_unregister_rdmacg(ib_dev); -- cgit v1.2.3 From decbc7a6b0073f55b200d80a3ecf5a5e205edd06 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 15 Apr 2019 13:22:50 +0300 Subject: RDMA/core: Introduce a helper function to change net namespace of rdma device Introduce a helper function that changes rdma device's net namespace which performs mini disable/enable sequence to have device visible only in assigned net namespace. Device unregistration, device rename and device change net namespace may be invoked concurrently. (a) device unregistration needs to wait if a device change (rename or net namespace change) operation is in progress. (b) device net namespace change should not proceed if the unregistration has started. (c) while one cpu is changing device net namespace, other cpu should not be able to rename or change net namespace. To address above concurrency, (a) Use unreg_mutex to synchronize between ib_unregister_device() and net namespace change operation (b) In cases where unregister_device() has started unregistration before change_netns got chance to acquire unreg_mutex, validate the refcount - if it dropped to zero, abort the net namespace change operation. Finally use the helper function to change net namespace of ib device to move the device back to init_net when such net is deleted. Signed-off-by: Parav Pandit Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 77 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 25f49b646007..7fe4f8b880ee 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -201,6 +201,9 @@ static struct notifier_block ibdev_lsm_nb = { .notifier_call = ib_security_change, }; +static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, + struct net *net); + /* Pointer to the RCU head at the start of the ib_port_data array */ struct ib_port_data_rcu { struct rcu_head rcu_head; @@ -861,6 +864,8 @@ static int add_compat_devs(struct ib_device *device) unsigned long index; int ret = 0; + lockdep_assert_held(&devices_rwsem); + down_read(&rdma_nets_rwsem); xa_for_each (&rdma_nets, index, rnet) { ret = add_one_compat_dev(device, rnet); @@ -978,6 +983,11 @@ static void rdma_dev_exit_net(struct net *net) remove_one_compat_dev(dev, rnet->id); + /* + * If the real device is in the NS then move it back to init. + */ + rdma_dev_change_netns(dev, net, &init_net); + put_device(&dev->dev); down_read(&devices_rwsem); } @@ -1428,6 +1438,73 @@ void ib_unregister_device_queued(struct ib_device *ib_dev) } EXPORT_SYMBOL(ib_unregister_device_queued); +/* + * The caller must pass in a device that has the kref held and the refcount + * released. If the device is in cur_net and still registered then it is moved + * into net. + */ +static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, + struct net *net) +{ + int ret2 = -EINVAL; + int ret; + + mutex_lock(&device->unregistration_lock); + + /* + * If a device not under ib_device_get() or the unregistration_lock + * the namespace can be changed, or it can be unregistered. Check + * again under the lock. + */ + if (refcount_read(&device->refcount) == 0 || + !net_eq(cur_net, read_pnet(&device->coredev.rdma_net))) { + ret = -ENODEV; + goto out; + } + + kobject_uevent(&device->dev.kobj, KOBJ_REMOVE); + disable_device(device); + + /* + * At this point no one can be using the device, so it is safe to + * change the namespace. + */ + write_pnet(&device->coredev.rdma_net, net); + + /* + * Currently rdma devices are system wide unique. So the device name + * is guaranteed free in the new namespace. Publish the new namespace + * at the sysfs level. + */ + down_read(&devices_rwsem); + ret = device_rename(&device->dev, dev_name(&device->dev)); + up_read(&devices_rwsem); + if (ret) { + dev_warn(&device->dev, + "%s: Couldn't rename device after namespace change\n", + __func__); + /* Try and put things back and re-enable the device */ + write_pnet(&device->coredev.rdma_net, cur_net); + } + + ret2 = enable_device_and_get(device); + if (ret2) + /* + * This shouldn't really happen, but if it does, let the user + * retry at later point. So don't disable the device. + */ + dev_warn(&device->dev, + "%s: Couldn't re-enable device after namespace change\n", + __func__); + kobject_uevent(&device->dev.kobj, KOBJ_ADD); + ib_device_put(device); +out: + mutex_unlock(&device->unregistration_lock); + if (ret) + return ret; + return ret2; +} + static struct pernet_operations rdma_dev_net_ops = { .init = rdma_dev_init_net, .exit = rdma_dev_exit_net, -- cgit v1.2.3 From 2e5b8a01165e4fe57ec396961daae38713edce35 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 15 Apr 2019 13:22:51 +0300 Subject: RDMA/core: Add a netlink command to change net namespace of rdma device Provide an option to change the net namespace of a rdma device through a netlink command. When multiple rdma devices exists in a system, and when containers are used, this will limit rdma device visibility to a specified net namespace. An example command to change net namespace of mlx5_1 device to the previously created net namespace 'foo' is: $ ip netns add foo $ rdma dev set mlx5_1 netns foo Signed-off-by: Parav Pandit Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 2 ++ drivers/infiniband/core/device.c | 56 +++++++++++++++++++++++++++++++++---- drivers/infiniband/core/nldev.c | 13 ++++++++- include/uapi/rdma/rdma_netlink.h | 6 +++- 4 files changed, 70 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 5b0ffbb6b3c9..d4dd360769cb 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -350,4 +350,6 @@ int ib_port_register_module_stat(struct ib_device *device, u8 port_num, const char *name); void ib_port_unregister_module_stat(struct kobject *kobj); +int ib_device_set_netns_put(struct sk_buff *skb, + struct ib_device *dev, u32 ns_fd); #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 7fe4f8b880ee..fcbf2d4c865d 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1452,9 +1452,9 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, mutex_lock(&device->unregistration_lock); /* - * If a device not under ib_device_get() or the unregistration_lock - * the namespace can be changed, or it can be unregistered. Check - * again under the lock. + * If a device not under ib_device_get() or if the unregistration_lock + * is not held, the namespace can be changed, or it can be unregistered. + * Check again under the lock. */ if (refcount_read(&device->refcount) == 0 || !net_eq(cur_net, read_pnet(&device->coredev.rdma_net))) { @@ -1471,12 +1471,12 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, */ write_pnet(&device->coredev.rdma_net, net); + down_read(&devices_rwsem); /* * Currently rdma devices are system wide unique. So the device name * is guaranteed free in the new namespace. Publish the new namespace * at the sysfs level. */ - down_read(&devices_rwsem); ret = device_rename(&device->dev, dev_name(&device->dev)); up_read(&devices_rwsem); if (ret) { @@ -1488,7 +1488,7 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, } ret2 = enable_device_and_get(device); - if (ret2) + if (ret2) { /* * This shouldn't really happen, but if it does, let the user * retry at later point. So don't disable the device. @@ -1496,7 +1496,9 @@ static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net, dev_warn(&device->dev, "%s: Couldn't re-enable device after namespace change\n", __func__); + } kobject_uevent(&device->dev.kobj, KOBJ_ADD); + ib_device_put(device); out: mutex_unlock(&device->unregistration_lock); @@ -1505,6 +1507,50 @@ out: return ret2; } +int ib_device_set_netns_put(struct sk_buff *skb, + struct ib_device *dev, u32 ns_fd) +{ + struct net *net; + int ret; + + net = get_net_ns_by_fd(ns_fd); + if (IS_ERR(net)) { + ret = PTR_ERR(net); + goto net_err; + } + + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { + ret = -EPERM; + goto ns_err; + } + + /* + * Currently supported only for those providers which support + * disassociation and don't do port specific sysfs init. Once a + * port_cleanup infrastructure is implemented, this limitation will be + * removed. + */ + if (!dev->ops.disassociate_ucontext || dev->ops.init_port || + ib_devices_shared_netns) { + ret = -EOPNOTSUPP; + goto ns_err; + } + + get_device(&dev->dev); + ib_device_put(dev); + ret = rdma_dev_change_netns(dev, current->nsproxy->net_ns, net); + put_device(&dev->dev); + + put_net(net); + return ret; + +ns_err: + put_net(net); +net_err: + ib_device_put(dev); + return ret; +} + static struct pernet_operations rdma_dev_net_ops = { .init = rdma_dev_init_net, .exit = rdma_dev_exit_net, diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 8cb3851d212e..bced945a456d 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -119,6 +119,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING, .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN }, + [RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 }, }; static int put_driver_name_print_type(struct sk_buff *msg, const char *name, @@ -691,9 +692,20 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], IB_DEVICE_NAME_MAX); err = ib_device_rename(device, name); + goto done; } + if (tb[RDMA_NLDEV_NET_NS_FD]) { + u32 ns_fd; + + ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]); + err = ib_device_set_netns_put(skb, device, ns_fd); + goto put_done; + } + +done: ib_device_put(device); +put_done: return err; } @@ -909,7 +921,6 @@ static int _nldev_res_get_dumpit(struct ib_device *device, nlmsg_cancel(skb, nlh); goto out; } - nlmsg_end(skb, nlh); idx++; diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index d49f491341f6..42a8bdc40a14 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -469,12 +469,16 @@ enum rdma_nldev_attr { * either shared or exclusive among multiple net namespaces. */ RDMA_NLDEV_SYS_ATTR_NETNS_MODE, /* u8 */ - /* * Device protocol, e.g. ib, iw, usnic, roce and opa */ RDMA_NLDEV_ATTR_DEV_PROTOCOL, /* string */ + /* + * File descriptor handle of the net namespace object + */ + RDMA_NLDEV_NET_NS_FD, /* u32 */ + /* * Always the end */ -- cgit v1.2.3 From 13a4376568f6e3a6df21e4c116b027101e28f954 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 28 Mar 2019 15:46:21 +0200 Subject: RDMA/mlx5: Access the prio bypass inside the FDB flow table namespace Now that we have a specific prio inside the FDB namespace allow retrieving it from the RDMA side. Signed-off-by: Mark Bloch Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 31 ++++++++++++++++++++----------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a3248f6419a8..ecd5054fb888 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3809,11 +3809,16 @@ _get_flow_table(struct mlx5_ib_dev *dev, bool mcast) { struct mlx5_flow_namespace *ns = NULL; - struct mlx5_ib_flow_prio *prio; - int max_table_size; + struct mlx5_ib_flow_prio *prio = NULL; + int max_table_size = 0; u32 flags = 0; int priority; + if (mcast) + priority = MLX5_IB_FLOW_MCAST_PRIO; + else + priority = ib_prio_to_core_prio(fs_matcher->priority, false); + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size)); @@ -3822,29 +3827,33 @@ _get_flow_table(struct mlx5_ib_dev *dev, if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, reformat_l3_tunnel_to_l2)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else { /* Can only be MLX5_FLOW_NAMESPACE_EGRESS */ - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, - log_max_ft_size)); + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { + max_table_size = BIT( + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size)); if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) { + max_table_size = BIT( + MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size)); + priority = FDB_BYPASS_PATH; } if (max_table_size < MLX5_FS_MAX_ENTRIES) return ERR_PTR(-ENOMEM); - if (mcast) - priority = MLX5_IB_FLOW_MCAST_PRIO; - else - priority = ib_prio_to_core_prio(fs_matcher->priority, false); - ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type); if (!ns) return ERR_PTR(-ENOTSUPP); if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) prio = &dev->flow_db->prios[priority]; - else + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) prio = &dev->flow_db->egress_prios[priority]; + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) + prio = &dev->flow_db->fdb; + + if (!prio) + return ERR_PTR(-EINVAL); if (prio->flow_table) return prio; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f5d572d1a492..55b8bdb402b6 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -194,6 +194,7 @@ struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS]; struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS]; + struct mlx5_ib_flow_prio fdb; struct mlx5_flow_table *lag_demux_ft; /* Protect flow steering bypass flow tables * when add/del flow rules. -- cgit v1.2.3 From 3b70508a6bfbdc78b565e9da22fd98483263494e Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 28 Mar 2019 15:46:22 +0200 Subject: RDMA/mlx5: Create flow table with max size supported Instead of failing the request, just use the supported number of flow entries. Signed-off-by: Mark Bloch Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ecd5054fb888..3b220fa78dac 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3233,12 +3233,11 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, if (!ns) return ERR_PTR(-ENOTSUPP); - if (num_entries > max_table_size) - return ERR_PTR(-ENOMEM); + max_table_size = min_t(int, num_entries, max_table_size); ft = prio->flow_table; if (!ft) - return _get_prio(ns, prio, priority, num_entries, num_groups, + return _get_prio(ns, prio, priority, max_table_size, num_groups, flags); return prio; @@ -3838,8 +3837,7 @@ _get_flow_table(struct mlx5_ib_dev *dev, priority = FDB_BYPASS_PATH; } - if (max_table_size < MLX5_FS_MAX_ENTRIES) - return ERR_PTR(-ENOMEM); + max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type); if (!ns) @@ -3858,7 +3856,7 @@ _get_flow_table(struct mlx5_ib_dev *dev, if (prio->flow_table) return prio; - return _get_prio(ns, prio, priority, MLX5_FS_MAX_ENTRIES, + return _get_prio(ns, prio, priority, max_table_size, MLX5_FS_MAX_TYPES, flags); } -- cgit v1.2.3 From 52438be4411271c5e93da54da340c3566b0e4bef Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 28 Mar 2019 15:46:23 +0200 Subject: RDMA/mlx5: Allow inserting a steering rule to the FDB Allow this only via mlx5 raw create flow API, legacy verbs are not supported. To accommodate that, we add a new attribute to matcher creation to indicate the type of flow table to be used. MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE With this new attribute MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS is no longer needed, we keep it for compatibility but at most only a single attribute can be passed of the two. When inserting a flow rule to the FDB we require that a DEVX FT is provided as a destination, no other configuration is allowed. Signed-off-by: Mark Bloch Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/flow.c | 75 ++++++++++++++++++++++++++----- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 1 + include/uapi/rdma/mlx5_user_ioctl_verbs.h | 1 + 3 files changed, 65 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index 09f5bc6142c9..71a8d46a0827 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -29,6 +29,9 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type, case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX: *namespace = MLX5_FLOW_NAMESPACE_EGRESS; break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB: + *namespace = MLX5_FLOW_NAMESPACE_FDB; + break; default: return -EINVAL; } @@ -93,6 +96,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( ((dest_devx && dest_qp) || (!dest_devx && !dest_qp))) return -EINVAL; + /* Allow only DEVX object as dest when inserting to FDB */ + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !dest_devx) + return -EINVAL; + if (dest_devx) { devx_obj = uverbs_attr_get_obj( attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); @@ -104,6 +111,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( */ if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type)) return -EINVAL; + /* Allow only flow table as dest when inserting to FDB */ + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && + dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) + return -EINVAL; } else if (dest_qp) { struct mlx5_ib_qp *mqp; @@ -203,6 +214,54 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject, return 0; } +static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs, + struct mlx5_ib_flow_matcher *obj) +{ + enum mlx5_ib_uapi_flow_table_type ft_type = + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX; + u32 flags; + int err; + + /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older + * users should switch to it. We leave this to not break userspace + */ + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) && + uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) + return -EINVAL; + + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) { + err = uverbs_get_const(&ft_type, attrs, + MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE); + if (err) + return err; + + err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type); + if (err) + return err; + + return 0; + } + + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) { + err = uverbs_get_flags32(&flags, attrs, + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, + IB_FLOW_ATTR_FLAGS_EGRESS); + if (err) + return err; + + if (flags) { + mlx5_ib_ft_type_to_namespace( + MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, + &obj->ns_type); + return 0; + } + } + + obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS; + + return 0; +} + static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( struct uverbs_attr_bundle *attrs) { @@ -210,14 +269,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); struct mlx5_ib_flow_matcher *obj; - u32 flags; int err; obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL); if (!obj) return -ENOMEM; - obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS; obj->mask_len = uverbs_attr_get_len( attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); err = uverbs_copy_from(&obj->matcher_mask, @@ -243,19 +300,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( if (err) goto end; - err = uverbs_get_flags32(&flags, attrs, - MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, - IB_FLOW_ATTR_FLAGS_EGRESS); + err = mlx5_ib_matcher_ns(attrs, obj); if (err) goto end; - if (flags) { - err = mlx5_ib_ft_type_to_namespace( - MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, &obj->ns_type); - if (err) - goto end; - } - uobj->object = obj; obj->mdev = dev->mdev; atomic_set(&obj->usecnt, 0); @@ -605,6 +653,9 @@ DECLARE_UVERBS_NAMED_METHOD( UA_MANDATORY), UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, enum ib_flow_flags, + UA_OPTIONAL), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE, + enum mlx5_ib_uapi_flow_table_type, UA_OPTIONAL)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 8149d224030b..0d8f564ce60b 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -144,6 +144,7 @@ enum mlx5_ib_flow_matcher_create_attrs { MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS, + MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE, }; enum mlx5_ib_flow_matcher_destroy_attrs { diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h index 4a701033b93f..0a126a6b9337 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h +++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h @@ -42,6 +42,7 @@ enum mlx5_ib_uapi_flow_action_flags { enum mlx5_ib_uapi_flow_table_type { MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX = 0x0, MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX = 0x1, + MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB = 0x2, }; enum mlx5_ib_uapi_flow_action_packet_reformat_type { -- cgit v1.2.3 From 56e5acd405fadac9b1eeacaac084c945b3f23b37 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 28 Mar 2019 15:46:24 +0200 Subject: RDMA/mlx5: Add query e-switch vport context to devx white list Add MLX5_OP_QUERY_ESW_VPORT_CONTEXT to devx white list. It will be allowed only if HCA_CAP.eswitch_manager==1. Signed-off-by: Maor Gottlieb Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 9e35560665c5..4f199671b5a9 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -682,6 +682,7 @@ static bool devx_is_whitelist_cmd(void *in) switch (opcode) { case MLX5_CMD_OP_QUERY_HCA_CAP: case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: return true; default: return false; @@ -719,6 +720,7 @@ static bool devx_is_general_cmd(void *in) switch (opcode) { case MLX5_CMD_OP_QUERY_HCA_CAP: case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: case MLX5_CMD_OP_QUERY_VPORT_STATE: case MLX5_CMD_OP_QUERY_ADAPTER: case MLX5_CMD_OP_QUERY_ISSI: -- cgit v1.2.3 From 7f575103b04246246e76de4f182475174124dd03 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 28 Mar 2019 15:46:25 +0200 Subject: RDMA/mlx5: Allow DEVX and raw creation flow on reps Remove the limitations that were in place and provide support for DEVX and raw flow creation on reps. Signed-off-by: Mark Bloch Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 2 +- drivers/infiniband/hw/mlx5/flow.c | 8 +------- drivers/infiniband/hw/mlx5/main.c | 3 +++ 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 4f199671b5a9..d627f44bc84d 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1906,7 +1906,7 @@ static bool devx_is_supported(struct ib_device *device) { struct mlx5_ib_dev *dev = to_mdev(device); - return !dev->is_rep && MLX5_CAP_GEN(dev->mdev, log_max_uctx); + return MLX5_CAP_GEN(dev->mdev, log_max_uctx); } const struct uapi_definition mlx5_ib_devx_defs[] = { diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index 71a8d46a0827..1fc302d41a53 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -670,15 +670,9 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE), &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY)); -static bool flow_is_supported(struct ib_device *device) -{ - return !to_mdev(device)->is_rep; -} - const struct uapi_definition mlx5_ib_flow_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED( - MLX5_IB_OBJECT_FLOW_MATCHER, - UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)), + MLX5_IB_OBJECT_FLOW_MATCHER), UAPI_DEF_CHAIN_OBJ_TREE( UVERBS_OBJECT_FLOW, &mlx5_ib_fs), diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 3b220fa78dac..c9b3a078d015 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6572,6 +6572,9 @@ const struct mlx5_ib_profile uplink_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, + mlx5_ib_stage_devx_init, + mlx5_ib_stage_devx_cleanup), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), -- cgit v1.2.3 From d3b5cc1cd996ce84d362b3c15f940346603741b9 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 28 Mar 2019 15:46:26 +0200 Subject: RDMA/mlx5: Initialize roce port info before multiport master init When working in mutliport RoCE mode it is possible to attach a slave before the master. In that case the slave is waiting for a master to be attached. When the master is attached it goes over the list of waiting slaves, finds a slave that is compatible and tries to bind it to itself. The call stack is: mlx5_ib_init_multiport_master() -> mlx5_ib_bind_slave_port() In the bind function we will create a netdev notifier, but this is done before we initialize the RoCE structure (this is done at a later stage by the master in the ROCE stage). Once events are delivered to that notifier we will use mlx5_ib_get_native_port_mdev() to get the actual port and as the native port is zero we will access an invalid index in the port structure. Move the RoCE structure initialization to an earlier stage. Fixes: 32f69e4be269 ("{net, IB}/mlx5: Manage port association for multiport RoCE") Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index c9b3a078d015..795db12e7fa4 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5904,6 +5904,9 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) for (i = 0; i < dev->num_ports; i++) { spin_lock_init(&dev->port[i].mp.mpi_lock); rwlock_init(&dev->port[i].roce.netdev_lock); + dev->port[i].roce.dev = dev; + dev->port[i].roce.native_port_num = i + 1; + dev->port[i].roce.last_port_state = IB_PORT_DOWN; } err = mlx5_ib_init_multiport_master(dev); @@ -6190,13 +6193,6 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = { static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) { u8 port_num; - int i; - - for (i = 0; i < dev->num_ports; i++) { - dev->port[i].roce.dev = dev; - dev->port[i].roce.native_port_num = i + 1; - dev->port[i].roce.last_port_state = IB_PORT_DOWN; - } dev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | -- cgit v1.2.3 From 5fb58c9e2fb11edd12379b20be01ee54a7bac026 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 28 Mar 2019 15:46:27 +0200 Subject: RDMA/mlx5: Don't create IB representors when in multiport RoCE mode Switchdev mode and mutiport RoCE mode aren't compatible at this point. Don't create IB reps when a user switches to switchdev mode and the driver operates in that mode. Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 795db12e7fa4..d40c39434637 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6630,7 +6630,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (MLX5_ESWITCH_MANAGER(mdev) && mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { - mlx5_ib_register_vport_reps(mdev); + if (!mlx5_core_mp_enabled(mdev)) + mlx5_ib_register_vport_reps(mdev); return mdev; } -- cgit v1.2.3 From 823b23da71132b80d9f41ab667c68b112455f3b6 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 10 Apr 2019 11:23:03 +0300 Subject: IB/core: Allow vlan link local address based RoCE GIDs IPv6 link local address for a VLAN netdevice has nothing to do with its resemblance with the default GID, because VLAN link local GID is in different layer 2 domain. Now that RoCE MAD packet processing and route resolution consider the right GID index, there is no need for an unnecessary check which prevents the addition of vlan based IPv6 link local GIDs. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 43c67e5f43c6..7499e7016e38 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -543,30 +543,11 @@ out_unlock: int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, union ib_gid *gid, struct ib_gid_attr *attr) { - struct net_device *idev; - unsigned long mask; - int ret; - - idev = ib_device_get_netdev(ib_dev, port); - if (idev && attr->ndev != idev) { - union ib_gid default_gid; - - /* Adding default GIDs is not permitted */ - make_default_gid(idev, &default_gid); - if (!memcmp(gid, &default_gid, sizeof(*gid))) { - dev_put(idev); - return -EPERM; - } - } - if (idev) - dev_put(idev); - - mask = GID_ATTR_FIND_MASK_GID | - GID_ATTR_FIND_MASK_GID_TYPE | - GID_ATTR_FIND_MASK_NETDEV; + unsigned long mask = GID_ATTR_FIND_MASK_GID | + GID_ATTR_FIND_MASK_GID_TYPE | + GID_ATTR_FIND_MASK_NETDEV; - ret = __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false); - return ret; + return __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false); } static int -- cgit v1.2.3 From 5d7ed2f27bbd482fd29e6b2e204b1a1ee8a0b268 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 10 Apr 2019 11:23:04 +0300 Subject: RDMA/cma: Consider scope_id while binding to ipv6 ll address When two netdev have same link local addresses (such as vlan and non vlan), two rdma cm listen id should be able to bind to following different addresses. listener-1: addr=lla, scope_id=A, port=X listener-2: addr=lla, scope_id=B, port=X However while comparing the addresses only addr and port are considered, due to which 2nd listener fails to listen. In below example of two listeners, 2nd listener is failing with address in use error. $ rping -sv -a fe80::268a:7ff:feb3:d113%ens2f1 -p 4545& $ rping -sv -a fe80::268a:7ff:feb3:d113%ens2f1.200 -p 4545 rdma_bind_addr: Address already in use To overcome this, consider the scope_ids as well which forms the accurate IPv6 link local address. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 7e139b3839dc..088b5495e199 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1177,18 +1177,31 @@ static inline bool cma_any_addr(const struct sockaddr *addr) return cma_zero_addr(addr) || cma_loopback_addr(addr); } -static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) +static int cma_addr_cmp(const struct sockaddr *src, const struct sockaddr *dst) { if (src->sa_family != dst->sa_family) return -1; switch (src->sa_family) { case AF_INET: - return ((struct sockaddr_in *) src)->sin_addr.s_addr != - ((struct sockaddr_in *) dst)->sin_addr.s_addr; - case AF_INET6: - return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, - &((struct sockaddr_in6 *) dst)->sin6_addr); + return ((struct sockaddr_in *)src)->sin_addr.s_addr != + ((struct sockaddr_in *)dst)->sin_addr.s_addr; + case AF_INET6: { + struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *)src; + struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *)dst; + bool link_local; + + if (ipv6_addr_cmp(&src_addr6->sin6_addr, + &dst_addr6->sin6_addr)) + return 1; + link_local = ipv6_addr_type(&dst_addr6->sin6_addr) & + IPV6_ADDR_LINKLOCAL; + /* Link local must match their scope_ids */ + return link_local ? (src_addr6->sin6_scope_id != + dst_addr6->sin6_scope_id) : + 0; + } + default: return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, &((struct sockaddr_ib *) dst)->sib_addr); -- cgit v1.2.3 From 574258222281221444b561b05c3a5fa85947a80c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 4 Apr 2019 09:56:38 +0300 Subject: RDMA/hns: Remove asynchronic QP destroy Verbs destroy callbacks are synchronous operations and can't be delayed. The expectation is that after driver returned from destroy function, the memory can be freed and user won't be able to access it again. Ditch workqueue implementation used in HNS driver. Fixes: d838c481e025 ("IB/hns: Fix the bug when destroy qp") Signed-off-by: Leon Romanovsky Acked-by: oulijun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_common.h | 33 --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 374 +--------------------------- drivers/infiniband/hw/hns/hns_roce_hw_v1.h | 12 - 3 files changed, 13 insertions(+), 406 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index f4c92a7ac1ce..8e95a1aa1b4f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -57,32 +57,6 @@ #define roce_set_bit(origin, shift, val) \ roce_set_field((origin), (1ul << (shift)), (shift), (val)) -/* - * roce_hw_index_cmp_lt - Compare two hardware index values in hisilicon - * SOC, check if a is less than b. - * @a: hardware index value - * @b: hardware index value - * @bits: the number of bits of a and b, range: 0~31. - * - * Hardware index increases continuously till max value, and then restart - * from zero, again and again. Because the bits of reg field is often - * limited, the reg field can only hold the low bits of the hardware index - * in hisilicon SOC. - * In some scenes we need to compare two values(a,b) getted from two reg - * fields in this driver, for example: - * If a equals 0xfffe, b equals 0x1 and bits equals 16, we think b has - * incresed from 0xffff to 0x1 and a is less than b. - * If a equals 0xfffe, b equals 0x0xf001 and bits equals 16, we think a - * is bigger than b. - * - * Return true on a less than b, otherwise false. - */ -#define roce_hw_index_mask(bits) ((1ul << (bits)) - 1) -#define roce_hw_index_shift(bits) (32 - (bits)) -#define roce_hw_index_cmp_lt(a, b, bits) \ - ((int)((((a) - (b)) & roce_hw_index_mask(bits)) << \ - roce_hw_index_shift(bits)) < 0) - #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4 @@ -271,8 +245,6 @@ #define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M \ (((1UL << 28) - 1) << ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) -#define ROCEE_SDB_PTR_CMP_BITS 28 - #define ROCEE_SDB_INV_CNT_SDB_INV_CNT_S 0 #define ROCEE_SDB_INV_CNT_SDB_INV_CNT_M \ (((1UL << 16) - 1) << ROCEE_SDB_INV_CNT_SDB_INV_CNT_S) @@ -353,13 +325,8 @@ #define ROCEE_CAEP_AE_MASK_REG 0x6C8 #define ROCEE_CAEP_AE_ST_REG 0x6CC -#define ROCEE_SDB_ISSUE_PTR_REG 0x758 -#define ROCEE_SDB_SEND_PTR_REG 0x75C #define ROCEE_CAEP_CQE_WCMD_EMPTY 0x850 #define ROCEE_SCAEP_WR_CQE_CNT 0x8D0 -#define ROCEE_SDB_INV_CNT_REG 0x9A4 -#define ROCEE_SDB_RETRY_CNT_REG 0x9AC -#define ROCEE_TSP_BP_ST_REG 0x9EC #define ROCEE_ECC_UCERR_ALM0_REG 0xB34 #define ROCEE_ECC_CERR_ALM0_REG 0xB40 diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 98c6a41edefd..26d4ed447bea 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1511,38 +1511,6 @@ static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset) return ret; } -static int hns_roce_des_qp_init(struct hns_roce_dev *hr_dev) -{ - struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_v1_priv *priv; - struct hns_roce_des_qp *des_qp; - - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - des_qp = &priv->des_qp; - - des_qp->requeue_flag = 1; - des_qp->qp_wq = create_singlethread_workqueue("hns_roce_destroy_qp"); - if (!des_qp->qp_wq) { - dev_err(dev, "Create destroy qp workqueue failed!\n"); - return -ENOMEM; - } - - return 0; -} - -static void hns_roce_des_qp_free(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_v1_priv *priv; - struct hns_roce_des_qp *des_qp; - - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - des_qp = &priv->des_qp; - - des_qp->requeue_flag = 0; - flush_workqueue(des_qp->qp_wq); - destroy_workqueue(des_qp->qp_wq); -} - static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) { int i = 0; @@ -1661,12 +1629,6 @@ static int hns_roce_v1_init(struct hns_roce_dev *hr_dev) goto error_failed_tptr_init; } - ret = hns_roce_des_qp_init(hr_dev); - if (ret) { - dev_err(dev, "des qp init failed!\n"); - goto error_failed_des_qp_init; - } - ret = hns_roce_free_mr_init(hr_dev); if (ret) { dev_err(dev, "free mr init failed!\n"); @@ -1678,9 +1640,6 @@ static int hns_roce_v1_init(struct hns_roce_dev *hr_dev) return 0; error_failed_free_mr_init: - hns_roce_des_qp_free(hr_dev); - -error_failed_des_qp_init: hns_roce_tptr_free(hr_dev); error_failed_tptr_init: @@ -1698,7 +1657,6 @@ static void hns_roce_v1_exit(struct hns_roce_dev *hr_dev) { hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN); hns_roce_free_mr_free(hr_dev); - hns_roce_des_qp_free(hr_dev); hns_roce_tptr_free(hr_dev); hns_roce_bt_free(hr_dev); hns_roce_raq_free(hr_dev); @@ -3644,307 +3602,22 @@ static int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, hns_roce_v1_q_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr); } -static void hns_roce_check_sdb_status(struct hns_roce_dev *hr_dev, - u32 *old_send, u32 *old_retry, - u32 *tsp_st, u32 *success_flags) -{ - __le32 *old_send_tmp, *old_retry_tmp; - u32 sdb_retry_cnt; - u32 sdb_send_ptr; - u32 cur_cnt, old_cnt; - __le32 tmp, tmp1; - u32 send_ptr; - - sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); - sdb_retry_cnt = roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG); - tmp = cpu_to_le32(sdb_send_ptr); - tmp1 = cpu_to_le32(sdb_retry_cnt); - cur_cnt = roce_get_field(tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(tmp1, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); - - old_send_tmp = (__le32 *)old_send; - old_retry_tmp = (__le32 *)old_retry; - if (!roce_get_bit(*tsp_st, ROCEE_CNT_CLR_CE_CNT_CLR_CE_S)) { - old_cnt = roce_get_field(*old_send_tmp, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(*old_retry_tmp, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); - if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) - *success_flags = 1; - } else { - old_cnt = roce_get_field(*old_send_tmp, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S); - if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) { - *success_flags = 1; - } else { - send_ptr = roce_get_field(*old_send_tmp, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(tmp1, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); - roce_set_field(*old_send_tmp, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S, - send_ptr); - } - } -} - -static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - u32 sdb_issue_ptr, - u32 *sdb_inv_cnt, - u32 *wait_stage) -{ - struct device *dev = &hr_dev->pdev->dev; - u32 sdb_send_ptr, old_send; - __le32 sdb_issue_ptr_tmp; - __le32 sdb_send_ptr_tmp; - u32 success_flags = 0; - unsigned long end; - u32 old_retry; - u32 inv_cnt; - u32 tsp_st; - __le32 tmp; - - if (*wait_stage > HNS_ROCE_V1_DB_STAGE2 || - *wait_stage < HNS_ROCE_V1_DB_STAGE1) { - dev_err(dev, "QP(0x%lx) db status wait stage(%d) error!\n", - hr_qp->qpn, *wait_stage); - return -EINVAL; - } - - /* Calculate the total timeout for the entire verification process */ - end = msecs_to_jiffies(HNS_ROCE_V1_CHECK_DB_TIMEOUT_MSECS) + jiffies; - - if (*wait_stage == HNS_ROCE_V1_DB_STAGE1) { - /* Query db process status, until hw process completely */ - sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); - while (roce_hw_index_cmp_lt(sdb_send_ptr, sdb_issue_ptr, - ROCEE_SDB_PTR_CMP_BITS)) { - if (!time_before(jiffies, end)) { - dev_dbg(dev, "QP(0x%lx) db process stage1 timeout. issue 0x%x send 0x%x.\n", - hr_qp->qpn, sdb_issue_ptr, - sdb_send_ptr); - return 0; - } - - msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS); - sdb_send_ptr = roce_read(hr_dev, - ROCEE_SDB_SEND_PTR_REG); - } - - sdb_send_ptr_tmp = cpu_to_le32(sdb_send_ptr); - sdb_issue_ptr_tmp = cpu_to_le32(sdb_issue_ptr); - if (roce_get_field(sdb_issue_ptr_tmp, - ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M, - ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) == - roce_get_field(sdb_send_ptr_tmp, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)) { - old_send = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); - old_retry = roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG); - - do { - tsp_st = roce_read(hr_dev, ROCEE_TSP_BP_ST_REG); - tmp = cpu_to_le32(tsp_st); - if (roce_get_bit(tmp, - ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S) == 1) { - *wait_stage = HNS_ROCE_V1_DB_WAIT_OK; - return 0; - } - - if (!time_before(jiffies, end)) { - dev_dbg(dev, "QP(0x%lx) db process stage1 timeout when send ptr equals issue ptr.\n" - "issue 0x%x send 0x%x.\n", - hr_qp->qpn, - le32_to_cpu(sdb_issue_ptr_tmp), - le32_to_cpu(sdb_send_ptr_tmp)); - return 0; - } - - msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS); - - hns_roce_check_sdb_status(hr_dev, &old_send, - &old_retry, &tsp_st, - &success_flags); - } while (!success_flags); - } - - *wait_stage = HNS_ROCE_V1_DB_STAGE2; - - /* Get list pointer */ - *sdb_inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG); - dev_dbg(dev, "QP(0x%lx) db process stage2. inv cnt = 0x%x.\n", - hr_qp->qpn, *sdb_inv_cnt); - } - - if (*wait_stage == HNS_ROCE_V1_DB_STAGE2) { - /* Query db's list status, until hw reversal */ - inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG); - while (roce_hw_index_cmp_lt(inv_cnt, - *sdb_inv_cnt + SDB_INV_CNT_OFFSET, - ROCEE_SDB_CNT_CMP_BITS)) { - if (!time_before(jiffies, end)) { - dev_dbg(dev, "QP(0x%lx) db process stage2 timeout. inv cnt 0x%x.\n", - hr_qp->qpn, inv_cnt); - return 0; - } - - msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS); - inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG); - } - - *wait_stage = HNS_ROCE_V1_DB_WAIT_OK; - } - - return 0; -} - -static int check_qp_reset_state(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_roce_qp_work *qp_work_entry, - int *is_timeout) -{ - struct device *dev = &hr_dev->pdev->dev; - u32 sdb_issue_ptr; - int ret; - - if (hr_qp->state != IB_QPS_RESET) { - /* Set qp to ERR, waiting for hw complete processing all dbs */ - ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, - IB_QPS_ERR); - if (ret) { - dev_err(dev, "Modify QP(0x%lx) to ERR failed!\n", - hr_qp->qpn); - return ret; - } - - /* Record issued doorbell */ - sdb_issue_ptr = roce_read(hr_dev, ROCEE_SDB_ISSUE_PTR_REG); - qp_work_entry->sdb_issue_ptr = sdb_issue_ptr; - qp_work_entry->db_wait_stage = HNS_ROCE_V1_DB_STAGE1; - - /* Query db process status, until hw process completely */ - ret = check_qp_db_process_status(hr_dev, hr_qp, sdb_issue_ptr, - &qp_work_entry->sdb_inv_cnt, - &qp_work_entry->db_wait_stage); - if (ret) { - dev_err(dev, "Check QP(0x%lx) db process status failed!\n", - hr_qp->qpn); - return ret; - } - - if (qp_work_entry->db_wait_stage != HNS_ROCE_V1_DB_WAIT_OK) { - qp_work_entry->sche_cnt = 0; - *is_timeout = 1; - return 0; - } - - /* Modify qp to reset before destroying qp */ - ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, - IB_QPS_RESET); - if (ret) { - dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", - hr_qp->qpn); - return ret; - } - } - - return 0; -} - -static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work) -{ - struct hns_roce_qp_work *qp_work_entry; - struct hns_roce_v1_priv *priv; - struct hns_roce_dev *hr_dev; - struct hns_roce_qp *hr_qp; - struct device *dev; - unsigned long qpn; - int ret; - - qp_work_entry = container_of(work, struct hns_roce_qp_work, work); - hr_dev = to_hr_dev(qp_work_entry->ib_dev); - dev = &hr_dev->pdev->dev; - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - hr_qp = qp_work_entry->qp; - qpn = hr_qp->qpn; - - dev_dbg(dev, "Schedule destroy QP(0x%lx) work.\n", qpn); - - qp_work_entry->sche_cnt++; - - /* Query db process status, until hw process completely */ - ret = check_qp_db_process_status(hr_dev, hr_qp, - qp_work_entry->sdb_issue_ptr, - &qp_work_entry->sdb_inv_cnt, - &qp_work_entry->db_wait_stage); - if (ret) { - dev_err(dev, "Check QP(0x%lx) db process status failed!\n", - qpn); - return; - } - - if (qp_work_entry->db_wait_stage != HNS_ROCE_V1_DB_WAIT_OK && - priv->des_qp.requeue_flag) { - queue_work(priv->des_qp.qp_wq, work); - return; - } - - /* Modify qp to reset before destroying qp */ - ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, - IB_QPS_RESET); - if (ret) { - dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", qpn); - return; - } - - hns_roce_qp_remove(hr_dev, hr_qp); - hns_roce_qp_free(hr_dev, hr_qp); - - if (hr_qp->ibqp.qp_type == IB_QPT_RC) { - /* RC QP, release QPN */ - hns_roce_release_range_qp(hr_dev, qpn, 1); - kfree(hr_qp); - } else - kfree(hr_to_hr_sqp(hr_qp)); - - kfree(qp_work_entry); - - dev_dbg(dev, "Accomplished destroy QP(0x%lx) work.\n", qpn); -} - int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_qp_work qp_work_entry; - struct hns_roce_qp_work *qp_work; - struct hns_roce_v1_priv *priv; struct hns_roce_cq *send_cq, *recv_cq; - bool is_user = ibqp->uobject; - int is_timeout = 0; int ret; - ret = check_qp_reset_state(hr_dev, hr_qp, &qp_work_entry, &is_timeout); - if (ret) { - dev_err(dev, "QP reset state check failed(%d)!\n", ret); + ret = hns_roce_v1_modify_qp(ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET); + if (ret) return ret; - } send_cq = to_hr_cq(hr_qp->ibqp.send_cq); recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq); hns_roce_lock_cqs(send_cq, recv_cq); - if (!is_user) { + if (!udata) { __hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ? to_hr_srq(hr_qp->ibqp.srq) : NULL); if (send_cq != recv_cq) @@ -3952,18 +3625,16 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) } hns_roce_unlock_cqs(send_cq, recv_cq); - if (!is_timeout) { - hns_roce_qp_remove(hr_dev, hr_qp); - hns_roce_qp_free(hr_dev, hr_qp); + hns_roce_qp_remove(hr_dev, hr_qp); + hns_roce_qp_free(hr_dev, hr_qp); - /* RC QP, release QPN */ - if (hr_qp->ibqp.qp_type == IB_QPT_RC) - hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); - } + /* RC QP, release QPN */ + if (hr_qp->ibqp.qp_type == IB_QPT_RC) + hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); - if (is_user) + if (udata) ib_umem_release(hr_qp->umem); else { kfree(hr_qp->sq.wrid); @@ -3972,29 +3643,10 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); } - if (!is_timeout) { - if (hr_qp->ibqp.qp_type == IB_QPT_RC) - kfree(hr_qp); - else - kfree(hr_to_hr_sqp(hr_qp)); - } else { - qp_work = kzalloc(sizeof(*qp_work), GFP_KERNEL); - if (!qp_work) - return -ENOMEM; - - INIT_WORK(&qp_work->work, hns_roce_v1_destroy_qp_work_fn); - qp_work->ib_dev = &hr_dev->ib_dev; - qp_work->qp = hr_qp; - qp_work->db_wait_stage = qp_work_entry.db_wait_stage; - qp_work->sdb_issue_ptr = qp_work_entry.sdb_issue_ptr; - qp_work->sdb_inv_cnt = qp_work_entry.sdb_inv_cnt; - qp_work->sche_cnt = qp_work_entry.sche_cnt; - - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - queue_work(priv->des_qp.qp_wq, &qp_work->work); - dev_dbg(dev, "Begin destroy QP(0x%lx) work.\n", hr_qp->qpn); - } - + if (hr_qp->ibqp.qp_type == IB_QPT_RC) + kfree(hr_qp); + else + kfree(hr_to_hr_sqp(hr_qp)); return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index 1a2c38785c7f..52307b2c7100 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -110,11 +110,6 @@ #define HNS_ROCE_V1_EXT_ODB_ALFUL \ (HNS_ROCE_V1_EXT_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD) -#define HNS_ROCE_V1_DB_WAIT_OK 0 -#define HNS_ROCE_V1_DB_STAGE1 1 -#define HNS_ROCE_V1_DB_STAGE2 2 -#define HNS_ROCE_V1_CHECK_DB_TIMEOUT_MSECS 10000 -#define HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS 20 #define HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS 50000 #define HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS 10000 #define HNS_ROCE_V1_FREE_MR_WAIT_VALUE 5 @@ -162,7 +157,6 @@ #define SQ_PSN_SHIFT 8 #define QKEY_VAL 0x80010000 #define SDB_INV_CNT_OFFSET 8 -#define SDB_ST_CMP_VAL 8 #define HNS_ROCE_CEQ_DEFAULT_INTERVAL 0x10 #define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x10 @@ -1068,11 +1062,6 @@ struct hns_roce_qp_work { u32 sche_cnt; }; -struct hns_roce_des_qp { - struct workqueue_struct *qp_wq; - int requeue_flag; -}; - struct hns_roce_mr_free_work { struct work_struct work; struct ib_device *ib_dev; @@ -1100,7 +1089,6 @@ struct hns_roce_v1_priv { struct hns_roce_raq_table raq_table; struct hns_roce_bt_table bt_table; struct hns_roce_tptr_table tptr_table; - struct hns_roce_des_qp des_qp; struct hns_roce_free_mr free_mr; }; -- cgit v1.2.3 From 07c5ba912401b2ae3f13e3ce214158aec723c3fd Mon Sep 17 00:00:00 2001 From: Josh Collier Date: Thu, 11 Apr 2019 07:07:42 -0700 Subject: IB/hfi1: Add debugfs to control expansion ROM write protect Some kernels now enable CONFIG_IO_STRICT_DEVMEM which prevents multiple handles to PCI resource0. In order to continue to support expansion ROM updates while the driver is loaded, the driver must now provide an interface to control the expansion ROM write protection. This patch adds an exprom_wp debugfs interface that allows the hfi1_eprom user tool to disable the expansion ROM write protection by opening the file and writing a '1'. The write protection is released when writing a '0' or automatically re-enabled when the file handle is closed. The current implementation will only allow one handle to be opened at a time across all hfi1 devices. Reviewed-by: Dennis Dalessandro Signed-off-by: Josh Collier Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/debugfs.c | 74 ++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 057bb82c664f..15efb4a380b2 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -1080,6 +1080,77 @@ static int qsfp2_debugfs_release(struct inode *in, struct file *fp) return __qsfp_debugfs_release(in, fp, 1); } +#define EXPROM_WRITE_ENABLE BIT_ULL(14) + +static bool exprom_wp_disabled; + +static int exprom_wp_set(struct hfi1_devdata *dd, bool disable) +{ + u64 gpio_val = 0; + + if (disable) { + gpio_val = EXPROM_WRITE_ENABLE; + exprom_wp_disabled = true; + dd_dev_info(dd, "Disable Expansion ROM Write Protection\n"); + } else { + exprom_wp_disabled = false; + dd_dev_info(dd, "Enable Expansion ROM Write Protection\n"); + } + + write_csr(dd, ASIC_GPIO_OUT, gpio_val); + write_csr(dd, ASIC_GPIO_OE, gpio_val); + + return 0; +} + +static ssize_t exprom_wp_debugfs_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return 0; +} + +static ssize_t exprom_wp_debugfs_write(struct file *file, + const char __user *buf, size_t count, + loff_t *ppos) +{ + struct hfi1_pportdata *ppd = private2ppd(file); + char cdata; + + if (count != 1) + return -EINVAL; + if (get_user(cdata, buf)) + return -EFAULT; + if (cdata == '0') + exprom_wp_set(ppd->dd, false); + else if (cdata == '1') + exprom_wp_set(ppd->dd, true); + else + return -EINVAL; + + return 1; +} + +static unsigned long exprom_in_use; + +static int exprom_wp_debugfs_open(struct inode *in, struct file *fp) +{ + if (test_and_set_bit(0, &exprom_in_use)) + return -EBUSY; + + return 0; +} + +static int exprom_wp_debugfs_release(struct inode *in, struct file *fp) +{ + struct hfi1_pportdata *ppd = private2ppd(fp); + + if (exprom_wp_disabled) + exprom_wp_set(ppd->dd, false); + clear_bit(0, &exprom_in_use); + + return 0; +} + #define DEBUGFS_OPS(nm, readroutine, writeroutine) \ { \ .name = nm, \ @@ -1119,6 +1190,9 @@ static const struct counter_info port_cntr_ops[] = { qsfp1_debugfs_open, qsfp1_debugfs_release), DEBUGFS_XOPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write, qsfp2_debugfs_open, qsfp2_debugfs_release), + DEBUGFS_XOPS("exprom_wp", exprom_wp_debugfs_read, + exprom_wp_debugfs_write, exprom_wp_debugfs_open, + exprom_wp_debugfs_release), DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write), DEBUGFS_OPS("dc8051_memory", dc8051_memory_read, NULL), DEBUGFS_OPS("lcb", debugfs_lcb_read, debugfs_lcb_write), -- cgit v1.2.3 From ea752bc5e50a03e337dfa5c8940d357c62300f8a Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Thu, 11 Apr 2019 07:15:49 -0700 Subject: IB/{rdmavt, hfi1): Miscellaneous comment fixes This patch fixes miscellaneous comment errors. Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/ruc.c | 2 +- include/rdma/rdmavt_qp.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 124a3ec1e15c..23ac6057b211 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -524,7 +524,7 @@ void _hfi1_do_send(struct work_struct *work) /** * hfi1_do_send - perform a send on a QP - * @work: contains a pointer to the QP + * @qp: a pointer to the QP * @in_thread: true if in a workqueue thread * * Process entries in the send work queue until credit or queue is diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index f0fbd4063fef..0ad89867b258 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -83,7 +83,6 @@ * RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating * next send completion entry not via send DMA * RVT_S_WAIT_PIO - waiting for a send buffer to be available - * RVT_S_WAIT_PIO_DRAIN - waiting for a qp to drain pio packets * RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available * RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available * RVT_S_WAIT_KMEM - waiting for kernel memory to be available -- cgit v1.2.3 From 62644c1d2bb7c8f7ec259e45a123dca6bbaa0f7b Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 11 Apr 2019 07:16:00 -0700 Subject: IB/hfi1: Make opfn.h self sufficient The opfn.h include file build-ablility depends on the including file having the correct includes. Fix by making opfn.h self sufficient. Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/opfn.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/opfn.h b/drivers/infiniband/hw/hfi1/opfn.h index 5f2011cabc25..62f93c1dc082 100644 --- a/drivers/infiniband/hw/hfi1/opfn.h +++ b/drivers/infiniband/hw/hfi1/opfn.h @@ -47,12 +47,14 @@ * for future transactions */ +#include +#include +#include + /* STL Verbs Extended */ #define IB_BTHE_E_SHIFT 24 #define HFI1_VERBS_E_ATOMIC_VADDR U64_MAX -struct ib_atomic_eth; - enum hfi1_opfn_codes { STL_VERBS_EXTD_NONE = 0, STL_VERBS_EXTD_TID_RDMA, -- cgit v1.2.3 From 715ab1a862c85b08a9881851c7b1fba84b0dc26b Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 11 Apr 2019 07:16:11 -0700 Subject: IB/rdmavt: Fix ab/ba include issues The currently include file ordering for rdmavt headers has an ab/ba include issue the precludes using inlines from rdma_vt.h in rdmavt_qp.h. At the heart of the issue is that rdma_vt.h includes rdmavt_qp.h. Fix the ordering issue by adjusting rdma_vt.h to not require rdmavt_qp.h and move qp related inlines to rdmavt_qp.h. Additionally, promote rvt_mmap_info to rdma_vt.h since it is shared by rdmavt_cq.h and rdmavt_qp.h. Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_verbs.h | 2 +- drivers/infiniband/sw/rdmavt/qp.h | 2 +- drivers/infiniband/sw/rdmavt/rc.c | 2 +- drivers/infiniband/sw/rdmavt/trace_qp.h | 2 +- drivers/infiniband/sw/rdmavt/trace_rc.h | 2 +- drivers/infiniband/sw/rdmavt/trace_tx.h | 2 +- include/rdma/rdma_vt.h | 78 +++++++-------------------------- include/rdma/rdmavt_qp.h | 74 +++++++++++++++++++++++++------ 8 files changed, 82 insertions(+), 82 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index a4426c24b0d1..17bdf8acee2f 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include struct qib_ctxtdata; diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h index 450b27ea1fa4..6db1619389b0 100644 --- a/drivers/infiniband/sw/rdmavt/qp.h +++ b/drivers/infiniband/sw/rdmavt/qp.h @@ -48,7 +48,7 @@ * */ -#include +#include int rvt_driver_qp_init(struct rvt_dev_info *rdi); void rvt_qp_exit(struct rvt_dev_info *rdi); diff --git a/drivers/infiniband/sw/rdmavt/rc.c b/drivers/infiniband/sw/rdmavt/rc.c index 8d71647820a8..09f0cf538be6 100644 --- a/drivers/infiniband/sw/rdmavt/rc.c +++ b/drivers/infiniband/sw/rdmavt/rc.c @@ -45,7 +45,7 @@ * */ -#include +#include #include /* diff --git a/drivers/infiniband/sw/rdmavt/trace_qp.h b/drivers/infiniband/sw/rdmavt/trace_qp.h index efc9d814b032..c32d21cc615e 100644 --- a/drivers/infiniband/sw/rdmavt/trace_qp.h +++ b/drivers/infiniband/sw/rdmavt/trace_qp.h @@ -51,7 +51,7 @@ #include #include -#include +#include #undef TRACE_SYSTEM #define TRACE_SYSTEM rvt_qp diff --git a/drivers/infiniband/sw/rdmavt/trace_rc.h b/drivers/infiniband/sw/rdmavt/trace_rc.h index 995276933a55..c47357af2099 100644 --- a/drivers/infiniband/sw/rdmavt/trace_rc.h +++ b/drivers/infiniband/sw/rdmavt/trace_rc.h @@ -51,7 +51,7 @@ #include #include -#include +#include #undef TRACE_SYSTEM #define TRACE_SYSTEM rvt_rc diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h index d5df352eadb1..d963ca755828 100644 --- a/drivers/infiniband/sw/rdmavt/trace_tx.h +++ b/drivers/infiniband/sw/rdmavt/trace_tx.h @@ -51,7 +51,7 @@ #include #include -#include +#include #undef TRACE_SYSTEM #define TRACE_SYSTEM rvt_tx diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4c257aff7d32..b9cd06db1a71 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -59,7 +59,6 @@ #include #include #include -#include #define RVT_MAX_PKEY_VALUES 16 @@ -72,6 +71,8 @@ struct trap_list { struct list_head list; }; +struct rvt_qp; +struct rvt_qpn_table; struct rvt_ibport { struct rvt_qp __rcu *qp[2]; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ @@ -206,6 +207,20 @@ struct rvt_ah { u8 log_pmtu; }; +/* + * This structure is used by rvt_mmap() to validate an offset + * when an mmap() request is made. The vm_area_struct then uses + * this as its vm_private_data. + */ +struct rvt_mmap_info { + struct list_head pending_mmaps; + struct ib_ucontext *context; + void *obj; + __u64 offset; + struct kref ref; + u32 size; +}; + /* memory working set size */ struct rvt_wss { unsigned long *entries; @@ -501,16 +516,6 @@ static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) return container_of(ibdev, struct rvt_dev_info, ibdev); } -static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) -{ - return container_of(ibsrq, struct rvt_srq, ibsrq); -} - -static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp) -{ - return container_of(ibqp, struct rvt_qp, ibqp); -} - static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) { /* @@ -548,57 +553,6 @@ static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi, return rdi->ports[port_index]->pkey_table[index]; } -/** - * rvt_lookup_qpn - return the QP with the given QPN - * @ibp: the ibport - * @qpn: the QP number to look up - * - * The caller must hold the rcu_read_lock(), and keep the lock until - * the returned qp is no longer in use. - */ -/* TODO: Remove this and put in rdmavt/qp.h when no longer needed by drivers */ -static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, - struct rvt_ibport *rvp, - u32 qpn) __must_hold(RCU) -{ - struct rvt_qp *qp = NULL; - - if (unlikely(qpn <= 1)) { - qp = rcu_dereference(rvp->qp[qpn]); - } else { - u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits); - - for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp; - qp = rcu_dereference(qp->next)) - if (qp->ibqp.qp_num == qpn) - break; - } - return qp; -} - -/** - * rvt_mod_retry_timer - mod a retry timer - * @qp - the QP - * @shift - timeout shift to wait for multiple packets - * Modify a potentially already running retry timer - */ -static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift) -{ - struct ib_qp *ibqp = &qp->ibqp; - struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); - - lockdep_assert_held(&qp->s_lock); - qp->s_flags |= RVT_S_TIMER; - /* 4.096 usec. * (1 << qp->timeout) */ - mod_timer(&qp->s_timer, jiffies + rdi->busy_jiffies + - (qp->timeout_jiffies << shift)); -} - -static inline void rvt_mod_retry_timer(struct rvt_qp *qp) -{ - return rvt_mod_retry_timer_ext(qp, 0); -} - struct rvt_dev_info *rvt_alloc_device(size_t size, int nports); void rvt_dealloc_device(struct rvt_dev_info *rdi); int rvt_register_device(struct rvt_dev_info *rvd, u32 driver_id); diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 0ad89867b258..a00c46a4e779 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -210,20 +210,6 @@ struct rvt_rq { spinlock_t lock ____cacheline_aligned_in_smp; }; -/* - * This structure is used by rvt_mmap() to validate an offset - * when an mmap() request is made. The vm_area_struct then uses - * this as its vm_private_data. - */ -struct rvt_mmap_info { - struct list_head pending_mmaps; - struct ib_ucontext *context; - void *obj; - __u64 offset; - struct kref ref; - unsigned size; -}; - /* * This structure holds the information that the send tasklet needs * to send a RDMA read response or atomic operation. @@ -398,6 +384,16 @@ struct rvt_srq { u32 limit; }; +static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct rvt_srq, ibsrq); +} + +static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct rvt_qp, ibqp); +} + #define RVT_QPN_MAX BIT(24) #define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) #define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) @@ -677,6 +673,56 @@ static inline unsigned long rvt_timeout_to_jiffies(u8 timeout) return usecs_to_jiffies(1U << timeout) * 4096UL / 1000UL; } +/** + * rvt_lookup_qpn - return the QP with the given QPN + * @ibp: the ibport + * @qpn: the QP number to look up + * + * The caller must hold the rcu_read_lock(), and keep the lock until + * the returned qp is no longer in use. + */ +static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, + struct rvt_ibport *rvp, + u32 qpn) __must_hold(RCU) +{ + struct rvt_qp *qp = NULL; + + if (unlikely(qpn <= 1)) { + qp = rcu_dereference(rvp->qp[qpn]); + } else { + u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits); + + for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp; + qp = rcu_dereference(qp->next)) + if (qp->ibqp.qp_num == qpn) + break; + } + return qp; +} + +/** + * rvt_mod_retry_timer - mod a retry timer + * @qp - the QP + * @shift - timeout shift to wait for multiple packets + * Modify a potentially already running retry timer + */ +static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift) +{ + struct ib_qp *ibqp = &qp->ibqp; + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + + lockdep_assert_held(&qp->s_lock); + qp->s_flags |= RVT_S_TIMER; + /* 4.096 usec. * (1 << qp->timeout) */ + mod_timer(&qp->s_timer, jiffies + rdi->busy_jiffies + + (qp->timeout_jiffies << shift)); +} + +static inline void rvt_mod_retry_timer(struct rvt_qp *qp) +{ + return rvt_mod_retry_timer_ext(qp, 0); +} + extern const int ib_rvt_state_ops[]; struct rvt_dev_info; -- cgit v1.2.3 From 52cdbcc2b1822974f547c7b892e8e1e8109d185e Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 11 Apr 2019 07:16:23 -0700 Subject: IB/rdmavt: Use more efficient allowed_ops QP creation already records the allowed_ops. Take advantage of that single field to replace multiple qp_type specific tests. Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/qp.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 90ed99f4b026..2460303053eb 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -624,10 +624,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last); rvt_put_swqe(wqe); - - if (qp->ibqp.qp_type == IB_QPT_UD || - qp->ibqp.qp_type == IB_QPT_SMI || - qp->ibqp.qp_type == IB_QPT_GSI) + if (qp->allowed_ops == IB_OPCODE_UD) atomic_dec(&ibah_to_rvtah( wqe->ud_wr.ah)->refcount); if (++qp->s_last >= qp->s_size) @@ -2015,8 +2012,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp, * opportunity to adjust PSN values based on internal checks. */ log_pmtu = qp->log_pmtu; - if (qp->ibqp.qp_type != IB_QPT_UC && - qp->ibqp.qp_type != IB_QPT_RC) { + if (qp->allowed_ops == IB_OPCODE_UD) { struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah); log_pmtu = ah->log_pmtu; @@ -2064,8 +2060,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp, return 0; bail_inval_free_ref: - if (qp->ibqp.qp_type != IB_QPT_UC && - qp->ibqp.qp_type != IB_QPT_RC) + if (qp->allowed_ops == IB_OPCODE_UD) atomic_dec(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); bail_inval_free: /* release mr holds */ @@ -2689,9 +2684,7 @@ void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, /* See post_send() */ barrier(); rvt_put_swqe(wqe); - if (qp->ibqp.qp_type == IB_QPT_UD || - qp->ibqp.qp_type == IB_QPT_SMI || - qp->ibqp.qp_type == IB_QPT_GSI) + if (qp->allowed_ops == IB_OPCODE_UD) atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); rvt_qp_swqe_complete(qp, -- cgit v1.2.3 From d40f69c9b9dff3e47d9647943db267b5792ae215 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 12 Apr 2019 06:41:42 -0700 Subject: IB/{rdmavt, qib, hfi1}: Use new routine to release reference counts The reference count adjustments on reference count completion are open coded throughout. Add a routine to do all reference count adjustments and use. Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/rc.c | 4 ++-- drivers/infiniband/hw/qib/qib_rc.c | 4 ++-- drivers/infiniband/sw/rdmavt/qp.c | 9 ++------- include/rdma/rdmavt_qp.h | 14 ++++++++++++++ 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 5ba39a9f65ad..a922edcf23d6 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1834,7 +1834,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) qp->s_last = s_last; /* see post_send() */ barrier(); - rvt_put_swqe(wqe); + rvt_put_qp_swqe(qp, wqe); rvt_qp_swqe_complete(qp, wqe, ib_hfi1_wc_opcode[wqe->wr.opcode], @@ -1882,7 +1882,7 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, u32 s_last; trdma_clean_swqe(qp, wqe); - rvt_put_swqe(wqe); + rvt_put_qp_swqe(qp, wqe); rvt_qp_wqe_unreserve(qp, wqe); s_last = qp->s_last; trace_hfi1_qp_send_completion(qp, wqe, s_last); diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 50dd9811b088..2ac4c67f5ba1 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -933,7 +933,7 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) qp->s_last = s_last; /* see post_send() */ barrier(); - rvt_put_swqe(wqe); + rvt_put_qp_swqe(qp, wqe); rvt_qp_swqe_complete(qp, wqe, ib_qib_wc_opcode[wqe->wr.opcode], @@ -975,7 +975,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { u32 s_last; - rvt_put_swqe(wqe); + rvt_put_qp_swqe(qp, wqe); s_last = qp->s_last; if (++s_last >= qp->s_size) s_last = 0; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 2460303053eb..31a2e65e4906 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -623,10 +623,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) while (qp->s_last != qp->s_head) { struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last); - rvt_put_swqe(wqe); - if (qp->allowed_ops == IB_OPCODE_UD) - atomic_dec(&ibah_to_rvtah( - wqe->ud_wr.ah)->refcount); + rvt_put_qp_swqe(qp, wqe); if (++qp->s_last >= qp->s_size) qp->s_last = 0; smp_wmb(); /* see qp_set_savail */ @@ -2683,9 +2680,7 @@ void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, qp->s_last = last; /* See post_send() */ barrier(); - rvt_put_swqe(wqe); - if (qp->allowed_ops == IB_OPCODE_UD) - atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); + rvt_put_qp_swqe(qp, wqe); rvt_qp_swqe_complete(qp, wqe, diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index a00c46a4e779..68e38c20afc0 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -723,6 +723,20 @@ static inline void rvt_mod_retry_timer(struct rvt_qp *qp) return rvt_mod_retry_timer_ext(qp, 0); } +/** + * rvt_put_qp_swqe - drop refs held by swqe + * @qp: the send qp + * @wqe: the send wqe + * + * This drops any references held by the swqe + */ +static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe) +{ + rvt_put_swqe(wqe); + if (qp->allowed_ops == IB_OPCODE_UD) + atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); +} + extern const int ib_rvt_state_ops[]; struct rvt_dev_info; -- cgit v1.2.3 From a9c62e007878ba88b703369c1cd9e26682453665 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 11 Apr 2019 07:17:10 -0700 Subject: IB/hfi1: Add selected Rcv counters These counters are required for error analysis and debug. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 3 +++ drivers/infiniband/hw/hfi1/chip.h | 3 +++ drivers/infiniband/hw/hfi1/chip_registers.h | 3 +++ 3 files changed, 9 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 229d5d4cafe8..ec2df39c2f60 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -4104,6 +4104,9 @@ def_access_ibp_counter(seq_naks); static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH), +[C_RX_LEN_ERR] = RXE32_DEV_CNTR_ELEM(RxLenErr, RCV_LENGTH_ERR_CNT, CNTR_SYNTH), +[C_RX_ICRC_ERR] = RXE32_DEV_CNTR_ELEM(RxICrcErr, RCV_ICRC_ERR_CNT, CNTR_SYNTH), +[C_RX_EBP] = RXE32_DEV_CNTR_ELEM(RxEbpCnt, RCV_EBP_CNT, CNTR_SYNTH), [C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT, CNTR_NORMAL), [C_RX_TID_INVALID] = RXE32_DEV_CNTR_ELEM(RxTIDInvalid, RCV_TID_VALID_ERR_CNT, diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 6c27c1c6a868..4e6c3556ec48 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -858,6 +858,9 @@ static inline int idx_from_vl(int vl) /* Per device counter indexes */ enum { C_RCV_OVF = 0, + C_RX_LEN_ERR, + C_RX_ICRC_ERR, + C_RX_EBP, C_RX_TID_FULL, C_RX_TID_INVALID, C_RX_TID_FLGMS, diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index c0800ea5a3f8..ab3589d17aee 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -380,6 +380,9 @@ #define DC_LCB_PRF_TX_FLIT_CNT (DC_LCB_CSRS + 0x000000000418) #define DC_LCB_STS_LINK_TRANSFER_ACTIVE (DC_LCB_CSRS + 0x000000000468) #define DC_LCB_STS_ROUND_TRIP_LTP_CNT (DC_LCB_CSRS + 0x0000000004B0) +#define RCV_LENGTH_ERR_CNT 0 +#define RCV_ICRC_ERR_CNT 6 +#define RCV_EBP_CNT 9 #define RCV_BUF_OVFL_CNT 10 #define RCV_CONTEXT_EGR_STALL 22 #define RCV_DATA_PKT_CNT 0 -- cgit v1.2.3 From 3c176c9d72446217f6451543452692141eb665dc Mon Sep 17 00:00:00 2001 From: John Fleck Date: Thu, 11 Apr 2019 07:17:21 -0700 Subject: IB/hfi1: Remove reference to RHF.VCRCErr The bit VCRCErr in the receive header flag is actually a reserved field. Remove bit operations on this field. Reviewed-by: Michael J. Ruhl Reviewed-by: Dennis Dalessandro Signed-off-by: John Fleck Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/common.h | 2 +- drivers/infiniband/hw/hfi1/driver.c | 5 ++--- drivers/infiniband/hw/hfi1/tid_rdma.c | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 7310a5dba420..d47da7b0438f 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -286,7 +286,7 @@ struct diag_pkt { #define RHF_TID_ERR (0x1ull << 59) #define RHF_LEN_ERR (0x1ull << 60) #define RHF_ECC_ERR (0x1ull << 61) -#define RHF_VCRC_ERR (0x1ull << 62) +#define RHF_RESERVED (0x1ull << 62) #define RHF_ICRC_ERR (0x1ull << 63) #define RHF_ERROR_SMASK 0xffe0000000000000ull /* bits 63:53 */ diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 129e48ec9ee0..01aa1f132f55 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -262,7 +262,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, hfi1_dbg_fault_suppress_err(verbs_dev)) return; - if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR)) + if (packet->rhf & RHF_ICRC_ERR) return; if (packet->etype == RHF_RCV_TYPE_BYPASS) { @@ -1581,7 +1581,7 @@ static void show_eflags_errs(struct hfi1_packet *packet) u32 rte = rhf_rcv_type_err(packet->rhf); dd_dev_err(rcd->dd, - "receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n", + "receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s] rte 0x%x\n", rcd->ctxt, packet->rhf, packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "", packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "", @@ -1589,7 +1589,6 @@ static void show_eflags_errs(struct hfi1_packet *packet) packet->rhf & RHF_TID_ERR ? "tid " : "", packet->rhf & RHF_LEN_ERR ? "len " : "", packet->rhf & RHF_ECC_ERR ? "ecc " : "", - packet->rhf & RHF_VCRC_ERR ? "vcrc " : "", packet->rhf & RHF_ICRC_ERR ? "icrc " : "", rte); } diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index eae6f05ca2fa..cf72bf39ff1e 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -2920,7 +2920,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, trace_hfi1_msg_handle_kdeth_eflags(NULL, "Kdeth error: rhf ", packet->rhf); - if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR)) + if (packet->rhf & RHF_ICRC_ERR) return ret; packet->ohdr = &hdr->u.oth; -- cgit v1.2.3 From ddcdc368b1033e19fd3a5f750752e10e28a87826 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 16 Apr 2019 14:07:29 +0300 Subject: RDMA/mlx5: Use get_zeroed_page() for clock_info get_zeroed_page() returns a virtual address for the page which is better than allocating a struct page and doing a permanent kmap on it. Cc: stable@vger.kernel.org Signed-off-by: Jason Gunthorpe Reviewed-by: Haggai Eran Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 5 ++-- .../net/ethernet/mellanox/mlx5/core/lib/clock.c | 30 ++++++++-------------- include/linux/mlx5/driver.h | 1 - 3 files changed, 14 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d3dd290ae1b1..da81402992bc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2070,11 +2070,12 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, return -EPERM; vma->vm_flags &= ~VM_MAYWRITE; - if (!dev->mdev->clock_info_page) + if (!dev->mdev->clock_info) return -EOPNOTSUPP; return rdma_user_mmap_page(&context->ibucontext, vma, - dev->mdev->clock_info_page, PAGE_SIZE); + virt_to_page(dev->mdev->clock_info), + PAGE_SIZE); } static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index ca0ee9916e9e..0059b290e095 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -535,23 +535,16 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) do_div(ns, NSEC_PER_SEC / HZ); clock->overflow_period = ns; - mdev->clock_info_page = alloc_page(GFP_KERNEL); - if (mdev->clock_info_page) { - mdev->clock_info = kmap(mdev->clock_info_page); - if (!mdev->clock_info) { - __free_page(mdev->clock_info_page); - mlx5_core_warn(mdev, "failed to map clock page\n"); - } else { - mdev->clock_info->sign = 0; - mdev->clock_info->nsec = clock->tc.nsec; - mdev->clock_info->cycles = clock->tc.cycle_last; - mdev->clock_info->mask = clock->cycles.mask; - mdev->clock_info->mult = clock->nominal_c_mult; - mdev->clock_info->shift = clock->cycles.shift; - mdev->clock_info->frac = clock->tc.frac; - mdev->clock_info->overflow_period = - clock->overflow_period; - } + mdev->clock_info = + (struct mlx5_ib_clock_info *)get_zeroed_page(GFP_KERNEL); + if (mdev->clock_info) { + mdev->clock_info->nsec = clock->tc.nsec; + mdev->clock_info->cycles = clock->tc.cycle_last; + mdev->clock_info->mask = clock->cycles.mask; + mdev->clock_info->mult = clock->nominal_c_mult; + mdev->clock_info->shift = clock->cycles.shift; + mdev->clock_info->frac = clock->tc.frac; + mdev->clock_info->overflow_period = clock->overflow_period; } INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out); @@ -599,8 +592,7 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) cancel_delayed_work_sync(&clock->overflow_work); if (mdev->clock_info) { - kunmap(mdev->clock_info_page); - __free_page(mdev->clock_info_page); + free_page((unsigned long)mdev->clock_info); mdev->clock_info = NULL; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0d0729648844..9ffc53acaec1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -681,7 +681,6 @@ struct mlx5_core_dev { #endif struct mlx5_clock clock; struct mlx5_ib_clock_info *clock_info; - struct page *clock_info_page; struct mlx5_fw_tracer *tracer; }; -- cgit v1.2.3 From 4eb6ab13b99148b5bf9bfdae7977fe139b4452f8 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 16 Apr 2019 14:07:30 +0300 Subject: RDMA: Remove rdma_user_mmap_page Upon further research drivers that want this should simply call the core function vm_insert_page(). The VMA holds a reference on the page and it will be automatically freed when the last reference drops. No need for disassociate to sequence the cleanup. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_main.c | 62 +++++++---------------------------- drivers/infiniband/hw/mlx5/main.c | 12 +++---- include/rdma/ib_verbs.h | 9 ----- 3 files changed, 17 insertions(+), 66 deletions(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index db20b6e0f253..7e767b94074a 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -926,43 +926,32 @@ static const struct vm_operations_struct rdma_umap_ops = { .fault = rdma_umap_fault, }; -static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, - struct vm_area_struct *vma, - unsigned long size) +/* + * Map IO memory into a process. This is to be called by drivers as part of + * their mmap() functions if they wish to send something like PCI-E BAR memory + * to userspace. + */ +int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, + unsigned long pfn, unsigned long size, pgprot_t prot) { struct ib_uverbs_file *ufile = ucontext->ufile; struct rdma_umap_priv *priv; if (!(vma->vm_flags & VM_SHARED)) - return ERR_PTR(-EINVAL); + return -EINVAL; if (vma->vm_end - vma->vm_start != size) - return ERR_PTR(-EINVAL); + return -EINVAL; /* Driver is using this wrong, must be called by ib_uverbs_mmap */ if (WARN_ON(!vma->vm_file || vma->vm_file->private_data != ufile)) - return ERR_PTR(-EINVAL); + return -EINVAL; lockdep_assert_held(&ufile->device->disassociate_srcu); priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) - return ERR_PTR(-ENOMEM); - return priv; -} - -/* - * Map IO memory into a process. This is to be called by drivers as part of - * their mmap() functions if they wish to send something like PCI-E BAR memory - * to userspace. - */ -int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, - unsigned long pfn, unsigned long size, pgprot_t prot) -{ - struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size); - - if (IS_ERR(priv)) - return PTR_ERR(priv); + return -ENOMEM; vma->vm_page_prot = prot; if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) { @@ -975,35 +964,6 @@ int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, } EXPORT_SYMBOL(rdma_user_mmap_io); -/* - * The page case is here for a slightly different reason, the driver expects - * to be able to free the page it is sharing to user space when it destroys - * its ucontext, which means we need to zap the user space references. - * - * We could handle this differently by providing an API to allocate a shared - * page and then only freeing the shared page when the last ufile is - * destroyed. - */ -int rdma_user_mmap_page(struct ib_ucontext *ucontext, - struct vm_area_struct *vma, struct page *page, - unsigned long size) -{ - struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size); - - if (IS_ERR(priv)) - return PTR_ERR(priv); - - if (remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), size, - vma->vm_page_prot)) { - kfree(priv); - return -EAGAIN; - } - - rdma_umap_priv_init(priv, vma); - return 0; -} -EXPORT_SYMBOL(rdma_user_mmap_page); - void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile) { struct rdma_umap_priv *priv, *next_priv; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index da81402992bc..239d70833afc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2060,22 +2060,22 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, struct vm_area_struct *vma, struct mlx5_ib_ucontext *context) { - if (vma->vm_end - vma->vm_start != PAGE_SIZE) + if ((vma->vm_end - vma->vm_start != PAGE_SIZE) || + !(vma->vm_flags & VM_SHARED)) return -EINVAL; if (get_index(vma->vm_pgoff) != MLX5_IB_CLOCK_INFO_V1) return -EOPNOTSUPP; - if (vma->vm_flags & VM_WRITE) + if (vma->vm_flags & (VM_WRITE | VM_EXEC)) return -EPERM; - vma->vm_flags &= ~VM_MAYWRITE; + vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC); if (!dev->mdev->clock_info) return -EOPNOTSUPP; - return rdma_user_mmap_page(&context->ibucontext, vma, - virt_to_page(dev->mdev->clock_info), - PAGE_SIZE); + return vm_insert_page(vma, vma->vm_start, + virt_to_page(dev->mdev->clock_info)); } static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9b9e17bcc201..7ca908d5c0c3 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2705,9 +2705,6 @@ void ib_set_device_ops(struct ib_device *device, #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, unsigned long pfn, unsigned long size, pgprot_t prot); -int rdma_user_mmap_page(struct ib_ucontext *ucontext, - struct vm_area_struct *vma, struct page *page, - unsigned long size); #else static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, @@ -2716,12 +2713,6 @@ static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext, { return -EINVAL; } -static inline int rdma_user_mmap_page(struct ib_ucontext *ucontext, - struct vm_area_struct *vma, struct page *page, - unsigned long size) -{ - return -EINVAL; -} #endif static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) -- cgit v1.2.3 From 1f1d6abbf0bebe4b1fc33b80730a1b4bfbc8f7bb Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Sun, 31 Mar 2019 19:44:50 +0300 Subject: IB/mlx5: Expose TIR ICM address to user space This patch exposes the TIR ICM address of raw packet and RSS QPs to user space. In order to pass the new field, the patch extends the mlx5 specific QP creation response structure and fills it with the icm address returned by the FW command, if available. Signed-off-by: Ariel Levkovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 46 ++++++++++++++++++++++++++++++++++++----- include/uapi/rdma/mlx5-abi.h | 2 ++ 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 19ae2b018f01..1af28bca9acd 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1402,7 +1402,8 @@ static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev, static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, u32 tdn, u32 *qp_flags_en, - struct ib_pd *pd) + struct ib_pd *pd, + u32 *out, int outlen) { u8 lb_flag = 0; u32 *in; @@ -1436,8 +1437,9 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, MLX5_SET(tirc, tirc, self_lb_block, lb_flag); - err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn); + err = mlx5_core_create_tir_out(dev->mdev, in, inlen, out, outlen); + rq->tirn = MLX5_GET(create_tir_out, out, tirn); if (!err && MLX5_GET(tirc, tirc, self_lb_block)) { err = mlx5_ib_enable_lb(dev, false, true); @@ -1463,6 +1465,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, int err; u32 tdn = mucontext->tdn; u16 uid = to_mpd(pd)->uid; + u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {}; if (qp->sq.wqe_cnt) { err = create_raw_packet_qp_tis(dev, qp, sq, tdn, pd); @@ -1495,7 +1498,9 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (err) goto err_destroy_sq; - err = create_raw_packet_qp_tir(dev, rq, tdn, &qp->flags_en, pd); + err = create_raw_packet_qp_tir( + dev, rq, tdn, &qp->flags_en, pd, out, + MLX5_ST_SZ_BYTES(create_tir_out)); if (err) goto err_destroy_rq; @@ -1504,6 +1509,20 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_RQN; resp->tirn = rq->tirn; resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN; + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) { + resp->tir_icm_addr = MLX5_GET( + create_tir_out, out, icm_address_31_0); + resp->tir_icm_addr |= + (u64)MLX5_GET(create_tir_out, out, + icm_address_39_32) + << 32; + resp->tir_icm_addr |= + (u64)MLX5_GET(create_tir_out, out, + icm_address_63_40) + << 40; + resp->comp_mask |= + MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR; + } } } @@ -1577,8 +1596,10 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, udata, struct mlx5_ib_ucontext, ibucontext); struct mlx5_ib_create_qp_resp resp = {}; int inlen; + int outlen; int err; u32 *in; + u32 *out; void *tirc; void *hfso; u32 selected_fields = 0; @@ -1658,10 +1679,12 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, } inlen = MLX5_ST_SZ_BYTES(create_tir_in); - in = kvzalloc(inlen, GFP_KERNEL); + outlen = MLX5_ST_SZ_BYTES(create_tir_out); + in = kvzalloc(inlen + outlen, GFP_KERNEL); if (!in) return -ENOMEM; + out = in + MLX5_ST_SZ_DW(create_tir_in); MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid); tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); MLX5_SET(tirc, tirc, disp_type, @@ -1773,8 +1796,9 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields); create_tir: - err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn); + err = mlx5_core_create_tir_out(dev->mdev, in, inlen, out, outlen); + qp->rss_qp.tirn = MLX5_GET(create_tir_out, out, tirn); if (!err && MLX5_GET(tirc, tirc, self_lb_block)) { err = mlx5_ib_enable_lb(dev, false, true); @@ -1789,6 +1813,18 @@ create_tir: if (mucontext->devx_uid) { resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN; resp.tirn = qp->rss_qp.tirn; + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) { + resp.tir_icm_addr = + MLX5_GET(create_tir_out, out, icm_address_31_0); + resp.tir_icm_addr |= (u64)MLX5_GET(create_tir_out, out, + icm_address_39_32) + << 32; + resp.tir_icm_addr |= (u64)MLX5_GET(create_tir_out, out, + icm_address_63_40) + << 40; + resp.comp_mask |= + MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR; + } } err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 87b3198f4b5d..0ef404f75f56 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -359,6 +359,7 @@ enum mlx5_ib_create_qp_resp_mask { MLX5_IB_CREATE_QP_RESP_MASK_TISN = 1UL << 1, MLX5_IB_CREATE_QP_RESP_MASK_RQN = 1UL << 2, MLX5_IB_CREATE_QP_RESP_MASK_SQN = 1UL << 3, + MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR = 1UL << 4, }; struct mlx5_ib_create_qp_resp { @@ -370,6 +371,7 @@ struct mlx5_ib_create_qp_resp { __u32 rqn; __u32 sqn; __u32 reserved1; + __u64 tir_icm_addr; }; struct mlx5_ib_alloc_mw { -- cgit v1.2.3 From a7b36d5fa86c70fbde311659fb2a7b8a52d965a1 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:21:07 -0800 Subject: ib/bnxt: Remove mention of idr_alloc from comment Signed-off-by: Matthew Wilcox Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 8cf255054fda..3fcc77c03903 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -3658,7 +3658,7 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) resp.chip_id0 = chip_met_rev_num; /* Future extension of chip info */ resp.chip_id1 = 0; - /*Temp, Use idr_alloc instead */ + /*Temp, Use xa_alloc instead */ resp.dev_id = rdev->en_dev->pdev->devfn; resp.max_qp = rdev->qplib_ctx.qpc_count; resp.pg_size = PAGE_SIZE; -- cgit v1.2.3 From b9b0f34531e0f8ff7fd0b78adfbc0e8209900f83 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:20:45 -0800 Subject: uverbs: Convert idr to XArray The word 'idr' is scattered throughout the API, so I haven't changed it, but the 'idr' variable is now an XArray. Signed-off-by: Matthew Wilcox Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 76 ++++++++++++------------------------- drivers/infiniband/core/uverbs.h | 4 +- 2 files changed, 25 insertions(+), 55 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index e9c905220abd..ccf4d069c25c 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -297,25 +297,13 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, static int idr_add_uobj(struct ib_uobject *uobj) { - int ret; - - idr_preload(GFP_KERNEL); - spin_lock(&uobj->ufile->idr_lock); - - /* - * We start with allocating an idr pointing to NULL. This represents an - * object which isn't initialized yet. We'll replace it later on with - * the real object once we commit. - */ - ret = idr_alloc(&uobj->ufile->idr, NULL, 0, - min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT); - if (ret >= 0) - uobj->id = ret; - - spin_unlock(&uobj->ufile->idr_lock); - idr_preload_end(); - - return ret < 0 ? ret : 0; + /* + * We start with allocating an idr pointing to NULL. This represents an + * object which isn't initialized yet. We'll replace it later on with + * the real object once we commit. + */ + return xa_alloc(&uobj->ufile->idr, &uobj->id, NULL, xa_limit_32b, + GFP_KERNEL); } /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */ @@ -325,29 +313,20 @@ lookup_get_idr_uobject(const struct uverbs_api_object *obj, enum rdma_lookup_mode mode) { struct ib_uobject *uobj; - unsigned long idrno = id; if (id < 0 || id > ULONG_MAX) return ERR_PTR(-EINVAL); rcu_read_lock(); - /* object won't be released as we're protected in rcu */ - uobj = idr_find(&ufile->idr, idrno); - if (!uobj) { - uobj = ERR_PTR(-ENOENT); - goto free; - } - /* * The idr_find is guaranteed to return a pointer to something that * isn't freed yet, or NULL, as the free after idr_remove goes through * kfree_rcu(). However the object may still have been released and * kfree() could be called at any time. */ - if (!kref_get_unless_zero(&uobj->ref)) + uobj = xa_load(&ufile->idr, id); + if (!uobj || !kref_get_unless_zero(&uobj->ref)) uobj = ERR_PTR(-ENOENT); - -free: rcu_read_unlock(); return uobj; } @@ -400,7 +379,7 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, struct ib_uobject *uobj; int ret; - if (IS_ERR(obj) && PTR_ERR(obj) == -ENOMSG) { + if (obj == ERR_PTR(-ENOMSG)) { /* must be UVERBS_IDR_ANY_OBJECT, see uapi_get_object() */ uobj = lookup_get_idr_uobject(NULL, ufile, id, mode); if (IS_ERR(uobj)) @@ -461,14 +440,12 @@ alloc_begin_idr_uobject(const struct uverbs_api_object *obj, ret = ib_rdmacg_try_charge(&uobj->cg_obj, uobj->context->device, RDMACG_RESOURCE_HCA_OBJECT); if (ret) - goto idr_remove; + goto remove; return uobj; -idr_remove: - spin_lock(&ufile->idr_lock); - idr_remove(&ufile->idr, uobj->id); - spin_unlock(&ufile->idr_lock); +remove: + xa_erase(&ufile->idr, uobj->id); uobj_put: uverbs_uobject_put(uobj); return ERR_PTR(ret); @@ -529,9 +506,7 @@ static void alloc_abort_idr_uobject(struct ib_uobject *uobj) ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, RDMACG_RESOURCE_HCA_OBJECT); - spin_lock(&uobj->ufile->idr_lock); - idr_remove(&uobj->ufile->idr, uobj->id); - spin_unlock(&uobj->ufile->idr_lock); + xa_erase(&uobj->ufile->idr, uobj->id); } static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj, @@ -562,9 +537,7 @@ static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj, static void remove_handle_idr_uobject(struct ib_uobject *uobj) { - spin_lock(&uobj->ufile->idr_lock); - idr_remove(&uobj->ufile->idr, uobj->id); - spin_unlock(&uobj->ufile->idr_lock); + xa_erase(&uobj->ufile->idr, uobj->id); /* Matches the kref in alloc_commit_idr_uobject */ uverbs_uobject_put(uobj); } @@ -595,17 +568,17 @@ static void remove_handle_fd_uobject(struct ib_uobject *uobj) static int alloc_commit_idr_uobject(struct ib_uobject *uobj) { struct ib_uverbs_file *ufile = uobj->ufile; + void *old; - spin_lock(&ufile->idr_lock); /* * We already allocated this IDR with a NULL object, so * this shouldn't fail. * - * NOTE: Once we set the IDR we loose ownership of our kref on uobj. + * NOTE: Storing the uobj transfers our kref on uobj to the XArray. * It will be put by remove_commit_idr_uobject() */ - WARN_ON(idr_replace(&ufile->idr, uobj, uobj->id)); - spin_unlock(&ufile->idr_lock); + old = xa_store(&ufile->idr, uobj->id, uobj, GFP_KERNEL); + WARN_ON(old != NULL); return 0; } @@ -739,29 +712,28 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj, void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile) { - spin_lock_init(&ufile->idr_lock); - idr_init(&ufile->idr); + xa_init_flags(&ufile->idr, XA_FLAGS_ALLOC); } void release_ufile_idr_uobject(struct ib_uverbs_file *ufile) { struct ib_uobject *entry; - int id; + unsigned long id; /* * At this point uverbs_cleanup_ufile() is guaranteed to have run, and - * there are no HW objects left, however the IDR is still populated + * there are no HW objects left, however the xarray is still populated * with anything that has not been cleaned up by userspace. Since the * kref on ufile is 0, nothing is allowed to call lookup_get. * * This is an optimized equivalent to remove_handle_idr_uobject */ - idr_for_each_entry(&ufile->idr, entry, id) { + xa_for_each(&ufile->idr, id, entry) { WARN_ON(entry->object); uverbs_uobject_put(entry); } - idr_destroy(&ufile->idr); + xa_destroy(&ufile->idr); } const struct uverbs_obj_type_class uverbs_idr_class = { diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index d2c29868172c..1e5aeb39f774 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -162,9 +162,7 @@ struct ib_uverbs_file { struct list_head umaps; struct page *disassociate_page; - struct idr idr; - /* spinlock protects write access to idr */ - spinlock_t idr_lock; + struct xarray idr; }; struct ib_uverbs_event { -- cgit v1.2.3 From 923abb9d797ba078f4e9eb3734dd71be5f567a2a Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 1 May 2019 13:48:13 +0300 Subject: RDMA/core: Introduce RDMA subsystem ibdev_* print functions Similarly to dev/netdev/etc printk helpers, add standard printk helpers for the RDMA subsystem. Example output: efa 0000:00:06.0 efa_0: Hello World! efa_0: Hello World! (no parent device set) (NULL ib_device): Hello World! (ibdev is NULL) Cc: Jason Baron Suggested-by: Jason Gunthorpe Suggested-by: Leon Romanovsky Signed-off-by: Gal Pressman Reviewed-by: Leon Romanovsky Reviewed-by: Shiraz Saleem Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 60 ++++++++++++++++++++++++++++++++++++++++ include/linux/dynamic_debug.h | 11 ++++++++ include/rdma/ib_verbs.h | 30 ++++++++++++++++++++ lib/dynamic_debug.c | 37 +++++++++++++++++++++++++ 4 files changed, 138 insertions(+) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index fcbf2d4c865d..76088655f06e 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -197,6 +197,66 @@ static int ib_security_change(struct notifier_block *nb, unsigned long event, static void ib_policy_change_task(struct work_struct *work); static DECLARE_WORK(ib_policy_change_work, ib_policy_change_task); +static void __ibdev_printk(const char *level, const struct ib_device *ibdev, + struct va_format *vaf) +{ + if (ibdev && ibdev->dev.parent) + dev_printk_emit(level[1] - '0', + ibdev->dev.parent, + "%s %s %s: %pV", + dev_driver_string(ibdev->dev.parent), + dev_name(ibdev->dev.parent), + dev_name(&ibdev->dev), + vaf); + else if (ibdev) + printk("%s%s: %pV", + level, dev_name(&ibdev->dev), vaf); + else + printk("%s(NULL ib_device): %pV", level, vaf); +} + +void ibdev_printk(const char *level, const struct ib_device *ibdev, + const char *format, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, format); + + vaf.fmt = format; + vaf.va = &args; + + __ibdev_printk(level, ibdev, &vaf); + + va_end(args); +} +EXPORT_SYMBOL(ibdev_printk); + +#define define_ibdev_printk_level(func, level) \ +void func(const struct ib_device *ibdev, const char *fmt, ...) \ +{ \ + struct va_format vaf; \ + va_list args; \ + \ + va_start(args, fmt); \ + \ + vaf.fmt = fmt; \ + vaf.va = &args; \ + \ + __ibdev_printk(level, ibdev, &vaf); \ + \ + va_end(args); \ +} \ +EXPORT_SYMBOL(func); + +define_ibdev_printk_level(ibdev_emerg, KERN_EMERG); +define_ibdev_printk_level(ibdev_alert, KERN_ALERT); +define_ibdev_printk_level(ibdev_crit, KERN_CRIT); +define_ibdev_printk_level(ibdev_err, KERN_ERR); +define_ibdev_printk_level(ibdev_warn, KERN_WARNING); +define_ibdev_printk_level(ibdev_notice, KERN_NOTICE); +define_ibdev_printk_level(ibdev_info, KERN_INFO); + static struct notifier_block ibdev_lsm_nb = { .notifier_call = ib_security_change, }; diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index c2be029b9b53..6c809440f319 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -71,6 +71,13 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor, const struct net_device *dev, const char *fmt, ...); +struct ib_device; + +extern __printf(3, 4) +void __dynamic_ibdev_dbg(struct _ddebug *descriptor, + const struct ib_device *ibdev, + const char *fmt, ...); + #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \ static struct _ddebug __aligned(8) \ __attribute__((section("__verbose"))) name = { \ @@ -154,6 +161,10 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor, _dynamic_func_call(fmt, __dynamic_netdev_dbg, \ dev, fmt, ##__VA_ARGS__) +#define dynamic_ibdev_dbg(dev, fmt, ...) \ + _dynamic_func_call(fmt, __dynamic_ibdev_dbg, \ + dev, fmt, ##__VA_ARGS__) + #define dynamic_hex_dump(prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) \ _dynamic_func_call_no_desc(__builtin_constant_p(prefix_str) ? prefix_str : "hexdump", \ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 737ef5ed3930..de8724e5a727 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -74,6 +74,36 @@ extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; extern struct workqueue_struct *ib_comp_unbound_wq; +__printf(3, 4) __cold +void ibdev_printk(const char *level, const struct ib_device *ibdev, + const char *format, ...); +__printf(2, 3) __cold +void ibdev_emerg(const struct ib_device *ibdev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_alert(const struct ib_device *ibdev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_crit(const struct ib_device *ibdev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_err(const struct ib_device *ibdev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_warn(const struct ib_device *ibdev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_notice(const struct ib_device *ibdev, const char *format, ...); +__printf(2, 3) __cold +void ibdev_info(const struct ib_device *ibdev, const char *format, ...); + +#if defined(CONFIG_DYNAMIC_DEBUG) +#define ibdev_dbg(__dev, format, args...) \ + dynamic_ibdev_dbg(__dev, format, ##args) +#elif defined(DEBUG) +#define ibdev_dbg(__dev, format, args...) \ + ibdev_printk(KERN_DEBUG, __dev, format, ##args) +#else +__printf(2, 3) __cold +static inline +void ibdev_dbg(const struct ib_device *ibdev, const char *format, ...) {} +#endif + union ib_gid { u8 raw[16]; struct { diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 7bdf98c37e91..8a16c2d498e9 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -37,6 +37,8 @@ #include #include +#include + extern struct _ddebug __start___verbose[]; extern struct _ddebug __stop___verbose[]; @@ -636,6 +638,41 @@ EXPORT_SYMBOL(__dynamic_netdev_dbg); #endif +#if IS_ENABLED(CONFIG_INFINIBAND) + +void __dynamic_ibdev_dbg(struct _ddebug *descriptor, + const struct ib_device *ibdev, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + if (ibdev && ibdev->dev.parent) { + char buf[PREFIX_SIZE]; + + dev_printk_emit(LOGLEVEL_DEBUG, ibdev->dev.parent, + "%s%s %s %s: %pV", + dynamic_emit_prefix(descriptor, buf), + dev_driver_string(ibdev->dev.parent), + dev_name(ibdev->dev.parent), + dev_name(&ibdev->dev), + &vaf); + } else if (ibdev) { + printk(KERN_DEBUG "%s: %pV", dev_name(&ibdev->dev), &vaf); + } else { + printk(KERN_DEBUG "(NULL ib_device): %pV", &vaf); + } + + va_end(args); +} +EXPORT_SYMBOL(__dynamic_ibdev_dbg); + +#endif + #define DDEBUG_STRING_SIZE 1024 static __initdata char ddebug_setup_string[DDEBUG_STRING_SIZE]; -- cgit v1.2.3 From 7872168a839144dbbfb33125262dab0673f9ddf5 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 29 Apr 2019 16:32:04 -0500 Subject: RDMA/umem: Handle page combining avoidance correctly in ib_umem_add_sg_table() The flag update_cur_sg tracks whether contiguous pages from a new set of page_list pages can be merged into the SGE passed into ib_umem_add_sg_table(). If this flag is true, but the total segment length exceeds the max_seg_size supported by HW, we avoid combining to this SGE and move to a new SGE (x) and merge 'len' pages to it. However, if i < npages, the next iteration can incorrectly merge 'len' contiguous pages into x instead of into a new SGE since update_cur_sg is still true. Reset update_cur_sg to false always after the check to merge pages into the first SGE passed in to ib_umem_add_sg_table(). Also, prevent a new SGE's segment length from ever exceeding HW max_seg_sz. There is a crash on hfi1 as result of this where-in max_seg_sz is defaulting to 64K. Due to above bug, unfolding SGE's in __ib_umem_release points to a bad page ptr. TEST comp-wfr.perfnative.STL-22166-WDT _ perftest native 2-Write_4097QP_4MB STARTING at 1555387093 BUG: Bad page state in process ib_write_bw pfn:7ebca0 page:ffffcd675faf2800 count:0 mapcount:1 mapping:0000000000000000 index:0x1 flags: 0x17ffffc0000000() raw: 0017ffffc0000000 dead000000000100 dead000000000200 0000000000000000 raw: 0000000000000001 0000000000000000 0000000000000000 0000000000000000 page dumped because: nonzero mapcount CPU: 18 PID: 15853 Comm: ib_write_bw Tainted: G B 5.1.0-rc4 #1 Hardware name: Intel Corporation S2600CWR/S2600CW, BIOS SE5C610.86B.01.01.0014.121820151719 12/18/2015 Call Trace: dump_stack+0x5a/0x73 bad_page+0xf5/0x10f free_pcppages_bulk+0x62c/0x680 free_unref_page+0x54/0x70 __ib_umem_release+0x148/0x1a0 [ib_uverbs] ib_umem_release+0x22/0x80 [ib_uverbs] rvt_dereg_mr+0x67/0xb0 [rdmavt] ib_dereg_mr_user+0x37/0x60 [ib_core] destroy_hw_idr_uobject+0x1c/0x50 [ib_uverbs] uverbs_destroy_uobject+0x2e/0x180 [ib_uverbs] uobj_destroy+0x4d/0x60 [ib_uverbs] __uobj_get_destroy+0x33/0x50 [ib_uverbs] __uobj_perform_destroy+0xa/0x30 [ib_uverbs] ib_uverbs_dereg_mr+0x66/0x90 [ib_uverbs] ib_uverbs_write+0x3e1/0x500 [ib_uverbs] vfs_write+0xad/0x1b0 ksys_write+0x5a/0xd0 do_syscall_64+0x5b/0x180 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: d10bcf947a3e ("RDMA/umem: Combine contiguous PAGE_SIZE regions in SGEs") Tested-by: Mike Marciniszyn Reviewed-by: Michael J. Ruhl Signed-off-by: Shiraz Saleem Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 7e912a91ec8e..23f7512cc7a8 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -101,17 +101,21 @@ static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg, * at i */ for (len = 0; i != npages && - first_pfn + len == page_to_pfn(page_list[i]); + first_pfn + len == page_to_pfn(page_list[i]) && + len < (max_seg_sz >> PAGE_SHIFT); len++) i++; /* Squash N contiguous pages from page_list into current sge */ - if (update_cur_sg && - ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT))) { - sg_set_page(sg, sg_page(sg), - sg->length + (len << PAGE_SHIFT), 0); + if (update_cur_sg) { + if ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT)) { + sg_set_page(sg, sg_page(sg), + sg->length + (len << PAGE_SHIFT), + 0); + update_cur_sg = false; + continue; + } update_cur_sg = false; - continue; } /* Squash N contiguous pages into next sge or first sge */ -- cgit v1.2.3 From f89adedaf3feb2e1a896b2f2387cdcb4e2b9c48b Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 30 Apr 2019 11:46:39 +0300 Subject: RDMA/uverbs: Initialize udata struct on destroy flows Cited commit introduced the udata parameter to different destroy flows but the uapi method definition does not have udata (i.e has_udata flag is not set). As a result, an uninitialized udata struct is being passed down to the driver callbacks. Fix that by clearing the driver udata even in cases where has_udata flag is not set. Fixes: c4367a26357b ("IB: Pass uverbs_attr_bundle down ib_x destroy path") Cc: Shamir Rabinovitch Co-developed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index cfbef25b3a73..829b0c6944d8 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -453,6 +453,8 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, uverbs_fill_udata(&pbundle->bundle, &pbundle->bundle.driver_udata, UVERBS_ATTR_UHW_IN, UVERBS_ATTR_UHW_OUT); + else + pbundle->bundle.driver_udata = (struct ib_udata){}; if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) { struct uverbs_obj_attr *destroy_attr = -- cgit v1.2.3 From 4f33dd41b24c1e3494a6ea6c7839ea068343f809 Mon Sep 17 00:00:00 2001 From: Shamir Rabinovitch Date: Tue, 30 Apr 2019 17:23:21 +0300 Subject: RDMA/uverbs: Initialize uverbs_attr_bundle ucontext in ib_uverbs_get_context ib_uverbs_get_context does not have a uobject so it does not call the rdma_lookup_get_uobject which is used to set up the uverbs_attr_bundle ucontext. For ib_uverbs_get_context we need to set up this manually before we send the uverbs_attr_bundle down to the driver layer. This completes the change that was done in commit 70f06b26f07e ("IB: ucontext should be set properly for all cmd & ioctl paths") Signed-off-by: Shamir Rabinovitch Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 04d08135b374..76ac113d1da5 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -230,6 +230,8 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) goto err_alloc; } + attrs->context = ucontext; + ucontext->res.type = RDMA_RESTRACK_CTX; ucontext->device = ib_dev; ucontext->cg_obj = cg_obj; -- cgit v1.2.3 From 8f4426aa19fcdb9326ac44154a117b1a3a5ae126 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 1 May 2019 08:38:30 +0300 Subject: IB/mlx5: Add missing XRC options to QP optional params mask The QP transition optional parameters for the various transition for XRC QPs are identical to those for RC QPs. Many of the XRC QP transition optional parameter bits are missing from the QP optional mask table. These omissions caused failures when doing XRC QP state transitions. For example, when trying to change the response timer of an XRC receive QP via the RTS2RTS transition, the new timer value was ignored because MLX5_QP_OPTPAR_RNR_TIMEOUT bit was missing from the optional params mask for XRC qps for the RTS2RTS transition. Fix this by adding the missing XRC optional parameters for all QP transitions to the opt_mask table. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Fixes: a4774e9095de ("IB/mlx5: Fix opt param mask according to firmware spec") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index efe1f6f0c351..dac58c652876 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3002,6 +3002,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX | MLX5_QP_OPTPAR_Q_KEY | MLX5_QP_OPTPAR_PRI_PORT, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_PRI_PORT, }, [MLX5_QP_STATE_RTR] = { [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | @@ -3035,6 +3040,12 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PM_STATE, [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | + MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PM_STATE | + MLX5_QP_OPTPAR_RNR_TIMEOUT, }, }, [MLX5_QP_STATE_RTS] = { @@ -3051,6 +3062,12 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY | MLX5_QP_OPTPAR_SRQN | MLX5_QP_OPTPAR_CQN_RCV, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_RNR_TIMEOUT | + MLX5_QP_OPTPAR_PM_STATE | + MLX5_QP_OPTPAR_ALT_ADDR_PATH, }, }, [MLX5_QP_STATE_SQER] = { @@ -3062,6 +3079,10 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RRE, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RNR_TIMEOUT | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RRE, }, }, }; -- cgit v1.2.3 From 1a418f7764a00bc6ad8fd1b765b941c3a8389467 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Wed, 1 May 2019 08:39:48 +0300 Subject: IB/core: Set qp->real_qp before it may be accessed real_qp should be initialized before ib_destroy_qp() is called. ib_destroy_qp() may be called in the error flow if ib_create_qp_security() failed. Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 1 + drivers/infiniband/core/uverbs_cmd.c | 1 - drivers/infiniband/core/verbs.c | 1 - 3 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index d4dd360769cb..2764647056d8 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -304,6 +304,7 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, qp->device = dev; qp->pd = pd; qp->uobject = uobj; + qp->real_qp = qp; /* * We don't track XRC QPs for now, because they don't have PD * and more importantly they are created internaly by driver, diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 76ac113d1da5..5a3a1780ceea 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1418,7 +1418,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (ret) goto err_cb; - qp->real_qp = qp; qp->pd = pd; qp->send_cq = attr.send_cq; qp->recv_cq = attr.recv_cq; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 7313edc9f091..d607c319ad50 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1172,7 +1172,6 @@ struct ib_qp *ib_create_qp_user(struct ib_pd *pd, if (ret) goto err; - qp->real_qp = qp; qp->qp_type = qp_init_attr->qp_type; qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl; -- cgit v1.2.3 From eb15c78b05bd9fbac45ee5b56aaf29b2570b5238 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 1 May 2019 08:46:55 +0300 Subject: RDMA/core: Do not invoke init_port on compat devices The driver interface cannot manipulate the sysfs of the compat device, only of the full device so we must avoid calling the driver sysfs APIs on compat devices. This prevents an oops: Call Trace: dump_stack+0x5a/0x73 kobject_init+0x74/0x80 kobject_init_and_add+0x35/0xb0 hfi1_create_port_files+0x6e/0x3c0 [hfi1] ib_setup_port_attrs+0x43b/0x560 [ib_core] add_one_compat_dev+0x16a/0x230 [ib_core] rdma_dev_init_net+0x110/0x160 [ib_core] ops_init+0x38/0xf0 setup_net+0xcf/0x1e0 copy_net_ns+0xb7/0x130 create_new_namespaces+0x11a/0x1b0 unshare_nsproxy_namespaces+0x55/0xa0 ksys_unshare+0x1a7/0x340 __x64_sys_unshare+0xe/0x20 do_syscall_64+0x5b/0x180 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 5417783eabb2 ("RDMA/core: Support core port attributes in non init_net") Reported-by: Mike Marciniszyn Tested-by: Mike Marciniszyn Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 3 +-- drivers/infiniband/core/device.c | 2 +- drivers/infiniband/core/sysfs.c | 16 ++++++++-------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 2764647056d8..ff40a450b5d2 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -341,8 +341,7 @@ int roce_resolve_route_from_path(struct sa_path_rec *rec, struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr); void ib_free_port_attrs(struct ib_core_device *coredev); -int ib_setup_port_attrs(struct ib_core_device *coredev, - bool alloc_hw_stats); +int ib_setup_port_attrs(struct ib_core_device *coredev); int rdma_compatdev_set(u8 enable); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 76088655f06e..2123cc693a29 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -870,7 +870,7 @@ static int add_one_compat_dev(struct ib_device *device, ret = device_add(&cdev->dev); if (ret) goto add_err; - ret = ib_setup_port_attrs(cdev, false); + ret = ib_setup_port_attrs(cdev); if (ret) goto port_err; diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 2fe89754e592..7a599c5e455f 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1015,10 +1015,10 @@ err_free_stats: return; } -static int add_port(struct ib_core_device *coredev, - int port_num, bool alloc_stats) +static int add_port(struct ib_core_device *coredev, int port_num) { struct ib_device *device = rdma_device_to_ibdev(&coredev->dev); + bool is_full_dev = &device->coredev == coredev; struct ib_port *p; struct ib_port_attr attr; int i; @@ -1057,7 +1057,7 @@ static int add_port(struct ib_core_device *coredev, goto err_put; } - if (device->ops.process_mad && alloc_stats) { + if (device->ops.process_mad && is_full_dev) { p->pma_table = get_counter_table(device, port_num); ret = sysfs_create_group(&p->kobj, p->pma_table); if (ret) @@ -1113,7 +1113,7 @@ static int add_port(struct ib_core_device *coredev, if (ret) goto err_free_pkey; - if (device->ops.init_port) { + if (device->ops.init_port && is_full_dev) { ret = device->ops.init_port(device, port_num, &p->kobj); if (ret) goto err_remove_pkey; @@ -1124,7 +1124,7 @@ static int add_port(struct ib_core_device *coredev, * port, so holder should be device. Therefore skip per port conunter * initialization. */ - if (device->ops.alloc_hw_stats && port_num && alloc_stats) + if (device->ops.alloc_hw_stats && port_num && is_full_dev) setup_hw_stats(device, p, port_num); list_add_tail(&p->kobj.entry, &coredev->port_list); @@ -1308,7 +1308,7 @@ void ib_free_port_attrs(struct ib_core_device *coredev) kobject_put(coredev->ports_kobj); } -int ib_setup_port_attrs(struct ib_core_device *coredev, bool alloc_stats) +int ib_setup_port_attrs(struct ib_core_device *coredev) { struct ib_device *device = rdma_device_to_ibdev(&coredev->dev); unsigned int port; @@ -1320,7 +1320,7 @@ int ib_setup_port_attrs(struct ib_core_device *coredev, bool alloc_stats) return -ENOMEM; rdma_for_each_port (device, port) { - ret = add_port(coredev, port, alloc_stats); + ret = add_port(coredev, port); if (ret) goto err_put; } @@ -1336,7 +1336,7 @@ int ib_device_register_sysfs(struct ib_device *device) { int ret; - ret = ib_setup_port_attrs(&device->coredev, true); + ret = ib_setup_port_attrs(&device->coredev); if (ret) return ret; -- cgit v1.2.3 From dd05cb828d0ebecd3d772075fccb85ec3618bedf Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 29 Apr 2019 14:59:06 +0300 Subject: RDMA: Get rid of iw_cm_verbs Integrate iw_cm_verbs data members into ib_device_ops and ib_device structs, this is done to achieve the following: 1) Avoid memory related bugs durring error unwind 2) Make the code more cleaner 3) Reduce code duplication Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 8 +++++++ drivers/infiniband/core/iwcm.c | 35 +++++++++++++++-------------- drivers/infiniband/hw/cxgb3/iwch_provider.c | 32 +++++++++----------------- drivers/infiniband/hw/cxgb4/provider.c | 33 ++++++++++----------------- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 30 +++++++++---------------- drivers/infiniband/hw/nes/nes_verbs.c | 27 +++++++++------------- drivers/infiniband/hw/qedr/main.c | 25 +++++++++------------ include/rdma/ib_verbs.h | 23 +++++++++++++++---- include/rdma/iw_cm.h | 25 --------------------- 9 files changed, 98 insertions(+), 140 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 2123cc693a29..9665c3796cfb 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2370,6 +2370,14 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, get_vf_config); SET_DEVICE_OP(dev_ops, get_vf_stats); SET_DEVICE_OP(dev_ops, init_port); + SET_DEVICE_OP(dev_ops, iw_accept); + SET_DEVICE_OP(dev_ops, iw_add_ref); + SET_DEVICE_OP(dev_ops, iw_connect); + SET_DEVICE_OP(dev_ops, iw_create_listen); + SET_DEVICE_OP(dev_ops, iw_destroy_listen); + SET_DEVICE_OP(dev_ops, iw_get_qp); + SET_DEVICE_OP(dev_ops, iw_reject); + SET_DEVICE_OP(dev_ops, iw_rem_ref); SET_DEVICE_OP(dev_ops, map_mr_sg); SET_DEVICE_OP(dev_ops, map_phys_fmr); SET_DEVICE_OP(dev_ops, mmap); diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 732637c913d9..72141c5b7c95 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -394,7 +394,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); /* destroy the listening endpoint */ - cm_id->device->iwcm->destroy_listen(cm_id); + cm_id->device->ops.iw_destroy_listen(cm_id); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_ESTABLISHED: @@ -417,7 +417,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) */ cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_id->device->iwcm->reject(cm_id, NULL, 0); + cm_id->device->ops.iw_reject(cm_id, NULL, 0); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_CONN_SENT: @@ -427,7 +427,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) break; } if (cm_id_priv->qp) { - cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); + cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp); cm_id_priv->qp = NULL; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -504,7 +504,7 @@ static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr, static int iw_cm_map(struct iw_cm_id *cm_id, bool active) { const char *devname = dev_name(&cm_id->device->dev); - const char *ifname = cm_id->device->iwcm->ifname; + const char *ifname = cm_id->device->iw_ifname; struct iwpm_dev_data pm_reg_msg = {}; struct iwpm_sa_data pm_msg; int status; @@ -526,7 +526,7 @@ static int iw_cm_map(struct iw_cm_id *cm_id, bool active) cm_id->mapped = true; pm_msg.loc_addr = cm_id->local_addr; pm_msg.rem_addr = cm_id->remote_addr; - pm_msg.flags = (cm_id->device->iwcm->driver_flags & IW_F_NO_PORT_MAP) ? + pm_msg.flags = (cm_id->device->iw_driver_flags & IW_F_NO_PORT_MAP) ? IWPM_FLAGS_NO_PORT_MAP : 0; if (active) status = iwpm_add_and_query_mapping(&pm_msg, @@ -577,7 +577,8 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = iw_cm_map(cm_id, false); if (!ret) - ret = cm_id->device->iwcm->create_listen(cm_id, backlog); + ret = cm_id->device->ops.iw_create_listen(cm_id, + backlog); if (ret) cm_id_priv->state = IW_CM_STATE_IDLE; spin_lock_irqsave(&cm_id_priv->lock, flags); @@ -617,7 +618,7 @@ int iw_cm_reject(struct iw_cm_id *cm_id, cm_id_priv->state = IW_CM_STATE_IDLE; spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = cm_id->device->iwcm->reject(cm_id, private_data, + ret = cm_id->device->ops.iw_reject(cm_id, private_data, private_data_len); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); @@ -653,25 +654,25 @@ int iw_cm_accept(struct iw_cm_id *cm_id, return -EINVAL; } /* Get the ib_qp given the QPN */ - qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); + qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn); if (!qp) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return -EINVAL; } - cm_id->device->iwcm->add_ref(qp); + cm_id->device->ops.iw_add_ref(qp); cm_id_priv->qp = qp; spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = cm_id->device->iwcm->accept(cm_id, iw_param); + ret = cm_id->device->ops.iw_accept(cm_id, iw_param); if (ret) { /* An error on accept precludes provider events */ BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); cm_id_priv->state = IW_CM_STATE_IDLE; spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->qp) { - cm_id->device->iwcm->rem_ref(qp); + cm_id->device->ops.iw_rem_ref(qp); cm_id_priv->qp = NULL; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -712,25 +713,25 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) } /* Get the ib_qp given the QPN */ - qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); + qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn); if (!qp) { ret = -EINVAL; goto err; } - cm_id->device->iwcm->add_ref(qp); + cm_id->device->ops.iw_add_ref(qp); cm_id_priv->qp = qp; cm_id_priv->state = IW_CM_STATE_CONN_SENT; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = iw_cm_map(cm_id, true); if (!ret) - ret = cm_id->device->iwcm->connect(cm_id, iw_param); + ret = cm_id->device->ops.iw_connect(cm_id, iw_param); if (!ret) return 0; /* success */ spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->qp) { - cm_id->device->iwcm->rem_ref(qp); + cm_id->device->ops.iw_rem_ref(qp); cm_id_priv->qp = NULL; } cm_id_priv->state = IW_CM_STATE_IDLE; @@ -895,7 +896,7 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, cm_id_priv->state = IW_CM_STATE_ESTABLISHED; } else { /* REJECTED or RESET */ - cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); + cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp); cm_id_priv->qp = NULL; cm_id_priv->state = IW_CM_STATE_IDLE; } @@ -946,7 +947,7 @@ static int cm_close_handler(struct iwcm_id_private *cm_id_priv, spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->qp) { - cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); + cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp); cm_id_priv->qp = NULL; } switch (cm_id_priv->state) { diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 62b99d26f0d3..3a481dfb1607 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1321,6 +1321,14 @@ static const struct ib_device_ops iwch_dev_ops = { .get_dma_mr = iwch_get_dma_mr, .get_hw_stats = iwch_get_mib, .get_port_immutable = iwch_port_immutable, + .iw_accept = iwch_accept_cr, + .iw_add_ref = iwch_qp_add_ref, + .iw_connect = iwch_connect, + .iw_create_listen = iwch_create_listen, + .iw_destroy_listen = iwch_destroy_listen, + .iw_get_qp = iwch_get_qp, + .iw_reject = iwch_reject_cr, + .iw_rem_ref = iwch_qp_rem_ref, .map_mr_sg = iwch_map_mr_sg, .mmap = iwch_mmap, .modify_qp = iwch_ib_modify_qp, @@ -1340,8 +1348,6 @@ static const struct ib_device_ops iwch_dev_ops = { int iwch_register_device(struct iwch_dev *dev) { - int ret; - pr_debug("%s iwch_dev %p\n", __func__, dev); memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); @@ -1379,34 +1385,18 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev; dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; - dev->ibdev.iwcm = kzalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); - if (!dev->ibdev.iwcm) - return -ENOMEM; - - dev->ibdev.iwcm->connect = iwch_connect; - dev->ibdev.iwcm->accept = iwch_accept_cr; - dev->ibdev.iwcm->reject = iwch_reject_cr; - dev->ibdev.iwcm->create_listen = iwch_create_listen; - dev->ibdev.iwcm->destroy_listen = iwch_destroy_listen; - dev->ibdev.iwcm->add_ref = iwch_qp_add_ref; - dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref; - dev->ibdev.iwcm->get_qp = iwch_get_qp; - memcpy(dev->ibdev.iwcm->ifname, dev->rdev.t3cdev_p->lldev->name, - sizeof(dev->ibdev.iwcm->ifname)); + memcpy(dev->ibdev.iw_ifname, dev->rdev.t3cdev_p->lldev->name, + sizeof(dev->ibdev.iw_ifname)); dev->ibdev.driver_id = RDMA_DRIVER_CXGB3; rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group); ib_set_device_ops(&dev->ibdev, &iwch_dev_ops); - ret = ib_register_device(&dev->ibdev, "cxgb3_%d"); - if (ret) - kfree(dev->ibdev.iwcm); - return ret; + return ib_register_device(&dev->ibdev, "cxgb3_%d"); } void iwch_unregister_device(struct iwch_dev *dev) { pr_debug("%s iwch_dev %p\n", __func__, dev); ib_unregister_device(&dev->ibdev); - kfree(dev->ibdev.iwcm); return; } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 3c5197ee77f5..74b795642fca 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -510,6 +510,14 @@ static const struct ib_device_ops c4iw_dev_ops = { .get_dma_mr = c4iw_get_dma_mr, .get_hw_stats = c4iw_get_mib, .get_port_immutable = c4iw_port_immutable, + .iw_accept = c4iw_accept_cr, + .iw_add_ref = c4iw_qp_add_ref, + .iw_connect = c4iw_connect, + .iw_create_listen = c4iw_create_listen, + .iw_destroy_listen = c4iw_destroy_listen, + .iw_get_qp = c4iw_get_qp, + .iw_reject = c4iw_reject_cr, + .iw_rem_ref = c4iw_qp_rem_ref, .map_mr_sg = c4iw_map_mr_sg, .mmap = c4iw_mmap, .modify_qp = c4iw_ib_modify_qp, @@ -588,36 +596,20 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; - dev->ibdev.iwcm = kzalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); - if (!dev->ibdev.iwcm) { - ret = -ENOMEM; - goto err_dealloc_ctx; - } - - dev->ibdev.iwcm->connect = c4iw_connect; - dev->ibdev.iwcm->accept = c4iw_accept_cr; - dev->ibdev.iwcm->reject = c4iw_reject_cr; - dev->ibdev.iwcm->create_listen = c4iw_create_listen; - dev->ibdev.iwcm->destroy_listen = c4iw_destroy_listen; - dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref; - dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref; - dev->ibdev.iwcm->get_qp = c4iw_get_qp; - memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name, - sizeof(dev->ibdev.iwcm->ifname)); + memcpy(dev->ibdev.iw_ifname, dev->rdev.lldi.ports[0]->name, + sizeof(dev->ibdev.iw_ifname)); rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group); dev->ibdev.driver_id = RDMA_DRIVER_CXGB4; ib_set_device_ops(&dev->ibdev, &c4iw_dev_ops); ret = set_netdevs(&dev->ibdev, &dev->rdev); if (ret) - goto err_kfree_iwcm; + goto err_dealloc_ctx; ret = ib_register_device(&dev->ibdev, "cxgb4_%d"); if (ret) - goto err_kfree_iwcm; + goto err_dealloc_ctx; return; -err_kfree_iwcm: - kfree(dev->ibdev.iwcm); err_dealloc_ctx: pr_err("%s - Failed registering iwarp device: %d\n", pci_name(ctx->lldi.pdev), ret); @@ -629,6 +621,5 @@ void c4iw_unregister_device(struct c4iw_dev *dev) { pr_debug("c4iw_dev %p\n", dev); ib_unregister_device(&dev->ibdev); - kfree(dev->ibdev.iwcm); return; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 7bf7fe854464..b8a1412253ae 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2704,6 +2704,14 @@ static const struct ib_device_ops i40iw_dev_ops = { .get_dma_mr = i40iw_get_dma_mr, .get_hw_stats = i40iw_get_hw_stats, .get_port_immutable = i40iw_port_immutable, + .iw_accept = i40iw_accept, + .iw_add_ref = i40iw_add_ref, + .iw_connect = i40iw_connect, + .iw_create_listen = i40iw_create_listen, + .iw_destroy_listen = i40iw_destroy_listen, + .iw_get_qp = i40iw_get_qp, + .iw_reject = i40iw_reject, + .iw_rem_ref = i40iw_rem_ref, .map_mr_sg = i40iw_map_mr_sg, .mmap = i40iw_mmap, .modify_qp = i40iw_modify_qp, @@ -2767,22 +2775,8 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.phys_port_cnt = 1; iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count; iwibdev->ibdev.dev.parent = &pcidev->dev; - iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL); - if (!iwibdev->ibdev.iwcm) { - ib_dealloc_device(&iwibdev->ibdev); - return NULL; - } - - iwibdev->ibdev.iwcm->add_ref = i40iw_add_ref; - iwibdev->ibdev.iwcm->rem_ref = i40iw_rem_ref; - iwibdev->ibdev.iwcm->get_qp = i40iw_get_qp; - iwibdev->ibdev.iwcm->connect = i40iw_connect; - iwibdev->ibdev.iwcm->accept = i40iw_accept; - iwibdev->ibdev.iwcm->reject = i40iw_reject; - iwibdev->ibdev.iwcm->create_listen = i40iw_create_listen; - iwibdev->ibdev.iwcm->destroy_listen = i40iw_destroy_listen; - memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name, - sizeof(iwibdev->ibdev.iwcm->ifname)); + memcpy(iwibdev->ibdev.iw_ifname, netdev->name, + sizeof(iwibdev->ibdev.iw_ifname)); ib_set_device_ops(&iwibdev->ibdev, &i40iw_dev_ops); return iwibdev; @@ -2813,8 +2807,6 @@ void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev) return; ib_unregister_device(&iwibdev->ibdev); - kfree(iwibdev->ibdev.iwcm); - iwibdev->ibdev.iwcm = NULL; wait_event_timeout(iwibdev->iwdev->close_wq, !atomic64_read(&iwibdev->iwdev->use_count), I40IW_EVENT_TIMEOUT); @@ -2842,8 +2834,6 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev) return 0; error: - kfree(iwdev->iwibdev->ibdev.iwcm); - iwdev->iwibdev->ibdev.iwcm = NULL; ib_dealloc_device(&iwdev->iwibdev->ibdev); return ret; } diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index a3b5e8eecb98..49024326a518 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -3577,6 +3577,14 @@ static const struct ib_device_ops nes_dev_ops = { .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = nes_get_dma_mr, .get_port_immutable = nes_port_immutable, + .iw_accept = nes_accept, + .iw_add_ref = nes_add_ref, + .iw_connect = nes_connect, + .iw_create_listen = nes_create_listen, + .iw_destroy_listen = nes_destroy_listen, + .iw_get_qp = nes_get_qp, + .iw_reject = nes_reject, + .iw_rem_ref = nes_rem_ref, .map_mr_sg = nes_map_mr_sg, .mmap = nes_mmap, .modify_qp = nes_modify_qp, @@ -3641,23 +3649,9 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.num_comp_vectors = 1; nesibdev->ibdev.dev.parent = &nesdev->pcidev->dev; - nesibdev->ibdev.iwcm = kzalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL); - if (nesibdev->ibdev.iwcm == NULL) { - ib_dealloc_device(&nesibdev->ibdev); - return NULL; - } - nesibdev->ibdev.iwcm->add_ref = nes_add_ref; - nesibdev->ibdev.iwcm->rem_ref = nes_rem_ref; - nesibdev->ibdev.iwcm->get_qp = nes_get_qp; - nesibdev->ibdev.iwcm->connect = nes_connect; - nesibdev->ibdev.iwcm->accept = nes_accept; - nesibdev->ibdev.iwcm->reject = nes_reject; - nesibdev->ibdev.iwcm->create_listen = nes_create_listen; - nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen; - ib_set_device_ops(&nesibdev->ibdev, &nes_dev_ops); - memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name, - sizeof(nesibdev->ibdev.iwcm->ifname)); + memcpy(nesibdev->ibdev.iw_ifname, netdev->name, + sizeof(nesibdev->ibdev.iw_ifname)); return nesibdev; } @@ -3718,7 +3712,6 @@ void nes_destroy_ofa_device(struct nes_ib_device *nesibdev) nes_unregister_ofa_device(nesibdev); - kfree(nesibdev->ibdev.iwcm); ib_dealloc_device(&nesibdev->ibdev); } diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index a0a49ed26860..083c2c00a8e9 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -148,6 +148,14 @@ static const struct attribute_group qedr_attr_group = { static const struct ib_device_ops qedr_iw_dev_ops = { .get_port_immutable = qedr_iw_port_immutable, + .iw_accept = qedr_iw_accept, + .iw_add_ref = qedr_iw_qp_add_ref, + .iw_connect = qedr_iw_connect, + .iw_create_listen = qedr_iw_create_listen, + .iw_destroy_listen = qedr_iw_destroy_listen, + .iw_get_qp = qedr_iw_get_qp, + .iw_reject = qedr_iw_reject, + .iw_rem_ref = qedr_iw_qp_rem_ref, .query_gid = qedr_iw_query_gid, }; @@ -157,21 +165,8 @@ static int qedr_iw_register_device(struct qedr_dev *dev) ib_set_device_ops(&dev->ibdev, &qedr_iw_dev_ops); - dev->ibdev.iwcm = kzalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL); - if (!dev->ibdev.iwcm) - return -ENOMEM; - - dev->ibdev.iwcm->connect = qedr_iw_connect; - dev->ibdev.iwcm->accept = qedr_iw_accept; - dev->ibdev.iwcm->reject = qedr_iw_reject; - dev->ibdev.iwcm->create_listen = qedr_iw_create_listen; - dev->ibdev.iwcm->destroy_listen = qedr_iw_destroy_listen; - dev->ibdev.iwcm->add_ref = qedr_iw_qp_add_ref; - dev->ibdev.iwcm->rem_ref = qedr_iw_qp_rem_ref; - dev->ibdev.iwcm->get_qp = qedr_iw_get_qp; - - memcpy(dev->ibdev.iwcm->ifname, - dev->ndev->name, sizeof(dev->ibdev.iwcm->ifname)); + memcpy(dev->ibdev.iw_ifname, + dev->ndev->name, sizeof(dev->ibdev.iw_ifname)); return 0; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index de8724e5a727..4312899231ca 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2221,8 +2221,6 @@ struct ib_cache { struct ib_event_handler event_handler; }; -struct iw_cm_verbs; - struct ib_port_immutable { int pkey_tbl_len; int gid_tbl_len; @@ -2304,6 +2302,8 @@ struct ib_counters_read_attr { }; struct uverbs_attr_bundle; +struct iw_cm_id; +struct iw_cm_conn_param; #define INIT_RDMA_OBJ_SIZE(ib_struct, drv_struct, member) \ .size_##ib_struct = \ @@ -2581,6 +2581,19 @@ struct ib_device_ops { */ void (*dealloc_driver)(struct ib_device *dev); + /* iWarp CM callbacks */ + void (*iw_add_ref)(struct ib_qp *qp); + void (*iw_rem_ref)(struct ib_qp *qp); + struct ib_qp *(*iw_get_qp)(struct ib_device *device, int qpn); + int (*iw_connect)(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *conn_param); + int (*iw_accept)(struct iw_cm_id *cm_id, + struct iw_cm_conn_param *conn_param); + int (*iw_reject)(struct iw_cm_id *cm_id, const void *pdata, + u8 pdata_len); + int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog); + int (*iw_destroy_listen)(struct iw_cm_id *cm_id); + DECLARE_RDMA_OBJ_SIZE(ib_ah); DECLARE_RDMA_OBJ_SIZE(ib_pd); DECLARE_RDMA_OBJ_SIZE(ib_srq); @@ -2621,8 +2634,6 @@ struct ib_device { int num_comp_vectors; - struct iw_cm_verbs *iwcm; - struct module *owner; union { struct device dev; @@ -2675,6 +2686,10 @@ struct ib_device { struct mutex compat_devs_mutex; /* Maintains compat devices for each net namespace */ struct xarray compat_devs; + + /* Used by iWarp CM */ + char iw_ifname[IFNAMSIZ]; + u32 iw_driver_flags; }; struct ib_client { diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h index 0e1f02815643..5aa8a9c76aa0 100644 --- a/include/rdma/iw_cm.h +++ b/include/rdma/iw_cm.h @@ -118,31 +118,6 @@ enum iw_flags { IW_F_NO_PORT_MAP = (1 << 0), }; -struct iw_cm_verbs { - void (*add_ref)(struct ib_qp *qp); - - void (*rem_ref)(struct ib_qp *qp); - - struct ib_qp * (*get_qp)(struct ib_device *device, - int qpn); - - int (*connect)(struct iw_cm_id *cm_id, - struct iw_cm_conn_param *conn_param); - - int (*accept)(struct iw_cm_id *cm_id, - struct iw_cm_conn_param *conn_param); - - int (*reject)(struct iw_cm_id *cm_id, - const void *pdata, u8 pdata_len); - - int (*create_listen)(struct iw_cm_id *cm_id, - int backlog); - - int (*destroy_listen)(struct iw_cm_id *cm_id); - char ifname[IFNAMSIZ]; - enum iw_flags driver_flags; -}; - /** * iw_create_cm_id - Create an IW CM identifier. * -- cgit v1.2.3 From 3bf3e2b881c1412d0329ce9376dfe1518489b8fc Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 2 May 2019 10:48:01 +0300 Subject: RDMA/rxe: Consider skb reserve space based on netdev of GID Always consider the skb reserve space based on netdevice of the GID attribute, regardless of vlan or non vlan netdevice. Fixes: 43c9fc509fa5 ("rdma_rxe: make rxe work over 802.1q VLAN devices") Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_net.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index f186b92ba45b..c44139788afc 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -481,8 +481,9 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, if (unlikely(!skb)) goto out; - skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev)); + skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(ndev)); + /* FIXME: hold reference to this netdev until life of this skb. */ skb->dev = ndev; if (av->network_type == RDMA_NETWORK_IPV4) skb->protocol = htons(ETH_P_IP); -- cgit v1.2.3 From 8f9748602491295b305960cd2cd4976ad15cb04f Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 2 May 2019 10:48:02 +0300 Subject: IB/cm: Reduce dependency on gid attribute ndev check GID type to path record type conversion can be done directly based on port type and gid attribute type. There is no need to find out using indirect way by its GID attribute's ndev field. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 4df59f2b0f04..da10e6ccb43c 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1985,11 +1985,12 @@ static int cm_req_handler(struct cm_work *work) grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr); gid_attr = grh->sgid_attr; - if (gid_attr && gid_attr->ndev) { + if (gid_attr && + rdma_protocol_roce(work->port->cm_dev->ib_device, + work->port->port_num)) { work->path[0].rec_type = sa_conv_gid_to_pathrec_type(gid_attr->gid_type); } else { - /* If no GID attribute or ndev is null, it is not RoCE. */ cm_path_set_rec_type(work->port->cm_dev->ib_device, work->port->port_num, &work->path[0], -- cgit v1.2.3 From a70c07397fd80dfeeaead15751ab71449b548826 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 2 May 2019 10:48:03 +0300 Subject: RDMA: Introduce and use GID attr helper to read RoCE L2 fields Instead of RoCE drivers figuring out vlan, smac fields while working on QP/AH, provide a helper routine to read the L2 fields such as vlan_id and source mac address. This moves logic from mlx5 driver to core for wider usage for RoCE ports. This is a preparation patch to allow detaching netdev in subsequent patch. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 55 ++++++++++++++++++++++++++++++ drivers/infiniband/hw/bnxt_re/ib_verbs.c | 18 ++++++---- drivers/infiniband/hw/hns/hns_roce_ah.c | 14 ++++---- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 7 ++-- drivers/infiniband/hw/mlx4/ah.c | 8 +++-- drivers/infiniband/hw/mlx4/qp.c | 6 ++-- drivers/infiniband/hw/mlx5/main.c | 42 ++++------------------- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 9 ++--- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 7 ++-- drivers/infiniband/hw/qedr/qedr_roce_cm.c | 11 +++--- drivers/infiniband/hw/qedr/verbs.c | 5 ++- include/rdma/ib_cache.h | 3 ++ 12 files changed, 118 insertions(+), 67 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 7499e7016e38..c164e377e563 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1250,6 +1250,61 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr) return ndev; } +static int get_lower_dev_vlan(struct net_device *lower_dev, void *data) +{ + u16 *vlan_id = data; + + if (is_vlan_dev(lower_dev)) + *vlan_id = vlan_dev_vlan_id(lower_dev); + + /* We are interested only in first level vlan device, so + * always return 1 to stop iterating over next level devices. + */ + return 1; +} + +/** + * rdma_read_gid_l2_fields - Read the vlan ID and source MAC address + * of a GID entry. + * + * @attr: GID attribute pointer whose L2 fields to be read + * @vlan_id: Pointer to vlan id to fill up if the GID entry has + * vlan id. It is optional. + * @smac: Pointer to smac to fill up for a GID entry. It is optional. + * + * rdma_read_gid_l2_fields() returns 0 on success and returns vlan id + * (if gid entry has vlan) and source MAC, or returns error. + */ +int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr, + u16 *vlan_id, u8 *smac) +{ + struct net_device *ndev; + + ndev = attr->ndev; + if (!ndev) + return -EINVAL; + + if (smac) + ether_addr_copy(smac, ndev->dev_addr); + if (vlan_id) { + *vlan_id = 0xffff; + if (is_vlan_dev(ndev)) { + *vlan_id = vlan_dev_vlan_id(ndev); + } else { + /* If the netdev is upper device and if it's lower + * device is vlan device, consider vlan id of the + * the lower vlan device for this gid entry. + */ + rcu_read_lock(); + netdev_walk_all_lower_dev_rcu(attr->ndev, + get_lower_dev_vlan, vlan_id); + rcu_read_unlock(); + } + } + return 0; +} +EXPORT_SYMBOL(rdma_read_gid_l2_fields); + static int config_non_roce_gid_cache(struct ib_device *device, u8 port, int gid_tbl_len) { diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 3fcc77c03903..cde789cb691b 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -360,8 +360,9 @@ int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context) struct bnxt_re_dev *rdev = to_bnxt_re_dev(attr->device, ibdev); struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl; - if ((attr->ndev) && is_vlan_dev(attr->ndev)) - vlan_id = vlan_dev_vlan_id(attr->ndev); + rc = rdma_read_gid_l2_fields(attr, &vlan_id, NULL); + if (rc) + return rc; rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)&attr->gid, rdev->qplib_res.netdev->dev_addr, @@ -1637,8 +1638,11 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, qp_attr->ah_attr.roce.dmac); sgid_attr = qp_attr->ah_attr.grh.sgid_attr; - memcpy(qp->qplib_qp.smac, sgid_attr->ndev->dev_addr, - ETH_ALEN); + rc = rdma_read_gid_l2_fields(sgid_attr, NULL, + &qp->qplib_qp.smac[0]); + if (rc) + return rc; + nw_type = rdma_gid_attr_network_type(sgid_attr); switch (nw_type) { case RDMA_NETWORK_IPV4: @@ -1857,8 +1861,10 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr)); - if (is_vlan_dev(sgid_attr->ndev)) - vlan_id = vlan_dev_vlan_id(sgid_attr->ndev); + rc = rdma_read_gid_l2_fields(sgid_attr, &vlan_id, NULL); + if (rc) + return rc; + /* Get network header type for this GID */ nw_type = rdma_gid_attr_network_type(sgid_attr); switch (nw_type) { diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index d9498313ea46..cdd2ac24fc2a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -49,20 +49,22 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, u16 vlan_tag = 0xffff; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); bool vlan_en = false; + int ret; + + gid_attr = ah_attr->grh.sgid_attr; + ret = rdma_read_gid_l2_fields(gid_attr, &vlan_tag, NULL); + if (ret) + return ret; /* Get mac address */ memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); - gid_attr = ah_attr->grh.sgid_attr; - if (is_vlan_dev(gid_attr->ndev)) { - vlan_tag = vlan_dev_vlan_id(gid_attr->ndev); + if (vlan_tag < VLAN_CFI_MASK) { vlan_en = true; - } - - if (vlan_tag < 0x1000) vlan_tag |= (rdma_ah_get_sl(ah_attr) & HNS_ROCE_VLAN_SL_BIT_MASK) << HNS_ROCE_VLAN_SL_SHIFT; + } ah->av.port_pd = cpu_to_le32(to_hr_pd(ibah->pd)->pdn | (rdma_ah_get_port_num(ah_attr) << diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f155d2d0b8cd..b5392cb5b20f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -3984,10 +3985,12 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, if (is_roce_protocol) { gid_attr = attr->ah_attr.grh.sgid_attr; - vlan = rdma_vlan_dev_vlan_id(gid_attr->ndev); + ret = rdma_read_gid_l2_fields(gid_attr, &vlan, NULL); + if (ret) + goto out; } - if (is_vlan_dev(gid_attr->ndev)) { + if (vlan < VLAN_CFI_MASK) { roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1); roce_set_bit(qpc_mask->byte_76_srqn_op_en, diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index b53772ab2401..02a169f8027b 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -99,9 +99,11 @@ static int create_iboe_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr) */ gid_attr = ah_attr->grh.sgid_attr; if (gid_attr) { - if (is_vlan_dev(gid_attr->ndev)) - vlan_tag = vlan_dev_vlan_id(gid_attr->ndev); - memcpy(ah->av.eth.s_mac, gid_attr->ndev->dev_addr, ETH_ALEN); + ret = rdma_read_gid_l2_fields(gid_attr, &vlan_tag, + &ah->av.eth.s_mac[0]); + if (ret) + return ret; + ret = mlx4_ib_gid_index_to_real_index(ibdev, gid_attr); if (ret < 0) return ret; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 364e16b5f8e1..bb1c6eb31b32 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2248,8 +2248,10 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, if (is_eth) { gid_attr = attr->ah_attr.grh.sgid_attr; - vlan = rdma_vlan_dev_vlan_id(gid_attr->ndev); - memcpy(smac, gid_attr->ndev->dev_addr, ETH_ALEN); + err = rdma_read_gid_l2_fields(gid_attr, &vlan, + &smac[0]); + if (err) + goto out; } if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 6135a0b285de..5ac24bce6e77 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -574,52 +574,22 @@ out: return err; } -struct mlx5_ib_vlan_info { - u16 vlan_id; - bool vlan; -}; - -static int get_lower_dev_vlan(struct net_device *lower_dev, void *data) -{ - struct mlx5_ib_vlan_info *vlan_info = data; - - if (is_vlan_dev(lower_dev)) { - vlan_info->vlan = true; - vlan_info->vlan_id = vlan_dev_vlan_id(lower_dev); - } - /* We are interested only in first level vlan device, so - * always return 1 to stop iterating over next level devices. - */ - return 1; -} - static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, unsigned int index, const union ib_gid *gid, const struct ib_gid_attr *attr) { enum ib_gid_type gid_type = IB_GID_TYPE_IB; - struct mlx5_ib_vlan_info vlan_info = { }; + u16 vlan_id = 0xffff; u8 roce_version = 0; u8 roce_l3_type = 0; u8 mac[ETH_ALEN]; + int ret; if (gid) { gid_type = attr->gid_type; - ether_addr_copy(mac, attr->ndev->dev_addr); - - if (is_vlan_dev(attr->ndev)) { - vlan_info.vlan = true; - vlan_info.vlan_id = vlan_dev_vlan_id(attr->ndev); - } else { - /* If the netdev is upper device and if it's lower - * lower device is vlan device, consider vlan id of - * the lower vlan device for this gid entry. - */ - rcu_read_lock(); - netdev_walk_all_lower_dev_rcu(attr->ndev, - get_lower_dev_vlan, &vlan_info); - rcu_read_unlock(); - } + ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]); + if (ret) + return ret; } switch (gid_type) { @@ -640,7 +610,7 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, return mlx5_core_roce_gid_set(dev->mdev, index, roce_version, roce_l3_type, gid->raw, mac, - vlan_info.vlan, vlan_info.vlan_id, + vlan_id < VLAN_CFI_MASK, vlan_id, port_num); } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index a17747cb086a..1d4ea135c28f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -175,14 +175,15 @@ int ocrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, if (atomic_cmpxchg(&dev->update_sl, 1, 0)) ocrdma_init_service_level(dev); + sgid_attr = attr->grh.sgid_attr; + status = rdma_read_gid_l2_fields(sgid_attr, &vlan_tag, NULL); + if (status) + return status; + status = ocrdma_alloc_av(dev, ah); if (status) goto av_err; - sgid_attr = attr->grh.sgid_attr; - if (is_vlan_dev(sgid_attr->ndev)) - vlan_tag = vlan_dev_vlan_id(sgid_attr->ndev); - /* Get network header type for this GID */ ah->hdr_type = rdma_gid_attr_network_type(sgid_attr); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 5d96b5a94583..32674b291f60 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2496,7 +2496,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, int status; struct rdma_ah_attr *ah_attr = &attrs->ah_attr; const struct ib_gid_attr *sgid_attr; - u32 vlan_id = 0xFFFF; + u16 vlan_id = 0xFFFF; u8 mac_addr[6], hdr_type; union { struct sockaddr _sockaddr; @@ -2526,8 +2526,9 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, sizeof(cmd->params.dgid)); sgid_attr = ah_attr->grh.sgid_attr; - vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev); - memcpy(mac_addr, sgid_attr->ndev->dev_addr, ETH_ALEN); + status = rdma_read_gid_l2_fields(sgid_attr, &vlan_id, &mac_addr[0]); + if (status) + return status; qp->sgid_idx = grh->sgid_index; memcpy(&cmd->params.sgid[0], &sgid_attr->gid.raw[0], diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c index e1ac2fd60bb1..f5542d703ef9 100644 --- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c @@ -397,14 +397,17 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, bool has_udp = false; int i; - send_size = 0; - for (i = 0; i < swr->num_sge; ++i) - send_size += swr->sg_list[i].length; + rc = rdma_read_gid_l2_fields(sgid_attr, &vlan_id, NULL); + if (rc) + return rc; - vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev); if (vlan_id < VLAN_CFI_MASK) has_vlan = true; + send_size = 0; + for (i = 0; i < swr->num_sge; ++i) + send_size += swr->sg_list[i].length; + has_udp = (sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP); if (!has_udp) { /* RoCE v1 */ diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 5e92b6229da2..e52d8761d681 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1050,10 +1050,13 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, enum rdma_network_type nw_type; const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); u32 ipv4_addr; + int ret; int i; gid_attr = grh->sgid_attr; - qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr->ndev); + ret = rdma_read_gid_l2_fields(gid_attr, &qp_params->vlan_id, NULL); + if (ret) + return ret; nw_type = rdma_gid_attr_network_type(gid_attr); switch (nw_type) { diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index 62e990b620aa..730a65ad8c74 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -54,6 +54,9 @@ const struct ib_gid_attr *rdma_find_gid_by_filter( void *), void *context); +int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr, + u16 *vlan_id, u8 *smac); + /** * ib_get_cached_pkey - Returns a cached PKey table entry * @device: The device to query. -- cgit v1.2.3 From adb4a57a7a1d0bf4bebc96266f30f04330e741de Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 2 May 2019 10:48:04 +0300 Subject: RDMA/cma: Use rdma_read_gid_attr_ndev_rcu to access netdev To access the netdevice of the GID attribute, use an existing API rdma_read_gid_attr_ndev_rcu(). This further reduces dependency on open access to netdevice of GID attribute. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/addr.c | 1 + drivers/infiniband/core/cache.c | 1 + drivers/infiniband/core/cma.c | 12 ++++++++++-- include/rdma/ib_cache.h | 1 + 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 0dce94e3c495..2b791ce7597f 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index c164e377e563..a53c7713d77a 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1249,6 +1249,7 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr) read_unlock_irqrestore(&table->rwlock, flags); return ndev; } +EXPORT_SYMBOL(rdma_read_gid_attr_ndev_rcu); static int get_lower_dev_vlan(struct net_device *lower_dev, void *data) { diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 088b5495e199..19f1730a4f24 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1486,6 +1486,7 @@ static struct net_device * roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event) { const struct ib_gid_attr *sgid_attr = NULL; + struct net_device *ndev; if (ib_event->event == IB_CM_REQ_RECEIVED) sgid_attr = ib_event->param.req_rcvd.ppath_sgid_attr; @@ -1494,8 +1495,15 @@ roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event) if (!sgid_attr) return NULL; - dev_hold(sgid_attr->ndev); - return sgid_attr->ndev; + + rcu_read_lock(); + ndev = rdma_read_gid_attr_ndev_rcu(sgid_attr); + if (IS_ERR(ndev)) + ndev = NULL; + else + dev_hold(ndev); + rcu_read_unlock(); + return ndev; } static struct net_device *cma_get_net_dev(const struct ib_cm_event *ib_event, diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index 730a65ad8c74..870b5e6c06db 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -56,6 +56,7 @@ const struct ib_gid_attr *rdma_find_gid_by_filter( int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr, u16 *vlan_id, u8 *smac); +struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr); /** * ib_get_cached_pkey - Returns a cached PKey table entry -- cgit v1.2.3 From dab2175800ef0b45d5e70a7337bd6cb311292ebe Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 2 May 2019 10:48:05 +0300 Subject: RDMA/rxe: Use rdma_read_gid_attr_ndev_rcu to access netdev Use rdma_read_gid_attr_ndev_rcu() to access netdevice attached to GID entry under rcu lock. This ensures that while working on the netdevice of the GID, it doesn't get freed. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_net.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index c44139788afc..5a3474f9351b 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -458,7 +458,7 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, int paylen, struct rxe_pkt_info *pkt) { unsigned int hdr_len; - struct sk_buff *skb; + struct sk_buff *skb = NULL; struct net_device *ndev; const struct ib_gid_attr *attr; const int port_num = 1; @@ -466,7 +466,6 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, attr = rdma_get_gid_attr(&rxe->ib_dev, port_num, av->grh.sgid_index); if (IS_ERR(attr)) return NULL; - ndev = attr->ndev; if (av->network_type == RDMA_NETWORK_IPV4) hdr_len = ETH_HLEN + sizeof(struct udphdr) + @@ -475,16 +474,26 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, hdr_len = ETH_HLEN + sizeof(struct udphdr) + sizeof(struct ipv6hdr); + rcu_read_lock(); + ndev = rdma_read_gid_attr_ndev_rcu(attr); + if (IS_ERR(ndev)) { + rcu_read_unlock(); + goto out; + } skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(ndev), GFP_ATOMIC); - if (unlikely(!skb)) + if (unlikely(!skb)) { + rcu_read_unlock(); goto out; + } skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(ndev)); /* FIXME: hold reference to this netdev until life of this skb. */ skb->dev = ndev; + rcu_read_unlock(); + if (av->network_type == RDMA_NETWORK_IPV4) skb->protocol = htons(ETH_P_IP); else -- cgit v1.2.3 From 5102eca9039ba1df6d2e8756b0c030676021f56b Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 2 May 2019 10:48:06 +0300 Subject: net/smc: Use rdma_read_gid_l2_fields to L2 fields Use core provided API to fill the source MAC address and use rdma_read_gid_attr_ndev_rcu() to get stable netdev. This is preparation patch to allow gid attribute to become NULL when associated net device is removed. Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- net/smc/smc_ib.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 53f429c04843..d14ca4af6f94 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -146,18 +146,13 @@ out: static int smc_ib_fill_mac(struct smc_ib_device *smcibdev, u8 ibport) { const struct ib_gid_attr *attr; - int rc = 0; + int rc; attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, 0); if (IS_ERR(attr)) return -ENODEV; - if (attr->ndev) - memcpy(smcibdev->mac[ibport - 1], attr->ndev->dev_addr, - ETH_ALEN); - else - rc = -ENODEV; - + rc = rdma_read_gid_l2_fields(attr, NULL, smcibdev->mac[ibport - 1]); rdma_put_gid_attr(attr); return rc; } @@ -185,6 +180,7 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, unsigned short vlan_id, u8 gid[], u8 *sgid_index) { const struct ib_gid_attr *attr; + const struct net_device *ndev; int i; for (i = 0; i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) { @@ -192,11 +188,14 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, if (IS_ERR(attr)) continue; - if (attr->ndev && + rcu_read_lock(); + ndev = rdma_read_gid_attr_ndev_rcu(attr); + if (!IS_ERR(ndev) && ((!vlan_id && !is_vlan_dev(attr->ndev)) || (vlan_id && is_vlan_dev(attr->ndev) && vlan_dev_vlan_id(attr->ndev) == vlan_id)) && attr->gid_type == IB_GID_TYPE_ROCE) { + rcu_read_unlock(); if (gid) memcpy(gid, &attr->gid, SMC_GID_SIZE); if (sgid_index) @@ -204,6 +203,7 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, rdma_put_gid_attr(attr); return 0; } + rcu_read_unlock(); rdma_put_gid_attr(attr); } return -ENODEV; -- cgit v1.2.3 From 943bd984b108b3bb778790c2da4ae8d186b547e6 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 2 May 2019 10:48:07 +0300 Subject: RDMA/core: Allow detaching gid attribute netdevice for RoCE When there is active traffic through a GID, a QP/AH holds reference to this GID entry. RoCE GID entry holds reference to its attached netdevice. Due to this when netdevice is deleted by admin user, its refcount is not dropped. Therefore, while deleting RoCE GID, wait for all GID attribute's netdev users to finish accessing netdev in rcu context. Once all users done accessing it, release the netdev refcount. Signed-off-by: Huy Nguyen Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 73 ++++++++++++++++++++++++++++++++++------- drivers/infiniband/core/sysfs.c | 13 +++++--- include/rdma/ib_verbs.h | 2 +- 3 files changed, 71 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index a53c7713d77a..099d922ae7bd 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -78,11 +78,22 @@ enum gid_table_entry_state { GID_TABLE_ENTRY_PENDING_DEL = 3, }; +struct roce_gid_ndev_storage { + struct rcu_head rcu_head; + struct net_device *ndev; +}; + struct ib_gid_table_entry { struct kref kref; struct work_struct del_work; struct ib_gid_attr attr; void *context; + /* Store the ndev pointer to release reference later on in + * call_rcu context because by that time gid_table_entry + * and attr might be already freed. So keep a copy of it. + * ndev_storage is freed by rcu callback. + */ + struct roce_gid_ndev_storage *ndev_storage; enum gid_table_entry_state state; }; @@ -206,6 +217,20 @@ static void schedule_free_gid(struct kref *kref) queue_work(ib_wq, &entry->del_work); } +static void put_gid_ndev(struct rcu_head *head) +{ + struct roce_gid_ndev_storage *storage = + container_of(head, struct roce_gid_ndev_storage, rcu_head); + + WARN_ON(!storage->ndev); + /* At this point its safe to release netdev reference, + * as all callers working on gid_attr->ndev are done + * using this netdev. + */ + dev_put(storage->ndev); + kfree(storage); +} + static void free_gid_entry_locked(struct ib_gid_table_entry *entry) { struct ib_device *device = entry->attr.device; @@ -228,8 +253,8 @@ static void free_gid_entry_locked(struct ib_gid_table_entry *entry) /* Now this index is ready to be allocated */ write_unlock_irq(&table->rwlock); - if (entry->attr.ndev) - dev_put(entry->attr.ndev); + if (entry->ndev_storage) + call_rcu(&entry->ndev_storage->rcu_head, put_gid_ndev); kfree(entry); } @@ -266,14 +291,25 @@ static struct ib_gid_table_entry * alloc_gid_entry(const struct ib_gid_attr *attr) { struct ib_gid_table_entry *entry; + struct net_device *ndev; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return NULL; + + ndev = rcu_dereference_protected(attr->ndev, 1); + if (ndev) { + entry->ndev_storage = kzalloc(sizeof(*entry->ndev_storage), + GFP_KERNEL); + if (!entry->ndev_storage) { + kfree(entry); + return NULL; + } + dev_hold(ndev); + entry->ndev_storage->ndev = ndev; + } kref_init(&entry->kref); memcpy(&entry->attr, attr, sizeof(*attr)); - if (entry->attr.ndev) - dev_hold(entry->attr.ndev); INIT_WORK(&entry->del_work, free_gid_work); entry->state = GID_TABLE_ENTRY_INVALID; return entry; @@ -343,6 +379,7 @@ static int add_roce_gid(struct ib_gid_table_entry *entry) static void del_gid(struct ib_device *ib_dev, u8 port, struct ib_gid_table *table, int ix) { + struct roce_gid_ndev_storage *ndev_storage; struct ib_gid_table_entry *entry; lockdep_assert_held(&table->lock); @@ -360,6 +397,13 @@ static void del_gid(struct ib_device *ib_dev, u8 port, table->data_vec[ix] = NULL; write_unlock_irq(&table->rwlock); + ndev_storage = entry->ndev_storage; + if (ndev_storage) { + entry->ndev_storage = NULL; + rcu_assign_pointer(entry->attr.ndev, NULL); + call_rcu(&ndev_storage->rcu_head, put_gid_ndev); + } + if (rdma_cap_roce_gid_table(ib_dev, port)) ib_dev->ops.del_gid(&entry->attr, &entry->context); @@ -1244,8 +1288,12 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr) read_lock_irqsave(&table->rwlock, flags); valid = is_gid_entry_valid(table->data_vec[attr->index]); - if (valid && attr->ndev && (READ_ONCE(attr->ndev->flags) & IFF_UP)) - ndev = attr->ndev; + if (valid) { + ndev = rcu_dereference(attr->ndev); + if (!ndev || + (ndev && ((READ_ONCE(ndev->flags) & IFF_UP) == 0))) + ndev = ERR_PTR(-ENODEV); + } read_unlock_irqrestore(&table->rwlock, flags); return ndev; } @@ -1281,10 +1329,12 @@ int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr, { struct net_device *ndev; - ndev = attr->ndev; - if (!ndev) - return -EINVAL; - + rcu_read_lock(); + ndev = rcu_dereference(attr->ndev); + if (!ndev) { + rcu_read_unlock(); + return -ENODEV; + } if (smac) ether_addr_copy(smac, ndev->dev_addr); if (vlan_id) { @@ -1296,12 +1346,11 @@ int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr, * device is vlan device, consider vlan id of the * the lower vlan device for this gid entry. */ - rcu_read_lock(); netdev_walk_all_lower_dev_rcu(attr->ndev, get_lower_dev_vlan, vlan_id); - rcu_read_unlock(); } } + rcu_read_unlock(); return 0; } EXPORT_SYMBOL(rdma_read_gid_l2_fields); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 7a599c5e455f..8c7adc27aeea 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -349,10 +349,15 @@ static struct attribute *port_default_attrs[] = { static size_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf) { - if (!gid_attr->ndev) - return -EINVAL; - - return sprintf(buf, "%s\n", gid_attr->ndev->name); + struct net_device *ndev; + size_t ret = -EINVAL; + + rcu_read_lock(); + ndev = rcu_dereference(gid_attr->ndev); + if (ndev) + ret = sprintf(buf, "%s\n", ndev->name); + rcu_read_unlock(); + return ret; } static size_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4312899231ca..8f8965f8ffdb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -124,7 +124,7 @@ enum ib_gid_type { #define ROCE_V2_UDP_DPORT 4791 struct ib_gid_attr { - struct net_device *ndev; + struct net_device __rcu *ndev; struct ib_device *device; union ib_gid gid; enum ib_gid_type gid_type; -- cgit v1.2.3 From 3a4ef2e2b5cf9a34bcc66c0d33f7eba180a14535 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 16 Apr 2019 15:13:10 +0300 Subject: RDMA/rdmavt: Catch use-after-free access of AH structures Prior to commit d345691471b4 ("RDMA: Handle AH allocations by IB/core"), AH destroy path is rdmavt returned -EBUSY warning to application and caused to potential leakage of kernel memory of AH structure. After that commit, the AH structure is always freed but such early return in driver code can potentially cause to use-after-free error. Add warning to catch such situation to help driver developers to fix AH release path. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/ah.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index e6f7e4689d4d..0e147b32cbe9 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -141,8 +141,7 @@ void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags) struct rvt_ah *ah = ibah_to_rvtah(ibah); unsigned long flags; - if (atomic_read(&ah->refcount) != 0) - return; + WARN_ON_ONCE(atomic_read(&ah->refcount)); spin_lock_irqsave(&dev->n_ahs_lock, flags); dev->n_ahs_allocated--; -- cgit v1.2.3 From 3b113a1ec3d4ac7e1e621b77650ac05491f5924a Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Sun, 5 May 2019 17:07:11 +0300 Subject: IB/mlx5: Support device memory type attribute This patch intoruduces a new mlx5_ib driver attribute to the DM allocation method - the DM type. In order to allow addition of new types in downstream patches this patch also refactors the allocation, deallocation and registration handlers to consider the requested type and perform the necessary actions according to it. Since not all future device memory types will be such that are mapped to user memory, the mandatory page index output attribute is modified to be optional. Signed-off-by: Ariel Levkovich Reviewed-by: Eli Cohen Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cmd.c | 30 +++---- drivers/infiniband/hw/mlx5/cmd.h | 4 +- drivers/infiniband/hw/mlx5/main.c | 135 +++++++++++++++++++----------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 23 +++-- drivers/infiniband/hw/mlx5/mr.c | 32 ++++--- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 1 + include/uapi/rdma/mlx5_user_ioctl_verbs.h | 4 + 7 files changed, 145 insertions(+), 84 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index be95ac5aeb30..f0e9c7609083 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -82,10 +82,10 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out)); } -int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, - u64 length, u32 alignment) +int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, + u64 length, u32 alignment) { - struct mlx5_core_dev *dev = memic->dev; + struct mlx5_core_dev *dev = dm->dev; u64 num_memic_hw_pages = MLX5_CAP_DEV_MEM(dev, memic_bar_size) >> PAGE_SHIFT; u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr); @@ -115,17 +115,17 @@ int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, mlx5_alignment); while (page_idx < num_memic_hw_pages) { - spin_lock(&memic->memic_lock); - page_idx = bitmap_find_next_zero_area(memic->memic_alloc_pages, + spin_lock(&dm->lock); + page_idx = bitmap_find_next_zero_area(dm->memic_alloc_pages, num_memic_hw_pages, page_idx, num_pages, 0); if (page_idx < num_memic_hw_pages) - bitmap_set(memic->memic_alloc_pages, + bitmap_set(dm->memic_alloc_pages, page_idx, num_pages); - spin_unlock(&memic->memic_lock); + spin_unlock(&dm->lock); if (page_idx >= num_memic_hw_pages) break; @@ -135,10 +135,10 @@ int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (ret) { - spin_lock(&memic->memic_lock); - bitmap_clear(memic->memic_alloc_pages, + spin_lock(&dm->lock); + bitmap_clear(dm->memic_alloc_pages, page_idx, num_pages); - spin_unlock(&memic->memic_lock); + spin_unlock(&dm->lock); if (ret == -EAGAIN) { page_idx++; @@ -157,9 +157,9 @@ int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, return -ENOMEM; } -int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length) +int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, u64 addr, u64 length) { - struct mlx5_core_dev *dev = memic->dev; + struct mlx5_core_dev *dev = dm->dev; u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr); u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE); u32 out[MLX5_ST_SZ_DW(dealloc_memic_out)] = {0}; @@ -177,10 +177,10 @@ int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length) err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (!err) { - spin_lock(&memic->memic_lock); - bitmap_clear(memic->memic_alloc_pages, + spin_lock(&dm->lock); + bitmap_clear(dm->memic_alloc_pages, start_page_idx, num_pages); - spin_unlock(&memic->memic_lock); + spin_unlock(&dm->lock); } return err; diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 923a7b93f507..80a644bea6c7 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -44,9 +44,9 @@ int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out); int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, void *in, int in_size); -int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, +int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, u64 length, u32 alignment); -int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length); +int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, u64 addr, u64 length); void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid); void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid); void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 5ac24bce6e77..97f32a03e456 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2264,58 +2264,90 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm return 0; } -struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, - struct ib_ucontext *context, - struct ib_dm_alloc_attr *attr, - struct uverbs_attr_bundle *attrs) +static int handle_alloc_dm_memic(struct ib_ucontext *ctx, + struct mlx5_ib_dm *dm, + struct ib_dm_alloc_attr *attr, + struct uverbs_attr_bundle *attrs) { - u64 act_size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE); - struct mlx5_memic *memic = &to_mdev(ibdev)->memic; - phys_addr_t memic_addr; - struct mlx5_ib_dm *dm; + struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm; u64 start_offset; u32 page_idx; int err; - dm = kzalloc(sizeof(*dm), GFP_KERNEL); - if (!dm) - return ERR_PTR(-ENOMEM); - - mlx5_ib_dbg(to_mdev(ibdev), "alloc_memic req: user_length=0x%llx act_length=0x%llx log_alignment=%d\n", - attr->length, act_size, attr->alignment); + dm->size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE); - err = mlx5_cmd_alloc_memic(memic, &memic_addr, - act_size, attr->alignment); + err = mlx5_cmd_alloc_memic(dm_db, &dm->dev_addr, + dm->size, attr->alignment); if (err) - goto err_free; + return err; - start_offset = memic_addr & ~PAGE_MASK; - page_idx = (memic_addr - memic->dev->bar_addr - - MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >> + page_idx = (dm->dev_addr - pci_resource_start(dm_db->dev->pdev, 0) - + MLX5_CAP64_DEV_MEM(dm_db->dev, memic_bar_start_addr)) >> PAGE_SHIFT; err = uverbs_copy_to(attrs, - MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, - &start_offset, sizeof(start_offset)); + MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, + &page_idx, sizeof(page_idx)); if (err) goto err_dealloc; + start_offset = dm->dev_addr & ~PAGE_MASK; err = uverbs_copy_to(attrs, - MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, - &page_idx, sizeof(page_idx)); + MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, + &start_offset, sizeof(start_offset)); if (err) goto err_dealloc; - bitmap_set(to_mucontext(context)->dm_pages, page_idx, - DIV_ROUND_UP(act_size, PAGE_SIZE)); + bitmap_set(to_mucontext(ctx)->dm_pages, page_idx, + DIV_ROUND_UP(dm->size, PAGE_SIZE)); + + return 0; + +err_dealloc: + mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size); + + return err; +} + +struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_dm_alloc_attr *attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_dm *dm; + enum mlx5_ib_uapi_dm_type type; + int err; + + err = uverbs_get_const_default(&type, attrs, + MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE, + MLX5_IB_UAPI_DM_TYPE_MEMIC); + if (err) + return ERR_PTR(err); + + mlx5_ib_dbg(to_mdev(ibdev), "alloc_dm req: dm_type=%d user_length=0x%llx log_alignment=%d\n", + type, attr->length, attr->alignment); + + dm = kzalloc(sizeof(*dm), GFP_KERNEL); + if (!dm) + return ERR_PTR(-ENOMEM); + + dm->type = type; + + switch (type) { + case MLX5_IB_UAPI_DM_TYPE_MEMIC: + err = handle_alloc_dm_memic(context, dm, + attr, + attrs); + break; + default: + err = -EOPNOTSUPP; + } - dm->dev_addr = memic_addr; + if (err) + goto err_free; return &dm->ibdm; -err_dealloc: - mlx5_cmd_dealloc_memic(memic, memic_addr, - act_size); err_free: kfree(dm); return ERR_PTR(err); @@ -2323,25 +2355,31 @@ err_free: int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs) { - struct mlx5_memic *memic = &to_mdev(ibdm->device)->memic; + struct mlx5_dm *dm_db = &to_mdev(ibdm->device)->dm; struct mlx5_ib_dm *dm = to_mdm(ibdm); - u64 act_size = roundup(dm->ibdm.length, MLX5_MEMIC_BASE_SIZE); u32 page_idx; int ret; - ret = mlx5_cmd_dealloc_memic(memic, dm->dev_addr, act_size); - if (ret) - return ret; + switch (dm->type) { + case MLX5_IB_UAPI_DM_TYPE_MEMIC: + ret = mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size); + if (ret) + return ret; - page_idx = (dm->dev_addr - memic->dev->bar_addr - - MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >> - PAGE_SHIFT; - bitmap_clear(rdma_udata_to_drv_context( - &attrs->driver_udata, - struct mlx5_ib_ucontext, - ibucontext)->dm_pages, - page_idx, - DIV_ROUND_UP(act_size, PAGE_SIZE)); + page_idx = (dm->dev_addr - + pci_resource_start(dm_db->dev->pdev, 0) - + MLX5_CAP64_DEV_MEM(dm_db->dev, + memic_bar_start_addr)) >> + PAGE_SHIFT; + bitmap_clear(rdma_udata_to_drv_context(&attrs->driver_udata, + struct mlx5_ib_ucontext, + ibucontext) + ->dm_pages, + page_idx, DIV_ROUND_UP(dm->size, PAGE_SIZE)); + break; + default: + return -EOPNOTSUPP; + } kfree(dm); @@ -5768,7 +5806,10 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( UA_MANDATORY), UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, UVERBS_ATTR_TYPE(u16), - UA_MANDATORY)); + UA_OPTIONAL), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE, + enum mlx5_ib_uapi_dm_type, + UA_OPTIONAL)); ADD_UVERBS_ATTRIBUTES_SIMPLE( mlx5_ib_flow_action, @@ -5916,8 +5957,8 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); - spin_lock_init(&dev->memic.memic_lock); - dev->memic.dev = mdev; + spin_lock_init(&dev->dm.lock); + dev->dm.dev = mdev; if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { err = init_srcu_struct(&dev->mr_srcu); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 55b8bdb402b6..714c360dc9fb 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -48,6 +48,7 @@ #include #include #include +#include #include "srq.h" @@ -554,15 +555,17 @@ enum mlx5_ib_mtt_access_flags { struct mlx5_ib_dm { struct ib_dm ibdm; phys_addr_t dev_addr; + u32 type; + size_t size; }; #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE) -#define MLX5_IB_DM_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE |\ - IB_ACCESS_REMOTE_WRITE |\ - IB_ACCESS_REMOTE_READ |\ - IB_ACCESS_REMOTE_ATOMIC |\ - IB_ZERO_BASED) +#define MLX5_IB_DM_MEMIC_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE |\ + IB_ACCESS_REMOTE_WRITE |\ + IB_ACCESS_REMOTE_READ |\ + IB_ACCESS_REMOTE_ATOMIC |\ + IB_ZERO_BASED) struct mlx5_ib_mr { struct ib_mr ibmr; @@ -843,9 +846,13 @@ struct mlx5_ib_flow_action { }; }; -struct mlx5_memic { +struct mlx5_dm { struct mlx5_core_dev *dev; - spinlock_t memic_lock; + /* This lock is used to protect the access to the shared + * allocation map when concurrent requests by different + * processes are handled. + */ + spinlock_t lock; DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES); }; @@ -949,7 +956,7 @@ struct mlx5_ib_dev { u8 umr_fence; struct list_head ib_dev_list; u64 sys_image_guid; - struct mlx5_memic memic; + struct mlx5_dm dm; u16 devx_whitelist_uid; struct mlx5_srq_table srq_table; struct mlx5_async_ctx async_ctx; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 4381cddab97b..ba35d68e7499 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1159,8 +1159,8 @@ static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, mr->access_flags = access_flags; } -static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr, - u64 length, int acc) +static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, + u64 length, int acc, int mode) { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); @@ -1182,9 +1182,8 @@ static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr, mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MEMIC & 0x3); - MLX5_SET(mkc, mkc, access_mode_4_2, - (MLX5_MKC_ACCESS_MODE_MEMIC >> 2) & 0x7); + MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); @@ -1194,7 +1193,7 @@ static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr, MLX5_SET64(mkc, mkc, len, length); MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET64(mkc, mkc, start_addr, memic_addr - dev->mdev->bar_addr); + MLX5_SET64(mkc, mkc, start_addr, start_addr); err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); if (err) @@ -1236,15 +1235,24 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, struct uverbs_attr_bundle *attrs) { struct mlx5_ib_dm *mdm = to_mdm(dm); - u64 memic_addr; + struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev; + u64 start_addr = mdm->dev_addr + attr->offset; + int mode; - if (attr->access_flags & ~MLX5_IB_DM_ALLOWED_ACCESS) - return ERR_PTR(-EINVAL); + switch (mdm->type) { + case MLX5_IB_UAPI_DM_TYPE_MEMIC: + if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS) + return ERR_PTR(-EINVAL); - memic_addr = mdm->dev_addr + attr->offset; + mode = MLX5_MKC_ACCESS_MODE_MEMIC; + start_addr -= pci_resource_start(dev->pdev, 0); + break; + default: + return ERR_PTR(-EINVAL); + } - return mlx5_ib_get_memic_mr(pd, memic_addr, attr->length, - attr->access_flags); + return mlx5_ib_get_dm_mr(pd, start_addr, attr->length, + attr->access_flags, mode); } struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 0d8f564ce60b..d404c951954c 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -44,6 +44,7 @@ enum mlx5_ib_create_flow_action_attrs { enum mlx5_ib_alloc_dm_attrs { MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET = (1U << UVERBS_ID_NS_SHIFT), MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, + MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE, }; enum mlx5_ib_devx_methods { diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h index 0a126a6b9337..c291fb2f8446 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h +++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h @@ -57,5 +57,9 @@ struct mlx5_ib_uapi_devx_async_cmd_hdr { __u8 out_data[]; }; +enum mlx5_ib_uapi_dm_type { + MLX5_IB_UAPI_DM_TYPE_MEMIC, +}; + #endif -- cgit v1.2.3 From 4056b12efd43248d8331b6ed93df5ea5250106a9 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Sun, 5 May 2019 17:07:12 +0300 Subject: IB/mlx5: Warn on allocated MEMIC buffers during cleanup Adding a warning on allocated MEMIC buffers that weren't freed prior to driver tear down. Signed-off-by: Ariel Levkovich Reviewed-by: Eli Cohen Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 97f32a03e456..803aea335953 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5907,6 +5907,8 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) srcu_barrier(&dev->mr_srcu); cleanup_srcu_struct(&dev->mr_srcu); } + + WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); } static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) -- cgit v1.2.3 From 25c13324d03d004f9e8071bf5bf5d5c6fdace71e Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Sun, 5 May 2019 17:07:13 +0300 Subject: IB/mlx5: Add steering SW ICM device memory type This patch adds support for allocating, deallocating and registering a new device memory type, STEERING_SW_ICM. This memory can be allocated and used by a privileged user for direct rule insertion and management of the device's steering tables. The type is provided by the user via the dedicated attribute in the alloc_dm ioctl command. Signed-off-by: Ariel Levkovich Reviewed-by: Eli Cohen Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cmd.c | 127 +++++++++++++++++++++++++- drivers/infiniband/hw/mlx5/cmd.h | 6 +- drivers/infiniband/hw/mlx5/main.c | 142 ++++++++++++++++++++++++++++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 17 ++++ drivers/infiniband/hw/mlx5/mr.c | 7 ++ include/uapi/rdma/mlx5_user_ioctl_verbs.h | 2 + 6 files changed, 292 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index f0e9c7609083..e3ec79b8f7f5 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -157,7 +157,7 @@ int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, return -ENOMEM; } -int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, u64 addr, u64 length) +int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length) { struct mlx5_core_dev *dev = dm->dev; u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr); @@ -186,6 +186,131 @@ int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, u64 addr, u64 length) return err; } +int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, + u16 uid, phys_addr_t *addr, u32 *obj_id) +{ + struct mlx5_core_dev *dev = dm->dev; + u32 num_blocks = DIV_ROUND_UP(length, MLX5_SW_ICM_BLOCK_SIZE(dev)); + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {}; + unsigned long *block_map; + u64 icm_start_addr; + u32 log_icm_size; + u32 max_blocks; + u64 block_idx; + void *sw_icm; + int ret; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); + MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); + + switch (type) { + case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, + steering_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size); + block_map = dm->steering_sw_icm_alloc_blocks; + break; + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, + header_modify_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, + log_header_modify_sw_icm_size); + block_map = dm->header_modify_sw_icm_alloc_blocks; + break; + default: + return -EINVAL; + } + + max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + spin_lock(&dm->lock); + block_idx = bitmap_find_next_zero_area(block_map, + max_blocks, + 0, + num_blocks, 0); + + if (block_idx < max_blocks) + bitmap_set(block_map, + block_idx, num_blocks); + + spin_unlock(&dm->lock); + + if (block_idx >= max_blocks) + return -ENOMEM; + + sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm); + icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr, + icm_start_addr); + MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length)); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) { + spin_lock(&dm->lock); + bitmap_clear(block_map, + block_idx, num_blocks); + spin_unlock(&dm->lock); + + return ret; + } + + *addr = icm_start_addr; + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return 0; +} + +int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, + u16 uid, phys_addr_t addr, u32 obj_id) +{ + struct mlx5_core_dev *dev = dm->dev; + u32 num_blocks = DIV_ROUND_UP(length, MLX5_SW_ICM_BLOCK_SIZE(dev)); + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + unsigned long *block_map; + u64 start_idx; + int err; + + switch (type) { + case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + start_idx = + (addr - MLX5_CAP64_DEV_MEM( + dev, steering_sw_icm_start_address)) >> + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + block_map = dm->steering_sw_icm_alloc_blocks; + break; + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + start_idx = + (addr - + MLX5_CAP64_DEV_MEM( + dev, header_modify_sw_icm_start_address)) >> + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + block_map = dm->header_modify_sw_icm_alloc_blocks; + break; + default: + return -EINVAL; + } + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + spin_lock(&dm->lock); + bitmap_clear(block_map, + start_idx, num_blocks); + spin_unlock(&dm->lock); + + return 0; +} + int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out) { u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 80a644bea6c7..0572dcba6eae 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -46,7 +46,7 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, void *in, int in_size); int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, u64 length, u32 alignment); -int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, u64 addr, u64 length); +int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length); void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid); void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid); void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid); @@ -65,4 +65,8 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, u16 uid); int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, u16 opmod, u8 port); +int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, + u16 uid, phys_addr_t *addr, u32 *obj_id); +int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, + u16 uid, phys_addr_t addr, u32 obj_id); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 803aea335953..f9def2dfdba3 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2264,6 +2264,28 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm return 0; } +static inline int check_dm_type_support(struct mlx5_ib_dev *dev, + u32 type) +{ + switch (type) { + case MLX5_IB_UAPI_DM_TYPE_MEMIC: + if (!MLX5_CAP_DEV_MEM(dev->mdev, memic)) + return -EOPNOTSUPP; + break; + case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + if (!capable(CAP_SYS_RAWIO) || + !capable(CAP_NET_RAW)) + return -EPERM; + + if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) || + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner))) + return -EOPNOTSUPP; + break; + } + + return 0; +} + static int handle_alloc_dm_memic(struct ib_ucontext *ctx, struct mlx5_ib_dm *dm, struct ib_dm_alloc_attr *attr, @@ -2309,6 +2331,40 @@ err_dealloc: return err; } +static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx, + struct mlx5_ib_dm *dm, + struct ib_dm_alloc_attr *attr, + struct uverbs_attr_bundle *attrs, + int type) +{ + struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm; + u64 act_size; + int err; + + /* Allocation size must a multiple of the basic block size + * and a power of 2. + */ + act_size = roundup(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev)); + act_size = roundup_pow_of_two(act_size); + + dm->size = act_size; + err = mlx5_cmd_alloc_sw_icm(dm_db, type, act_size, + to_mucontext(ctx)->devx_uid, &dm->dev_addr, + &dm->icm_dm.obj_id); + if (err) + return err; + + err = uverbs_copy_to(attrs, + MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, + &dm->dev_addr, sizeof(dm->dev_addr)); + if (err) + mlx5_cmd_dealloc_sw_icm(dm_db, type, dm->size, + to_mucontext(ctx)->devx_uid, + dm->dev_addr, dm->icm_dm.obj_id); + + return err; +} + struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_dm_alloc_attr *attr, @@ -2327,6 +2383,10 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, mlx5_ib_dbg(to_mdev(ibdev), "alloc_dm req: dm_type=%d user_length=0x%llx log_alignment=%d\n", type, attr->length, attr->alignment); + err = check_dm_type_support(to_mdev(ibdev), type); + if (err) + return ERR_PTR(err); + dm = kzalloc(sizeof(*dm), GFP_KERNEL); if (!dm) return ERR_PTR(-ENOMEM); @@ -2339,6 +2399,10 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, attr, attrs); break; + case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + err = handle_alloc_dm_sw_icm(context, dm, attr, attrs, type); + break; default: err = -EOPNOTSUPP; } @@ -2355,6 +2419,8 @@ err_free: int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs) { + struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context( + &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); struct mlx5_dm *dm_db = &to_mdev(ibdm->device)->dm; struct mlx5_ib_dm *dm = to_mdm(ibdm); u32 page_idx; @@ -2371,11 +2437,16 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs) MLX5_CAP64_DEV_MEM(dm_db->dev, memic_bar_start_addr)) >> PAGE_SHIFT; - bitmap_clear(rdma_udata_to_drv_context(&attrs->driver_udata, - struct mlx5_ib_ucontext, - ibucontext) - ->dm_pages, - page_idx, DIV_ROUND_UP(dm->size, PAGE_SIZE)); + bitmap_clear(ctx->dm_pages, page_idx, + DIV_ROUND_UP(dm->size, PAGE_SIZE)); + break; + case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + ret = mlx5_cmd_dealloc_sw_icm(dm_db, dm->type, dm->size, + ctx->devx_uid, dm->dev_addr, + dm->icm_dm.obj_id); + if (ret) + return ret; break; default: return -EOPNOTSUPP; @@ -5902,6 +5973,8 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { + struct mlx5_core_dev *mdev = dev->mdev; + mlx5_ib_cleanup_multiport_master(dev); if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { srcu_barrier(&dev->mr_srcu); @@ -5909,11 +5982,29 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) } WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); + + WARN_ON(dev->dm.steering_sw_icm_alloc_blocks && + !bitmap_empty( + dev->dm.steering_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)))); + + kfree(dev->dm.steering_sw_icm_alloc_blocks); + + WARN_ON(dev->dm.header_modify_sw_icm_alloc_blocks && + !bitmap_empty(dev->dm.header_modify_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM( + mdev, log_header_modify_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)))); + + kfree(dev->dm.header_modify_sw_icm_alloc_blocks); } static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; + u64 header_modify_icm_blocks = 0; + u64 steering_icm_blocks = 0; int err; int i; @@ -5959,16 +6050,51 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); + if (MLX5_CAP_GEN_64(mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) { + if (MLX5_CAP64_DEV_MEM(mdev, steering_sw_icm_start_address)) { + steering_icm_blocks = + BIT(MLX5_CAP_DEV_MEM(mdev, + log_steering_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)); + + dev->dm.steering_sw_icm_alloc_blocks = + kcalloc(BITS_TO_LONGS(steering_icm_blocks), + sizeof(unsigned long), GFP_KERNEL); + if (!dev->dm.steering_sw_icm_alloc_blocks) + goto err_mp; + } + + if (MLX5_CAP64_DEV_MEM(mdev, + header_modify_sw_icm_start_address)) { + header_modify_icm_blocks = BIT( + MLX5_CAP_DEV_MEM( + mdev, log_header_modify_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)); + + dev->dm.header_modify_sw_icm_alloc_blocks = + kcalloc(BITS_TO_LONGS(header_modify_icm_blocks), + sizeof(unsigned long), GFP_KERNEL); + if (!dev->dm.header_modify_sw_icm_alloc_blocks) + goto err_dm; + } + } + spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { err = init_srcu_struct(&dev->mr_srcu); if (err) - goto err_mp; + goto err_dm; } return 0; + +err_dm: + kfree(dev->dm.steering_sw_icm_alloc_blocks); + kfree(dev->dm.header_modify_sw_icm_alloc_blocks); + err_mp: mlx5_ib_cleanup_multiport_master(dev); @@ -6151,7 +6277,9 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops); } - if (MLX5_CAP_DEV_MEM(mdev, memic)) + if (MLX5_CAP_DEV_MEM(mdev, memic) || + MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops); if (mlx5_accel_ipsec_device_caps(dev->mdev) & diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 714c360dc9fb..40eb8be482e4 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -118,6 +118,10 @@ enum { MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN, }; +#define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) \ + (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity)) +#define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) + struct mlx5_ib_ucontext { struct ib_ucontext ibucontext; struct list_head db_page_list; @@ -557,6 +561,12 @@ struct mlx5_ib_dm { phys_addr_t dev_addr; u32 type; size_t size; + union { + struct { + u32 obj_id; + } icm_dm; + /* other dm types specific params should be added here */ + }; }; #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE) @@ -567,6 +577,11 @@ struct mlx5_ib_dm { IB_ACCESS_REMOTE_ATOMIC |\ IB_ZERO_BASED) +#define MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE |\ + IB_ACCESS_REMOTE_WRITE |\ + IB_ACCESS_REMOTE_READ |\ + IB_ZERO_BASED) + struct mlx5_ib_mr { struct ib_mr ibmr; void *descs; @@ -854,6 +869,8 @@ struct mlx5_dm { */ spinlock_t lock; DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES); + unsigned long *steering_sw_icm_alloc_blocks; + unsigned long *header_modify_sw_icm_alloc_blocks; }; struct mlx5_read_counters_attr { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index ba35d68e7499..5f09699fab98 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1247,6 +1247,13 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, mode = MLX5_MKC_ACCESS_MODE_MEMIC; start_addr -= pci_resource_start(dev->pdev, 0); break; + case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS) + return ERR_PTR(-EINVAL); + + mode = MLX5_MKC_ACCESS_MODE_SW_ICM; + break; default: return ERR_PTR(-EINVAL); } diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h index c291fb2f8446..a8f34c237458 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h +++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h @@ -59,6 +59,8 @@ struct mlx5_ib_uapi_devx_async_cmd_hdr { enum mlx5_ib_uapi_dm_type { MLX5_IB_UAPI_DM_TYPE_MEMIC, + MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM, + MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM, }; #endif -- cgit v1.2.3 From 33cde96fb5d7ae36207541c8a832d7fae3cadbde Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Sun, 5 May 2019 17:07:14 +0300 Subject: IB/mlx5: Device resource control for privileged DEVX user For DEVX users who have SYS_RAWIO capability, we set the internal device resources capability when creating the UCTX. This will allow the device to restrict the allocation of internal device resources such as SW ICM memory to privileged DEVX users only. Signed-off-by: Ariel Levkovich Reviewed-by: Eli Cohen Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index d627f44bc84d..169ffffcf5ed 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -85,6 +85,10 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) if (is_user && capable(CAP_NET_RAW) && (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX)) cap |= MLX5_UCTX_CAP_RAW_TX; + if (is_user && capable(CAP_SYS_RAWIO) && + (MLX5_CAP_GEN(dev->mdev, uctx_cap) & + MLX5_UCTX_CAP_INTERNAL_DEV_RES)) + cap |= MLX5_UCTX_CAP_INTERNAL_DEV_RES; MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX); MLX5_SET(uctx, uctx, cap, cap); -- cgit v1.2.3 From 10bf13c334504a1fea54b731217ade6814b79f65 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 6 May 2019 10:45:56 +0300 Subject: RDMA/mlx5: Remove MAYEXEC flag MAYEXEC flag was mistakenly added in the commit cited in the fixes line. Fixes: 4eb6ab13b991 ("RDMA: Remove rdma_user_mmap_page") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index f9def2dfdba3..687f99172037 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2073,7 +2073,7 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, if (vma->vm_flags & (VM_WRITE | VM_EXEC)) return -EPERM; - vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC); + vma->vm_flags &= ~VM_MAYWRITE; if (!dev->mdev->clock_info) return -EOPNOTSUPP; -- cgit v1.2.3 From 4c4b1996b5db688e2dcb8242b0a3bf7b1e845e42 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 18 Mar 2019 09:55:09 -0700 Subject: IB/hfi1: Fix WQ_MEM_RECLAIM warning The work_item cancels that occur when a QP is destroyed can elicit the following trace: workqueue: WQ_MEM_RECLAIM ipoib_wq:ipoib_cm_tx_reap [ib_ipoib] is flushing !WQ_MEM_RECLAIM hfi0_0:_hfi1_do_send [hfi1] WARNING: CPU: 7 PID: 1403 at kernel/workqueue.c:2486 check_flush_dependency+0xb1/0x100 Call Trace: __flush_work.isra.29+0x8c/0x1a0 ? __switch_to_asm+0x40/0x70 __cancel_work_timer+0x103/0x190 ? schedule+0x32/0x80 iowait_cancel_work+0x15/0x30 [hfi1] rvt_reset_qp+0x1f8/0x3e0 [rdmavt] rvt_destroy_qp+0x65/0x1f0 [rdmavt] ? _cond_resched+0x15/0x30 ib_destroy_qp+0xe9/0x230 [ib_core] ipoib_cm_tx_reap+0x21c/0x560 [ib_ipoib] process_one_work+0x171/0x370 worker_thread+0x49/0x3f0 kthread+0xf8/0x130 ? max_active_store+0x80/0x80 ? kthread_bind+0x10/0x10 ret_from_fork+0x35/0x40 Since QP destruction frees memory, hfi1_wq should have the WQ_MEM_RECLAIM. The hfi1_wq does not allocate memory with GFP_KERNEL or otherwise become entangled with memory reclaim, so this flag is appropriate. Fixes: 0a226edd203f ("staging/rdma/hfi1: Use parallel workqueue for SDMA engines") Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index e4c2ae4f1cb3..71cb9525c074 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -805,7 +805,8 @@ static int create_workqueues(struct hfi1_devdata *dd) ppd->hfi1_wq = alloc_workqueue( "hfi%d_%d", - WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE, + WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE | + WQ_MEM_RECLAIM, HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES, dd->unit, pidx); if (!ppd->hfi1_wq) -- cgit v1.2.3 From 4a35339958f16d42a4ca06a8da9d4b5ab39ee8ea Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 6 May 2019 08:53:32 -0500 Subject: RDMA/umem: Add API to find best driver supported page size in an MR This helper iterates through the SG list to find the best page size to use from a bitmap of HW supported page sizes. Drivers that support multiple page sizes, but not mixed sizes in an MR can use this API. Suggested-by: Jason Gunthorpe Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 51 ++++++++++++++++++++++++++++++++++++++++++ include/rdma/ib_umem.h | 9 ++++++++ include/rdma/ib_verbs.h | 24 ++++++++++++++++++++ 3 files changed, 84 insertions(+) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 23f7512cc7a8..145c31c530ae 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -130,6 +130,57 @@ static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg, return sg; } +/** + * ib_umem_find_best_pgsz - Find best HW page size to use for this MR + * + * @umem: umem struct + * @pgsz_bitmap: bitmap of HW supported page sizes + * @virt: IOVA + * + * This helper is intended for HW that support multiple page + * sizes but can do only a single page size in an MR. + * + * Returns 0 if the umem requires page sizes not supported by + * the driver to be mapped. Drivers always supporting PAGE_SIZE + * or smaller will never see a 0 result. + */ +unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, + unsigned long pgsz_bitmap, + unsigned long virt) +{ + struct scatterlist *sg; + unsigned int best_pg_bit; + unsigned long va, pgoff; + dma_addr_t mask; + int i; + + /* At minimum, drivers must support PAGE_SIZE or smaller */ + if (WARN_ON(!(pgsz_bitmap & GENMASK(PAGE_SHIFT, 0)))) + return 0; + + va = virt; + /* max page size not to exceed MR length */ + mask = roundup_pow_of_two(umem->length); + /* offset into first SGL */ + pgoff = umem->address & ~PAGE_MASK; + + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) { + /* Walk SGL and reduce max page size if VA/PA bits differ + * for any address. + */ + mask |= (sg_dma_address(sg) + pgoff) ^ va; + if (i && i != (umem->nmap - 1)) + /* restrict by length as well for interior SGEs */ + mask |= sg_dma_len(sg); + va += sg_dma_len(sg) - pgoff; + pgoff = 0; + } + best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap); + + return BIT_ULL(best_pg_bit); +} +EXPORT_SYMBOL(ib_umem_find_best_pgsz); + /** * ib_umem_get - Pin and DMA map userspace memory. * diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index b13a2e9a50d4..917b687010f0 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -87,6 +87,9 @@ void ib_umem_release(struct ib_umem *umem); int ib_umem_page_count(struct ib_umem *umem); int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, size_t length); +unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, + unsigned long pgsz_bitmap, + unsigned long virt); #else /* CONFIG_INFINIBAND_USER_MEM */ @@ -104,6 +107,12 @@ static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offs size_t length) { return -EINVAL; } +static inline int ib_umem_find_best_pgsz(struct ib_umem *umem, + unsigned long pgsz_bitmap, + unsigned long virt) { + return -EINVAL; +} + #endif /* CONFIG_INFINIBAND_USER_MEM */ #endif /* IB_UMEM_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8f8965f8ffdb..03b07ec6a34b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3250,6 +3250,30 @@ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num) return rdma_protocol_iwarp(dev, port_num); } +/** + * rdma_find_pg_bit - Find page bit given address and HW supported page sizes + * + * @addr: address + * @pgsz_bitmap: bitmap of HW supported page sizes + */ +static inline unsigned int rdma_find_pg_bit(unsigned long addr, + unsigned long pgsz_bitmap) +{ + unsigned long align; + unsigned long pgsz; + + align = addr & -addr; + + /* Find page bit such that addr is aligned to the highest supported + * HW page size + */ + pgsz = pgsz_bitmap & ~(-align << 1); + if (!pgsz) + return __ffs(pgsz_bitmap); + + return __fls(pgsz); +} + int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, int state); int ib_get_vf_config(struct ib_device *device, int vf, u8 port, -- cgit v1.2.3 From a808273a495c657e33281b181fd7fcc2bb28f662 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 6 May 2019 08:53:33 -0500 Subject: RDMA/verbs: Add a DMA iterator to return aligned contiguous memory blocks This helper iterates over a DMA-mapped SGL and returns contiguous memory blocks aligned to a HW supported page size. Suggested-by: Jason Gunthorpe Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 34 +++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 47 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index d607c319ad50..a4e97dd3c4b6 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2710,3 +2710,37 @@ int rdma_init_netdev(struct ib_device *device, u8 port_num, netdev, params.param); } EXPORT_SYMBOL(rdma_init_netdev); + +void __rdma_block_iter_start(struct ib_block_iter *biter, + struct scatterlist *sglist, unsigned int nents, + unsigned long pgsz) +{ + memset(biter, 0, sizeof(struct ib_block_iter)); + biter->__sg = sglist; + biter->__sg_nents = nents; + + /* Driver provides best block size to use */ + biter->__pg_bit = __fls(pgsz); +} +EXPORT_SYMBOL(__rdma_block_iter_start); + +bool __rdma_block_iter_next(struct ib_block_iter *biter) +{ + unsigned int block_offset; + + if (!biter->__sg_nents || !biter->__sg) + return false; + + biter->__dma_addr = sg_dma_address(biter->__sg) + biter->__sg_advance; + block_offset = biter->__dma_addr & (BIT_ULL(biter->__pg_bit) - 1); + biter->__sg_advance += BIT_ULL(biter->__pg_bit) - block_offset; + + if (biter->__sg_advance >= sg_dma_len(biter->__sg)) { + biter->__sg_advance = 0; + biter->__sg = sg_next(biter->__sg); + biter->__sg_nents--; + } + + return true; +} +EXPORT_SYMBOL(__rdma_block_iter_next); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 03b07ec6a34b..deb67b21ccb9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2726,6 +2726,21 @@ struct ib_client { u8 no_kverbs_req:1; }; +/* + * IB block DMA iterator + * + * Iterates the DMA-mapped SGL in contiguous memory blocks aligned + * to a HW supported page size. + */ +struct ib_block_iter { + /* internal states */ + struct scatterlist *__sg; /* sg holding the current aligned block */ + dma_addr_t __dma_addr; /* unaligned DMA address of this block */ + unsigned int __sg_nents; /* number of SG entries */ + unsigned int __sg_advance; /* number of bytes to advance in sg in next step */ + unsigned int __pg_bit; /* alignment of current block */ +}; + struct ib_device *_ib_alloc_device(size_t size); #define ib_alloc_device(drv_struct, member) \ container_of(_ib_alloc_device(sizeof(struct drv_struct) + \ @@ -2746,6 +2761,38 @@ void ib_unregister_device_queued(struct ib_device *ib_dev); int ib_register_client (struct ib_client *client); void ib_unregister_client(struct ib_client *client); +void __rdma_block_iter_start(struct ib_block_iter *biter, + struct scatterlist *sglist, + unsigned int nents, + unsigned long pgsz); +bool __rdma_block_iter_next(struct ib_block_iter *biter); + +/** + * rdma_block_iter_dma_address - get the aligned dma address of the current + * block held by the block iterator. + * @biter: block iterator holding the memory block + */ +static inline dma_addr_t +rdma_block_iter_dma_address(struct ib_block_iter *biter) +{ + return biter->__dma_addr & ~(BIT_ULL(biter->__pg_bit) - 1); +} + +/** + * rdma_for_each_block - iterate over contiguous memory blocks of the sg list + * @sglist: sglist to iterate over + * @biter: block iterator holding the memory block + * @nents: maximum number of sg entries to iterate over + * @pgsz: best HW supported page size to use + * + * Callers may use rdma_block_iter_dma_address() to get each + * blocks aligned DMA address. + */ +#define rdma_for_each_block(sglist, biter, nents, pgsz) \ + for (__rdma_block_iter_start(biter, sglist, nents, \ + pgsz); \ + __rdma_block_iter_next(biter);) + /** * ib_get_client_data - Get IB client context * @device:Device to get context for -- cgit v1.2.3 From eb52c0333f06b88bca5bac0dc0aeca729de6eb11 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 6 May 2019 08:53:34 -0500 Subject: RDMA/i40iw: Use core helpers to get aligned DMA address within a supported page size Call the core helpers to retrieve the HW aligned address to use for the MR, within a supported i40iw page size. Remove code in i40iw to determine when MR is backed by 2M huge pages which involves checking the umem->hugetlb flag and VMA inspection. The new DMA iterator will return the 2M aligned address if the MR is backed by 2M pages. Fixes: f26c7c83395b ("i40iw: Add 2MB page support") Reviewed-by: Michael J. Ruhl Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 46 +++++-------------------------- drivers/infiniband/hw/i40iw/i40iw_verbs.h | 3 +- 2 files changed, 8 insertions(+), 41 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index b8a1412253ae..5689d742bafb 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1338,52 +1338,21 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr, struct i40iw_pbl *iwpbl = &iwmr->iwpbl; struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc; struct i40iw_pble_info *pinfo; - struct sg_dma_page_iter sg_iter; - u64 pg_addr = 0; + struct ib_block_iter biter; u32 idx = 0; - bool first_pg = true; pinfo = (level == I40IW_LEVEL_1) ? NULL : palloc->level2.leaf; if (iwmr->type == IW_MEMREG_TYPE_QP) iwpbl->qp_mr.sq_page = sg_page(region->sg_head.sgl); - for_each_sg_dma_page (region->sg_head.sgl, &sg_iter, region->nmap, 0) { - pg_addr = sg_page_iter_dma_address(&sg_iter); - if (first_pg) - *pbl = cpu_to_le64(pg_addr & iwmr->page_msk); - else if (!(pg_addr & ~iwmr->page_msk)) - *pbl = cpu_to_le64(pg_addr); - else - continue; - - first_pg = false; + rdma_for_each_block(region->sg_head.sgl, &biter, region->nmap, + iwmr->page_size) { + *pbl = rdma_block_iter_dma_address(&biter); pbl = i40iw_next_pbl_addr(pbl, &pinfo, &idx); } } -/** - * i40iw_set_hugetlb_params - set MR pg size and mask to huge pg values. - * @addr: virtual address - * @iwmr: mr pointer for this memory registration - */ -static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr) -{ - struct vm_area_struct *vma; - struct hstate *h; - - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, addr); - if (vma && is_vm_hugetlb_page(vma)) { - h = hstate_vma(vma); - if (huge_page_size(h) == 0x200000) { - iwmr->page_size = huge_page_size(h); - iwmr->page_msk = huge_page_mask(h); - } - } - up_read(¤t->mm->mmap_sem); -} - /** * i40iw_check_mem_contiguous - check if pbls stored in arr are contiguous * @arr: lvl1 pbl array @@ -1839,10 +1808,9 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, iwmr->ibmr.device = pd->device; iwmr->page_size = PAGE_SIZE; - iwmr->page_msk = PAGE_MASK; - - if (region->hugetlb && (req.reg_type == IW_MEMREG_TYPE_MEM)) - i40iw_set_hugetlb_values(start, iwmr); + if (req.reg_type == IW_MEMREG_TYPE_MEM) + iwmr->page_size = ib_umem_find_best_pgsz(region, SZ_4K | SZ_2M, + virt); region_length = region->length + (start & (iwmr->page_size - 1)); pg_shift = ffs(iwmr->page_size) - 1; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h index 76cf173377ab..3a413752ccc3 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h @@ -94,8 +94,7 @@ struct i40iw_mr { struct ib_umem *region; u16 type; u32 page_cnt; - u32 page_size; - u64 page_msk; + u64 page_size; u32 npages; u32 stag; u64 length; -- cgit v1.2.3 From d85582517e9103604991ac5265855b48ccfd54d8 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 6 May 2019 08:53:35 -0500 Subject: RDMA/bnxt_re: Use core helpers to get aligned DMA address Call the core helpers to retrieve the HW aligned address to use for the MR, within a supported bnxt_re page size. Remove checking the umem->hugtetlb flag as it is no longer required. The new DMA block iterator will return the 2M aligned address if the MR is backed by 2M huge pages. Acked-by: Selvin Xavier Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index cde789cb691b..2c3685faa57a 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -3507,17 +3507,12 @@ static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig, int page_shift) { u64 *pbl_tbl = pbl_tbl_orig; - u64 paddr; - u64 page_mask = (1ULL << page_shift) - 1; - struct sg_dma_page_iter sg_iter; + u64 page_size = BIT_ULL(page_shift); + struct ib_block_iter biter; + + rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, page_size) + *pbl_tbl++ = rdma_block_iter_dma_address(&biter); - for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { - paddr = sg_page_iter_dma_address(&sg_iter); - if (pbl_tbl == pbl_tbl_orig) - *pbl_tbl++ = paddr & ~page_mask; - else if ((paddr & page_mask) == 0) - *pbl_tbl++ = paddr; - } return pbl_tbl - pbl_tbl_orig; } @@ -3579,7 +3574,9 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, goto free_umem; } - page_shift = PAGE_SHIFT; + page_shift = __ffs(ib_umem_find_best_pgsz(umem, + BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M, + virt_addr)); if (!bnxt_re_page_size_ok(page_shift)) { dev_err(rdev_to_dev(rdev), "umem page size unsupported!"); @@ -3587,17 +3584,13 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, goto fail; } - if (!umem->hugetlb && length > BNXT_RE_MAX_MR_SIZE_LOW) { + if (page_shift == BNXT_RE_PAGE_SHIFT_4K && + length > BNXT_RE_MAX_MR_SIZE_LOW) { dev_err(rdev_to_dev(rdev), "Requested MR Sz:%llu Max sup:%llu", length, (u64)BNXT_RE_MAX_MR_SIZE_LOW); rc = -EINVAL; goto fail; } - if (umem->hugetlb && length > BNXT_RE_PAGE_SIZE_2M) { - page_shift = BNXT_RE_PAGE_SHIFT_2M; - dev_warn(rdev_to_dev(rdev), "umem hugetlb set page_size %x", - 1 << page_shift); - } /* Map umem buf ptrs to the PBL */ umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, page_shift); -- cgit v1.2.3 From db6c6774af0d4861a7c5181ecc3c9ac320de46d9 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 6 May 2019 08:53:36 -0500 Subject: RDMA/umem: Remove hugetlb flag The drivers i40iw and bnxt_re no longer dependent on the hugetlb flag. So remove this flag from ib_umem structure. Reviewed-by: Michael J. Ruhl Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 26 +------------------------- drivers/infiniband/core/umem_odp.c | 3 --- include/rdma/ib_umem.h | 1 - 3 files changed, 1 insertion(+), 29 deletions(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 145c31c530ae..0a23048db523 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -199,14 +198,12 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, struct ib_ucontext *context; struct ib_umem *umem; struct page **page_list; - struct vm_area_struct **vma_list; unsigned long lock_limit; unsigned long new_pinned; unsigned long cur_base; struct mm_struct *mm; unsigned long npages; int ret; - int i; unsigned long dma_attrs = 0; struct scatterlist *sg; unsigned int gup_flags = FOLL_WRITE; @@ -264,23 +261,12 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, return umem; } - /* We assume the memory is from hugetlb until proved otherwise */ - umem->hugetlb = 1; - page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) { ret = -ENOMEM; goto umem_kfree; } - /* - * if we can't alloc the vma_list, it's not so bad; - * just assume the memory is not hugetlb memory - */ - vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL); - if (!vma_list) - umem->hugetlb = 0; - npages = ib_umem_num_pages(umem); if (npages == 0 || npages > UINT_MAX) { ret = -EINVAL; @@ -312,7 +298,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, ret = get_user_pages_longterm(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof (struct page *)), - gup_flags, page_list, vma_list); + gup_flags, page_list, NULL); if (ret < 0) { up_read(&mm->mmap_sem); goto umem_release; @@ -325,14 +311,6 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, dma_get_max_seg_size(context->device->dma_device), &umem->sg_nents); - /* Continue to hold the mmap_sem as vma_list access - * needs to be protected. - */ - for (i = 0; i < ret && umem->hugetlb; i++) { - if (vma_list && !is_vm_hugetlb_page(vma_list[i])) - umem->hugetlb = 0; - } - up_read(&mm->mmap_sem); } @@ -357,8 +335,6 @@ umem_release: vma: atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm); out: - if (vma_list) - free_page((unsigned long) vma_list); free_page((unsigned long) page_list); umem_kfree: if (ret) { diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 97219143f16f..c7226cf52acc 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -417,9 +417,6 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) h = hstate_vma(vma); umem->page_shift = huge_page_shift(h); up_read(&mm->mmap_sem); - umem->hugetlb = 1; - } else { - umem->hugetlb = 0; } mutex_init(&umem_odp->umem_mutex); diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 917b687010f0..040d853077c6 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -48,7 +48,6 @@ struct ib_umem { unsigned long address; int page_shift; u32 writable : 1; - u32 hugetlb : 1; u32 is_odp : 1; struct work_struct work; struct sg_table sg_head; -- cgit v1.2.3 From f95be3d28d891b0c0f339a504e3aa8e382bbd9a6 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 5 May 2019 20:59:21 +0300 Subject: RDMA: Add EFA related definitions Add EFA driver ID to the IOCTL interface uapi. This patch also adds unspecified node/transport type that will be used by EFA (usnic is left unchanged as it's already part of our ABI). Signed-off-by: Gal Pressman Reviewed-by: Shiraz Saleem Reviewed-by: Steve Wise Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sysfs.c | 1 + drivers/infiniband/core/verbs.c | 2 ++ include/rdma/ib_verbs.h | 4 +++- include/uapi/rdma/rdma_user_ioctl_cmds.h | 1 + 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 8c7adc27aeea..c78d0c9646ae 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1201,6 +1201,7 @@ static ssize_t node_type_show(struct device *device, case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type); case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type); case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type); + case RDMA_NODE_UNSPECIFIED: return sprintf(buf, "%d: unspecified\n", dev->node_type); case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); default: return sprintf(buf, "%d: \n", dev->node_type); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index a4e97dd3c4b6..e666a1f7608d 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -218,6 +218,8 @@ rdma_node_get_transport(enum rdma_node_type node_type) return RDMA_TRANSPORT_USNIC_UDP; if (node_type == RDMA_NODE_RNIC) return RDMA_TRANSPORT_IWARP; + if (node_type == RDMA_NODE_UNSPECIFIED) + return RDMA_TRANSPORT_UNSPECIFIED; return RDMA_TRANSPORT_IB; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index deb67b21ccb9..0742095355f2 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -140,6 +140,7 @@ enum rdma_node_type { RDMA_NODE_RNIC, RDMA_NODE_USNIC, RDMA_NODE_USNIC_UDP, + RDMA_NODE_UNSPECIFIED, }; enum { @@ -151,7 +152,8 @@ enum rdma_transport_type { RDMA_TRANSPORT_IB, RDMA_TRANSPORT_IWARP, RDMA_TRANSPORT_USNIC, - RDMA_TRANSPORT_USNIC_UDP + RDMA_TRANSPORT_USNIC_UDP, + RDMA_TRANSPORT_UNSPECIFIED, }; enum rdma_protocol_type { diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h index 06c34d99be85..26213f49f5c8 100644 --- a/include/uapi/rdma/rdma_user_ioctl_cmds.h +++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h @@ -102,6 +102,7 @@ enum rdma_driver_id { RDMA_DRIVER_RXE, RDMA_DRIVER_HFI1, RDMA_DRIVER_QIB, + RDMA_DRIVER_EFA, }; #endif -- cgit v1.2.3 From 01edac3aa2b9002860f405f1af23536386d45db0 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 5 May 2019 20:59:22 +0300 Subject: RDMA/efa: Add EFA device definitions EFA PCIe device implements a single Admin Queue (AQ) and Admin Completion Queue (ACQ) pair to initialize and communicate configuration with the device. Through this pair, we run set/get commands for querying and configuring the device, create/modify/destroy queues, and IB specific commands like Address Handler (AH), Memory Registration (MR) and Protection Domains (PD). In addition to admin (AQ/ACQ), we have data path queues that get classified as Queue Pairs (QP) and Completion Queues (CQ). Signed-off-by: Gal Pressman Reviewed-by: Shiraz Saleem Reviewed-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_admin_cmds_defs.h | 794 ++++++++++++++++++++++++ drivers/infiniband/hw/efa/efa_admin_defs.h | 136 ++++ drivers/infiniband/hw/efa/efa_common_defs.h | 18 + drivers/infiniband/hw/efa/efa_regs_defs.h | 113 ++++ 4 files changed, 1061 insertions(+) create mode 100644 drivers/infiniband/hw/efa/efa_admin_cmds_defs.h create mode 100644 drivers/infiniband/hw/efa/efa_admin_defs.h create mode 100644 drivers/infiniband/hw/efa/efa_common_defs.h create mode 100644 drivers/infiniband/hw/efa/efa_regs_defs.h diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h new file mode 100644 index 000000000000..2be0469d545f --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -0,0 +1,794 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_ADMIN_CMDS_H_ +#define _EFA_ADMIN_CMDS_H_ + +#define EFA_ADMIN_API_VERSION_MAJOR 0 +#define EFA_ADMIN_API_VERSION_MINOR 1 + +/* EFA admin queue opcodes */ +enum efa_admin_aq_opcode { + EFA_ADMIN_CREATE_QP = 1, + EFA_ADMIN_MODIFY_QP = 2, + EFA_ADMIN_QUERY_QP = 3, + EFA_ADMIN_DESTROY_QP = 4, + EFA_ADMIN_CREATE_AH = 5, + EFA_ADMIN_DESTROY_AH = 6, + EFA_ADMIN_REG_MR = 7, + EFA_ADMIN_DEREG_MR = 8, + EFA_ADMIN_CREATE_CQ = 9, + EFA_ADMIN_DESTROY_CQ = 10, + EFA_ADMIN_GET_FEATURE = 11, + EFA_ADMIN_SET_FEATURE = 12, + EFA_ADMIN_GET_STATS = 13, + EFA_ADMIN_ALLOC_PD = 14, + EFA_ADMIN_DEALLOC_PD = 15, + EFA_ADMIN_ALLOC_UAR = 16, + EFA_ADMIN_DEALLOC_UAR = 17, + EFA_ADMIN_MAX_OPCODE = 17, +}; + +enum efa_admin_aq_feature_id { + EFA_ADMIN_DEVICE_ATTR = 1, + EFA_ADMIN_AENQ_CONFIG = 2, + EFA_ADMIN_NETWORK_ATTR = 3, + EFA_ADMIN_QUEUE_ATTR = 4, + EFA_ADMIN_HW_HINTS = 5, + EFA_ADMIN_FEATURES_OPCODE_NUM = 8, +}; + +/* QP transport type */ +enum efa_admin_qp_type { + /* Unreliable Datagram */ + EFA_ADMIN_QP_TYPE_UD = 1, + /* Scalable Reliable Datagram */ + EFA_ADMIN_QP_TYPE_SRD = 2, +}; + +/* QP state */ +enum efa_admin_qp_state { + EFA_ADMIN_QP_STATE_RESET = 0, + EFA_ADMIN_QP_STATE_INIT = 1, + EFA_ADMIN_QP_STATE_RTR = 2, + EFA_ADMIN_QP_STATE_RTS = 3, + EFA_ADMIN_QP_STATE_SQD = 4, + EFA_ADMIN_QP_STATE_SQE = 5, + EFA_ADMIN_QP_STATE_ERR = 6, +}; + +enum efa_admin_get_stats_type { + EFA_ADMIN_GET_STATS_TYPE_BASIC = 0, +}; + +enum efa_admin_get_stats_scope { + EFA_ADMIN_GET_STATS_SCOPE_ALL = 0, + EFA_ADMIN_GET_STATS_SCOPE_QUEUE = 1, +}; + +enum efa_admin_modify_qp_mask_bits { + EFA_ADMIN_QP_STATE_BIT = 0, + EFA_ADMIN_CUR_QP_STATE_BIT = 1, + EFA_ADMIN_QKEY_BIT = 2, + EFA_ADMIN_SQ_PSN_BIT = 3, + EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT = 4, +}; + +/* + * QP allocation sizes, converted by fabric QueuePair (QP) create command + * from QP capabilities. + */ +struct efa_admin_qp_alloc_size { + /* Send descriptor ring size in bytes */ + u32 send_queue_ring_size; + + /* Max number of WQEs that can be outstanding on send queue. */ + u32 send_queue_depth; + + /* + * Recv descriptor ring size in bytes, sufficient for user-provided + * number of WQEs + */ + u32 recv_queue_ring_size; + + /* Max number of WQEs that can be outstanding on recv queue */ + u32 recv_queue_depth; +}; + +struct efa_admin_create_qp_cmd { + /* Common Admin Queue descriptor */ + struct efa_admin_aq_common_desc aq_common_desc; + + /* Protection Domain associated with this QP */ + u16 pd; + + /* QP type */ + u8 qp_type; + + /* + * 0 : sq_virt - If set, SQ ring base address is + * virtual (IOVA returned by MR registration) + * 1 : rq_virt - If set, RQ ring base address is + * virtual (IOVA returned by MR registration) + * 7:2 : reserved - MBZ + */ + u8 flags; + + /* + * Send queue (SQ) ring base physical address. This field is not + * used if this is a Low Latency Queue(LLQ). + */ + u64 sq_base_addr; + + /* Receive queue (RQ) ring base address. */ + u64 rq_base_addr; + + /* Index of CQ to be associated with Send Queue completions */ + u32 send_cq_idx; + + /* Index of CQ to be associated with Recv Queue completions */ + u32 recv_cq_idx; + + /* + * Memory registration key for the SQ ring, used only when not in + * LLQ mode and base address is virtual + */ + u32 sq_l_key; + + /* + * Memory registration key for the RQ ring, used only when base + * address is virtual + */ + u32 rq_l_key; + + /* Requested QP allocation sizes */ + struct efa_admin_qp_alloc_size qp_alloc_size; + + /* UAR number */ + u16 uar; + + /* MBZ */ + u16 reserved; + + /* MBZ */ + u32 reserved2; +}; + +struct efa_admin_create_qp_resp { + /* Common Admin Queue completion descriptor */ + struct efa_admin_acq_common_desc acq_common_desc; + + /* Opaque handle to be used for consequent operations on the QP */ + u32 qp_handle; + + /* QP number in the given EFA virtual device */ + u16 qp_num; + + /* MBZ */ + u16 reserved; + + /* Index of sub-CQ for Send Queue completions */ + u16 send_sub_cq_idx; + + /* Index of sub-CQ for Receive Queue completions */ + u16 recv_sub_cq_idx; + + /* SQ doorbell address, as offset to PCIe DB BAR */ + u32 sq_db_offset; + + /* RQ doorbell address, as offset to PCIe DB BAR */ + u32 rq_db_offset; + + /* + * low latency send queue ring base address as an offset to PCIe + * MMIO LLQ_MEM BAR + */ + u32 llq_descriptors_offset; +}; + +struct efa_admin_modify_qp_cmd { + /* Common Admin Queue descriptor */ + struct efa_admin_aq_common_desc aq_common_desc; + + /* + * Mask indicating which fields should be updated see enum + * efa_admin_modify_qp_mask_bits + */ + u32 modify_mask; + + /* QP handle returned by create_qp command */ + u32 qp_handle; + + /* QP state */ + u32 qp_state; + + /* Override current QP state (before applying the transition) */ + u32 cur_qp_state; + + /* QKey */ + u32 qkey; + + /* SQ PSN */ + u32 sq_psn; + + /* Enable async notification when SQ is drained */ + u8 sq_drained_async_notify; + + /* MBZ */ + u8 reserved1; + + /* MBZ */ + u16 reserved2; +}; + +struct efa_admin_modify_qp_resp { + /* Common Admin Queue completion descriptor */ + struct efa_admin_acq_common_desc acq_common_desc; +}; + +struct efa_admin_query_qp_cmd { + /* Common Admin Queue descriptor */ + struct efa_admin_aq_common_desc aq_common_desc; + + /* QP handle returned by create_qp command */ + u32 qp_handle; +}; + +struct efa_admin_query_qp_resp { + /* Common Admin Queue completion descriptor */ + struct efa_admin_acq_common_desc acq_common_desc; + + /* QP state */ + u32 qp_state; + + /* QKey */ + u32 qkey; + + /* SQ PSN */ + u32 sq_psn; + + /* Indicates that draining is in progress */ + u8 sq_draining; + + /* MBZ */ + u8 reserved1; + + /* MBZ */ + u16 reserved2; +}; + +struct efa_admin_destroy_qp_cmd { + /* Common Admin Queue descriptor */ + struct efa_admin_aq_common_desc aq_common_desc; + + /* QP handle returned by create_qp command */ + u32 qp_handle; +}; + +struct efa_admin_destroy_qp_resp { + /* Common Admin Queue completion descriptor */ + struct efa_admin_acq_common_desc acq_common_desc; +}; + +/* + * Create Address Handle command parameters. Must not be called more than + * once for the same destination + */ +struct efa_admin_create_ah_cmd { + /* Common Admin Queue descriptor */ + struct efa_admin_aq_common_desc aq_common_desc; + + /* Destination address in network byte order */ + u8 dest_addr[16]; + + /* PD number */ + u16 pd; + + u16 reserved; +}; + +struct efa_admin_create_ah_resp { + /* Common Admin Queue completion descriptor */ + struct efa_admin_acq_common_desc acq_common_desc; + + /* Target interface address handle (opaque) */ + u16 ah; + + u16 reserved; +}; + +struct efa_admin_destroy_ah_cmd { + /* Common Admin Queue descriptor */ + struct efa_admin_aq_common_desc aq_common_desc; + + /* Target interface address handle (opaque) */ + u16 ah; + + /* PD number */ + u16 pd; +}; + +struct efa_admin_destroy_ah_resp { + /* Common Admin Queue completion descriptor */ + struct efa_admin_acq_common_desc acq_common_desc; +}; + +/* + * Registration of MemoryRegion, required for QP working with Virtual + * Addresses. In standard verbs semantics, region length is limited to 2GB + * space, but EFA offers larger MR support for large memory space, to ease + * on users working with very large datasets (i.e. full GPU memory mapping). + */ +struct efa_admin_reg_mr_cmd { + /* Common Admin Queue descriptor */ + struct efa_admin_aq_common_desc aq_common_desc; + + /* Protection Domain */ + u16 pd; + + /* MBZ */ + u16 reserved16_w1; + + /* Physical Buffer List, each element is page-aligned. */ + union { + /* + * Inline array of guest-physical page addresses of user + * memory pages (optimization for short region + * registrations) + */ + u64 inline_pbl_array[4]; + + /* points to PBL (direct or indirect, chained if needed) */ + struct efa_admin_ctrl_buff_info pbl; + } pbl; + + /* Memory region length, in bytes. */ + u64 mr_length; + + /* + * flags and page size + * 4:0 : phys_page_size_shift - page size is (1 << + * phys_page_size_shift). Page size is used for + * building the Virtual to Physical address mapping + * 6:5 : reserved - MBZ + * 7 : mem_addr_phy_mode_en - Enable bit for physical + * memory registration (no translation), can be used + * only by privileged clients. If set, PBL must + * contain a single entry. + */ + u8 flags; + + /* + * permissions + * 0 : local_write_enable - Write permissions: value + * of 1 needed for RQ buffers and for RDMA write + * 7:1 : reserved1 - remote access flags, etc + */ + u8 permissions; + + u16 reserved16_w5; + + /* number of pages in PBL (redundant, could be calculated) */ + u32 page_num; + + /* + * IO Virtual Address associated with this MR. If + * mem_addr_phy_mode_en is set, contains the physical address of + * the region. + */ + u64 iova; +}; + +struct efa_admin_reg_mr_resp { + /* Common Admin Queue completion descriptor */ + struct efa_admin_acq_common_desc acq_common_desc; + + /* + * L_Key, to be used in conjunction with local buffer references in + * SQ and RQ WQE, or with virtual RQ/CQ rings + */ + u32 l_key; + + /* + * R_Key, to be used in RDMA messages to refer to remotely accessed + * memory region + */ + u32 r_key; +}; + +struct efa_admin_dereg_mr_cmd { + /* Common Admin Queue descriptor */ + struct efa_admin_aq_common_desc aq_common_desc; + + /* L_Key, memory region's l_key */ + u32 l_key; +}; + +struct efa_admin_dereg_mr_resp { + /* Common Admin Queue completion descriptor */ + struct efa_admin_acq_common_desc acq_common_desc; +}; + +struct efa_admin_create_cq_cmd { + struct efa_admin_aq_common_desc aq_common_desc; + + /* + * 4:0 : reserved5 + * 5 : interrupt_mode_enabled - if set, cq operates + * in interrupt mode (i.e. CQ events and MSI-X are + * generated), otherwise - polling + * 6 : virt - If set, ring base address is virtual + * (IOVA returned by MR registration) + * 7 : reserved6 + */ + u8 cq_caps_1; + + /* + * 4:0 : cq_entry_size_words - size of CQ entry in + * 32-bit words, valid values: 4, 8. + * 7:5 : reserved7 + */ + u8 cq_caps_2; + + /* completion queue depth in # of entries. must be power of 2 */ + u16 cq_depth; + + /* msix vector assigned to this cq */ + u32 msix_vector_idx; + + /* + * CQ ring base address, virtual or physical depending on 'virt' + * flag + */ + struct efa_common_mem_addr cq_ba; + + /* + * Memory registration key for the ring, used only when base + * address is virtual + */ + u32 l_key; + + /* + * number of sub cqs - must be equal to sub_cqs_per_cq of queue + * attributes. + */ + u16 num_sub_cqs; + + /* UAR number */ + u16 uar; +}; + +struct efa_admin_create_cq_resp { + struct efa_admin_acq_common_desc acq_common_desc; + + u16 cq_idx; + + /* actual cq depth in number of entries */ + u16 cq_actual_depth; +}; + +struct efa_admin_destroy_cq_cmd { + struct efa_admin_aq_common_desc aq_common_desc; + + u16 cq_idx; + + u16 reserved1; +}; + +struct efa_admin_destroy_cq_resp { + struct efa_admin_acq_common_desc acq_common_desc; +}; + +/* + * EFA AQ Get Statistics command. Extended statistics are placed in control + * buffer pointed by AQ entry + */ +struct efa_admin_aq_get_stats_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; + + union { + /* command specific inline data */ + u32 inline_data_w1[3]; + + struct efa_admin_ctrl_buff_info control_buffer; + } u; + + /* stats type as defined in enum efa_admin_get_stats_type */ + u8 type; + + /* stats scope defined in enum efa_admin_get_stats_scope */ + u8 scope; + + u16 scope_modifier; +}; + +struct efa_admin_basic_stats { + u64 tx_bytes; + + u64 tx_pkts; + + u64 rx_bytes; + + u64 rx_pkts; + + u64 rx_drops; +}; + +struct efa_admin_acq_get_stats_resp { + struct efa_admin_acq_common_desc acq_common_desc; + + struct efa_admin_basic_stats basic_stats; +}; + +struct efa_admin_get_set_feature_common_desc { + /* + * 1:0 : select - 0x1 - current value; 0x3 - default + * value + * 7:3 : reserved3 + */ + u8 flags; + + /* as appears in efa_admin_aq_feature_id */ + u8 feature_id; + + /* MBZ */ + u16 reserved16; +}; + +struct efa_admin_feature_device_attr_desc { + /* Bitmap of efa_admin_aq_feature_id */ + u64 supported_features; + + /* Bitmap of supported page sizes in MR registrations */ + u64 page_size_cap; + + u32 fw_version; + + u32 admin_api_version; + + u32 device_version; + + /* Bar used for SQ and RQ doorbells */ + u16 db_bar; + + /* Indicates how many bits are used physical address access */ + u8 phys_addr_width; + + /* Indicates how many bits are used virtual address access */ + u8 virt_addr_width; +}; + +struct efa_admin_feature_queue_attr_desc { + /* The maximum number of queue pairs supported */ + u32 max_qp; + + u32 max_sq_depth; + + /* max send wr used in inline-buf */ + u32 inline_buf_size; + + u32 max_rq_depth; + + /* The maximum number of completion queues supported per VF */ + u32 max_cq; + + u32 max_cq_depth; + + /* Number of sub-CQs to be created for each CQ */ + u16 sub_cqs_per_cq; + + u16 reserved; + + /* + * Maximum number of SGEs (buffs) allowed for a single send work + * queue element (WQE) + */ + u16 max_wr_send_sges; + + /* Maximum number of SGEs allowed for a single recv WQE */ + u16 max_wr_recv_sges; + + /* The maximum number of memory regions supported */ + u32 max_mr; + + /* The maximum number of pages can be registered */ + u32 max_mr_pages; + + /* The maximum number of protection domains supported */ + u32 max_pd; + + /* The maximum number of address handles supported */ + u32 max_ah; + + /* The maximum size of LLQ in bytes */ + u32 max_llq_size; +}; + +struct efa_admin_feature_aenq_desc { + /* bitmask for AENQ groups the device can report */ + u32 supported_groups; + + /* bitmask for AENQ groups to report */ + u32 enabled_groups; +}; + +struct efa_admin_feature_network_attr_desc { + /* Raw address data in network byte order */ + u8 addr[16]; + + u32 mtu; +}; + +/* + * When hint value is 0, hints capabilities are not supported or driver + * should use its own predefined value + */ +struct efa_admin_hw_hints { + /* value in ms */ + u16 mmio_read_timeout; + + /* value in ms */ + u16 driver_watchdog_timeout; + + /* value in ms */ + u16 admin_completion_timeout; + + /* poll interval in ms */ + u16 poll_interval; +}; + +struct efa_admin_get_feature_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; + + struct efa_admin_ctrl_buff_info control_buffer; + + struct efa_admin_get_set_feature_common_desc feature_common; + + u32 raw[11]; +}; + +struct efa_admin_get_feature_resp { + struct efa_admin_acq_common_desc acq_common_desc; + + union { + u32 raw[14]; + + struct efa_admin_feature_device_attr_desc device_attr; + + struct efa_admin_feature_aenq_desc aenq; + + struct efa_admin_feature_network_attr_desc network_attr; + + struct efa_admin_feature_queue_attr_desc queue_attr; + + struct efa_admin_hw_hints hw_hints; + } u; +}; + +struct efa_admin_set_feature_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; + + struct efa_admin_ctrl_buff_info control_buffer; + + struct efa_admin_get_set_feature_common_desc feature_common; + + union { + u32 raw[11]; + + /* AENQ configuration */ + struct efa_admin_feature_aenq_desc aenq; + } u; +}; + +struct efa_admin_set_feature_resp { + struct efa_admin_acq_common_desc acq_common_desc; + + union { + u32 raw[14]; + } u; +}; + +struct efa_admin_alloc_pd_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; +}; + +struct efa_admin_alloc_pd_resp { + struct efa_admin_acq_common_desc acq_common_desc; + + /* PD number */ + u16 pd; + + /* MBZ */ + u16 reserved; +}; + +struct efa_admin_dealloc_pd_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; + + /* PD number */ + u16 pd; + + /* MBZ */ + u16 reserved; +}; + +struct efa_admin_dealloc_pd_resp { + struct efa_admin_acq_common_desc acq_common_desc; +}; + +struct efa_admin_alloc_uar_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; +}; + +struct efa_admin_alloc_uar_resp { + struct efa_admin_acq_common_desc acq_common_desc; + + /* UAR number */ + u16 uar; + + /* MBZ */ + u16 reserved; +}; + +struct efa_admin_dealloc_uar_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; + + /* UAR number */ + u16 uar; + + /* MBZ */ + u16 reserved; +}; + +struct efa_admin_dealloc_uar_resp { + struct efa_admin_acq_common_desc acq_common_desc; +}; + +/* asynchronous event notification groups */ +enum efa_admin_aenq_group { + EFA_ADMIN_FATAL_ERROR = 1, + EFA_ADMIN_WARNING = 2, + EFA_ADMIN_NOTIFICATION = 3, + EFA_ADMIN_KEEP_ALIVE = 4, + EFA_ADMIN_AENQ_GROUPS_NUM = 5, +}; + +enum efa_admin_aenq_notification_syndrom { + EFA_ADMIN_SUSPEND = 0, + EFA_ADMIN_RESUME = 1, + EFA_ADMIN_UPDATE_HINTS = 2, +}; + +struct efa_admin_mmio_req_read_less_resp { + u16 req_id; + + u16 reg_off; + + /* value is valid when poll is cleared */ + u32 reg_val; +}; + +/* create_qp_cmd */ +#define EFA_ADMIN_CREATE_QP_CMD_SQ_VIRT_MASK BIT(0) +#define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_SHIFT 1 +#define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_MASK BIT(1) + +/* reg_mr_cmd */ +#define EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK GENMASK(4, 0) +#define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_SHIFT 7 +#define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK BIT(7) +#define EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK BIT(0) + +/* create_cq_cmd */ +#define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5 +#define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5) +#define EFA_ADMIN_CREATE_CQ_CMD_VIRT_SHIFT 6 +#define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6) +#define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) + +/* get_set_feature_common_desc */ +#define EFA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0) + +#endif /* _EFA_ADMIN_CMDS_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h new file mode 100644 index 000000000000..c8e0c8b905be --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_admin_defs.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_ADMIN_H_ +#define _EFA_ADMIN_H_ + +enum efa_admin_aq_completion_status { + EFA_ADMIN_SUCCESS = 0, + EFA_ADMIN_RESOURCE_ALLOCATION_FAILURE = 1, + EFA_ADMIN_BAD_OPCODE = 2, + EFA_ADMIN_UNSUPPORTED_OPCODE = 3, + EFA_ADMIN_MALFORMED_REQUEST = 4, + /* Additional status is provided in ACQ entry extended_status */ + EFA_ADMIN_ILLEGAL_PARAMETER = 5, + EFA_ADMIN_UNKNOWN_ERROR = 6, + EFA_ADMIN_RESOURCE_BUSY = 7, +}; + +struct efa_admin_aq_common_desc { + /* + * 11:0 : command_id + * 15:12 : reserved12 + */ + u16 command_id; + + /* as appears in efa_admin_aq_opcode */ + u8 opcode; + + /* + * 0 : phase + * 1 : ctrl_data - control buffer address valid + * 2 : ctrl_data_indirect - control buffer address + * points to list of pages with addresses of control + * buffers + * 7:3 : reserved3 + */ + u8 flags; +}; + +/* + * used in efa_admin_aq_entry. Can point directly to control data, or to a + * page list chunk. Used also at the end of indirect mode page list chunks, + * for chaining. + */ +struct efa_admin_ctrl_buff_info { + u32 length; + + struct efa_common_mem_addr address; +}; + +struct efa_admin_aq_entry { + struct efa_admin_aq_common_desc aq_common_descriptor; + + union { + u32 inline_data_w1[3]; + + struct efa_admin_ctrl_buff_info control_buffer; + } u; + + u32 inline_data_w4[12]; +}; + +struct efa_admin_acq_common_desc { + /* + * command identifier to associate it with the aq descriptor + * 11:0 : command_id + * 15:12 : reserved12 + */ + u16 command; + + u8 status; + + /* + * 0 : phase + * 7:1 : reserved1 + */ + u8 flags; + + u16 extended_status; + + /* + * indicates to the driver which AQ entry has been consumed by the + * device and could be reused + */ + u16 sq_head_indx; +}; + +struct efa_admin_acq_entry { + struct efa_admin_acq_common_desc acq_common_descriptor; + + u32 response_specific_data[14]; +}; + +struct efa_admin_aenq_common_desc { + u16 group; + + u16 syndrom; + + /* + * 0 : phase + * 7:1 : reserved - MBZ + */ + u8 flags; + + u8 reserved1[3]; + + u32 timestamp_low; + + u32 timestamp_high; +}; + +struct efa_admin_aenq_entry { + struct efa_admin_aenq_common_desc aenq_common_desc; + + /* command specific inline data */ + u32 inline_data_w4[12]; +}; + +/* aq_common_desc */ +#define EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) +#define EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0) +#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT 1 +#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK BIT(1) +#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT 2 +#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK BIT(2) + +/* acq_common_desc */ +#define EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) +#define EFA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK BIT(0) + +/* aenq_common_desc */ +#define EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0) + +#endif /* _EFA_ADMIN_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_common_defs.h b/drivers/infiniband/hw/efa/efa_common_defs.h new file mode 100644 index 000000000000..c559ec08898e --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_common_defs.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_COMMON_H_ +#define _EFA_COMMON_H_ + +#define EFA_COMMON_SPEC_VERSION_MAJOR 2 +#define EFA_COMMON_SPEC_VERSION_MINOR 0 + +struct efa_common_mem_addr { + u32 mem_addr_low; + + u32 mem_addr_high; +}; + +#endif /* _EFA_COMMON_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_regs_defs.h b/drivers/infiniband/hw/efa/efa_regs_defs.h new file mode 100644 index 000000000000..bb9cad3d6a15 --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_regs_defs.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_REGS_H_ +#define _EFA_REGS_H_ + +enum efa_regs_reset_reason_types { + EFA_REGS_RESET_NORMAL = 0, + /* Keep alive timeout */ + EFA_REGS_RESET_KEEP_ALIVE_TO = 1, + EFA_REGS_RESET_ADMIN_TO = 2, + EFA_REGS_RESET_INIT_ERR = 3, + EFA_REGS_RESET_DRIVER_INVALID_STATE = 4, + EFA_REGS_RESET_OS_TRIGGER = 5, + EFA_REGS_RESET_SHUTDOWN = 6, + EFA_REGS_RESET_USER_TRIGGER = 7, + EFA_REGS_RESET_GENERIC = 8, +}; + +/* efa_registers offsets */ + +/* 0 base */ +#define EFA_REGS_VERSION_OFF 0x0 +#define EFA_REGS_CONTROLLER_VERSION_OFF 0x4 +#define EFA_REGS_CAPS_OFF 0x8 +#define EFA_REGS_AQ_BASE_LO_OFF 0x10 +#define EFA_REGS_AQ_BASE_HI_OFF 0x14 +#define EFA_REGS_AQ_CAPS_OFF 0x18 +#define EFA_REGS_ACQ_BASE_LO_OFF 0x20 +#define EFA_REGS_ACQ_BASE_HI_OFF 0x24 +#define EFA_REGS_ACQ_CAPS_OFF 0x28 +#define EFA_REGS_AQ_PROD_DB_OFF 0x2c +#define EFA_REGS_AENQ_CAPS_OFF 0x34 +#define EFA_REGS_AENQ_BASE_LO_OFF 0x38 +#define EFA_REGS_AENQ_BASE_HI_OFF 0x3c +#define EFA_REGS_AENQ_CONS_DB_OFF 0x40 +#define EFA_REGS_INTR_MASK_OFF 0x4c +#define EFA_REGS_DEV_CTL_OFF 0x54 +#define EFA_REGS_DEV_STS_OFF 0x58 +#define EFA_REGS_MMIO_REG_READ_OFF 0x5c +#define EFA_REGS_MMIO_RESP_LO_OFF 0x60 +#define EFA_REGS_MMIO_RESP_HI_OFF 0x64 + +/* version register */ +#define EFA_REGS_VERSION_MINOR_VERSION_MASK 0xff +#define EFA_REGS_VERSION_MAJOR_VERSION_SHIFT 8 +#define EFA_REGS_VERSION_MAJOR_VERSION_MASK 0xff00 + +/* controller_version register */ +#define EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK 0xff +#define EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT 8 +#define EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK 0xff00 +#define EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT 16 +#define EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK 0xff0000 +#define EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT 24 +#define EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK 0xff000000 + +/* caps register */ +#define EFA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK 0x1 +#define EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT 1 +#define EFA_REGS_CAPS_RESET_TIMEOUT_MASK 0x3e +#define EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT 8 +#define EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK 0xff00 +#define EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT 16 +#define EFA_REGS_CAPS_ADMIN_CMD_TO_MASK 0xf0000 + +/* aq_caps register */ +#define EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK 0xffff +#define EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT 16 +#define EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK 0xffff0000 + +/* acq_caps register */ +#define EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK 0xffff +#define EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT 16 +#define EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK 0xff0000 +#define EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT 24 +#define EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK 0xff000000 + +/* aenq_caps register */ +#define EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK 0xffff +#define EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT 16 +#define EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK 0xff0000 +#define EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT 24 +#define EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK 0xff000000 + +/* dev_ctl register */ +#define EFA_REGS_DEV_CTL_DEV_RESET_MASK 0x1 +#define EFA_REGS_DEV_CTL_AQ_RESTART_SHIFT 1 +#define EFA_REGS_DEV_CTL_AQ_RESTART_MASK 0x2 +#define EFA_REGS_DEV_CTL_RESET_REASON_SHIFT 28 +#define EFA_REGS_DEV_CTL_RESET_REASON_MASK 0xf0000000 + +/* dev_sts register */ +#define EFA_REGS_DEV_STS_READY_MASK 0x1 +#define EFA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT 1 +#define EFA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK 0x2 +#define EFA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT 2 +#define EFA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK 0x4 +#define EFA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT 3 +#define EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK 0x8 +#define EFA_REGS_DEV_STS_RESET_FINISHED_SHIFT 4 +#define EFA_REGS_DEV_STS_RESET_FINISHED_MASK 0x10 +#define EFA_REGS_DEV_STS_FATAL_ERROR_SHIFT 5 +#define EFA_REGS_DEV_STS_FATAL_ERROR_MASK 0x20 + +/* mmio_reg_read register */ +#define EFA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff +#define EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT 16 +#define EFA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000 + +#endif /* _EFA_REGS_H_ */ -- cgit v1.2.3 From 853f56523565c7d3526799d3e2dc503128c336ec Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 5 May 2019 20:59:23 +0300 Subject: RDMA/efa: Add the efa.h header file Add EFA driver generic header file defining driver's device independent internal data structures and definitions. Signed-off-by: Gal Pressman Reviewed-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa.h | 163 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 drivers/infiniband/hw/efa/efa.h diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h new file mode 100644 index 000000000000..9e3cc3239c13 --- /dev/null +++ b/drivers/infiniband/hw/efa/efa.h @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_H_ +#define _EFA_H_ + +#include +#include +#include +#include +#include + +#include +#include + +#include "efa_com_cmd.h" + +#define DRV_MODULE_NAME "efa" +#define DEVICE_NAME "Elastic Fabric Adapter (EFA)" + +#define EFA_IRQNAME_SIZE 40 + +/* 1 for AENQ + ADMIN */ +#define EFA_NUM_MSIX_VEC 1 +#define EFA_MGMNT_MSIX_VEC_IDX 0 + +struct efa_irq { + irq_handler_t handler; + void *data; + int cpu; + u32 vector; + cpumask_t affinity_hint_mask; + char name[EFA_IRQNAME_SIZE]; +}; + +struct efa_sw_stats { + atomic64_t alloc_pd_err; + atomic64_t create_qp_err; + atomic64_t create_cq_err; + atomic64_t reg_mr_err; + atomic64_t alloc_ucontext_err; + atomic64_t create_ah_err; +}; + +/* Don't use anything other than atomic64 */ +struct efa_stats { + struct efa_sw_stats sw_stats; + atomic64_t keep_alive_rcvd; +}; + +struct efa_dev { + struct ib_device ibdev; + struct efa_com_dev edev; + struct pci_dev *pdev; + struct efa_com_get_device_attr_result dev_attr; + + u64 reg_bar_addr; + u64 reg_bar_len; + u64 mem_bar_addr; + u64 mem_bar_len; + u64 db_bar_addr; + u64 db_bar_len; + u8 addr[EFA_GID_SIZE]; + u32 mtu; + + int admin_msix_vector_idx; + struct efa_irq admin_irq; + + struct efa_stats stats; +}; + +struct efa_ucontext { + struct ib_ucontext ibucontext; + struct xarray mmap_xa; + u32 mmap_xa_page; + u16 uarn; +}; + +struct efa_pd { + struct ib_pd ibpd; + u16 pdn; +}; + +struct efa_mr { + struct ib_mr ibmr; + struct ib_umem *umem; +}; + +struct efa_cq { + struct ib_cq ibcq; + struct efa_ucontext *ucontext; + dma_addr_t dma_addr; + void *cpu_addr; + size_t size; + u16 cq_idx; +}; + +struct efa_qp { + struct ib_qp ibqp; + dma_addr_t rq_dma_addr; + void *rq_cpu_addr; + size_t rq_size; + enum ib_qp_state state; + u32 qp_handle; + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; + u32 max_inline_data; +}; + +struct efa_ah { + struct ib_ah ibah; + u16 ah; + /* dest_addr */ + u8 id[EFA_GID_SIZE]; +}; + +int efa_query_device(struct ib_device *ibdev, + struct ib_device_attr *props, + struct ib_udata *udata); +int efa_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props); +int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); +int efa_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid); +int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey); +int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); +struct ib_qp *efa_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); +int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +struct ib_cq *efa_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_udata *udata); +struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata); +int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); +int efa_get_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable); +int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata); +void efa_dealloc_ucontext(struct ib_ucontext *ibucontext); +int efa_mmap(struct ib_ucontext *ibucontext, + struct vm_area_struct *vma); +int efa_create_ah(struct ib_ah *ibah, + struct rdma_ah_attr *ah_attr, + u32 flags, + struct ib_udata *udata); +void efa_destroy_ah(struct ib_ah *ibah, u32 flags); +int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_udata *udata); +enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev, + u8 port_num); + +#endif /* _EFA_H_ */ -- cgit v1.2.3 From 43eaa49d511cf2f5a9fa56316f02cb80ecf6578b Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 5 May 2019 20:59:24 +0300 Subject: RDMA/efa: Add the efa_com.h file A helper header file for EFA admin queue, admin queue completion, asynchronous notification queue, and various hardware configuration data structures and functions. Signed-off-by: Gal Pressman Reviewed-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_com.h | 144 ++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 drivers/infiniband/hw/efa/efa_com.h diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h new file mode 100644 index 000000000000..84d96724a74b --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_com.h @@ -0,0 +1,144 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_COM_H_ +#define _EFA_COM_H_ + +#include +#include +#include +#include +#include + +#include + +#include "efa_common_defs.h" +#include "efa_admin_defs.h" +#include "efa_admin_cmds_defs.h" +#include "efa_regs_defs.h" + +#define EFA_MAX_HANDLERS 256 + +struct efa_com_admin_cq { + struct efa_admin_acq_entry *entries; + dma_addr_t dma_addr; + spinlock_t lock; /* Protects ACQ */ + + u16 cc; /* consumer counter */ + u8 phase; +}; + +struct efa_com_admin_sq { + struct efa_admin_aq_entry *entries; + dma_addr_t dma_addr; + spinlock_t lock; /* Protects ASQ */ + + u32 __iomem *db_addr; + + u16 cc; /* consumer counter */ + u16 pc; /* producer counter */ + u8 phase; + +}; + +/* Don't use anything other than atomic64 */ +struct efa_com_stats_admin { + atomic64_t aborted_cmd; + atomic64_t submitted_cmd; + atomic64_t completed_cmd; + atomic64_t no_completion; +}; + +enum { + EFA_AQ_STATE_RUNNING_BIT = 0, + EFA_AQ_STATE_POLLING_BIT = 1, +}; + +struct efa_com_admin_queue { + void *dmadev; + void *efa_dev; + struct efa_comp_ctx *comp_ctx; + u32 completion_timeout; /* usecs */ + u16 poll_interval; /* msecs */ + u16 depth; + struct efa_com_admin_cq cq; + struct efa_com_admin_sq sq; + u16 msix_vector_idx; + + unsigned long state; + + /* Count the number of available admin commands */ + struct semaphore avail_cmds; + + struct efa_com_stats_admin stats; + + spinlock_t comp_ctx_lock; /* Protects completion context pool */ + u32 *comp_ctx_pool; + u16 comp_ctx_pool_next; +}; + +struct efa_aenq_handlers; + +struct efa_com_aenq { + struct efa_admin_aenq_entry *entries; + struct efa_aenq_handlers *aenq_handlers; + dma_addr_t dma_addr; + u32 cc; /* consumer counter */ + u16 msix_vector_idx; + u16 depth; + u8 phase; +}; + +struct efa_com_mmio_read { + struct efa_admin_mmio_req_read_less_resp *read_resp; + dma_addr_t read_resp_dma_addr; + u16 seq_num; + u16 mmio_read_timeout; /* usecs */ + /* serializes mmio reads */ + spinlock_t lock; +}; + +struct efa_com_dev { + struct efa_com_admin_queue aq; + struct efa_com_aenq aenq; + u8 __iomem *reg_bar; + void *dmadev; + void *efa_dev; + u32 supported_features; + u32 dma_addr_bits; + + struct efa_com_mmio_read mmio_read; +}; + +typedef void (*efa_aenq_handler)(void *data, + struct efa_admin_aenq_entry *aenq_e); + +/* Holds aenq handlers. Indexed by AENQ event group */ +struct efa_aenq_handlers { + efa_aenq_handler handlers[EFA_MAX_HANDLERS]; + efa_aenq_handler unimplemented_handler; +}; + +int efa_com_admin_init(struct efa_com_dev *edev, + struct efa_aenq_handlers *aenq_handlers); +void efa_com_admin_destroy(struct efa_com_dev *edev); +int efa_com_dev_reset(struct efa_com_dev *edev, + enum efa_regs_reset_reason_types reset_reason); +void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling); +void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev); +int efa_com_mmio_reg_read_init(struct efa_com_dev *edev); +void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev); + +int efa_com_validate_version(struct efa_com_dev *edev); +int efa_com_get_dma_width(struct efa_com_dev *edev); + +int efa_com_cmd_exec(struct efa_com_admin_queue *aq, + struct efa_admin_aq_entry *cmd, + size_t cmd_size, + struct efa_admin_acq_entry *comp, + size_t comp_size); +void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data); + +#endif /* _EFA_COM_H_ */ -- cgit v1.2.3 From cd9b3d597054ce5f8e5e19359d7762e253c9019f Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 5 May 2019 20:59:25 +0300 Subject: RDMA/efa: Add the com service API definitions Header file for the various commands that can be sent through admin queue. This includes queue create/modify/destroy, setting up and remove protection domains, address handlers, and memory registration, etc. Signed-off-by: Gal Pressman Reviewed-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_com_cmd.h | 270 ++++++++++++++++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 drivers/infiniband/hw/efa/efa_com_cmd.h diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h new file mode 100644 index 000000000000..a1174380462c --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -0,0 +1,270 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_COM_CMD_H_ +#define _EFA_COM_CMD_H_ + +#include "efa_com.h" + +#define EFA_GID_SIZE 16 + +struct efa_com_create_qp_params { + u64 rq_base_addr; + u32 send_cq_idx; + u32 recv_cq_idx; + /* + * Send descriptor ring size in bytes, + * sufficient for user-provided number of WQEs and SGL size + */ + u32 sq_ring_size_in_bytes; + /* Max number of WQEs that will be posted on send queue */ + u32 sq_depth; + /* Recv descriptor ring size in bytes */ + u32 rq_ring_size_in_bytes; + u32 rq_depth; + u16 pd; + u16 uarn; + u8 qp_type; +}; + +struct efa_com_create_qp_result { + u32 qp_handle; + u32 qp_num; + u32 sq_db_offset; + u32 rq_db_offset; + u32 llq_descriptors_offset; + u16 send_sub_cq_idx; + u16 recv_sub_cq_idx; +}; + +struct efa_com_modify_qp_params { + u32 modify_mask; + u32 qp_handle; + u32 qp_state; + u32 cur_qp_state; + u32 qkey; + u32 sq_psn; + u8 sq_drained_async_notify; +}; + +struct efa_com_query_qp_params { + u32 qp_handle; +}; + +struct efa_com_query_qp_result { + u32 qp_state; + u32 qkey; + u32 sq_draining; + u32 sq_psn; +}; + +struct efa_com_destroy_qp_params { + u32 qp_handle; +}; + +struct efa_com_create_cq_params { + /* cq physical base address in OS memory */ + dma_addr_t dma_addr; + /* completion queue depth in # of entries */ + u16 cq_depth; + u16 num_sub_cqs; + u16 uarn; + u8 entry_size_in_bytes; +}; + +struct efa_com_create_cq_result { + /* cq identifier */ + u16 cq_idx; + /* actual cq depth in # of entries */ + u16 actual_depth; +}; + +struct efa_com_destroy_cq_params { + u16 cq_idx; +}; + +struct efa_com_create_ah_params { + u16 pdn; + /* Destination address in network byte order */ + u8 dest_addr[EFA_GID_SIZE]; +}; + +struct efa_com_create_ah_result { + u16 ah; +}; + +struct efa_com_destroy_ah_params { + u16 ah; + u16 pdn; +}; + +struct efa_com_get_network_attr_result { + u8 addr[EFA_GID_SIZE]; + u32 mtu; +}; + +struct efa_com_get_device_attr_result { + u64 page_size_cap; + u64 max_mr_pages; + u32 fw_version; + u32 admin_api_version; + u32 device_version; + u32 supported_features; + u32 phys_addr_width; + u32 virt_addr_width; + u32 max_qp; + u32 max_sq_depth; /* wqes */ + u32 max_rq_depth; /* wqes */ + u32 max_cq; + u32 max_cq_depth; /* cqes */ + u32 inline_buf_size; + u32 max_mr; + u32 max_pd; + u32 max_ah; + u32 max_llq_size; + u16 sub_cqs_per_cq; + u16 max_sq_sge; + u16 max_rq_sge; + u8 db_bar; +}; + +struct efa_com_get_hw_hints_result { + u16 mmio_read_timeout; + u16 driver_watchdog_timeout; + u16 admin_completion_timeout; + u16 poll_interval; + u32 reserved[4]; +}; + +struct efa_com_mem_addr { + u32 mem_addr_low; + u32 mem_addr_high; +}; + +/* Used at indirect mode page list chunks for chaining */ +struct efa_com_ctrl_buff_info { + /* indicates length of the buffer pointed by control_buffer_address. */ + u32 length; + /* points to control buffer (direct or indirect) */ + struct efa_com_mem_addr address; +}; + +struct efa_com_reg_mr_params { + /* Memory region length, in bytes. */ + u64 mr_length_in_bytes; + /* IO Virtual Address associated with this MR. */ + u64 iova; + /* words 8:15: Physical Buffer List, each element is page-aligned. */ + union { + /* + * Inline array of physical addresses of app pages + * (optimization for short region reservations) + */ + u64 inline_pbl_array[4]; + /* + * Describes the next physically contiguous chunk of indirect + * page list. A page list contains physical addresses of command + * data pages. Data pages are 4KB; page list chunks are + * variable-sized. + */ + struct efa_com_ctrl_buff_info pbl; + } pbl; + /* number of pages in PBL (redundant, could be calculated) */ + u32 page_num; + /* Protection Domain */ + u16 pd; + /* + * phys_page_size_shift - page size is (1 << phys_page_size_shift) + * Page size is used for building the Virtual to Physical + * address mapping + */ + u8 page_shift; + /* + * permissions + * 0: local_write_enable - Write permissions: value of 1 needed + * for RQ buffers and for RDMA write:1: reserved1 - remote + * access flags, etc + */ + u8 permissions; + u8 inline_pbl; + u8 indirect; +}; + +struct efa_com_reg_mr_result { + /* + * To be used in conjunction with local buffers references in SQ and + * RQ WQE + */ + u32 l_key; + /* + * To be used in incoming RDMA semantics messages to refer to remotely + * accessed memory region + */ + u32 r_key; +}; + +struct efa_com_dereg_mr_params { + u32 l_key; +}; + +struct efa_com_alloc_pd_result { + u16 pdn; +}; + +struct efa_com_dealloc_pd_params { + u16 pdn; +}; + +struct efa_com_alloc_uar_result { + u16 uarn; +}; + +struct efa_com_dealloc_uar_params { + u16 uarn; +}; + +void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low); +int efa_com_create_qp(struct efa_com_dev *edev, + struct efa_com_create_qp_params *params, + struct efa_com_create_qp_result *res); +int efa_com_modify_qp(struct efa_com_dev *edev, + struct efa_com_modify_qp_params *params); +int efa_com_query_qp(struct efa_com_dev *edev, + struct efa_com_query_qp_params *params, + struct efa_com_query_qp_result *result); +int efa_com_destroy_qp(struct efa_com_dev *edev, + struct efa_com_destroy_qp_params *params); +int efa_com_create_cq(struct efa_com_dev *edev, + struct efa_com_create_cq_params *params, + struct efa_com_create_cq_result *result); +int efa_com_destroy_cq(struct efa_com_dev *edev, + struct efa_com_destroy_cq_params *params); +int efa_com_register_mr(struct efa_com_dev *edev, + struct efa_com_reg_mr_params *params, + struct efa_com_reg_mr_result *result); +int efa_com_dereg_mr(struct efa_com_dev *edev, + struct efa_com_dereg_mr_params *params); +int efa_com_create_ah(struct efa_com_dev *edev, + struct efa_com_create_ah_params *params, + struct efa_com_create_ah_result *result); +int efa_com_destroy_ah(struct efa_com_dev *edev, + struct efa_com_destroy_ah_params *params); +int efa_com_get_network_attr(struct efa_com_dev *edev, + struct efa_com_get_network_attr_result *result); +int efa_com_get_device_attr(struct efa_com_dev *edev, + struct efa_com_get_device_attr_result *result); +int efa_com_get_hw_hints(struct efa_com_dev *edev, + struct efa_com_get_hw_hints_result *result); +int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups); +int efa_com_alloc_pd(struct efa_com_dev *edev, + struct efa_com_alloc_pd_result *result); +int efa_com_dealloc_pd(struct efa_com_dev *edev, + struct efa_com_dealloc_pd_params *params); +int efa_com_alloc_uar(struct efa_com_dev *edev, + struct efa_com_alloc_uar_result *result); +int efa_com_dealloc_uar(struct efa_com_dev *edev, + struct efa_com_dealloc_uar_params *params); + +#endif /* _EFA_COM_CMD_H_ */ -- cgit v1.2.3 From 2ce62149afaece4baee21ad2b925ae1d089b4e3b Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 5 May 2019 20:59:26 +0300 Subject: RDMA/efa: Add the ABI definitions Add the EFA ABI file exposed to userspace. Signed-off-by: Gal Pressman Reviewed-by: Steve Wise Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/efa-abi.h | 101 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 include/uapi/rdma/efa-abi.h diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h new file mode 100644 index 000000000000..9599a2a62be8 --- /dev/null +++ b/include/uapi/rdma/efa-abi.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef EFA_ABI_USER_H +#define EFA_ABI_USER_H + +#include + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define EFA_UVERBS_ABI_VERSION 1 + +/* + * Keep structs aligned to 8 bytes. + * Keep reserved fields as arrays of __u8 named reserved_XXX where XXX is the + * hex bit offset of the field. + */ + +enum efa_ibv_user_cmds_supp_udata { + EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE = 1 << 0, + EFA_USER_CMDS_SUPP_UDATA_CREATE_AH = 1 << 1, +}; + +struct efa_ibv_alloc_ucontext_resp { + __u32 comp_mask; + __u32 cmds_supp_udata_mask; + __u16 sub_cqs_per_cq; + __u16 inline_buf_size; + __u32 max_llq_size; /* bytes */ +}; + +struct efa_ibv_alloc_pd_resp { + __u32 comp_mask; + __u16 pdn; + __u8 reserved_30[2]; +}; + +struct efa_ibv_create_cq { + __u32 comp_mask; + __u32 cq_entry_size; + __u16 num_sub_cqs; + __u8 reserved_50[6]; +}; + +struct efa_ibv_create_cq_resp { + __u32 comp_mask; + __u8 reserved_20[4]; + __aligned_u64 q_mmap_key; + __aligned_u64 q_mmap_size; + __u16 cq_idx; + __u8 reserved_d0[6]; +}; + +enum { + EFA_QP_DRIVER_TYPE_SRD = 0, +}; + +struct efa_ibv_create_qp { + __u32 comp_mask; + __u32 rq_ring_size; /* bytes */ + __u32 sq_ring_size; /* bytes */ + __u32 driver_qp_type; +}; + +struct efa_ibv_create_qp_resp { + __u32 comp_mask; + /* the offset inside the page of the rq db */ + __u32 rq_db_offset; + /* the offset inside the page of the sq db */ + __u32 sq_db_offset; + /* the offset inside the page of descriptors buffer */ + __u32 llq_desc_offset; + __aligned_u64 rq_mmap_key; + __aligned_u64 rq_mmap_size; + __aligned_u64 rq_db_mmap_key; + __aligned_u64 sq_db_mmap_key; + __aligned_u64 llq_desc_mmap_key; + __u16 send_sub_cq_idx; + __u16 recv_sub_cq_idx; + __u8 reserved_1e0[4]; +}; + +struct efa_ibv_create_ah_resp { + __u32 comp_mask; + __u16 efa_address_handle; + __u8 reserved_30[2]; +}; + +struct efa_ibv_ex_query_device_resp { + __u32 comp_mask; + __u32 max_sq_wr; + __u32 max_rq_wr; + __u16 max_sq_sge; + __u16 max_rq_sge; +}; + +#endif /* EFA_ABI_USER_H */ -- cgit v1.2.3 From 0420e542569b2e56df29d243d09f5974bb6594a4 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 5 May 2019 20:59:27 +0300 Subject: RDMA/efa: Implement functions that submit and complete admin commands Add admin commands submissions/completions implementation. Signed-off-by: Gal Pressman Reviewed-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_com.c | 1160 +++++++++++++++++++++++++++++++++++ 1 file changed, 1160 insertions(+) create mode 100644 drivers/infiniband/hw/efa/efa_com.c diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c new file mode 100644 index 000000000000..a5c788741a04 --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -0,0 +1,1160 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#include "efa_com.h" +#include "efa_regs_defs.h" + +#define ADMIN_CMD_TIMEOUT_US 30000000 /* usecs */ + +#define EFA_REG_READ_TIMEOUT_US 50000 /* usecs */ +#define EFA_MMIO_READ_INVALID 0xffffffff + +#define EFA_POLL_INTERVAL_MS 100 /* msecs */ + +#define EFA_ASYNC_QUEUE_DEPTH 16 +#define EFA_ADMIN_QUEUE_DEPTH 32 + +#define MIN_EFA_VER\ + ((EFA_ADMIN_API_VERSION_MAJOR << EFA_REGS_VERSION_MAJOR_VERSION_SHIFT) | \ + (EFA_ADMIN_API_VERSION_MINOR & EFA_REGS_VERSION_MINOR_VERSION_MASK)) + +#define EFA_CTRL_MAJOR 0 +#define EFA_CTRL_MINOR 0 +#define EFA_CTRL_SUB_MINOR 1 + +#define MIN_EFA_CTRL_VER \ + (((EFA_CTRL_MAJOR) << \ + (EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \ + ((EFA_CTRL_MINOR) << \ + (EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \ + (EFA_CTRL_SUB_MINOR)) + +#define EFA_DMA_ADDR_TO_UINT32_LOW(x) ((u32)((u64)(x))) +#define EFA_DMA_ADDR_TO_UINT32_HIGH(x) ((u32)(((u64)(x)) >> 32)) + +#define EFA_REGS_ADMIN_INTR_MASK 1 + +enum efa_cmd_status { + EFA_CMD_SUBMITTED, + EFA_CMD_COMPLETED, + /* Abort - canceled by the driver */ + EFA_CMD_ABORTED, +}; + +struct efa_comp_ctx { + struct completion wait_event; + struct efa_admin_acq_entry *user_cqe; + u32 comp_size; + enum efa_cmd_status status; + /* status from the device */ + u8 comp_status; + u8 cmd_opcode; + u8 occupied; +}; + +static const char *efa_com_cmd_str(u8 cmd) +{ +#define EFA_CMD_STR_CASE(_cmd) case EFA_ADMIN_##_cmd: return #_cmd + + switch (cmd) { + EFA_CMD_STR_CASE(CREATE_QP); + EFA_CMD_STR_CASE(MODIFY_QP); + EFA_CMD_STR_CASE(QUERY_QP); + EFA_CMD_STR_CASE(DESTROY_QP); + EFA_CMD_STR_CASE(CREATE_AH); + EFA_CMD_STR_CASE(DESTROY_AH); + EFA_CMD_STR_CASE(REG_MR); + EFA_CMD_STR_CASE(DEREG_MR); + EFA_CMD_STR_CASE(CREATE_CQ); + EFA_CMD_STR_CASE(DESTROY_CQ); + EFA_CMD_STR_CASE(GET_FEATURE); + EFA_CMD_STR_CASE(SET_FEATURE); + EFA_CMD_STR_CASE(GET_STATS); + EFA_CMD_STR_CASE(ALLOC_PD); + EFA_CMD_STR_CASE(DEALLOC_PD); + EFA_CMD_STR_CASE(ALLOC_UAR); + EFA_CMD_STR_CASE(DEALLOC_UAR); + default: return "unknown command opcode"; + } +#undef EFA_CMD_STR_CASE +} + +static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset) +{ + struct efa_com_mmio_read *mmio_read = &edev->mmio_read; + struct efa_admin_mmio_req_read_less_resp *read_resp; + unsigned long exp_time; + u32 mmio_read_reg; + u32 err; + + read_resp = mmio_read->read_resp; + + spin_lock(&mmio_read->lock); + mmio_read->seq_num++; + + /* trash DMA req_id to identify when hardware is done */ + read_resp->req_id = mmio_read->seq_num + 0x9aL; + mmio_read_reg = (offset << EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) & + EFA_REGS_MMIO_REG_READ_REG_OFF_MASK; + mmio_read_reg |= mmio_read->seq_num & + EFA_REGS_MMIO_REG_READ_REQ_ID_MASK; + + writel(mmio_read_reg, edev->reg_bar + EFA_REGS_MMIO_REG_READ_OFF); + + exp_time = jiffies + usecs_to_jiffies(mmio_read->mmio_read_timeout); + do { + if (READ_ONCE(read_resp->req_id) == mmio_read->seq_num) + break; + udelay(1); + } while (time_is_after_jiffies(exp_time)); + + if (read_resp->req_id != mmio_read->seq_num) { + ibdev_err(edev->efa_dev, + "Reading register timed out. expected: req id[%u] offset[%#x] actual: req id[%u] offset[%#x]\n", + mmio_read->seq_num, offset, read_resp->req_id, + read_resp->reg_off); + err = EFA_MMIO_READ_INVALID; + goto out; + } + + if (read_resp->reg_off != offset) { + ibdev_err(edev->efa_dev, + "Reading register failed: wrong offset provided\n"); + err = EFA_MMIO_READ_INVALID; + goto out; + } + + err = read_resp->reg_val; +out: + spin_unlock(&mmio_read->lock); + return err; +} + +static int efa_com_admin_init_sq(struct efa_com_dev *edev) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_com_admin_sq *sq = &aq->sq; + u16 size = aq->depth * sizeof(*sq->entries); + u32 addr_high; + u32 addr_low; + u32 aq_caps; + + sq->entries = + dma_alloc_coherent(aq->dmadev, size, &sq->dma_addr, GFP_KERNEL); + if (!sq->entries) + return -ENOMEM; + + spin_lock_init(&sq->lock); + + sq->cc = 0; + sq->pc = 0; + sq->phase = 1; + + sq->db_addr = (u32 __iomem *)(edev->reg_bar + EFA_REGS_AQ_PROD_DB_OFF); + + addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(sq->dma_addr); + addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(sq->dma_addr); + + writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF); + writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF); + + aq_caps = aq->depth & EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK; + aq_caps |= (sizeof(struct efa_admin_aq_entry) << + EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) & + EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK; + + writel(aq_caps, edev->reg_bar + EFA_REGS_AQ_CAPS_OFF); + + return 0; +} + +static int efa_com_admin_init_cq(struct efa_com_dev *edev) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_com_admin_cq *cq = &aq->cq; + u16 size = aq->depth * sizeof(*cq->entries); + u32 addr_high; + u32 addr_low; + u32 acq_caps; + + cq->entries = + dma_alloc_coherent(aq->dmadev, size, &cq->dma_addr, GFP_KERNEL); + if (!cq->entries) + return -ENOMEM; + + spin_lock_init(&cq->lock); + + cq->cc = 0; + cq->phase = 1; + + addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(cq->dma_addr); + addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(cq->dma_addr); + + writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF); + writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF); + + acq_caps = aq->depth & EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK; + acq_caps |= (sizeof(struct efa_admin_acq_entry) << + EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) & + EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK; + acq_caps |= (aq->msix_vector_idx << + EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT) & + EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK; + + writel(acq_caps, edev->reg_bar + EFA_REGS_ACQ_CAPS_OFF); + + return 0; +} + +static int efa_com_admin_init_aenq(struct efa_com_dev *edev, + struct efa_aenq_handlers *aenq_handlers) +{ + struct efa_com_aenq *aenq = &edev->aenq; + u32 addr_low, addr_high, aenq_caps; + u16 size; + + if (!aenq_handlers) { + ibdev_err(edev->efa_dev, "aenq handlers pointer is NULL\n"); + return -EINVAL; + } + + size = EFA_ASYNC_QUEUE_DEPTH * sizeof(*aenq->entries); + aenq->entries = dma_alloc_coherent(edev->dmadev, size, &aenq->dma_addr, + GFP_KERNEL); + if (!aenq->entries) + return -ENOMEM; + + aenq->aenq_handlers = aenq_handlers; + aenq->depth = EFA_ASYNC_QUEUE_DEPTH; + aenq->cc = 0; + aenq->phase = 1; + + addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr); + addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr); + + writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF); + writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF); + + aenq_caps = aenq->depth & EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK; + aenq_caps |= (sizeof(struct efa_admin_aenq_entry) << + EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) & + EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK; + aenq_caps |= (aenq->msix_vector_idx + << EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT) & + EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK; + writel(aenq_caps, edev->reg_bar + EFA_REGS_AENQ_CAPS_OFF); + + /* + * Init cons_db to mark that all entries in the queue + * are initially available + */ + writel(edev->aenq.cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF); + + return 0; +} + +/* ID to be used with efa_com_get_comp_ctx */ +static u16 efa_com_alloc_ctx_id(struct efa_com_admin_queue *aq) +{ + u16 ctx_id; + + spin_lock(&aq->comp_ctx_lock); + ctx_id = aq->comp_ctx_pool[aq->comp_ctx_pool_next]; + aq->comp_ctx_pool_next++; + spin_unlock(&aq->comp_ctx_lock); + + return ctx_id; +} + +static void efa_com_dealloc_ctx_id(struct efa_com_admin_queue *aq, + u16 ctx_id) +{ + spin_lock(&aq->comp_ctx_lock); + aq->comp_ctx_pool_next--; + aq->comp_ctx_pool[aq->comp_ctx_pool_next] = ctx_id; + spin_unlock(&aq->comp_ctx_lock); +} + +static inline void efa_com_put_comp_ctx(struct efa_com_admin_queue *aq, + struct efa_comp_ctx *comp_ctx) +{ + u16 comp_id = comp_ctx->user_cqe->acq_common_descriptor.command & + EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK; + + ibdev_dbg(aq->efa_dev, "Putting completion command_id %d\n", comp_id); + comp_ctx->occupied = 0; + efa_com_dealloc_ctx_id(aq, comp_id); +} + +static struct efa_comp_ctx *efa_com_get_comp_ctx(struct efa_com_admin_queue *aq, + u16 command_id, bool capture) +{ + if (command_id >= aq->depth) { + ibdev_err(aq->efa_dev, + "command id is larger than the queue size. cmd_id: %u queue size %d\n", + command_id, aq->depth); + return NULL; + } + + if (aq->comp_ctx[command_id].occupied && capture) { + ibdev_err(aq->efa_dev, "Completion context is occupied\n"); + return NULL; + } + + if (capture) { + aq->comp_ctx[command_id].occupied = 1; + ibdev_dbg(aq->efa_dev, "Taking completion ctxt command_id %d\n", + command_id); + } + + return &aq->comp_ctx[command_id]; +} + +static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq, + struct efa_admin_aq_entry *cmd, + size_t cmd_size_in_bytes, + struct efa_admin_acq_entry *comp, + size_t comp_size_in_bytes) +{ + struct efa_comp_ctx *comp_ctx; + u16 queue_size_mask; + u16 ctx_id; + u16 pi; + + queue_size_mask = aq->depth - 1; + pi = aq->sq.pc & queue_size_mask; + + ctx_id = efa_com_alloc_ctx_id(aq); + + cmd->aq_common_descriptor.flags |= aq->sq.phase & + EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK; + + cmd->aq_common_descriptor.command_id |= ctx_id & + EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK; + + comp_ctx = efa_com_get_comp_ctx(aq, ctx_id, true); + if (!comp_ctx) { + efa_com_dealloc_ctx_id(aq, ctx_id); + return ERR_PTR(-EINVAL); + } + + comp_ctx->status = EFA_CMD_SUBMITTED; + comp_ctx->comp_size = comp_size_in_bytes; + comp_ctx->user_cqe = comp; + comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode; + + reinit_completion(&comp_ctx->wait_event); + + memcpy(&aq->sq.entries[pi], cmd, cmd_size_in_bytes); + + aq->sq.pc++; + atomic64_inc(&aq->stats.submitted_cmd); + + if ((aq->sq.pc & queue_size_mask) == 0) + aq->sq.phase = !aq->sq.phase; + + /* barrier not needed in case of writel */ + writel(aq->sq.pc, aq->sq.db_addr); + + return comp_ctx; +} + +static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq) +{ + size_t pool_size = aq->depth * sizeof(*aq->comp_ctx_pool); + size_t size = aq->depth * sizeof(struct efa_comp_ctx); + struct efa_comp_ctx *comp_ctx; + u16 i; + + aq->comp_ctx = devm_kzalloc(aq->dmadev, size, GFP_KERNEL); + aq->comp_ctx_pool = devm_kzalloc(aq->dmadev, pool_size, GFP_KERNEL); + if (!aq->comp_ctx || !aq->comp_ctx_pool) { + devm_kfree(aq->dmadev, aq->comp_ctx_pool); + devm_kfree(aq->dmadev, aq->comp_ctx); + return -ENOMEM; + } + + for (i = 0; i < aq->depth; i++) { + comp_ctx = efa_com_get_comp_ctx(aq, i, false); + if (comp_ctx) + init_completion(&comp_ctx->wait_event); + + aq->comp_ctx_pool[i] = i; + } + + spin_lock_init(&aq->comp_ctx_lock); + + aq->comp_ctx_pool_next = 0; + + return 0; +} + +static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq, + struct efa_admin_aq_entry *cmd, + size_t cmd_size_in_bytes, + struct efa_admin_acq_entry *comp, + size_t comp_size_in_bytes) +{ + struct efa_comp_ctx *comp_ctx; + + spin_lock(&aq->sq.lock); + if (!test_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state)) { + ibdev_err(aq->efa_dev, "Admin queue is closed\n"); + spin_unlock(&aq->sq.lock); + return ERR_PTR(-ENODEV); + } + + comp_ctx = __efa_com_submit_admin_cmd(aq, cmd, cmd_size_in_bytes, comp, + comp_size_in_bytes); + spin_unlock(&aq->sq.lock); + if (IS_ERR(comp_ctx)) + clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + + return comp_ctx; +} + +static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq, + struct efa_admin_acq_entry *cqe) +{ + struct efa_comp_ctx *comp_ctx; + u16 cmd_id; + + cmd_id = cqe->acq_common_descriptor.command & + EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK; + + comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, false); + if (!comp_ctx) { + ibdev_err(aq->efa_dev, + "comp_ctx is NULL. Changing the admin queue running state\n"); + clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + return; + } + + comp_ctx->status = EFA_CMD_COMPLETED; + comp_ctx->comp_status = cqe->acq_common_descriptor.status; + if (comp_ctx->user_cqe) + memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size); + + if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state)) + complete(&comp_ctx->wait_event); +} + +static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq) +{ + struct efa_admin_acq_entry *cqe; + u16 queue_size_mask; + u16 comp_num = 0; + u8 phase; + u16 ci; + + queue_size_mask = aq->depth - 1; + + ci = aq->cq.cc & queue_size_mask; + phase = aq->cq.phase; + + cqe = &aq->cq.entries[ci]; + + /* Go over all the completions */ + while ((READ_ONCE(cqe->acq_common_descriptor.flags) & + EFA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) { + /* + * Do not read the rest of the completion entry before the + * phase bit was validated + */ + dma_rmb(); + efa_com_handle_single_admin_completion(aq, cqe); + + ci++; + comp_num++; + if (ci == aq->depth) { + ci = 0; + phase = !phase; + } + + cqe = &aq->cq.entries[ci]; + } + + aq->cq.cc += comp_num; + aq->cq.phase = phase; + aq->sq.cc += comp_num; + atomic64_add(comp_num, &aq->stats.completed_cmd); +} + +static int efa_com_comp_status_to_errno(u8 comp_status) +{ + switch (comp_status) { + case EFA_ADMIN_SUCCESS: + return 0; + case EFA_ADMIN_RESOURCE_ALLOCATION_FAILURE: + return -ENOMEM; + case EFA_ADMIN_UNSUPPORTED_OPCODE: + return -EOPNOTSUPP; + case EFA_ADMIN_BAD_OPCODE: + case EFA_ADMIN_MALFORMED_REQUEST: + case EFA_ADMIN_ILLEGAL_PARAMETER: + case EFA_ADMIN_UNKNOWN_ERROR: + return -EINVAL; + default: + return -EINVAL; + } +} + +static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_ctx, + struct efa_com_admin_queue *aq) +{ + unsigned long timeout; + unsigned long flags; + int err; + + timeout = jiffies + usecs_to_jiffies(aq->completion_timeout); + + while (1) { + spin_lock_irqsave(&aq->cq.lock, flags); + efa_com_handle_admin_completion(aq); + spin_unlock_irqrestore(&aq->cq.lock, flags); + + if (comp_ctx->status != EFA_CMD_SUBMITTED) + break; + + if (time_is_before_jiffies(timeout)) { + ibdev_err(aq->efa_dev, + "Wait for completion (polling) timeout\n"); + /* EFA didn't have any completion */ + atomic64_inc(&aq->stats.no_completion); + + clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + err = -ETIME; + goto out; + } + + msleep(aq->poll_interval); + } + + if (comp_ctx->status == EFA_CMD_ABORTED) { + ibdev_err(aq->efa_dev, "Command was aborted\n"); + atomic64_inc(&aq->stats.aborted_cmd); + err = -ENODEV; + goto out; + } + + WARN_ONCE(comp_ctx->status != EFA_CMD_COMPLETED, + "Invalid completion status %d\n", comp_ctx->status); + + err = efa_com_comp_status_to_errno(comp_ctx->comp_status); +out: + efa_com_put_comp_ctx(aq, comp_ctx); + return err; +} + +static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *comp_ctx, + struct efa_com_admin_queue *aq) +{ + unsigned long flags; + int err; + + wait_for_completion_timeout(&comp_ctx->wait_event, + usecs_to_jiffies(aq->completion_timeout)); + + /* + * In case the command wasn't completed find out the root cause. + * There might be 2 kinds of errors + * 1) No completion (timeout reached) + * 2) There is completion but the device didn't get any msi-x interrupt. + */ + if (comp_ctx->status == EFA_CMD_SUBMITTED) { + spin_lock_irqsave(&aq->cq.lock, flags); + efa_com_handle_admin_completion(aq); + spin_unlock_irqrestore(&aq->cq.lock, flags); + + atomic64_inc(&aq->stats.no_completion); + + if (comp_ctx->status == EFA_CMD_COMPLETED) + ibdev_err(aq->efa_dev, + "The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (ctx: 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n", + efa_com_cmd_str(comp_ctx->cmd_opcode), + comp_ctx->cmd_opcode, comp_ctx->status, + comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc); + else + ibdev_err(aq->efa_dev, + "The device didn't send any completion for admin cmd %s(%d) status %d (ctx 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n", + efa_com_cmd_str(comp_ctx->cmd_opcode), + comp_ctx->cmd_opcode, comp_ctx->status, + comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc); + + clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + err = -ETIME; + goto out; + } + + err = efa_com_comp_status_to_errno(comp_ctx->comp_status); +out: + efa_com_put_comp_ctx(aq, comp_ctx); + return err; +} + +/* + * There are two types to wait for completion. + * Polling mode - wait until the completion is available. + * Async mode - wait on wait queue until the completion is ready + * (or the timeout expired). + * It is expected that the IRQ called efa_com_handle_admin_completion + * to mark the completions. + */ +static int efa_com_wait_and_process_admin_cq(struct efa_comp_ctx *comp_ctx, + struct efa_com_admin_queue *aq) +{ + if (test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state)) + return efa_com_wait_and_process_admin_cq_polling(comp_ctx, aq); + + return efa_com_wait_and_process_admin_cq_interrupts(comp_ctx, aq); +} + +/** + * efa_com_cmd_exec - Execute admin command + * @aq: admin queue. + * @cmd: the admin command to execute. + * @cmd_size: the command size. + * @comp: command completion return entry. + * @comp_size: command completion size. + * Submit an admin command and then wait until the device will return a + * completion. + * The completion will be copied into comp. + * + * @return - 0 on success, negative value on failure. + */ +int efa_com_cmd_exec(struct efa_com_admin_queue *aq, + struct efa_admin_aq_entry *cmd, + size_t cmd_size, + struct efa_admin_acq_entry *comp, + size_t comp_size) +{ + struct efa_comp_ctx *comp_ctx; + int err; + + might_sleep(); + + /* In case of queue FULL */ + down(&aq->avail_cmds); + + ibdev_dbg(aq->efa_dev, "%s (opcode %d)\n", + efa_com_cmd_str(cmd->aq_common_descriptor.opcode), + cmd->aq_common_descriptor.opcode); + comp_ctx = efa_com_submit_admin_cmd(aq, cmd, cmd_size, comp, comp_size); + if (IS_ERR(comp_ctx)) { + ibdev_err(aq->efa_dev, + "Failed to submit command %s (opcode %u) err %ld\n", + efa_com_cmd_str(cmd->aq_common_descriptor.opcode), + cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx)); + + up(&aq->avail_cmds); + return PTR_ERR(comp_ctx); + } + + err = efa_com_wait_and_process_admin_cq(comp_ctx, aq); + if (err) + ibdev_err(aq->efa_dev, + "Failed to process command %s (opcode %u) comp_status %d err %d\n", + efa_com_cmd_str(cmd->aq_common_descriptor.opcode), + cmd->aq_common_descriptor.opcode, + comp_ctx->comp_status, err); + + up(&aq->avail_cmds); + + return err; +} + +/** + * efa_com_abort_admin_commands - Abort all the outstanding admin commands. + * @edev: EFA communication layer struct + * + * This method aborts all the outstanding admin commands. + * The caller should then call efa_com_wait_for_abort_completion to make sure + * all the commands were completed. + */ +static void efa_com_abort_admin_commands(struct efa_com_dev *edev) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_comp_ctx *comp_ctx; + unsigned long flags; + u16 i; + + spin_lock(&aq->sq.lock); + spin_lock_irqsave(&aq->cq.lock, flags); + for (i = 0; i < aq->depth; i++) { + comp_ctx = efa_com_get_comp_ctx(aq, i, false); + if (!comp_ctx) + break; + + comp_ctx->status = EFA_CMD_ABORTED; + + complete(&comp_ctx->wait_event); + } + spin_unlock_irqrestore(&aq->cq.lock, flags); + spin_unlock(&aq->sq.lock); +} + +/** + * efa_com_wait_for_abort_completion - Wait for admin commands abort. + * @edev: EFA communication layer struct + * + * This method wait until all the outstanding admin commands will be completed. + */ +static void efa_com_wait_for_abort_completion(struct efa_com_dev *edev) +{ + struct efa_com_admin_queue *aq = &edev->aq; + int i; + + /* all mine */ + for (i = 0; i < aq->depth; i++) + down(&aq->avail_cmds); + + /* let it go */ + for (i = 0; i < aq->depth; i++) + up(&aq->avail_cmds); +} + +static void efa_com_admin_flush(struct efa_com_dev *edev) +{ + struct efa_com_admin_queue *aq = &edev->aq; + + clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + + efa_com_abort_admin_commands(edev); + efa_com_wait_for_abort_completion(edev); +} + +/** + * efa_com_admin_destroy - Destroy the admin and the async events queues. + * @edev: EFA communication layer struct + */ +void efa_com_admin_destroy(struct efa_com_dev *edev) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_com_aenq *aenq = &edev->aenq; + struct efa_com_admin_cq *cq = &aq->cq; + struct efa_com_admin_sq *sq = &aq->sq; + u16 size; + + efa_com_admin_flush(edev); + + devm_kfree(edev->dmadev, aq->comp_ctx_pool); + devm_kfree(edev->dmadev, aq->comp_ctx); + + size = aq->depth * sizeof(*sq->entries); + dma_free_coherent(edev->dmadev, size, sq->entries, sq->dma_addr); + + size = aq->depth * sizeof(*cq->entries); + dma_free_coherent(edev->dmadev, size, cq->entries, cq->dma_addr); + + size = aenq->depth * sizeof(*aenq->entries); + dma_free_coherent(edev->dmadev, size, aenq->entries, aenq->dma_addr); +} + +/** + * efa_com_set_admin_polling_mode - Set the admin completion queue polling mode + * @edev: EFA communication layer struct + * @polling: Enable/Disable polling mode + * + * Set the admin completion mode. + */ +void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling) +{ + u32 mask_value = 0; + + if (polling) + mask_value = EFA_REGS_ADMIN_INTR_MASK; + + writel(mask_value, edev->reg_bar + EFA_REGS_INTR_MASK_OFF); + if (polling) + set_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state); + else + clear_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state); +} + +static void efa_com_stats_init(struct efa_com_dev *edev) +{ + atomic64_t *s = (atomic64_t *)&edev->aq.stats; + int i; + + for (i = 0; i < sizeof(edev->aq.stats) / sizeof(*s); i++, s++) + atomic64_set(s, 0); +} + +/** + * efa_com_admin_init - Init the admin and the async queues + * @edev: EFA communication layer struct + * @aenq_handlers: Those handlers to be called upon event. + * + * Initialize the admin submission and completion queues. + * Initialize the asynchronous events notification queues. + * + * @return - 0 on success, negative value on failure. + */ +int efa_com_admin_init(struct efa_com_dev *edev, + struct efa_aenq_handlers *aenq_handlers) +{ + struct efa_com_admin_queue *aq = &edev->aq; + u32 timeout; + u32 dev_sts; + u32 cap; + int err; + + dev_sts = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF); + if (!(dev_sts & EFA_REGS_DEV_STS_READY_MASK)) { + ibdev_err(edev->efa_dev, + "Device isn't ready, abort com init %#x\n", dev_sts); + return -ENODEV; + } + + aq->depth = EFA_ADMIN_QUEUE_DEPTH; + + aq->dmadev = edev->dmadev; + aq->efa_dev = edev->efa_dev; + set_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state); + + sema_init(&aq->avail_cmds, aq->depth); + + efa_com_stats_init(edev); + + err = efa_com_init_comp_ctxt(aq); + if (err) + return err; + + err = efa_com_admin_init_sq(edev); + if (err) + goto err_destroy_comp_ctxt; + + err = efa_com_admin_init_cq(edev); + if (err) + goto err_destroy_sq; + + efa_com_set_admin_polling_mode(edev, false); + + err = efa_com_admin_init_aenq(edev, aenq_handlers); + if (err) + goto err_destroy_cq; + + cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF); + timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >> + EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT; + if (timeout) + /* the resolution of timeout reg is 100ms */ + aq->completion_timeout = timeout * 100000; + else + aq->completion_timeout = ADMIN_CMD_TIMEOUT_US; + + aq->poll_interval = EFA_POLL_INTERVAL_MS; + + set_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + + return 0; + +err_destroy_cq: + dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->cq.entries), + aq->cq.entries, aq->cq.dma_addr); +err_destroy_sq: + dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->sq.entries), + aq->sq.entries, aq->sq.dma_addr); +err_destroy_comp_ctxt: + devm_kfree(edev->dmadev, aq->comp_ctx); + + return err; +} + +/** + * efa_com_admin_q_comp_intr_handler - admin queue interrupt handler + * @edev: EFA communication layer struct + * + * This method goes over the admin completion queue and wakes up + * all the pending threads that wait on the commands wait event. + * + * @note: Should be called after MSI-X interrupt. + */ +void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev) +{ + unsigned long flags; + + spin_lock_irqsave(&edev->aq.cq.lock, flags); + efa_com_handle_admin_completion(&edev->aq); + spin_unlock_irqrestore(&edev->aq.cq.lock, flags); +} + +/* + * efa_handle_specific_aenq_event: + * return the handler that is relevant to the specific event group + */ +static efa_aenq_handler efa_com_get_specific_aenq_cb(struct efa_com_dev *edev, + u16 group) +{ + struct efa_aenq_handlers *aenq_handlers = edev->aenq.aenq_handlers; + + if (group < EFA_MAX_HANDLERS && aenq_handlers->handlers[group]) + return aenq_handlers->handlers[group]; + + return aenq_handlers->unimplemented_handler; +} + +/** + * efa_com_aenq_intr_handler - AENQ interrupt handler + * @edev: EFA communication layer struct + * @data: Data of interrupt handler. + * + * Go over the async event notification queue and call the proper aenq handler. + */ +void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data) +{ + struct efa_admin_aenq_common_desc *aenq_common; + struct efa_com_aenq *aenq = &edev->aenq; + struct efa_admin_aenq_entry *aenq_e; + efa_aenq_handler handler_cb; + u32 processed = 0; + u8 phase; + u32 ci; + + ci = aenq->cc & (aenq->depth - 1); + phase = aenq->phase; + aenq_e = &aenq->entries[ci]; /* Get first entry */ + aenq_common = &aenq_e->aenq_common_desc; + + /* Go over all the events */ + while ((READ_ONCE(aenq_common->flags) & + EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) { + /* + * Do not read the rest of the completion entry before the + * phase bit was validated + */ + dma_rmb(); + + /* Handle specific event*/ + handler_cb = efa_com_get_specific_aenq_cb(edev, + aenq_common->group); + handler_cb(data, aenq_e); /* call the actual event handler*/ + + /* Get next event entry */ + ci++; + processed++; + + if (ci == aenq->depth) { + ci = 0; + phase = !phase; + } + aenq_e = &aenq->entries[ci]; + aenq_common = &aenq_e->aenq_common_desc; + } + + aenq->cc += processed; + aenq->phase = phase; + + /* Don't update aenq doorbell if there weren't any processed events */ + if (!processed) + return; + + /* barrier not needed in case of writel */ + writel(aenq->cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF); +} + +static void efa_com_mmio_reg_read_resp_addr_init(struct efa_com_dev *edev) +{ + struct efa_com_mmio_read *mmio_read = &edev->mmio_read; + u32 addr_high; + u32 addr_low; + + /* dma_addr_bits is unknown at this point */ + addr_high = (mmio_read->read_resp_dma_addr >> 32) & GENMASK(31, 0); + addr_low = mmio_read->read_resp_dma_addr & GENMASK(31, 0); + + writel(addr_high, edev->reg_bar + EFA_REGS_MMIO_RESP_HI_OFF); + writel(addr_low, edev->reg_bar + EFA_REGS_MMIO_RESP_LO_OFF); +} + +int efa_com_mmio_reg_read_init(struct efa_com_dev *edev) +{ + struct efa_com_mmio_read *mmio_read = &edev->mmio_read; + + spin_lock_init(&mmio_read->lock); + mmio_read->read_resp = + dma_alloc_coherent(edev->dmadev, sizeof(*mmio_read->read_resp), + &mmio_read->read_resp_dma_addr, GFP_KERNEL); + if (!mmio_read->read_resp) + return -ENOMEM; + + efa_com_mmio_reg_read_resp_addr_init(edev); + + mmio_read->read_resp->req_id = 0; + mmio_read->seq_num = 0; + mmio_read->mmio_read_timeout = EFA_REG_READ_TIMEOUT_US; + + return 0; +} + +void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev) +{ + struct efa_com_mmio_read *mmio_read = &edev->mmio_read; + + dma_free_coherent(edev->dmadev, sizeof(*mmio_read->read_resp), + mmio_read->read_resp, mmio_read->read_resp_dma_addr); +} + +int efa_com_validate_version(struct efa_com_dev *edev) +{ + u32 ctrl_ver_masked; + u32 ctrl_ver; + u32 ver; + + /* + * Make sure the EFA version and the controller version are at least + * as the driver expects + */ + ver = efa_com_reg_read32(edev, EFA_REGS_VERSION_OFF); + ctrl_ver = efa_com_reg_read32(edev, + EFA_REGS_CONTROLLER_VERSION_OFF); + + ibdev_dbg(edev->efa_dev, "efa device version: %d.%d\n", + (ver & EFA_REGS_VERSION_MAJOR_VERSION_MASK) >> + EFA_REGS_VERSION_MAJOR_VERSION_SHIFT, + ver & EFA_REGS_VERSION_MINOR_VERSION_MASK); + + if (ver < MIN_EFA_VER) { + ibdev_err(edev->efa_dev, + "EFA version is lower than the minimal version the driver supports\n"); + return -EOPNOTSUPP; + } + + ibdev_dbg(edev->efa_dev, + "efa controller version: %d.%d.%d implementation version %d\n", + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >> + EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT, + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >> + EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT, + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK), + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >> + EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT); + + ctrl_ver_masked = + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) | + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) | + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK); + + /* Validate the ctrl version without the implementation ID */ + if (ctrl_ver_masked < MIN_EFA_CTRL_VER) { + ibdev_err(edev->efa_dev, + "EFA ctrl version is lower than the minimal ctrl version the driver supports\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +/** + * efa_com_get_dma_width - Retrieve physical dma address width the device + * supports. + * @edev: EFA communication layer struct + * + * Retrieve the maximum physical address bits the device can handle. + * + * @return: > 0 on Success and negative value otherwise. + */ +int efa_com_get_dma_width(struct efa_com_dev *edev) +{ + u32 caps = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF); + int width; + + width = (caps & EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >> + EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT; + + ibdev_dbg(edev->efa_dev, "DMA width: %d\n", width); + + if (width < 32 || width > 64) { + ibdev_err(edev->efa_dev, "DMA width illegal value: %d\n", width); + return -EINVAL; + } + + edev->dma_addr_bits = width; + + return width; +} + +static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout, + u16 exp_state) +{ + u32 val, i; + + for (i = 0; i < timeout; i++) { + val = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF); + + if ((val & EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) == + exp_state) + return 0; + + ibdev_dbg(edev->efa_dev, "Reset indication val %d\n", val); + msleep(EFA_POLL_INTERVAL_MS); + } + + return -ETIME; +} + +/** + * efa_com_dev_reset - Perform device FLR to the device. + * @edev: EFA communication layer struct + * @reset_reason: Specify what is the trigger for the reset in case of an error. + * + * @return - 0 on success, negative value on failure. + */ +int efa_com_dev_reset(struct efa_com_dev *edev, + enum efa_regs_reset_reason_types reset_reason) +{ + u32 stat, timeout, cap, reset_val; + int err; + + stat = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF); + cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF); + + if (!(stat & EFA_REGS_DEV_STS_READY_MASK)) { + ibdev_err(edev->efa_dev, + "Device isn't ready, can't reset device\n"); + return -EINVAL; + } + + timeout = (cap & EFA_REGS_CAPS_RESET_TIMEOUT_MASK) >> + EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT; + if (!timeout) { + ibdev_err(edev->efa_dev, "Invalid timeout value\n"); + return -EINVAL; + } + + /* start reset */ + reset_val = EFA_REGS_DEV_CTL_DEV_RESET_MASK; + reset_val |= (reset_reason << EFA_REGS_DEV_CTL_RESET_REASON_SHIFT) & + EFA_REGS_DEV_CTL_RESET_REASON_MASK; + writel(reset_val, edev->reg_bar + EFA_REGS_DEV_CTL_OFF); + + /* reset clears the mmio readless address, restore it */ + efa_com_mmio_reg_read_resp_addr_init(edev); + + err = wait_for_reset_state(edev, timeout, + EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK); + if (err) { + ibdev_err(edev->efa_dev, "Reset indication didn't turn on\n"); + return err; + } + + /* reset done */ + writel(0, edev->reg_bar + EFA_REGS_DEV_CTL_OFF); + err = wait_for_reset_state(edev, timeout, 0); + if (err) { + ibdev_err(edev->efa_dev, "Reset indication didn't turn off\n"); + return err; + } + + timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >> + EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT; + if (timeout) + /* the resolution of timeout reg is 100ms */ + edev->aq.completion_timeout = timeout * 100000; + else + edev->aq.completion_timeout = ADMIN_CMD_TIMEOUT_US; + + return 0; +} -- cgit v1.2.3 From e9c6c537308895e8b2b98ad15e82938935bdf1cf Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 5 May 2019 20:59:28 +0300 Subject: RDMA/efa: Add common command handlers Add the EFA common commands implementation. Signed-off-by: Gal Pressman Reviewed-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_com_cmd.c | 692 ++++++++++++++++++++++++++++++++ 1 file changed, 692 insertions(+) create mode 100644 drivers/infiniband/hw/efa/efa_com_cmd.c diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c new file mode 100644 index 000000000000..14227725521c --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -0,0 +1,692 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#include "efa.h" +#include "efa_com.h" +#include "efa_com_cmd.h" + +void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low) +{ + *addr_low = lower_32_bits(addr); + *addr_high = upper_32_bits(addr); +} + +int efa_com_create_qp(struct efa_com_dev *edev, + struct efa_com_create_qp_params *params, + struct efa_com_create_qp_result *res) +{ + struct efa_admin_create_qp_cmd create_qp_cmd = {}; + struct efa_admin_create_qp_resp cmd_completion; + struct efa_com_admin_queue *aq = &edev->aq; + int err; + + create_qp_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_QP; + + create_qp_cmd.pd = params->pd; + create_qp_cmd.qp_type = params->qp_type; + create_qp_cmd.rq_base_addr = params->rq_base_addr; + create_qp_cmd.send_cq_idx = params->send_cq_idx; + create_qp_cmd.recv_cq_idx = params->recv_cq_idx; + create_qp_cmd.qp_alloc_size.send_queue_ring_size = + params->sq_ring_size_in_bytes; + create_qp_cmd.qp_alloc_size.send_queue_depth = + params->sq_depth; + create_qp_cmd.qp_alloc_size.recv_queue_ring_size = + params->rq_ring_size_in_bytes; + create_qp_cmd.qp_alloc_size.recv_queue_depth = + params->rq_depth; + create_qp_cmd.uar = params->uarn; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&create_qp_cmd, + sizeof(create_qp_cmd), + (struct efa_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (err) { + ibdev_err(edev->efa_dev, "Failed to create qp [%d]\n", err); + return err; + } + + res->qp_handle = cmd_completion.qp_handle; + res->qp_num = cmd_completion.qp_num; + res->sq_db_offset = cmd_completion.sq_db_offset; + res->rq_db_offset = cmd_completion.rq_db_offset; + res->llq_descriptors_offset = cmd_completion.llq_descriptors_offset; + res->send_sub_cq_idx = cmd_completion.send_sub_cq_idx; + res->recv_sub_cq_idx = cmd_completion.recv_sub_cq_idx; + + return err; +} + +int efa_com_modify_qp(struct efa_com_dev *edev, + struct efa_com_modify_qp_params *params) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_modify_qp_cmd cmd = {}; + struct efa_admin_modify_qp_resp resp; + int err; + + cmd.aq_common_desc.opcode = EFA_ADMIN_MODIFY_QP; + cmd.modify_mask = params->modify_mask; + cmd.qp_handle = params->qp_handle; + cmd.qp_state = params->qp_state; + cmd.cur_qp_state = params->cur_qp_state; + cmd.qkey = params->qkey; + cmd.sq_psn = params->sq_psn; + cmd.sq_drained_async_notify = params->sq_drained_async_notify; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + if (err) { + ibdev_err(edev->efa_dev, + "Failed to modify qp-%u modify_mask[%#x] [%d]\n", + cmd.qp_handle, cmd.modify_mask, err); + return err; + } + + return 0; +} + +int efa_com_query_qp(struct efa_com_dev *edev, + struct efa_com_query_qp_params *params, + struct efa_com_query_qp_result *result) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_query_qp_cmd cmd = {}; + struct efa_admin_query_qp_resp resp; + int err; + + cmd.aq_common_desc.opcode = EFA_ADMIN_QUERY_QP; + cmd.qp_handle = params->qp_handle; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + if (err) { + ibdev_err(edev->efa_dev, "Failed to query qp-%u [%d]\n", + cmd.qp_handle, err); + return err; + } + + result->qp_state = resp.qp_state; + result->qkey = resp.qkey; + result->sq_draining = resp.sq_draining; + result->sq_psn = resp.sq_psn; + + return 0; +} + +int efa_com_destroy_qp(struct efa_com_dev *edev, + struct efa_com_destroy_qp_params *params) +{ + struct efa_admin_destroy_qp_resp cmd_completion; + struct efa_admin_destroy_qp_cmd qp_cmd = {}; + struct efa_com_admin_queue *aq = &edev->aq; + int err; + + qp_cmd.aq_common_desc.opcode = EFA_ADMIN_DESTROY_QP; + qp_cmd.qp_handle = params->qp_handle; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&qp_cmd, + sizeof(qp_cmd), + (struct efa_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (err) + ibdev_err(edev->efa_dev, "Failed to destroy qp-%u [%d]\n", + qp_cmd.qp_handle, err); + + return 0; +} + +int efa_com_create_cq(struct efa_com_dev *edev, + struct efa_com_create_cq_params *params, + struct efa_com_create_cq_result *result) +{ + struct efa_admin_create_cq_resp cmd_completion; + struct efa_admin_create_cq_cmd create_cmd = {}; + struct efa_com_admin_queue *aq = &edev->aq; + int err; + + create_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_CQ; + create_cmd.cq_caps_2 = (params->entry_size_in_bytes / 4) & + EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK; + create_cmd.cq_depth = params->cq_depth; + create_cmd.num_sub_cqs = params->num_sub_cqs; + create_cmd.uar = params->uarn; + + efa_com_set_dma_addr(params->dma_addr, + &create_cmd.cq_ba.mem_addr_high, + &create_cmd.cq_ba.mem_addr_low); + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&create_cmd, + sizeof(create_cmd), + (struct efa_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (err) { + ibdev_err(edev->efa_dev, "Failed to create cq[%d]\n", err); + return err; + } + + result->cq_idx = cmd_completion.cq_idx; + result->actual_depth = params->cq_depth; + + return err; +} + +int efa_com_destroy_cq(struct efa_com_dev *edev, + struct efa_com_destroy_cq_params *params) +{ + struct efa_admin_destroy_cq_cmd destroy_cmd = {}; + struct efa_admin_destroy_cq_resp destroy_resp; + struct efa_com_admin_queue *aq = &edev->aq; + int err; + + destroy_cmd.cq_idx = params->cq_idx; + destroy_cmd.aq_common_desc.opcode = EFA_ADMIN_DESTROY_CQ; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&destroy_cmd, + sizeof(destroy_cmd), + (struct efa_admin_acq_entry *)&destroy_resp, + sizeof(destroy_resp)); + + if (err) + ibdev_err(edev->efa_dev, "Failed to destroy CQ-%u [%d]\n", + params->cq_idx, err); + + return 0; +} + +int efa_com_register_mr(struct efa_com_dev *edev, + struct efa_com_reg_mr_params *params, + struct efa_com_reg_mr_result *result) +{ + struct efa_admin_reg_mr_resp cmd_completion; + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_reg_mr_cmd mr_cmd = {}; + int err; + + mr_cmd.aq_common_desc.opcode = EFA_ADMIN_REG_MR; + mr_cmd.pd = params->pd; + mr_cmd.mr_length = params->mr_length_in_bytes; + mr_cmd.flags |= params->page_shift & + EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK; + mr_cmd.iova = params->iova; + mr_cmd.permissions |= params->permissions & + EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK; + + if (params->inline_pbl) { + memcpy(mr_cmd.pbl.inline_pbl_array, + params->pbl.inline_pbl_array, + sizeof(mr_cmd.pbl.inline_pbl_array)); + } else { + mr_cmd.pbl.pbl.length = params->pbl.pbl.length; + mr_cmd.pbl.pbl.address.mem_addr_low = + params->pbl.pbl.address.mem_addr_low; + mr_cmd.pbl.pbl.address.mem_addr_high = + params->pbl.pbl.address.mem_addr_high; + mr_cmd.aq_common_desc.flags |= + EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK; + if (params->indirect) + mr_cmd.aq_common_desc.flags |= + EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + } + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&mr_cmd, + sizeof(mr_cmd), + (struct efa_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (err) { + ibdev_err(edev->efa_dev, "Failed to register mr [%d]\n", err); + return err; + } + + result->l_key = cmd_completion.l_key; + result->r_key = cmd_completion.r_key; + + return 0; +} + +int efa_com_dereg_mr(struct efa_com_dev *edev, + struct efa_com_dereg_mr_params *params) +{ + struct efa_admin_dereg_mr_resp cmd_completion; + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_dereg_mr_cmd mr_cmd = {}; + int err; + + mr_cmd.aq_common_desc.opcode = EFA_ADMIN_DEREG_MR; + mr_cmd.l_key = params->l_key; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&mr_cmd, + sizeof(mr_cmd), + (struct efa_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (err) + ibdev_err(edev->efa_dev, + "Failed to de-register mr(lkey-%u) [%d]\n", + mr_cmd.l_key, err); + + return 0; +} + +int efa_com_create_ah(struct efa_com_dev *edev, + struct efa_com_create_ah_params *params, + struct efa_com_create_ah_result *result) +{ + struct efa_admin_create_ah_resp cmd_completion; + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_create_ah_cmd ah_cmd = {}; + int err; + + ah_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_AH; + + memcpy(ah_cmd.dest_addr, params->dest_addr, sizeof(ah_cmd.dest_addr)); + ah_cmd.pd = params->pdn; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&ah_cmd, + sizeof(ah_cmd), + (struct efa_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (err) { + ibdev_err(edev->efa_dev, "Failed to create ah [%d]\n", err); + return err; + } + + result->ah = cmd_completion.ah; + + return 0; +} + +int efa_com_destroy_ah(struct efa_com_dev *edev, + struct efa_com_destroy_ah_params *params) +{ + struct efa_admin_destroy_ah_resp cmd_completion; + struct efa_admin_destroy_ah_cmd ah_cmd = {}; + struct efa_com_admin_queue *aq = &edev->aq; + int err; + + ah_cmd.aq_common_desc.opcode = EFA_ADMIN_DESTROY_AH; + ah_cmd.ah = params->ah; + ah_cmd.pd = params->pdn; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&ah_cmd, + sizeof(ah_cmd), + (struct efa_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (err) + ibdev_err(edev->efa_dev, "Failed to destroy ah-%d pd-%d [%d]\n", + ah_cmd.ah, ah_cmd.pd, err); + + return 0; +} + +static bool +efa_com_check_supported_feature_id(struct efa_com_dev *edev, + enum efa_admin_aq_feature_id feature_id) +{ + u32 feature_mask = 1 << feature_id; + + /* Device attributes is always supported */ + if (feature_id != EFA_ADMIN_DEVICE_ATTR && + !(edev->supported_features & feature_mask)) + return false; + + return true; +} + +static int efa_com_get_feature_ex(struct efa_com_dev *edev, + struct efa_admin_get_feature_resp *get_resp, + enum efa_admin_aq_feature_id feature_id, + dma_addr_t control_buf_dma_addr, + u32 control_buff_size) +{ + struct efa_admin_get_feature_cmd get_cmd = {}; + struct efa_com_admin_queue *aq; + int err; + + if (!efa_com_check_supported_feature_id(edev, feature_id)) { + ibdev_err(edev->efa_dev, "Feature %d isn't supported\n", + feature_id); + return -EOPNOTSUPP; + } + + aq = &edev->aq; + + get_cmd.aq_common_descriptor.opcode = EFA_ADMIN_GET_FEATURE; + + if (control_buff_size) + get_cmd.aq_common_descriptor.flags = + EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + + + efa_com_set_dma_addr(control_buf_dma_addr, + &get_cmd.control_buffer.address.mem_addr_high, + &get_cmd.control_buffer.address.mem_addr_low); + + get_cmd.control_buffer.length = control_buff_size; + get_cmd.feature_common.feature_id = feature_id; + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *) + &get_cmd, + sizeof(get_cmd), + (struct efa_admin_acq_entry *) + get_resp, + sizeof(*get_resp)); + + if (err) + ibdev_err(edev->efa_dev, + "Failed to submit get_feature command %d [%d]\n", + feature_id, err); + + return 0; +} + +static int efa_com_get_feature(struct efa_com_dev *edev, + struct efa_admin_get_feature_resp *get_resp, + enum efa_admin_aq_feature_id feature_id) +{ + return efa_com_get_feature_ex(edev, get_resp, feature_id, 0, 0); +} + +int efa_com_get_network_attr(struct efa_com_dev *edev, + struct efa_com_get_network_attr_result *result) +{ + struct efa_admin_get_feature_resp resp; + int err; + + err = efa_com_get_feature(edev, &resp, + EFA_ADMIN_NETWORK_ATTR); + if (err) { + ibdev_err(edev->efa_dev, + "Failed to get network attributes %d\n", err); + return err; + } + + memcpy(result->addr, resp.u.network_attr.addr, + sizeof(resp.u.network_attr.addr)); + result->mtu = resp.u.network_attr.mtu; + + return 0; +} + +int efa_com_get_device_attr(struct efa_com_dev *edev, + struct efa_com_get_device_attr_result *result) +{ + struct efa_admin_get_feature_resp resp; + int err; + + err = efa_com_get_feature(edev, &resp, EFA_ADMIN_DEVICE_ATTR); + if (err) { + ibdev_err(edev->efa_dev, "Failed to get device attributes %d\n", + err); + return err; + } + + result->page_size_cap = resp.u.device_attr.page_size_cap; + result->fw_version = resp.u.device_attr.fw_version; + result->admin_api_version = resp.u.device_attr.admin_api_version; + result->device_version = resp.u.device_attr.device_version; + result->supported_features = resp.u.device_attr.supported_features; + result->phys_addr_width = resp.u.device_attr.phys_addr_width; + result->virt_addr_width = resp.u.device_attr.virt_addr_width; + result->db_bar = resp.u.device_attr.db_bar; + + if (result->admin_api_version < 1) { + ibdev_err(edev->efa_dev, + "Failed to get device attr api version [%u < 1]\n", + result->admin_api_version); + return -EINVAL; + } + + edev->supported_features = resp.u.device_attr.supported_features; + err = efa_com_get_feature(edev, &resp, + EFA_ADMIN_QUEUE_ATTR); + if (err) { + ibdev_err(edev->efa_dev, + "Failed to get network attributes %d\n", err); + return err; + } + + result->max_qp = resp.u.queue_attr.max_qp; + result->max_sq_depth = resp.u.queue_attr.max_sq_depth; + result->max_rq_depth = resp.u.queue_attr.max_rq_depth; + result->max_cq = resp.u.queue_attr.max_cq; + result->max_cq_depth = resp.u.queue_attr.max_cq_depth; + result->inline_buf_size = resp.u.queue_attr.inline_buf_size; + result->max_sq_sge = resp.u.queue_attr.max_wr_send_sges; + result->max_rq_sge = resp.u.queue_attr.max_wr_recv_sges; + result->max_mr = resp.u.queue_attr.max_mr; + result->max_mr_pages = resp.u.queue_attr.max_mr_pages; + result->max_pd = resp.u.queue_attr.max_pd; + result->max_ah = resp.u.queue_attr.max_ah; + result->max_llq_size = resp.u.queue_attr.max_llq_size; + result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq; + + return 0; +} + +int efa_com_get_hw_hints(struct efa_com_dev *edev, + struct efa_com_get_hw_hints_result *result) +{ + struct efa_admin_get_feature_resp resp; + int err; + + err = efa_com_get_feature(edev, &resp, EFA_ADMIN_HW_HINTS); + if (err) { + ibdev_err(edev->efa_dev, "Failed to get hw hints %d\n", err); + return err; + } + + result->admin_completion_timeout = resp.u.hw_hints.admin_completion_timeout; + result->driver_watchdog_timeout = resp.u.hw_hints.driver_watchdog_timeout; + result->mmio_read_timeout = resp.u.hw_hints.mmio_read_timeout; + result->poll_interval = resp.u.hw_hints.poll_interval; + + return 0; +} + +static int efa_com_set_feature_ex(struct efa_com_dev *edev, + struct efa_admin_set_feature_resp *set_resp, + struct efa_admin_set_feature_cmd *set_cmd, + enum efa_admin_aq_feature_id feature_id, + dma_addr_t control_buf_dma_addr, + u32 control_buff_size) +{ + struct efa_com_admin_queue *aq; + int err; + + if (!efa_com_check_supported_feature_id(edev, feature_id)) { + ibdev_err(edev->efa_dev, "Feature %d isn't supported\n", + feature_id); + return -EOPNOTSUPP; + } + + aq = &edev->aq; + + set_cmd->aq_common_descriptor.opcode = EFA_ADMIN_SET_FEATURE; + if (control_buff_size) { + set_cmd->aq_common_descriptor.flags = + EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + efa_com_set_dma_addr(control_buf_dma_addr, + &set_cmd->control_buffer.address.mem_addr_high, + &set_cmd->control_buffer.address.mem_addr_low); + } + + set_cmd->control_buffer.length = control_buff_size; + set_cmd->feature_common.feature_id = feature_id; + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)set_cmd, + sizeof(*set_cmd), + (struct efa_admin_acq_entry *)set_resp, + sizeof(*set_resp)); + + if (err) + ibdev_err(edev->efa_dev, + "Failed to submit set_feature command %d error: %d\n", + feature_id, err); + + return 0; +} + +static int efa_com_set_feature(struct efa_com_dev *edev, + struct efa_admin_set_feature_resp *set_resp, + struct efa_admin_set_feature_cmd *set_cmd, + enum efa_admin_aq_feature_id feature_id) +{ + return efa_com_set_feature_ex(edev, set_resp, set_cmd, feature_id, + 0, 0); +} + +int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups) +{ + struct efa_admin_get_feature_resp get_resp; + struct efa_admin_set_feature_resp set_resp; + struct efa_admin_set_feature_cmd cmd = {}; + int err; + + ibdev_dbg(edev->efa_dev, "Configuring aenq with groups[%#x]\n", groups); + + err = efa_com_get_feature(edev, &get_resp, EFA_ADMIN_AENQ_CONFIG); + if (err) { + ibdev_err(edev->efa_dev, "Failed to get aenq attributes: %d\n", + err); + return err; + } + + ibdev_dbg(edev->efa_dev, + "Get aenq groups: supported[%#x] enabled[%#x]\n", + get_resp.u.aenq.supported_groups, + get_resp.u.aenq.enabled_groups); + + if ((get_resp.u.aenq.supported_groups & groups) != groups) { + ibdev_err(edev->efa_dev, + "Trying to set unsupported aenq groups[%#x] supported[%#x]\n", + groups, get_resp.u.aenq.supported_groups); + return -EOPNOTSUPP; + } + + cmd.u.aenq.enabled_groups = groups; + err = efa_com_set_feature(edev, &set_resp, &cmd, + EFA_ADMIN_AENQ_CONFIG); + if (err) { + ibdev_err(edev->efa_dev, "Failed to set aenq attributes: %d\n", + err); + return err; + } + + return 0; +} + +int efa_com_alloc_pd(struct efa_com_dev *edev, + struct efa_com_alloc_pd_result *result) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_alloc_pd_cmd cmd = {}; + struct efa_admin_alloc_pd_resp resp; + int err; + + cmd.aq_common_descriptor.opcode = EFA_ADMIN_ALLOC_PD; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + if (err) { + ibdev_err(edev->efa_dev, "Failed to allocate pd[%d]\n", err); + return err; + } + + result->pdn = resp.pd; + + return 0; +} + +int efa_com_dealloc_pd(struct efa_com_dev *edev, + struct efa_com_dealloc_pd_params *params) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_dealloc_pd_cmd cmd = {}; + struct efa_admin_dealloc_pd_resp resp; + int err; + + cmd.aq_common_descriptor.opcode = EFA_ADMIN_DEALLOC_PD; + cmd.pd = params->pdn; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + if (err) { + ibdev_err(edev->efa_dev, "Failed to deallocate pd-%u [%d]\n", + cmd.pd, err); + return err; + } + + return 0; +} + +int efa_com_alloc_uar(struct efa_com_dev *edev, + struct efa_com_alloc_uar_result *result) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_alloc_uar_cmd cmd = {}; + struct efa_admin_alloc_uar_resp resp; + int err; + + cmd.aq_common_descriptor.opcode = EFA_ADMIN_ALLOC_UAR; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + if (err) { + ibdev_err(edev->efa_dev, "Failed to allocate uar[%d]\n", err); + return err; + } + + result->uarn = resp.uar; + + return 0; +} + +int efa_com_dealloc_uar(struct efa_com_dev *edev, + struct efa_com_dealloc_uar_params *params) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_dealloc_uar_cmd cmd = {}; + struct efa_admin_dealloc_uar_resp resp; + int err; + + cmd.aq_common_descriptor.opcode = EFA_ADMIN_DEALLOC_UAR; + cmd.uar = params->uarn; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + if (err) { + ibdev_err(edev->efa_dev, "Failed to deallocate uar-%u [%d]\n", + cmd.uar, err); + return err; + } + + return 0; +} -- cgit v1.2.3 From 40909f664d279765af430acc5db348a0b71c9b0a Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 5 May 2019 20:59:29 +0300 Subject: RDMA/efa: Add EFA verbs implementation Add a file that implements the EFA verbs. Signed-off-by: Gal Pressman Reviewed-by: Shiraz Saleem Reviewed-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_verbs.c | 1825 +++++++++++++++++++++++++++++++++ 1 file changed, 1825 insertions(+) create mode 100644 drivers/infiniband/hw/efa/efa_verbs.c diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c new file mode 100644 index 000000000000..6d6886c9009f --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -0,0 +1,1825 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#include + +#include +#include +#include +#include +#include + +#include "efa.h" + +#define EFA_MMAP_FLAG_SHIFT 56 +#define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0) +#define EFA_MMAP_INVALID U64_MAX + +enum { + EFA_MMAP_DMA_PAGE = 0, + EFA_MMAP_IO_WC, + EFA_MMAP_IO_NC, +}; + +#define EFA_AENQ_ENABLED_GROUPS \ + (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ + BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE)) + +struct efa_mmap_entry { + void *obj; + u64 address; + u64 length; + u32 mmap_page; + u8 mmap_flag; +}; + +static inline u64 get_mmap_key(const struct efa_mmap_entry *efa) +{ + return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) | + ((u64)efa->mmap_page << PAGE_SHIFT); +} + +#define EFA_CHUNK_PAYLOAD_SHIFT 12 +#define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT) +#define EFA_CHUNK_PAYLOAD_PTR_SIZE 8 + +#define EFA_CHUNK_SHIFT 12 +#define EFA_CHUNK_SIZE BIT(EFA_CHUNK_SHIFT) +#define EFA_CHUNK_PTR_SIZE sizeof(struct efa_com_ctrl_buff_info) + +#define EFA_PTRS_PER_CHUNK \ + ((EFA_CHUNK_SIZE - EFA_CHUNK_PTR_SIZE) / EFA_CHUNK_PAYLOAD_PTR_SIZE) + +#define EFA_CHUNK_USED_SIZE \ + ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE) + +#define EFA_SUPPORTED_ACCESS_FLAGS IB_ACCESS_LOCAL_WRITE + +struct pbl_chunk { + dma_addr_t dma_addr; + u64 *buf; + u32 length; +}; + +struct pbl_chunk_list { + struct pbl_chunk *chunks; + unsigned int size; +}; + +struct pbl_context { + union { + struct { + dma_addr_t dma_addr; + } continuous; + struct { + u32 pbl_buf_size_in_pages; + struct scatterlist *sgl; + int sg_dma_cnt; + struct pbl_chunk_list chunk_list; + } indirect; + } phys; + u64 *pbl_buf; + u32 pbl_buf_size_in_bytes; + u8 physically_continuous; +}; + +static inline struct efa_dev *to_edev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct efa_dev, ibdev); +} + +static inline struct efa_ucontext *to_eucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct efa_ucontext, ibucontext); +} + +static inline struct efa_pd *to_epd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct efa_pd, ibpd); +} + +static inline struct efa_mr *to_emr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct efa_mr, ibmr); +} + +static inline struct efa_qp *to_eqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct efa_qp, ibqp); +} + +static inline struct efa_cq *to_ecq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct efa_cq, ibcq); +} + +static inline struct efa_ah *to_eah(struct ib_ah *ibah) +{ + return container_of(ibah, struct efa_ah, ibah); +} + +#define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \ + sizeof(((typeof(x) *)0)->fld) <= (sz)) + +#define is_reserved_cleared(reserved) \ + !memchr_inv(reserved, 0, sizeof(reserved)) + +static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr, + size_t size, enum dma_data_direction dir) +{ + void *addr; + + addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); + if (!addr) + return NULL; + + *dma_addr = dma_map_single(&dev->pdev->dev, addr, size, dir); + if (dma_mapping_error(&dev->pdev->dev, *dma_addr)) { + ibdev_err(&dev->ibdev, "Failed to map DMA address\n"); + free_pages_exact(addr, size); + return NULL; + } + + return addr; +} + +/* + * This is only called when the ucontext is destroyed and there can be no + * concurrent query via mmap or allocate on the xarray, thus we can be sure no + * other thread is using the entry pointer. We also know that all the BAR + * pages have either been zap'd or munmaped at this point. Normal pages are + * refcounted and will be freed at the proper time. + */ +static void mmap_entries_remove_free(struct efa_dev *dev, + struct efa_ucontext *ucontext) +{ + struct efa_mmap_entry *entry; + unsigned long mmap_page; + + xa_for_each(&ucontext->mmap_xa, mmap_page, entry) { + xa_erase(&ucontext->mmap_xa, mmap_page); + + ibdev_dbg( + &dev->ibdev, + "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n", + entry->obj, get_mmap_key(entry), entry->address, + entry->length); + if (entry->mmap_flag == EFA_MMAP_DMA_PAGE) + /* DMA mapping is already gone, now free the pages */ + free_pages_exact(phys_to_virt(entry->address), + entry->length); + kfree(entry); + } +} + +static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev, + struct efa_ucontext *ucontext, + u64 key, u64 len) +{ + struct efa_mmap_entry *entry; + u64 mmap_page; + + mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT; + if (mmap_page > U32_MAX) + return NULL; + + entry = xa_load(&ucontext->mmap_xa, mmap_page); + if (!entry || get_mmap_key(entry) != key || entry->length != len) + return NULL; + + ibdev_dbg(&dev->ibdev, + "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n", + entry->obj, key, entry->address, entry->length); + + return entry; +} + +/* + * Note this locking scheme cannot support removal of entries, except during + * ucontext destruction when the core code guarentees no concurrency. + */ +static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext, + void *obj, u64 address, u64 length, u8 mmap_flag) +{ + struct efa_mmap_entry *entry; + int err; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return EFA_MMAP_INVALID; + + entry->obj = obj; + entry->address = address; + entry->length = length; + entry->mmap_flag = mmap_flag; + + xa_lock(&ucontext->mmap_xa); + entry->mmap_page = ucontext->mmap_xa_page; + ucontext->mmap_xa_page += DIV_ROUND_UP(length, PAGE_SIZE); + err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry, + GFP_KERNEL); + xa_unlock(&ucontext->mmap_xa); + if (err){ + kfree(entry); + return EFA_MMAP_INVALID; + } + + ibdev_dbg( + &dev->ibdev, + "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n", + entry->obj, entry->address, entry->length, get_mmap_key(entry)); + + return get_mmap_key(entry); +} + +int efa_query_device(struct ib_device *ibdev, + struct ib_device_attr *props, + struct ib_udata *udata) +{ + struct efa_com_get_device_attr_result *dev_attr; + struct efa_ibv_ex_query_device_resp resp = {}; + struct efa_dev *dev = to_edev(ibdev); + int err; + + if (udata && udata->inlen && + !ib_is_udata_cleared(udata, 0, udata->inlen)) { + ibdev_dbg(ibdev, + "Incompatible ABI params, udata not cleared\n"); + return -EINVAL; + } + + dev_attr = &dev->dev_attr; + + memset(props, 0, sizeof(*props)); + props->max_mr_size = dev_attr->max_mr_pages * PAGE_SIZE; + props->page_size_cap = dev_attr->page_size_cap; + props->vendor_id = dev->pdev->vendor; + props->vendor_part_id = dev->pdev->device; + props->hw_ver = dev->pdev->subsystem_device; + props->max_qp = dev_attr->max_qp; + props->max_cq = dev_attr->max_cq; + props->max_pd = dev_attr->max_pd; + props->max_mr = dev_attr->max_mr; + props->max_ah = dev_attr->max_ah; + props->max_cqe = dev_attr->max_cq_depth; + props->max_qp_wr = min_t(u32, dev_attr->max_sq_depth, + dev_attr->max_rq_depth); + props->max_send_sge = dev_attr->max_sq_sge; + props->max_recv_sge = dev_attr->max_rq_sge; + + if (udata && udata->outlen) { + resp.max_sq_sge = dev_attr->max_sq_sge; + resp.max_rq_sge = dev_attr->max_rq_sge; + resp.max_sq_wr = dev_attr->max_sq_depth; + resp.max_rq_wr = dev_attr->max_rq_depth; + + err = ib_copy_to_udata(udata, &resp, + min(sizeof(resp), udata->outlen)); + if (err) { + ibdev_dbg(ibdev, + "Failed to copy udata for query_device\n"); + return err; + } + } + + return 0; +} + +int efa_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + struct efa_dev *dev = to_edev(ibdev); + + props->lmc = 1; + + props->state = IB_PORT_ACTIVE; + props->phys_state = 5; + props->gid_tbl_len = 1; + props->pkey_tbl_len = 1; + props->active_speed = IB_SPEED_EDR; + props->active_width = IB_WIDTH_4X; + props->max_mtu = ib_mtu_int_to_enum(dev->mtu); + props->active_mtu = ib_mtu_int_to_enum(dev->mtu); + props->max_msg_sz = dev->mtu; + props->max_vl_num = 1; + + return 0; +} + +int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + struct efa_dev *dev = to_edev(ibqp->device); + struct efa_com_query_qp_params params = {}; + struct efa_com_query_qp_result result; + struct efa_qp *qp = to_eqp(ibqp); + int err; + +#define EFA_QUERY_QP_SUPP_MASK \ + (IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \ + IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP) + + if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) { + ibdev_dbg(&dev->ibdev, + "Unsupported qp_attr_mask[%#x] supported[%#x]\n", + qp_attr_mask, EFA_QUERY_QP_SUPP_MASK); + return -EOPNOTSUPP; + } + + memset(qp_attr, 0, sizeof(*qp_attr)); + memset(qp_init_attr, 0, sizeof(*qp_init_attr)); + + params.qp_handle = qp->qp_handle; + err = efa_com_query_qp(&dev->edev, ¶ms, &result); + if (err) + return err; + + qp_attr->qp_state = result.qp_state; + qp_attr->qkey = result.qkey; + qp_attr->sq_psn = result.sq_psn; + qp_attr->sq_draining = result.sq_draining; + qp_attr->port_num = 1; + + qp_attr->cap.max_send_wr = qp->max_send_wr; + qp_attr->cap.max_recv_wr = qp->max_recv_wr; + qp_attr->cap.max_send_sge = qp->max_send_sge; + qp_attr->cap.max_recv_sge = qp->max_recv_sge; + qp_attr->cap.max_inline_data = qp->max_inline_data; + + qp_init_attr->qp_type = ibqp->qp_type; + qp_init_attr->recv_cq = ibqp->recv_cq; + qp_init_attr->send_cq = ibqp->send_cq; + qp_init_attr->qp_context = ibqp->qp_context; + qp_init_attr->cap = qp_attr->cap; + + return 0; +} + +int efa_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + struct efa_dev *dev = to_edev(ibdev); + + memcpy(gid->raw, dev->addr, sizeof(dev->addr)); + + return 0; +} + +int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + if (index > 0) + return -EINVAL; + + *pkey = 0xffff; + return 0; +} + +static int efa_pd_dealloc(struct efa_dev *dev, u16 pdn) +{ + struct efa_com_dealloc_pd_params params = { + .pdn = pdn, + }; + + return efa_com_dealloc_pd(&dev->edev, ¶ms); +} + +int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct efa_ibv_alloc_pd_resp resp = {}; + struct efa_com_alloc_pd_result result; + struct efa_pd *pd = to_epd(ibpd); + int err; + + if (udata->inlen && + !ib_is_udata_cleared(udata, 0, udata->inlen)) { + ibdev_dbg(&dev->ibdev, + "Incompatible ABI params, udata not cleared\n"); + err = -EINVAL; + goto err_out; + } + + err = efa_com_alloc_pd(&dev->edev, &result); + if (err) + goto err_out; + + pd->pdn = result.pdn; + resp.pdn = result.pdn; + + if (udata->outlen) { + err = ib_copy_to_udata(udata, &resp, + min(sizeof(resp), udata->outlen)); + if (err) { + ibdev_dbg(&dev->ibdev, + "Failed to copy udata for alloc_pd\n"); + goto err_dealloc_pd; + } + } + + ibdev_dbg(&dev->ibdev, "Allocated pd[%d]\n", pd->pdn); + + return 0; + +err_dealloc_pd: + efa_pd_dealloc(dev, result.pdn); +err_out: + atomic64_inc(&dev->stats.sw_stats.alloc_pd_err); + return err; +} + +void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct efa_pd *pd = to_epd(ibpd); + + if (udata->inlen && + !ib_is_udata_cleared(udata, 0, udata->inlen)) { + ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); + return; + } + + ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn); + efa_pd_dealloc(dev, pd->pdn); +} + +static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle) +{ + struct efa_com_destroy_qp_params params = { .qp_handle = qp_handle }; + + return efa_com_destroy_qp(&dev->edev, ¶ms); +} + +int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibqp->pd->device); + struct efa_qp *qp = to_eqp(ibqp); + int err; + + if (udata->inlen && + !ib_is_udata_cleared(udata, 0, udata->inlen)) { + ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); + return -EINVAL; + } + + ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num); + err = efa_destroy_qp_handle(dev, qp->qp_handle); + if (err) + return err; + + if (qp->rq_cpu_addr) { + ibdev_dbg(&dev->ibdev, + "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n", + qp->rq_cpu_addr, qp->rq_size, + &qp->rq_dma_addr); + dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size, + DMA_TO_DEVICE); + } + + kfree(qp); + return 0; +} + +static int qp_mmap_entries_setup(struct efa_qp *qp, + struct efa_dev *dev, + struct efa_ucontext *ucontext, + struct efa_com_create_qp_params *params, + struct efa_ibv_create_qp_resp *resp) +{ + /* + * Once an entry is inserted it might be mmapped, hence cannot be + * cleaned up until dealloc_ucontext. + */ + resp->sq_db_mmap_key = + mmap_entry_insert(dev, ucontext, qp, + dev->db_bar_addr + resp->sq_db_offset, + PAGE_SIZE, EFA_MMAP_IO_NC); + if (resp->sq_db_mmap_key == EFA_MMAP_INVALID) + return -ENOMEM; + + resp->sq_db_offset &= ~PAGE_MASK; + + resp->llq_desc_mmap_key = + mmap_entry_insert(dev, ucontext, qp, + dev->mem_bar_addr + resp->llq_desc_offset, + PAGE_ALIGN(params->sq_ring_size_in_bytes + + (resp->llq_desc_offset & ~PAGE_MASK)), + EFA_MMAP_IO_WC); + if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID) + return -ENOMEM; + + resp->llq_desc_offset &= ~PAGE_MASK; + + if (qp->rq_size) { + resp->rq_db_mmap_key = + mmap_entry_insert(dev, ucontext, qp, + dev->db_bar_addr + resp->rq_db_offset, + PAGE_SIZE, EFA_MMAP_IO_NC); + if (resp->rq_db_mmap_key == EFA_MMAP_INVALID) + return -ENOMEM; + + resp->rq_db_offset &= ~PAGE_MASK; + + resp->rq_mmap_key = + mmap_entry_insert(dev, ucontext, qp, + virt_to_phys(qp->rq_cpu_addr), + qp->rq_size, EFA_MMAP_DMA_PAGE); + if (resp->rq_mmap_key == EFA_MMAP_INVALID) + return -ENOMEM; + + resp->rq_mmap_size = qp->rq_size; + } + + return 0; +} + +static int efa_qp_validate_cap(struct efa_dev *dev, + struct ib_qp_init_attr *init_attr) +{ + if (init_attr->cap.max_send_wr > dev->dev_attr.max_sq_depth) { + ibdev_dbg(&dev->ibdev, + "qp: requested send wr[%u] exceeds the max[%u]\n", + init_attr->cap.max_send_wr, + dev->dev_attr.max_sq_depth); + return -EINVAL; + } + if (init_attr->cap.max_recv_wr > dev->dev_attr.max_rq_depth) { + ibdev_dbg(&dev->ibdev, + "qp: requested receive wr[%u] exceeds the max[%u]\n", + init_attr->cap.max_recv_wr, + dev->dev_attr.max_rq_depth); + return -EINVAL; + } + if (init_attr->cap.max_send_sge > dev->dev_attr.max_sq_sge) { + ibdev_dbg(&dev->ibdev, + "qp: requested sge send[%u] exceeds the max[%u]\n", + init_attr->cap.max_send_sge, dev->dev_attr.max_sq_sge); + return -EINVAL; + } + if (init_attr->cap.max_recv_sge > dev->dev_attr.max_rq_sge) { + ibdev_dbg(&dev->ibdev, + "qp: requested sge recv[%u] exceeds the max[%u]\n", + init_attr->cap.max_recv_sge, dev->dev_attr.max_rq_sge); + return -EINVAL; + } + if (init_attr->cap.max_inline_data > dev->dev_attr.inline_buf_size) { + ibdev_dbg(&dev->ibdev, + "qp: requested inline data[%u] exceeds the max[%u]\n", + init_attr->cap.max_inline_data, + dev->dev_attr.inline_buf_size); + return -EINVAL; + } + + return 0; +} + +static int efa_qp_validate_attr(struct efa_dev *dev, + struct ib_qp_init_attr *init_attr) +{ + if (init_attr->qp_type != IB_QPT_DRIVER && + init_attr->qp_type != IB_QPT_UD) { + ibdev_dbg(&dev->ibdev, + "Unsupported qp type %d\n", init_attr->qp_type); + return -EOPNOTSUPP; + } + + if (init_attr->srq) { + ibdev_dbg(&dev->ibdev, "SRQ is not supported\n"); + return -EOPNOTSUPP; + } + + if (init_attr->create_flags) { + ibdev_dbg(&dev->ibdev, "Unsupported create flags\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +struct ib_qp *efa_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct efa_com_create_qp_params create_qp_params = {}; + struct efa_com_create_qp_result create_qp_resp; + struct efa_dev *dev = to_edev(ibpd->device); + struct efa_ibv_create_qp_resp resp = {}; + struct efa_ibv_create_qp cmd = {}; + bool rq_entry_inserted = false; + struct efa_ucontext *ucontext; + struct efa_qp *qp; + int err; + + ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext, + ibucontext); + + err = efa_qp_validate_cap(dev, init_attr); + if (err) + goto err_out; + + err = efa_qp_validate_attr(dev, init_attr); + if (err) + goto err_out; + + if (!field_avail(cmd, driver_qp_type, udata->inlen)) { + ibdev_dbg(&dev->ibdev, + "Incompatible ABI params, no input udata\n"); + err = -EINVAL; + goto err_out; + } + + if (udata->inlen > sizeof(cmd) && + !ib_is_udata_cleared(udata, sizeof(cmd), + udata->inlen - sizeof(cmd))) { + ibdev_dbg(&dev->ibdev, + "Incompatible ABI params, unknown fields in udata\n"); + err = -EINVAL; + goto err_out; + } + + err = ib_copy_from_udata(&cmd, udata, + min(sizeof(cmd), udata->inlen)); + if (err) { + ibdev_dbg(&dev->ibdev, + "Cannot copy udata for create_qp\n"); + goto err_out; + } + + if (cmd.comp_mask) { + ibdev_dbg(&dev->ibdev, + "Incompatible ABI params, unknown fields in udata\n"); + err = -EINVAL; + goto err_out; + } + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) { + err = -ENOMEM; + goto err_out; + } + + create_qp_params.uarn = ucontext->uarn; + create_qp_params.pd = to_epd(ibpd)->pdn; + + if (init_attr->qp_type == IB_QPT_UD) { + create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_UD; + } else if (cmd.driver_qp_type == EFA_QP_DRIVER_TYPE_SRD) { + create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_SRD; + } else { + ibdev_dbg(&dev->ibdev, + "Unsupported qp type %d driver qp type %d\n", + init_attr->qp_type, cmd.driver_qp_type); + err = -EOPNOTSUPP; + goto err_free_qp; + } + + ibdev_dbg(&dev->ibdev, "Create QP: qp type %d driver qp type %#x\n", + init_attr->qp_type, cmd.driver_qp_type); + create_qp_params.send_cq_idx = to_ecq(init_attr->send_cq)->cq_idx; + create_qp_params.recv_cq_idx = to_ecq(init_attr->recv_cq)->cq_idx; + create_qp_params.sq_depth = init_attr->cap.max_send_wr; + create_qp_params.sq_ring_size_in_bytes = cmd.sq_ring_size; + + create_qp_params.rq_depth = init_attr->cap.max_recv_wr; + create_qp_params.rq_ring_size_in_bytes = cmd.rq_ring_size; + qp->rq_size = PAGE_ALIGN(create_qp_params.rq_ring_size_in_bytes); + if (qp->rq_size) { + qp->rq_cpu_addr = efa_zalloc_mapped(dev, &qp->rq_dma_addr, + qp->rq_size, DMA_TO_DEVICE); + if (!qp->rq_cpu_addr) { + err = -ENOMEM; + goto err_free_qp; + } + + ibdev_dbg(&dev->ibdev, + "qp->cpu_addr[0x%p] allocated: size[%lu], dma[%pad]\n", + qp->rq_cpu_addr, qp->rq_size, &qp->rq_dma_addr); + create_qp_params.rq_base_addr = qp->rq_dma_addr; + } + + err = efa_com_create_qp(&dev->edev, &create_qp_params, + &create_qp_resp); + if (err) + goto err_free_mapped; + + resp.sq_db_offset = create_qp_resp.sq_db_offset; + resp.rq_db_offset = create_qp_resp.rq_db_offset; + resp.llq_desc_offset = create_qp_resp.llq_descriptors_offset; + resp.send_sub_cq_idx = create_qp_resp.send_sub_cq_idx; + resp.recv_sub_cq_idx = create_qp_resp.recv_sub_cq_idx; + + err = qp_mmap_entries_setup(qp, dev, ucontext, &create_qp_params, + &resp); + if (err) + goto err_destroy_qp; + + rq_entry_inserted = true; + qp->qp_handle = create_qp_resp.qp_handle; + qp->ibqp.qp_num = create_qp_resp.qp_num; + qp->ibqp.qp_type = init_attr->qp_type; + qp->max_send_wr = init_attr->cap.max_send_wr; + qp->max_recv_wr = init_attr->cap.max_recv_wr; + qp->max_send_sge = init_attr->cap.max_send_sge; + qp->max_recv_sge = init_attr->cap.max_recv_sge; + qp->max_inline_data = init_attr->cap.max_inline_data; + + if (udata->outlen) { + err = ib_copy_to_udata(udata, &resp, + min(sizeof(resp), udata->outlen)); + if (err) { + ibdev_dbg(&dev->ibdev, + "Failed to copy udata for qp[%u]\n", + create_qp_resp.qp_num); + goto err_destroy_qp; + } + } + + ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num); + + return &qp->ibqp; + +err_destroy_qp: + efa_destroy_qp_handle(dev, create_qp_resp.qp_handle); +err_free_mapped: + if (qp->rq_size) { + dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size, + DMA_TO_DEVICE); + if (!rq_entry_inserted) + free_pages_exact(qp->rq_cpu_addr, qp->rq_size); + } +err_free_qp: + kfree(qp); +err_out: + atomic64_inc(&dev->stats.sw_stats.create_qp_err); + return ERR_PTR(err); +} + +static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp, + struct ib_qp_attr *qp_attr, int qp_attr_mask, + enum ib_qp_state cur_state, + enum ib_qp_state new_state) +{ +#define EFA_MODIFY_QP_SUPP_MASK \ + (IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \ + IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN) + + if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) { + ibdev_dbg(&dev->ibdev, + "Unsupported qp_attr_mask[%#x] supported[%#x]\n", + qp_attr_mask, EFA_MODIFY_QP_SUPP_MASK); + return -EOPNOTSUPP; + } + + if (!ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD, + qp_attr_mask)) { + ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n"); + return -EINVAL; + } + + if ((qp_attr_mask & IB_QP_PORT) && qp_attr->port_num != 1) { + ibdev_dbg(&dev->ibdev, "Can't change port num\n"); + return -EOPNOTSUPP; + } + + if ((qp_attr_mask & IB_QP_PKEY_INDEX) && qp_attr->pkey_index) { + ibdev_dbg(&dev->ibdev, "Can't change pkey index\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibqp->device); + struct efa_com_modify_qp_params params = {}; + struct efa_qp *qp = to_eqp(ibqp); + enum ib_qp_state cur_state; + enum ib_qp_state new_state; + int err; + + if (udata->inlen && + !ib_is_udata_cleared(udata, 0, udata->inlen)) { + ibdev_dbg(&dev->ibdev, + "Incompatible ABI params, udata not cleared\n"); + return -EINVAL; + } + + cur_state = qp_attr_mask & IB_QP_CUR_STATE ? qp_attr->cur_qp_state : + qp->state; + new_state = qp_attr_mask & IB_QP_STATE ? qp_attr->qp_state : cur_state; + + err = efa_modify_qp_validate(dev, qp, qp_attr, qp_attr_mask, cur_state, + new_state); + if (err) + return err; + + params.qp_handle = qp->qp_handle; + + if (qp_attr_mask & IB_QP_STATE) { + params.modify_mask |= BIT(EFA_ADMIN_QP_STATE_BIT) | + BIT(EFA_ADMIN_CUR_QP_STATE_BIT); + params.cur_qp_state = qp_attr->cur_qp_state; + params.qp_state = qp_attr->qp_state; + } + + if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) { + params.modify_mask |= + BIT(EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT); + params.sq_drained_async_notify = qp_attr->en_sqd_async_notify; + } + + if (qp_attr_mask & IB_QP_QKEY) { + params.modify_mask |= BIT(EFA_ADMIN_QKEY_BIT); + params.qkey = qp_attr->qkey; + } + + if (qp_attr_mask & IB_QP_SQ_PSN) { + params.modify_mask |= BIT(EFA_ADMIN_SQ_PSN_BIT); + params.sq_psn = qp_attr->sq_psn; + } + + err = efa_com_modify_qp(&dev->edev, ¶ms); + if (err) + return err; + + qp->state = new_state; + + return 0; +} + +static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx) +{ + struct efa_com_destroy_cq_params params = { .cq_idx = cq_idx }; + + return efa_com_destroy_cq(&dev->edev, ¶ms); +} + +int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibcq->device); + struct efa_cq *cq = to_ecq(ibcq); + int err; + + if (udata->inlen && + !ib_is_udata_cleared(udata, 0, udata->inlen)) { + ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); + return -EINVAL; + } + + ibdev_dbg(&dev->ibdev, + "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n", + cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr); + + err = efa_destroy_cq_idx(dev, cq->cq_idx); + if (err) + return err; + + dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, + DMA_FROM_DEVICE); + + kfree(cq); + return 0; +} + +static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, + struct efa_ibv_create_cq_resp *resp) +{ + resp->q_mmap_size = cq->size; + resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq, + virt_to_phys(cq->cpu_addr), + cq->size, EFA_MMAP_DMA_PAGE); + if (resp->q_mmap_key == EFA_MMAP_INVALID) + return -ENOMEM; + + return 0; +} + +static struct ib_cq *do_create_cq(struct ib_device *ibdev, int entries, + int vector, struct ib_ucontext *ibucontext, + struct ib_udata *udata) +{ + struct efa_ibv_create_cq_resp resp = {}; + struct efa_com_create_cq_params params; + struct efa_com_create_cq_result result; + struct efa_dev *dev = to_edev(ibdev); + struct efa_ibv_create_cq cmd = {}; + bool cq_entry_inserted = false; + struct efa_cq *cq; + int err; + + ibdev_dbg(ibdev, "create_cq entries %d\n", entries); + + if (entries < 1 || entries > dev->dev_attr.max_cq_depth) { + ibdev_dbg(ibdev, + "cq: requested entries[%u] non-positive or greater than max[%u]\n", + entries, dev->dev_attr.max_cq_depth); + err = -EINVAL; + goto err_out; + } + + if (!field_avail(cmd, num_sub_cqs, udata->inlen)) { + ibdev_dbg(ibdev, + "Incompatible ABI params, no input udata\n"); + err = -EINVAL; + goto err_out; + } + + if (udata->inlen > sizeof(cmd) && + !ib_is_udata_cleared(udata, sizeof(cmd), + udata->inlen - sizeof(cmd))) { + ibdev_dbg(ibdev, + "Incompatible ABI params, unknown fields in udata\n"); + err = -EINVAL; + goto err_out; + } + + err = ib_copy_from_udata(&cmd, udata, + min(sizeof(cmd), udata->inlen)); + if (err) { + ibdev_dbg(ibdev, "Cannot copy udata for create_cq\n"); + goto err_out; + } + + if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_50)) { + ibdev_dbg(ibdev, + "Incompatible ABI params, unknown fields in udata\n"); + err = -EINVAL; + goto err_out; + } + + if (!cmd.cq_entry_size) { + ibdev_dbg(ibdev, + "Invalid entry size [%u]\n", cmd.cq_entry_size); + err = -EINVAL; + goto err_out; + } + + if (cmd.num_sub_cqs != dev->dev_attr.sub_cqs_per_cq) { + ibdev_dbg(ibdev, + "Invalid number of sub cqs[%u] expected[%u]\n", + cmd.num_sub_cqs, dev->dev_attr.sub_cqs_per_cq); + err = -EINVAL; + goto err_out; + } + + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) { + err = -ENOMEM; + goto err_out; + } + + cq->ucontext = to_eucontext(ibucontext); + cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs); + cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size, + DMA_FROM_DEVICE); + if (!cq->cpu_addr) { + err = -ENOMEM; + goto err_free_cq; + } + + params.uarn = cq->ucontext->uarn; + params.cq_depth = entries; + params.dma_addr = cq->dma_addr; + params.entry_size_in_bytes = cmd.cq_entry_size; + params.num_sub_cqs = cmd.num_sub_cqs; + err = efa_com_create_cq(&dev->edev, ¶ms, &result); + if (err) + goto err_free_mapped; + + resp.cq_idx = result.cq_idx; + cq->cq_idx = result.cq_idx; + cq->ibcq.cqe = result.actual_depth; + WARN_ON_ONCE(entries != result.actual_depth); + + err = cq_mmap_entries_setup(dev, cq, &resp); + if (err) { + ibdev_dbg(ibdev, + "Could not setup cq[%u] mmap entries\n", cq->cq_idx); + goto err_destroy_cq; + } + + cq_entry_inserted = true; + + if (udata->outlen) { + err = ib_copy_to_udata(udata, &resp, + min(sizeof(resp), udata->outlen)); + if (err) { + ibdev_dbg(ibdev, + "Failed to copy udata for create_cq\n"); + goto err_destroy_cq; + } + } + + ibdev_dbg(ibdev, + "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n", + cq->cq_idx, result.actual_depth, &cq->dma_addr, cq->cpu_addr); + + return &cq->ibcq; + +err_destroy_cq: + efa_destroy_cq_idx(dev, cq->cq_idx); +err_free_mapped: + dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, + DMA_FROM_DEVICE); + if (!cq_entry_inserted) + free_pages_exact(cq->cpu_addr, cq->size); +err_free_cq: + kfree(cq); +err_out: + atomic64_inc(&dev->stats.sw_stats.create_cq_err); + return ERR_PTR(err); +} + +struct ib_cq *efa_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_udata *udata) +{ + struct efa_ucontext *ucontext = rdma_udata_to_drv_context(udata, + struct efa_ucontext, + ibucontext); + + return do_create_cq(ibdev, attr->cqe, attr->comp_vector, + &ucontext->ibucontext, udata); +} + +static int umem_to_page_list(struct efa_dev *dev, + struct ib_umem *umem, + u64 *page_list, + u32 hp_cnt, + u8 hp_shift) +{ + u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT); + struct sg_dma_page_iter sg_iter; + unsigned int page_idx = 0; + unsigned int hp_idx = 0; + + ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n", + hp_cnt, pages_in_hp); + + for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { + if (page_idx % pages_in_hp == 0) { + page_list[hp_idx] = sg_page_iter_dma_address(&sg_iter); + hp_idx++; + } + + page_idx++; + } + + return 0; +} + +static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt) +{ + struct scatterlist *sglist; + struct page *pg; + int i; + + sglist = kcalloc(page_cnt, sizeof(*sglist), GFP_KERNEL); + if (!sglist) + return NULL; + sg_init_table(sglist, page_cnt); + for (i = 0; i < page_cnt; i++) { + pg = vmalloc_to_page(buf); + if (!pg) + goto err; + sg_set_page(&sglist[i], pg, PAGE_SIZE, 0); + buf += PAGE_SIZE / sizeof(*buf); + } + return sglist; + +err: + kfree(sglist); + return NULL; +} + +/* + * create a chunk list of physical pages dma addresses from the supplied + * scatter gather list + */ +static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl) +{ + unsigned int entry, payloads_in_sg, chunk_list_size, chunk_idx, payload_idx; + struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list; + int page_cnt = pbl->phys.indirect.pbl_buf_size_in_pages; + struct scatterlist *pages_sgl = pbl->phys.indirect.sgl; + int sg_dma_cnt = pbl->phys.indirect.sg_dma_cnt; + struct efa_com_ctrl_buff_info *ctrl_buf; + u64 *cur_chunk_buf, *prev_chunk_buf; + struct scatterlist *sg; + dma_addr_t dma_addr; + int i; + + /* allocate a chunk list that consists of 4KB chunks */ + chunk_list_size = DIV_ROUND_UP(page_cnt, EFA_PTRS_PER_CHUNK); + + chunk_list->size = chunk_list_size; + chunk_list->chunks = kcalloc(chunk_list_size, + sizeof(*chunk_list->chunks), + GFP_KERNEL); + if (!chunk_list->chunks) + return -ENOMEM; + + ibdev_dbg(&dev->ibdev, + "chunk_list_size[%u] - pages[%u]\n", chunk_list_size, + page_cnt); + + /* allocate chunk buffers: */ + for (i = 0; i < chunk_list_size; i++) { + chunk_list->chunks[i].buf = kzalloc(EFA_CHUNK_SIZE, GFP_KERNEL); + if (!chunk_list->chunks[i].buf) + goto chunk_list_dealloc; + + chunk_list->chunks[i].length = EFA_CHUNK_USED_SIZE; + } + chunk_list->chunks[chunk_list_size - 1].length = + ((page_cnt % EFA_PTRS_PER_CHUNK) * EFA_CHUNK_PAYLOAD_PTR_SIZE) + + EFA_CHUNK_PTR_SIZE; + + /* fill the dma addresses of sg list pages to chunks: */ + chunk_idx = 0; + payload_idx = 0; + cur_chunk_buf = chunk_list->chunks[0].buf; + for_each_sg(pages_sgl, sg, sg_dma_cnt, entry) { + payloads_in_sg = sg_dma_len(sg) >> EFA_CHUNK_PAYLOAD_SHIFT; + for (i = 0; i < payloads_in_sg; i++) { + cur_chunk_buf[payload_idx++] = + (sg_dma_address(sg) & ~(EFA_CHUNK_PAYLOAD_SIZE - 1)) + + (EFA_CHUNK_PAYLOAD_SIZE * i); + + if (payload_idx == EFA_PTRS_PER_CHUNK) { + chunk_idx++; + cur_chunk_buf = chunk_list->chunks[chunk_idx].buf; + payload_idx = 0; + } + } + } + + /* map chunks to dma and fill chunks next ptrs */ + for (i = chunk_list_size - 1; i >= 0; i--) { + dma_addr = dma_map_single(&dev->pdev->dev, + chunk_list->chunks[i].buf, + chunk_list->chunks[i].length, + DMA_TO_DEVICE); + if (dma_mapping_error(&dev->pdev->dev, dma_addr)) { + ibdev_err(&dev->ibdev, + "chunk[%u] dma_map_failed\n", i); + goto chunk_list_unmap; + } + + chunk_list->chunks[i].dma_addr = dma_addr; + ibdev_dbg(&dev->ibdev, + "chunk[%u] mapped at [%pad]\n", i, &dma_addr); + + if (!i) + break; + + prev_chunk_buf = chunk_list->chunks[i - 1].buf; + + ctrl_buf = (struct efa_com_ctrl_buff_info *) + &prev_chunk_buf[EFA_PTRS_PER_CHUNK]; + ctrl_buf->length = chunk_list->chunks[i].length; + + efa_com_set_dma_addr(dma_addr, + &ctrl_buf->address.mem_addr_high, + &ctrl_buf->address.mem_addr_low); + } + + return 0; + +chunk_list_unmap: + for (; i < chunk_list_size; i++) { + dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr, + chunk_list->chunks[i].length, DMA_TO_DEVICE); + } +chunk_list_dealloc: + for (i = 0; i < chunk_list_size; i++) + kfree(chunk_list->chunks[i].buf); + + kfree(chunk_list->chunks); + return -ENOMEM; +} + +static void pbl_chunk_list_destroy(struct efa_dev *dev, struct pbl_context *pbl) +{ + struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list; + int i; + + for (i = 0; i < chunk_list->size; i++) { + dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr, + chunk_list->chunks[i].length, DMA_TO_DEVICE); + kfree(chunk_list->chunks[i].buf); + } + + kfree(chunk_list->chunks); +} + +/* initialize pbl continuous mode: map pbl buffer to a dma address. */ +static int pbl_continuous_initialize(struct efa_dev *dev, + struct pbl_context *pbl) +{ + dma_addr_t dma_addr; + + dma_addr = dma_map_single(&dev->pdev->dev, pbl->pbl_buf, + pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE); + if (dma_mapping_error(&dev->pdev->dev, dma_addr)) { + ibdev_err(&dev->ibdev, "Unable to map pbl to DMA address\n"); + return -ENOMEM; + } + + pbl->phys.continuous.dma_addr = dma_addr; + ibdev_dbg(&dev->ibdev, + "pbl continuous - dma_addr = %pad, size[%u]\n", + &dma_addr, pbl->pbl_buf_size_in_bytes); + + return 0; +} + +/* + * initialize pbl indirect mode: + * create a chunk list out of the dma addresses of the physical pages of + * pbl buffer. + */ +static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl) +{ + u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, PAGE_SIZE); + struct scatterlist *sgl; + int sg_dma_cnt, err; + + BUILD_BUG_ON(EFA_CHUNK_PAYLOAD_SIZE > PAGE_SIZE); + sgl = efa_vmalloc_buf_to_sg(pbl->pbl_buf, size_in_pages); + if (!sgl) + return -ENOMEM; + + sg_dma_cnt = dma_map_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE); + if (!sg_dma_cnt) { + err = -EINVAL; + goto err_map; + } + + pbl->phys.indirect.pbl_buf_size_in_pages = size_in_pages; + pbl->phys.indirect.sgl = sgl; + pbl->phys.indirect.sg_dma_cnt = sg_dma_cnt; + err = pbl_chunk_list_create(dev, pbl); + if (err) { + ibdev_dbg(&dev->ibdev, + "chunk_list creation failed[%d]\n", err); + goto err_chunk; + } + + ibdev_dbg(&dev->ibdev, + "pbl indirect - size[%u], chunks[%u]\n", + pbl->pbl_buf_size_in_bytes, + pbl->phys.indirect.chunk_list.size); + + return 0; + +err_chunk: + dma_unmap_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE); +err_map: + kfree(sgl); + return err; +} + +static void pbl_indirect_terminate(struct efa_dev *dev, struct pbl_context *pbl) +{ + pbl_chunk_list_destroy(dev, pbl); + dma_unmap_sg(&dev->pdev->dev, pbl->phys.indirect.sgl, + pbl->phys.indirect.pbl_buf_size_in_pages, DMA_TO_DEVICE); + kfree(pbl->phys.indirect.sgl); +} + +/* create a page buffer list from a mapped user memory region */ +static int pbl_create(struct efa_dev *dev, + struct pbl_context *pbl, + struct ib_umem *umem, + int hp_cnt, + u8 hp_shift) +{ + int err; + + pbl->pbl_buf_size_in_bytes = hp_cnt * EFA_CHUNK_PAYLOAD_PTR_SIZE; + pbl->pbl_buf = kzalloc(pbl->pbl_buf_size_in_bytes, + GFP_KERNEL | __GFP_NOWARN); + if (pbl->pbl_buf) { + pbl->physically_continuous = 1; + err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt, + hp_shift); + if (err) + goto err_continuous; + err = pbl_continuous_initialize(dev, pbl); + if (err) + goto err_continuous; + } else { + pbl->physically_continuous = 0; + pbl->pbl_buf = vzalloc(pbl->pbl_buf_size_in_bytes); + if (!pbl->pbl_buf) + return -ENOMEM; + + err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt, + hp_shift); + if (err) + goto err_indirect; + err = pbl_indirect_initialize(dev, pbl); + if (err) + goto err_indirect; + } + + ibdev_dbg(&dev->ibdev, + "user_pbl_created: user_pages[%u], continuous[%u]\n", + hp_cnt, pbl->physically_continuous); + + return 0; + +err_continuous: + kfree(pbl->pbl_buf); + return err; +err_indirect: + vfree(pbl->pbl_buf); + return err; +} + +static void pbl_destroy(struct efa_dev *dev, struct pbl_context *pbl) +{ + if (pbl->physically_continuous) { + dma_unmap_single(&dev->pdev->dev, pbl->phys.continuous.dma_addr, + pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE); + kfree(pbl->pbl_buf); + } else { + pbl_indirect_terminate(dev, pbl); + vfree(pbl->pbl_buf); + } +} + +static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr, + struct efa_com_reg_mr_params *params) +{ + int err; + + params->inline_pbl = 1; + err = umem_to_page_list(dev, mr->umem, params->pbl.inline_pbl_array, + params->page_num, params->page_shift); + if (err) + return err; + + ibdev_dbg(&dev->ibdev, + "inline_pbl_array - pages[%u]\n", params->page_num); + + return 0; +} + +static int efa_create_pbl(struct efa_dev *dev, + struct pbl_context *pbl, + struct efa_mr *mr, + struct efa_com_reg_mr_params *params) +{ + int err; + + err = pbl_create(dev, pbl, mr->umem, params->page_num, + params->page_shift); + if (err) { + ibdev_dbg(&dev->ibdev, "Failed to create pbl[%d]\n", err); + return err; + } + + params->inline_pbl = 0; + params->indirect = !pbl->physically_continuous; + if (pbl->physically_continuous) { + params->pbl.pbl.length = pbl->pbl_buf_size_in_bytes; + + efa_com_set_dma_addr(pbl->phys.continuous.dma_addr, + ¶ms->pbl.pbl.address.mem_addr_high, + ¶ms->pbl.pbl.address.mem_addr_low); + } else { + params->pbl.pbl.length = + pbl->phys.indirect.chunk_list.chunks[0].length; + + efa_com_set_dma_addr(pbl->phys.indirect.chunk_list.chunks[0].dma_addr, + ¶ms->pbl.pbl.address.mem_addr_high, + ¶ms->pbl.pbl.address.mem_addr_low); + } + + return 0; +} + +static void efa_cont_pages(struct ib_umem *umem, u64 addr, + unsigned long max_page_shift, + int *count, u8 *shift, u32 *ncont) +{ + struct scatterlist *sg; + u64 base = ~0, p = 0; + unsigned long tmp; + unsigned long m; + u64 len, pfn; + int i = 0; + int entry; + + addr = addr >> PAGE_SHIFT; + tmp = (unsigned long)addr; + m = find_first_bit(&tmp, BITS_PER_LONG); + if (max_page_shift) + m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m); + + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = DIV_ROUND_UP(sg_dma_len(sg), PAGE_SIZE); + pfn = sg_dma_address(sg) >> PAGE_SHIFT; + if (base + p != pfn) { + /* + * If either the offset or the new + * base are unaligned update m + */ + tmp = (unsigned long)(pfn | p); + if (!IS_ALIGNED(tmp, 1 << m)) + m = find_first_bit(&tmp, BITS_PER_LONG); + + base = pfn; + p = 0; + } + + p += len; + i += len; + } + + if (i) { + m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); + *ncont = DIV_ROUND_UP(i, (1 << m)); + } else { + m = 0; + *ncont = 0; + } + + *shift = PAGE_SHIFT + m; + *count = i; +} + +struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct efa_com_reg_mr_params params = {}; + struct efa_com_reg_mr_result result = {}; + unsigned long max_page_shift; + struct pbl_context pbl; + struct efa_mr *mr; + int inline_size; + int npages; + int err; + + if (udata->inlen && + !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) { + ibdev_dbg(&dev->ibdev, + "Incompatible ABI params, udata not cleared\n"); + err = -EINVAL; + goto err_out; + } + + if (access_flags & ~EFA_SUPPORTED_ACCESS_FLAGS) { + ibdev_dbg(&dev->ibdev, + "Unsupported access flags[%#x], supported[%#x]\n", + access_flags, EFA_SUPPORTED_ACCESS_FLAGS); + err = -EOPNOTSUPP; + goto err_out; + } + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + err = -ENOMEM; + goto err_out; + } + + mr->umem = ib_umem_get(udata, start, length, access_flags, 0); + if (IS_ERR(mr->umem)) { + err = PTR_ERR(mr->umem); + ibdev_dbg(&dev->ibdev, + "Failed to pin and map user space memory[%d]\n", err); + goto err_free; + } + + params.pd = to_epd(ibpd)->pdn; + params.iova = virt_addr; + params.mr_length_in_bytes = length; + params.permissions = access_flags & 0x1; + max_page_shift = fls64(dev->dev_attr.page_size_cap); + + efa_cont_pages(mr->umem, start, max_page_shift, &npages, + ¶ms.page_shift, ¶ms.page_num); + ibdev_dbg(&dev->ibdev, + "start %#llx length %#llx npages %d params.page_shift %u params.page_num %u\n", + start, length, npages, params.page_shift, params.page_num); + + inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array); + if (params.page_num <= inline_size) { + err = efa_create_inline_pbl(dev, mr, ¶ms); + if (err) + goto err_unmap; + + err = efa_com_register_mr(&dev->edev, ¶ms, &result); + if (err) + goto err_unmap; + } else { + err = efa_create_pbl(dev, &pbl, mr, ¶ms); + if (err) + goto err_unmap; + + err = efa_com_register_mr(&dev->edev, ¶ms, &result); + pbl_destroy(dev, &pbl); + + if (err) + goto err_unmap; + } + + mr->ibmr.lkey = result.l_key; + mr->ibmr.rkey = result.r_key; + mr->ibmr.length = length; + ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey); + + return &mr->ibmr; + +err_unmap: + ib_umem_release(mr->umem); +err_free: + kfree(mr); +err_out: + atomic64_inc(&dev->stats.sw_stats.reg_mr_err); + return ERR_PTR(err); +} + +int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibmr->device); + struct efa_com_dereg_mr_params params; + struct efa_mr *mr = to_emr(ibmr); + int err; + + if (udata->inlen && + !ib_is_udata_cleared(udata, 0, udata->inlen)) { + ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); + return -EINVAL; + } + + ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey); + + if (mr->umem) { + params.l_key = mr->ibmr.lkey; + err = efa_com_dereg_mr(&dev->edev, ¶ms); + if (err) + return err; + ib_umem_release(mr->umem); + } + + kfree(mr); + + return 0; +} + +int efa_get_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = ib_query_port(ibdev, port_num, &attr); + if (err) { + ibdev_dbg(ibdev, "Couldn't query port err[%d]\n", err); + return err; + } + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + + return 0; +} + +static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn) +{ + struct efa_com_dealloc_uar_params params = { + .uarn = uarn, + }; + + return efa_com_dealloc_uar(&dev->edev, ¶ms); +} + +int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) +{ + struct efa_ucontext *ucontext = to_eucontext(ibucontext); + struct efa_dev *dev = to_edev(ibucontext->device); + struct efa_ibv_alloc_ucontext_resp resp = {}; + struct efa_com_alloc_uar_result result; + int err; + + /* + * it's fine if the driver does not know all request fields, + * we will ack input fields in our response. + */ + + err = efa_com_alloc_uar(&dev->edev, &result); + if (err) + goto err_out; + + ucontext->uarn = result.uarn; + xa_init(&ucontext->mmap_xa); + + resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE; + resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH; + resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq; + resp.inline_buf_size = dev->dev_attr.inline_buf_size; + resp.max_llq_size = dev->dev_attr.max_llq_size; + + if (udata && udata->outlen) { + err = ib_copy_to_udata(udata, &resp, + min(sizeof(resp), udata->outlen)); + if (err) + goto err_dealloc_uar; + } + + return 0; + +err_dealloc_uar: + efa_dealloc_uar(dev, result.uarn); +err_out: + atomic64_inc(&dev->stats.sw_stats.alloc_ucontext_err); + return err; +} + +void efa_dealloc_ucontext(struct ib_ucontext *ibucontext) +{ + struct efa_ucontext *ucontext = to_eucontext(ibucontext); + struct efa_dev *dev = to_edev(ibucontext->device); + + mmap_entries_remove_free(dev, ucontext); + efa_dealloc_uar(dev, ucontext->uarn); +} + +static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, + struct vm_area_struct *vma, u64 key, u64 length) +{ + struct efa_mmap_entry *entry; + unsigned long va; + u64 pfn; + int err; + + entry = mmap_entry_get(dev, ucontext, key, length); + if (!entry) { + ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid entry\n", + key); + return -EINVAL; + } + + ibdev_dbg(&dev->ibdev, + "Mapping address[%#llx], length[%#llx], mmap_flag[%d]\n", + entry->address, length, entry->mmap_flag); + + pfn = entry->address >> PAGE_SHIFT; + switch (entry->mmap_flag) { + case EFA_MMAP_IO_NC: + err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, + pgprot_noncached(vma->vm_page_prot)); + break; + case EFA_MMAP_IO_WC: + err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, + pgprot_writecombine(vma->vm_page_prot)); + break; + case EFA_MMAP_DMA_PAGE: + for (va = vma->vm_start; va < vma->vm_end; + va += PAGE_SIZE, pfn++) { + err = vm_insert_page(vma, va, pfn_to_page(pfn)); + if (err) + break; + } + break; + default: + err = -EINVAL; + } + + if (err) + ibdev_dbg( + &dev->ibdev, + "Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n", + entry->address, length, entry->mmap_flag, err); + + return err; +} + +int efa_mmap(struct ib_ucontext *ibucontext, + struct vm_area_struct *vma) +{ + struct efa_ucontext *ucontext = to_eucontext(ibucontext); + struct efa_dev *dev = to_edev(ibucontext->device); + u64 length = vma->vm_end - vma->vm_start; + u64 key = vma->vm_pgoff << PAGE_SHIFT; + + ibdev_dbg(&dev->ibdev, + "start %#lx, end %#lx, length = %#llx, key = %#llx\n", + vma->vm_start, vma->vm_end, length, key); + + if (length % PAGE_SIZE != 0 || !(vma->vm_flags & VM_SHARED)) { + ibdev_dbg(&dev->ibdev, + "length[%#llx] is not page size aligned[%#lx] or VM_SHARED is not set [%#lx]\n", + length, PAGE_SIZE, vma->vm_flags); + return -EINVAL; + } + + if (vma->vm_flags & VM_EXEC) { + ibdev_dbg(&dev->ibdev, "Mapping executable pages is not permitted\n"); + return -EPERM; + } + vma->vm_flags &= ~VM_MAYEXEC; + + return __efa_mmap(dev, ucontext, vma, key, length); +} + +static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah) +{ + struct efa_com_destroy_ah_params params = { + .ah = ah->ah, + .pdn = to_epd(ah->ibah.pd)->pdn, + }; + + return efa_com_destroy_ah(&dev->edev, ¶ms); +} + +int efa_create_ah(struct ib_ah *ibah, + struct rdma_ah_attr *ah_attr, + u32 flags, + struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibah->device); + struct efa_com_create_ah_params params = {}; + struct efa_ibv_create_ah_resp resp = {}; + struct efa_com_create_ah_result result; + struct efa_ah *ah = to_eah(ibah); + int err; + + if (!(flags & RDMA_CREATE_AH_SLEEPABLE)) { + ibdev_dbg(&dev->ibdev, + "Create address handle is not supported in atomic context\n"); + err = -EOPNOTSUPP; + goto err_out; + } + + if (udata->inlen && + !ib_is_udata_cleared(udata, 0, udata->inlen)) { + ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n"); + err = -EINVAL; + goto err_out; + } + + memcpy(params.dest_addr, ah_attr->grh.dgid.raw, + sizeof(params.dest_addr)); + params.pdn = to_epd(ibah->pd)->pdn; + err = efa_com_create_ah(&dev->edev, ¶ms, &result); + if (err) + goto err_out; + + memcpy(ah->id, ah_attr->grh.dgid.raw, sizeof(ah->id)); + ah->ah = result.ah; + + resp.efa_address_handle = result.ah; + + if (udata->outlen) { + err = ib_copy_to_udata(udata, &resp, + min(sizeof(resp), udata->outlen)); + if (err) { + ibdev_dbg(&dev->ibdev, + "Failed to copy udata for create_ah response\n"); + goto err_destroy_ah; + } + } + ibdev_dbg(&dev->ibdev, "Created ah[%d]\n", ah->ah); + + return 0; + +err_destroy_ah: + efa_ah_destroy(dev, ah); +err_out: + atomic64_inc(&dev->stats.sw_stats.create_ah_err); + return err; +} + +void efa_destroy_ah(struct ib_ah *ibah, u32 flags) +{ + struct efa_dev *dev = to_edev(ibah->pd->device); + struct efa_ah *ah = to_eah(ibah); + + ibdev_dbg(&dev->ibdev, "Destroy ah[%d]\n", ah->ah); + + if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) { + ibdev_dbg(&dev->ibdev, + "Destroy address handle is not supported in atomic context\n"); + return; + } + + efa_ah_destroy(dev, ah); +} + +enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev, + u8 port_num) +{ + return IB_LINK_LAYER_UNSPECIFIED; +} + -- cgit v1.2.3 From b7f5e880f3774ff1934ce272e1c3b673a3a9c414 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 5 May 2019 20:59:30 +0300 Subject: RDMA/efa: Add the efa module Add the main EFA module file which takes care of device probe/initialization/registration/etc. Signed-off-by: Gal Pressman Reviewed-by: Shiraz Saleem Reviewed-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_main.c | 533 +++++++++++++++++++++++++++++++++++ 1 file changed, 533 insertions(+) create mode 100644 drivers/infiniband/hw/efa/efa_main.c diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c new file mode 100644 index 000000000000..db974caf1eb1 --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -0,0 +1,533 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#include +#include + +#include + +#include "efa.h" + +#define PCI_DEV_ID_EFA_VF 0xefa0 + +static const struct pci_device_id efa_pci_tbl[] = { + { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA_VF) }, + { } +}; + +MODULE_AUTHOR("Amazon.com, Inc. or its affiliates"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION(DEVICE_NAME); +MODULE_DEVICE_TABLE(pci, efa_pci_tbl); + +#define EFA_REG_BAR 0 +#define EFA_MEM_BAR 2 +#define EFA_BASE_BAR_MASK (BIT(EFA_REG_BAR) | BIT(EFA_MEM_BAR)) + +#define EFA_AENQ_ENABLED_GROUPS \ + (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ + BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE)) + +static void efa_update_network_attr(struct efa_dev *dev, + struct efa_com_get_network_attr_result *network_attr) +{ + memcpy(dev->addr, network_attr->addr, sizeof(network_attr->addr)); + dev->mtu = network_attr->mtu; + + dev_dbg(&dev->pdev->dev, "Full address %pI6\n", dev->addr); +} + +/* This handler will called for unknown event group or unimplemented handlers */ +static void unimplemented_aenq_handler(void *data, + struct efa_admin_aenq_entry *aenq_e) +{ + struct efa_dev *dev = (struct efa_dev *)data; + + ibdev_err(&dev->ibdev, + "Unknown event was received or event with unimplemented handler\n"); +} + +static void efa_keep_alive(void *data, struct efa_admin_aenq_entry *aenq_e) +{ + struct efa_dev *dev = (struct efa_dev *)data; + + atomic64_inc(&dev->stats.keep_alive_rcvd); +} + +static struct efa_aenq_handlers aenq_handlers = { + .handlers = { + [EFA_ADMIN_KEEP_ALIVE] = efa_keep_alive, + }, + .unimplemented_handler = unimplemented_aenq_handler +}; + +static void efa_release_bars(struct efa_dev *dev, int bars_mask) +{ + struct pci_dev *pdev = dev->pdev; + int release_bars; + + release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & bars_mask; + pci_release_selected_regions(pdev, release_bars); +} + +static irqreturn_t efa_intr_msix_mgmnt(int irq, void *data) +{ + struct efa_dev *dev = data; + + efa_com_admin_q_comp_intr_handler(&dev->edev); + efa_com_aenq_intr_handler(&dev->edev, data); + + return IRQ_HANDLED; +} + +static int efa_request_mgmnt_irq(struct efa_dev *dev) +{ + struct efa_irq *irq; + int err; + + irq = &dev->admin_irq; + err = request_irq(irq->vector, irq->handler, 0, irq->name, + irq->data); + if (err) { + dev_err(&dev->pdev->dev, "Failed to request admin irq (%d)\n", + err); + return err; + } + + dev_dbg(&dev->pdev->dev, "Set affinity hint of mgmnt irq to %*pbl (irq vector: %d)\n", + nr_cpumask_bits, &irq->affinity_hint_mask, irq->vector); + irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask); + + return err; +} + +static void efa_setup_mgmnt_irq(struct efa_dev *dev) +{ + u32 cpu; + + snprintf(dev->admin_irq.name, EFA_IRQNAME_SIZE, + "efa-mgmnt@pci:%s", pci_name(dev->pdev)); + dev->admin_irq.handler = efa_intr_msix_mgmnt; + dev->admin_irq.data = dev; + dev->admin_irq.vector = + pci_irq_vector(dev->pdev, dev->admin_msix_vector_idx); + cpu = cpumask_first(cpu_online_mask); + dev->admin_irq.cpu = cpu; + cpumask_set_cpu(cpu, + &dev->admin_irq.affinity_hint_mask); + dev_info(&dev->pdev->dev, "Setup irq:0x%p vector:%d name:%s\n", + &dev->admin_irq, + dev->admin_irq.vector, + dev->admin_irq.name); +} + +static void efa_free_mgmnt_irq(struct efa_dev *dev) +{ + struct efa_irq *irq; + + irq = &dev->admin_irq; + irq_set_affinity_hint(irq->vector, NULL); + free_irq(irq->vector, irq->data); +} + +static int efa_set_mgmnt_irq(struct efa_dev *dev) +{ + efa_setup_mgmnt_irq(dev); + + return efa_request_mgmnt_irq(dev); +} + +static int efa_request_doorbell_bar(struct efa_dev *dev) +{ + u8 db_bar_idx = dev->dev_attr.db_bar; + struct pci_dev *pdev = dev->pdev; + int bars; + int err; + + if (!(BIT(db_bar_idx) & EFA_BASE_BAR_MASK)) { + bars = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(db_bar_idx); + + err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); + if (err) { + dev_err(&dev->pdev->dev, + "pci_request_selected_regions for bar %d failed %d\n", + db_bar_idx, err); + return err; + } + } + + dev->db_bar_addr = pci_resource_start(dev->pdev, db_bar_idx); + dev->db_bar_len = pci_resource_len(dev->pdev, db_bar_idx); + + return 0; +} + +static void efa_release_doorbell_bar(struct efa_dev *dev) +{ + if (!(BIT(dev->dev_attr.db_bar) & EFA_BASE_BAR_MASK)) + efa_release_bars(dev, BIT(dev->dev_attr.db_bar)); +} + +static void efa_update_hw_hints(struct efa_dev *dev, + struct efa_com_get_hw_hints_result *hw_hints) +{ + struct efa_com_dev *edev = &dev->edev; + + if (hw_hints->mmio_read_timeout) + edev->mmio_read.mmio_read_timeout = + hw_hints->mmio_read_timeout * 1000; + + if (hw_hints->poll_interval) + edev->aq.poll_interval = hw_hints->poll_interval; + + if (hw_hints->admin_completion_timeout) + edev->aq.completion_timeout = + hw_hints->admin_completion_timeout; +} + +static void efa_stats_init(struct efa_dev *dev) +{ + atomic64_t *s = (atomic64_t *)&dev->stats; + int i; + + for (i = 0; i < sizeof(dev->stats) / sizeof(*s); i++, s++) + atomic64_set(s, 0); +} + +static const struct ib_device_ops efa_dev_ops = { + .alloc_pd = efa_alloc_pd, + .alloc_ucontext = efa_alloc_ucontext, + .create_ah = efa_create_ah, + .create_cq = efa_create_cq, + .create_qp = efa_create_qp, + .dealloc_pd = efa_dealloc_pd, + .dealloc_ucontext = efa_dealloc_ucontext, + .dereg_mr = efa_dereg_mr, + .destroy_ah = efa_destroy_ah, + .destroy_cq = efa_destroy_cq, + .destroy_qp = efa_destroy_qp, + .get_link_layer = efa_port_link_layer, + .get_port_immutable = efa_get_port_immutable, + .mmap = efa_mmap, + .modify_qp = efa_modify_qp, + .query_device = efa_query_device, + .query_gid = efa_query_gid, + .query_pkey = efa_query_pkey, + .query_port = efa_query_port, + .query_qp = efa_query_qp, + .reg_user_mr = efa_reg_mr, + + INIT_RDMA_OBJ_SIZE(ib_ah, efa_ah, ibah), + INIT_RDMA_OBJ_SIZE(ib_pd, efa_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, efa_ucontext, ibucontext), +}; + +static int efa_ib_device_add(struct efa_dev *dev) +{ + struct efa_com_get_network_attr_result network_attr; + struct efa_com_get_hw_hints_result hw_hints; + struct pci_dev *pdev = dev->pdev; + int err; + + efa_stats_init(dev); + + err = efa_com_get_device_attr(&dev->edev, &dev->dev_attr); + if (err) + return err; + + dev_dbg(&dev->pdev->dev, "Doorbells bar (%d)\n", dev->dev_attr.db_bar); + err = efa_request_doorbell_bar(dev); + if (err) + return err; + + err = efa_com_get_network_attr(&dev->edev, &network_attr); + if (err) + goto err_release_doorbell_bar; + + efa_update_network_attr(dev, &network_attr); + + err = efa_com_get_hw_hints(&dev->edev, &hw_hints); + if (err) + goto err_release_doorbell_bar; + + efa_update_hw_hints(dev, &hw_hints); + + /* Try to enable all the available aenq groups */ + err = efa_com_set_aenq_config(&dev->edev, EFA_AENQ_ENABLED_GROUPS); + if (err) + goto err_release_doorbell_bar; + + dev->ibdev.owner = THIS_MODULE; + dev->ibdev.node_type = RDMA_NODE_UNSPECIFIED; + dev->ibdev.phys_port_cnt = 1; + dev->ibdev.num_comp_vectors = 1; + dev->ibdev.dev.parent = &pdev->dev; + dev->ibdev.uverbs_abi_ver = EFA_UVERBS_ABI_VERSION; + + dev->ibdev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_CREATE_AH) | + (1ull << IB_USER_VERBS_CMD_DESTROY_AH); + + dev->ibdev.uverbs_ex_cmd_mask = + (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE); + + dev->ibdev.driver_id = RDMA_DRIVER_EFA; + ib_set_device_ops(&dev->ibdev, &efa_dev_ops); + + err = ib_register_device(&dev->ibdev, "efa_%d"); + if (err) + goto err_release_doorbell_bar; + + ibdev_info(&dev->ibdev, "IB device registered\n"); + + return 0; + +err_release_doorbell_bar: + efa_release_doorbell_bar(dev); + return err; +} + +static void efa_ib_device_remove(struct efa_dev *dev) +{ + efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL); + ibdev_info(&dev->ibdev, "Unregister ib device\n"); + ib_unregister_device(&dev->ibdev); + efa_release_doorbell_bar(dev); +} + +static void efa_disable_msix(struct efa_dev *dev) +{ + pci_free_irq_vectors(dev->pdev); +} + +static int efa_enable_msix(struct efa_dev *dev) +{ + int msix_vecs, irq_num; + + /* Reserve the max msix vectors we might need */ + msix_vecs = EFA_NUM_MSIX_VEC; + dev_dbg(&dev->pdev->dev, "Trying to enable MSI-X, vectors %d\n", + msix_vecs); + + dev->admin_msix_vector_idx = EFA_MGMNT_MSIX_VEC_IDX; + irq_num = pci_alloc_irq_vectors(dev->pdev, msix_vecs, + msix_vecs, PCI_IRQ_MSIX); + + if (irq_num < 0) { + dev_err(&dev->pdev->dev, "Failed to enable MSI-X. irq_num %d\n", + irq_num); + return -ENOSPC; + } + + if (irq_num != msix_vecs) { + dev_err(&dev->pdev->dev, + "Allocated %d MSI-X (out of %d requested)\n", + irq_num, msix_vecs); + return -ENOSPC; + } + + return 0; +} + +static int efa_device_init(struct efa_com_dev *edev, struct pci_dev *pdev) +{ + int dma_width; + int err; + + err = efa_com_dev_reset(edev, EFA_REGS_RESET_NORMAL); + if (err) + return err; + + err = efa_com_validate_version(edev); + if (err) + return err; + + dma_width = efa_com_get_dma_width(edev); + if (dma_width < 0) { + err = dma_width; + return err; + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width)); + if (err) { + dev_err(&pdev->dev, "pci_set_dma_mask failed %d\n", err); + return err; + } + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width)); + if (err) { + dev_err(&pdev->dev, + "err_pci_set_consistent_dma_mask failed %d\n", + err); + return err; + } + + return 0; +} + +static struct efa_dev *efa_probe_device(struct pci_dev *pdev) +{ + struct efa_com_dev *edev; + struct efa_dev *dev; + int bars; + int err; + + err = pci_enable_device_mem(pdev); + if (err) { + dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n"); + return ERR_PTR(err); + } + + pci_set_master(pdev); + + dev = ib_alloc_device(efa_dev, ibdev); + if (!dev) { + dev_err(&pdev->dev, "Device alloc failed\n"); + err = -ENOMEM; + goto err_disable_device; + } + + pci_set_drvdata(pdev, dev); + edev = &dev->edev; + edev->efa_dev = dev; + edev->dmadev = &pdev->dev; + dev->pdev = pdev; + + bars = pci_select_bars(pdev, IORESOURCE_MEM) & EFA_BASE_BAR_MASK; + err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); + if (err) { + dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n", + err); + goto err_ibdev_destroy; + } + + dev->reg_bar_addr = pci_resource_start(pdev, EFA_REG_BAR); + dev->reg_bar_len = pci_resource_len(pdev, EFA_REG_BAR); + dev->mem_bar_addr = pci_resource_start(pdev, EFA_MEM_BAR); + dev->mem_bar_len = pci_resource_len(pdev, EFA_MEM_BAR); + + edev->reg_bar = devm_ioremap(&pdev->dev, + dev->reg_bar_addr, + dev->reg_bar_len); + if (!edev->reg_bar) { + dev_err(&pdev->dev, "Failed to remap register bar\n"); + err = -EFAULT; + goto err_release_bars; + } + + err = efa_com_mmio_reg_read_init(edev); + if (err) { + dev_err(&pdev->dev, "Failed to init readless MMIO\n"); + goto err_iounmap; + } + + err = efa_device_init(edev, pdev); + if (err) { + dev_err(&pdev->dev, "EFA device init failed\n"); + if (err == -ETIME) + err = -EPROBE_DEFER; + goto err_reg_read_destroy; + } + + err = efa_enable_msix(dev); + if (err) + goto err_reg_read_destroy; + + edev->aq.msix_vector_idx = dev->admin_msix_vector_idx; + edev->aenq.msix_vector_idx = dev->admin_msix_vector_idx; + + err = efa_set_mgmnt_irq(dev); + if (err) + goto err_disable_msix; + + err = efa_com_admin_init(edev, &aenq_handlers); + if (err) + goto err_free_mgmnt_irq; + + return dev; + +err_free_mgmnt_irq: + efa_free_mgmnt_irq(dev); +err_disable_msix: + efa_disable_msix(dev); +err_reg_read_destroy: + efa_com_mmio_reg_read_destroy(edev); +err_iounmap: + devm_iounmap(&pdev->dev, edev->reg_bar); +err_release_bars: + efa_release_bars(dev, EFA_BASE_BAR_MASK); +err_ibdev_destroy: + ib_dealloc_device(&dev->ibdev); +err_disable_device: + pci_disable_device(pdev); + return ERR_PTR(err); +} + +static void efa_remove_device(struct pci_dev *pdev) +{ + struct efa_dev *dev = pci_get_drvdata(pdev); + struct efa_com_dev *edev; + + edev = &dev->edev; + efa_com_admin_destroy(edev); + efa_free_mgmnt_irq(dev); + efa_disable_msix(dev); + efa_com_mmio_reg_read_destroy(edev); + devm_iounmap(&pdev->dev, edev->reg_bar); + efa_release_bars(dev, EFA_BASE_BAR_MASK); + ib_dealloc_device(&dev->ibdev); + pci_disable_device(pdev); +} + +static int efa_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct efa_dev *dev; + int err; + + dev = efa_probe_device(pdev); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + err = efa_ib_device_add(dev); + if (err) + goto err_remove_device; + + return 0; + +err_remove_device: + efa_remove_device(pdev); + return err; +} + +static void efa_remove(struct pci_dev *pdev) +{ + struct efa_dev *dev = pci_get_drvdata(pdev); + + efa_ib_device_remove(dev); + efa_remove_device(pdev); +} + +static struct pci_driver efa_pci_driver = { + .name = DRV_MODULE_NAME, + .id_table = efa_pci_tbl, + .probe = efa_probe, + .remove = efa_remove, +}; + +module_pci_driver(efa_pci_driver); -- cgit v1.2.3 From f23afd75fc998ce002400b7687f942ce5207909a Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 5 May 2019 20:59:31 +0300 Subject: RDMA/efa: Add driver to Kconfig/Makefile Add EFA Makefile and Kconfig. Signed-off-by: Gal Pressman Reviewed-by: Steve Wise Signed-off-by: Jason Gunthorpe --- MAINTAINERS | 9 +++++++++ drivers/infiniband/Kconfig | 1 + drivers/infiniband/hw/Makefile | 1 + drivers/infiniband/hw/efa/Kconfig | 15 +++++++++++++++ drivers/infiniband/hw/efa/Makefile | 9 +++++++++ 5 files changed, 35 insertions(+) create mode 100644 drivers/infiniband/hw/efa/Kconfig create mode 100644 drivers/infiniband/hw/efa/Makefile diff --git a/MAINTAINERS b/MAINTAINERS index da2cd7265fb2..a868d8ce1437 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -744,6 +744,15 @@ S: Supported F: Documentation/networking/device_drivers/amazon/ena.txt F: drivers/net/ethernet/amazon/ +AMAZON RDMA EFA DRIVER +M: Gal Pressman +R: Yossi Leybovich +L: linux-rdma@vger.kernel.org +Q: https://patchwork.kernel.org/project/linux-rdma/list/ +S: Supported +F: drivers/infiniband/hw/efa/ +F: include/uapi/rdma/efa-abi.h + AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER M: Tom Lendacky M: Gary Hook diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index a1fb840de45d..e549be36dffe 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -94,6 +94,7 @@ source "drivers/infiniband/hw/mthca/Kconfig" source "drivers/infiniband/hw/qib/Kconfig" source "drivers/infiniband/hw/cxgb3/Kconfig" source "drivers/infiniband/hw/cxgb4/Kconfig" +source "drivers/infiniband/hw/efa/Kconfig" source "drivers/infiniband/hw/i40iw/Kconfig" source "drivers/infiniband/hw/mlx4/Kconfig" source "drivers/infiniband/hw/mlx5/Kconfig" diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index e4f31c1be8f7..77094be1b262 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/ obj-$(CONFIG_INFINIBAND_QIB) += qib/ obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/ obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/ +obj-$(CONFIG_INFINIBAND_EFA) += efa/ obj-$(CONFIG_INFINIBAND_I40IW) += i40iw/ obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/ diff --git a/drivers/infiniband/hw/efa/Kconfig b/drivers/infiniband/hw/efa/Kconfig new file mode 100644 index 000000000000..457e18ba1d57 --- /dev/null +++ b/drivers/infiniband/hw/efa/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +# Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. +# +# Amazon fabric device configuration +# + +config INFINIBAND_EFA + tristate "Amazon Elastic Fabric Adapter (EFA) support" + depends on PCI_MSI && 64BIT && !CPU_BIG_ENDIAN + depends on INFINIBAND_USER_ACCESS + help + This driver supports Amazon Elastic Fabric Adapter (EFA). + + To compile this driver as a module, choose M here. + The module will be called efa. diff --git a/drivers/infiniband/hw/efa/Makefile b/drivers/infiniband/hw/efa/Makefile new file mode 100644 index 000000000000..6e83083af0bc --- /dev/null +++ b/drivers/infiniband/hw/efa/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +# Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. +# +# Makefile for Amazon Elastic Fabric Adapter (EFA) device driver. +# + +obj-$(CONFIG_INFINIBAND_EFA) += efa.o + +efa-y := efa_com_cmd.o efa_com.o efa_main.o efa_verbs.o -- cgit v1.2.3 From d2c4ada1ed883ea1310112965f2f1d713a470699 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 6 May 2019 18:02:56 +0300 Subject: lib/scatterlist: Remove leftover from sg_page_iter comment Commit d901b2760dc6 ("lib/scatterlist: Provide a DMA page iterator") added the sg DMA iterator but a leftover remained in the sg_page_iter documentation as you cannot get the page dma address (only the page itself), fix it. Signed-off-by: Gal Pressman Reviewed-by: Mukesh Ojha Signed-off-by: Jason Gunthorpe --- include/linux/scatterlist.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index b4be960c7e5d..30a9a55c28ba 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -340,11 +340,11 @@ int sg_alloc_table_chained(struct sg_table *table, int nents, * sg page iterator * * Iterates over sg entries page-by-page. On each successful iteration, you - * can call sg_page_iter_page(@piter) to get the current page and its dma - * address. @piter->sg will point to the sg holding this page and - * @piter->sg_pgoffset to the page's page offset within the sg. The iteration - * will stop either when a maximum number of sg entries was reached or a - * terminating sg (sg_last(sg) == true) was reached. + * can call sg_page_iter_page(@piter) to get the current page. + * @piter->sg will point to the sg holding this page and @piter->sg_pgoffset to + * the page's page offset within the sg. The iteration will stop either when a + * maximum number of sg entries was reached or a terminating sg + * (sg_last(sg) == true) was reached. */ struct sg_page_iter { struct scatterlist *sg; /* sg holding the page */ -- cgit v1.2.3 From e7a5b4aafd82771f8924905c208d5d236ddcb671 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 5 May 2019 19:33:20 +0300 Subject: RDMA/device: Don't fire uevent before device is fully initialized When the refcount is 0 the device is invisible to netlink. However in the patch below the refcount = 1 was moved to after the device_add(). This creates a race where userspace can issue a netlink query after the device_add() event and not see the device as visible. Ensure that no uevent is fired before device is fully registered. Fixes: d79af7242bb2 ("RDMA/device: Expose ib_device_try_get(()") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 9665c3796cfb..78dc07c6ac4b 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1303,6 +1303,11 @@ int ib_register_device(struct ib_device *device, const char *name) ib_device_register_rdmacg(device); + /* + * Ensure that ADD uevent is not fired because it + * is too early amd device is not initialized yet. + */ + dev_set_uevent_suppress(&device->dev, true); ret = device_add(&device->dev); if (ret) goto cg_cleanup; @@ -1315,6 +1320,9 @@ int ib_register_device(struct ib_device *device, const char *name) } ret = enable_device_and_get(device); + dev_set_uevent_suppress(&device->dev, false); + /* Mark for userspace that device is ready */ + kobject_uevent(&device->dev.kobj, KOBJ_ADD); if (ret) { void (*dealloc_fn)(struct ib_device *); @@ -1343,6 +1351,7 @@ int ib_register_device(struct ib_device *device, const char *name) dev_cleanup: device_del(&device->dev); cg_cleanup: + dev_set_uevent_suppress(&device->dev, false); ib_device_unregister_rdmacg(device); ib_cache_cleanup_one(device); return ret; -- cgit v1.2.3 From ba7d8117f3cca8eb70d579fde3f9ec8cd6a28f39 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Thu, 11 Apr 2019 07:22:35 -0700 Subject: IB/core, ipoib: Do not overreact to SM LID change event When IPoIB receives an SM LID change event, it reacts by flushing its path record cache and rejoining multicast groups. This is the same behavior it performs when it receives a reregistration event. This behavior is unnecessary as an SM may have database backup or synchronization mechanisms which permit the SM location or LID to change without loss of multicast membership and without impact to path records. Both opensm and the OPA FM issue reregistration events if a new SM is started (or restarted with a new config) or an SM event occurs which results in loss of multicast membership records by the SM (such as opensm failover) or the SM encounters new nodes with Active ports (such as after joining 2 fabrics by connecting switches via ISLs). Hence this event can be depended on as the trigger for IPoIB cache and multicast flushing. It appears that some drivers, such as qib, and hfi1 issue the IB_EVENT_SM_CHANGE but other drivers such as mlx4 and mlx5 do not. Empirical testing on Mellanox EDR using ibv_asyncwatch has confirmed that Mellanox EDR HCAs do not generate SM change events and that opensm does generate reregistration. An SM LID change event is generated by the mentioned drivers to reflect that sm_lid and/or sm_sl in the local port info has changed. The intent of this event is to permit applications and ULPs which have a local copy of this information (or an address handle using it) to update their information. The intent is that the reregistration event (caused by the SM via a bit in Set(PortInfo)) be used to inform nodes that they need to rejoin multicast groups, resubscribe for notices and potentially update path records. When an SM migrates or fails over, a SM LID change event can occur. In response IPoIB discards path records and multicast membership and loses connectivity until these records are restored via SA requests. In very large fabrics, it may take minutes for the SM to be ready and for the SA responses to be supplied. This can result in undesirable and unnecessary IPoIB connectivity impacts. It also can result in an unnecessary storm of SA queries from all nodes in a cluster potentially followed by yet another storm if the SM issues the reregistration request. The fact the Mellanox HCAs do not even generate this event, is further evidence that on modern IB fabrics there will be no ill side effects from the proposed changes below to reduce the reaction by 3 kernel components to this event. So these changes should be benign for Mellanox IB fabrics and will benefit OPA fabrics while also making ib_core and ULP behavor "correct" as intended by the IBTA spec and kernel RDMA event APIs. Address these issues by removing IB_EVENT_SM_CHANGE handling from ipoib. IPoIB does not locally store sm_lid nor sm_sl, so it does not need to do anything on SM LID change. IPoIB makes use of other ib_core components to issue SA requests for it and those components correctly track SM LID and SM LID changes. Also in ib_core multicast handling, remove the test for IB_EVENT_SM_CHANGE. This code is moving all multicast groups to the error state, which will trigger rejoins. This code is used by IPoIB as well as the connection manager and other clients of multicast groups. This kernel module centralizes group membership status and joins since a node can only join a given group once but multiple ULPs or applications may want to join the same group. It makes use of the sa_query.c component in ib_core, which correctly trackes SM LID and SL. This component does not track SM LID nor SL itself and hence need not react to their changes. Similarly in the ib_core cache code remove the handling for the IB_EVENT_SM_CHANGE. In this function. The ib_cache_update function which is ultimately called is updating local copies of the pkey table, gid table and lmc. It does not update nor retain sm_lid nor sm_sl. As such it does not need to be called on an SM LID change. It technically also does not need to be called on a reregistration. The LID_CHANGE, PKEY_CHANGE, GID_CHANGE and port state change events (PORT_ERR, PORT_ACTICE) should be sufficient triggers. It is worth noting that the alternative of simply having the hfi1 and qib drivers not generate the SM LID change event was explored. While this would duplicate what Mellanox drivers do now, it is not the correct behavior and removes the ability for an SM to migrate without requiring reregistration. Since both opensm and OPA SM have mechanisms to backup or synchronize registration information, it is desirable to let them perform SM migrations (with LID or SL changes) without requiring reregistration when they deem it appropriate. Suggested-by: Todd Rimmer Tested-by: Michael Brooks Reviewed-by: Mike Marciniszyn Reviewed-by: Todd Rimmer Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 1 - drivers/infiniband/core/multicast.c | 1 - drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 3 +-- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 099d922ae7bd..18e476b3ced0 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1478,7 +1478,6 @@ static void ib_cache_event(struct ib_event_handler *handler, event->event == IB_EVENT_PORT_ACTIVE || event->event == IB_EVENT_LID_CHANGE || event->event == IB_EVENT_PKEY_CHANGE || - event->event == IB_EVENT_SM_CHANGE || event->event == IB_EVENT_CLIENT_REREGISTER || event->event == IB_EVENT_GID_CHANGE) { work = kmalloc(sizeof *work, GFP_ATOMIC); diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index d50ff70bb24b..cd338ddc4a39 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -804,7 +804,6 @@ static void mcast_event_handler(struct ib_event_handler *handler, switch (event->event) { case IB_EVENT_PORT_ERR: case IB_EVENT_LID_CHANGE: - case IB_EVENT_SM_CHANGE: case IB_EVENT_CLIENT_REREGISTER: mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR); break; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 1e88213459f2..ba09068f6200 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -279,8 +279,7 @@ void ipoib_event(struct ib_event_handler *handler, ipoib_dbg(priv, "Event %d on device %s port %d\n", record->event, dev_name(&record->device->dev), record->element.port_num); - if (record->event == IB_EVENT_SM_CHANGE || - record->event == IB_EVENT_CLIENT_REREGISTER) { + if (record->event == IB_EVENT_CLIENT_REREGISTER) { queue_work(ipoib_workqueue, &priv->flush_light); } else if (record->event == IB_EVENT_PORT_ERR || record->event == IB_EVENT_PORT_ACTIVE || -- cgit v1.2.3 From b79656ed44c6865e17bcd93472ec39488bcc4984 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 6 May 2019 14:23:04 +0300 Subject: RDMA/ipoib: Allow user space differentiate between valid dev_port Systemd triggers the following warning during IPoIB device load: mlx5_core 0000:00:0c.0 ib0: "systemd-udevd" wants to know my dev_id. Should it look at dev_port instead? See Documentation/ABI/testing/sysfs-class-net for more info. This is caused due to user space attempt to differentiate old systems without dev_port and new systems with dev_port. In case dev_port will be zero, the systemd will try to read dev_id instead. There is no need to print a warning in such case, because it is valid situation and it is needed to ensure systemd compatibility with old kernels. Link: https://github.com/systemd/systemd/blob/master/src/udev/udev-builtin-net_id.c#L358 Cc: # 4.19 Fixes: f6350da41dc7 ("IB/ipoib: Log sysfs 'dev_id' accesses from userspace") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 48eda16db1a7..9b5e11d3fb85 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2402,7 +2402,18 @@ static ssize_t dev_id_show(struct device *dev, { struct net_device *ndev = to_net_dev(dev); - if (ndev->dev_id == ndev->dev_port) + /* + * ndev->dev_port will be equal to 0 in old kernel prior to commit + * 9b8b2a323008 ("IB/ipoib: Use dev_port to expose network interface + * port numbers") Zero was chosen as special case for user space + * applications to fallback and query dev_id to check if it has + * different value or not. + * + * Don't print warning in such scenario. + * + * https://github.com/systemd/systemd/blob/master/src/udev/udev-builtin-net_id.c#L358 + */ + if (ndev->dev_port && ndev->dev_id == ndev->dev_port) netdev_info_once(ndev, "\"%s\" wants to know my dev_id. Should it look at dev_port instead? See Documentation/ABI/testing/sysfs-class-net for more info.\n", current->comm); -- cgit v1.2.3