From 3ae15e1623b9d32eb410c2a23d90e47b16e6acd0 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 30 Apr 2008 19:52:55 -0700 Subject: IB/mlx4: Fix off-by-one errors in calls to mlx4_ib_free_cq_buf() When I merged bbf8eed1 ("IB/mlx4: Add support for resizing CQs") I changed things around so that mlx4_ib_alloc_cq_buf() and mlx4_ib_free_cq_buf() were used everywhere they could be. However, I screwed up the number of entries passed into mlx4_ib_alloc_cq_buf() in a couple places -- the function bumps the number of entries internally, so the caller shouldn't add 1 as well. Passing a too-big value for the number of entries to mlx4_ib_free_cq_buf() can cause the cleanup to go off the end of an array and corrupt allocator state in interesting ways. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/cq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 2f199c5c4a72..4521319b1406 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -246,7 +246,7 @@ err_mtt: if (context) ib_umem_release(cq->umem); else - mlx4_ib_free_cq_buf(dev, &cq->buf, entries); + mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); err_db: if (!context) @@ -434,7 +434,7 @@ int mlx4_ib_destroy_cq(struct ib_cq *cq) mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db); ib_umem_release(mcq->umem); } else { - mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe + 1); + mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe); mlx4_db_free(dev->dev, &mcq->db); } -- cgit v1.2.3 From c8286944b802c5ce4063ec3c334b38c6757a9434 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 2 May 2008 11:17:41 -0500 Subject: RDMA/cxgb3: QP flush fixes - Flush the QP only after the HW disables the connection. Currently we flush the QP when transitioning to CLOSING. This exposes a race condition where the HW can complete a RECV WR, for instance, -and- the SW can flush that same WR. - Only call CQ event handlers on flush IFF we actually flushed something. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/cxio_hal.c | 13 ++++++++++--- drivers/infiniband/hw/cxgb3/cxio_hal.h | 4 ++-- drivers/infiniband/hw/cxgb3/iwch_qp.c | 13 ++++++++----- 3 files changed, 20 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index ed2ee4ba4b7c..5fd8506a8657 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -359,9 +359,10 @@ static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq) cq->sw_wptr++; } -void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count) +int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count) { u32 ptr; + int flushed = 0; PDBG("%s wq %p cq %p\n", __func__, wq, cq); @@ -369,8 +370,11 @@ void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count) PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__, wq->rq_rptr, wq->rq_wptr, count); ptr = wq->rq_rptr + count; - while (ptr++ != wq->rq_wptr) + while (ptr++ != wq->rq_wptr) { insert_recv_cqe(wq, cq); + flushed++; + } + return flushed; } static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq, @@ -394,9 +398,10 @@ static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq, cq->sw_wptr++; } -void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) +int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) { __u32 ptr; + int flushed = 0; struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2); ptr = wq->sq_rptr + count; @@ -405,7 +410,9 @@ void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) insert_sq_cqe(wq, cq, sqp); sqp++; ptr++; + flushed++; } + return flushed; } /* diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h index 2bcff7f5046e..69ab08ebc680 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h @@ -173,8 +173,8 @@ u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp); void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid); int __init cxio_hal_init(void); void __exit cxio_hal_exit(void); -void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count); -void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count); +int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count); +int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count); void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count); void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count); void cxio_flush_hw_cq(struct t3_cq *cq); diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 9b4be889c58e..79dbe5beae52 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -655,6 +655,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag) { struct iwch_cq *rchp, *schp; int count; + int flushed; rchp = get_chp(qhp->rhp, qhp->attr.rcq); schp = get_chp(qhp->rhp, qhp->attr.scq); @@ -669,20 +670,22 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag) spin_lock(&qhp->lock); cxio_flush_hw_cq(&rchp->cq); cxio_count_rcqes(&rchp->cq, &qhp->wq, &count); - cxio_flush_rq(&qhp->wq, &rchp->cq, count); + flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&rchp->lock, *flag); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + if (flushed) + (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); /* locking heirarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&schp->lock, *flag); spin_lock(&qhp->lock); cxio_flush_hw_cq(&schp->cq); cxio_count_scqes(&schp->cq, &qhp->wq, &count); - cxio_flush_sq(&qhp->wq, &schp->cq, count); + flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&schp->lock, *flag); - (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); + if (flushed) + (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); /* deref */ if (atomic_dec_and_test(&qhp->refcnt)) @@ -880,7 +883,6 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, ep = qhp->ep; get_ep(&ep->com); } - flush_qp(qhp, &flag); break; case IWCH_QP_STATE_TERMINATE: qhp->attr.state = IWCH_QP_STATE_TERMINATE; @@ -911,6 +913,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, } switch (attrs->next_state) { case IWCH_QP_STATE_IDLE: + flush_qp(qhp, &flag); qhp->attr.state = IWCH_QP_STATE_IDLE; qhp->attr.llp_stream_handle = NULL; put_ep(&qhp->ep->com); -- cgit v1.2.3 From c4d49776e8f5bf2d900d2b6d4855c1670a535ac5 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 2 May 2008 10:57:09 -0700 Subject: RDMA/cxgb3: Silently ignore close reply after abort. Remove bad BUG_ON() that can trigger in correct operation from close_con_rpl(). It is possible to get a close_rpl message on a dead connection. The sequence is: - host refs ep for close exchange - host posts close_req - hw posts PEER_ABORT from incoming RST - host marks ep DEAD - host posts ABORT_RPL and releases ep resources - hw posts CLOSE_RPL - host derefs ep and ep freed. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index d44a6df9ad8c..32e290e5661c 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1650,8 +1650,8 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) release = 1; break; case ABORTING: - break; case DEAD: + break; default: BUG_ON(1); break; -- cgit v1.2.3 From 77a8d5741f3ee2c79554382179cca7b5893d6ae9 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 2 May 2008 10:57:09 -0700 Subject: RDMA/cxgb3: Bump up the MPA connection setup timeout. Testing on large clusters shows its way too short at 10 secs. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 32e290e5661c..c325c44807e8 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -67,10 +67,10 @@ int peer2peer = 0; module_param(peer2peer, int, 0644); MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)"); -static int ep_timeout_secs = 10; +static int ep_timeout_secs = 60; module_param(ep_timeout_secs, int, 0644); MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " - "in seconds (default=10)"); + "in seconds (default=60)"); static int mpa_rev = 1; module_param(mpa_rev, int, 0644); -- cgit v1.2.3 From cf04690885972eaba830ee761de545a6956197e6 Mon Sep 17 00:00:00 2001 From: Stefan Roscher Date: Mon, 5 May 2008 15:51:49 -0700 Subject: IB/ehca: Fix function return types Also remove duplicate assignment of local_ca_ack_delay and change min_t check for local_ca_ack_delay to u8 instead of int. Signed-off-by: Stefan Roscher Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_hca.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index 2515cbde7e65..bc3b37d2070f 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -101,7 +101,6 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) props->max_ee = limit_uint(rblock->max_rd_ee_context); props->max_rdd = limit_uint(rblock->max_rd_domain); props->max_fmr = limit_uint(rblock->max_mr); - props->local_ca_ack_delay = limit_uint(rblock->local_ca_ack_delay); props->max_qp_rd_atom = limit_uint(rblock->max_rr_qp); props->max_ee_rd_atom = limit_uint(rblock->max_rr_ee_context); props->max_res_rd_atom = limit_uint(rblock->max_rr_hca); @@ -115,7 +114,7 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) } props->max_pkeys = 16; - props->local_ca_ack_delay = limit_uint(rblock->local_ca_ack_delay); + props->local_ca_ack_delay = min_t(u8, rblock->local_ca_ack_delay, 255); props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp); props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp); props->max_mcast_grp = limit_uint(rblock->max_mcast_grp); @@ -136,7 +135,7 @@ query_device1: return ret; } -static int map_mtu(struct ehca_shca *shca, u32 fw_mtu) +static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu) { switch (fw_mtu) { case 0x1: @@ -156,7 +155,7 @@ static int map_mtu(struct ehca_shca *shca, u32 fw_mtu) } } -static int map_number_of_vls(struct ehca_shca *shca, u32 vl_cap) +static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap) { switch (vl_cap) { case 0x1: -- cgit v1.2.3 From 0e9913362a967377eb886bbdf305ec58aa07a878 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 6 May 2008 15:03:38 -0700 Subject: RDMA/cxgb3: Don't add PBL memory to gen_pool in chunks Current iw_cxgb3 code adds PBL memory to the driver's gen_pool in 2 MB chunks. This limits the largest single allocation that can be done to the same size, which means that with 4 KB pages, each of which takes 8 bytes of PBL memory, the largest memory region that can be allocated is 1 GB (256K PBL entries * 4 KB/entry). Remove this limit by adding all the PBL memory in a single gen_pool chunk, if possible. Add code that falls back to smaller chunks if gen_pool_add() fails, which can happen if there is not sufficient contiguous lowmem for the internal gen_pool bitmap. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/cxio_resource.c | 36 ++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c index 45ed4f25ef78..bd233c087653 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_resource.c +++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c @@ -250,7 +250,6 @@ void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp) */ #define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */ -#define PBL_CHUNK 2*1024*1024 u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size) { @@ -267,14 +266,35 @@ void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p) { - unsigned long i; + unsigned pbl_start, pbl_chunk; + rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1); - if (rdev_p->pbl_pool) - for (i = rdev_p->rnic_info.pbl_base; - i <= rdev_p->rnic_info.pbl_top - PBL_CHUNK + 1; - i += PBL_CHUNK) - gen_pool_add(rdev_p->pbl_pool, i, PBL_CHUNK, -1); - return rdev_p->pbl_pool ? 0 : -ENOMEM; + if (!rdev_p->pbl_pool) + return -ENOMEM; + + pbl_start = rdev_p->rnic_info.pbl_base; + pbl_chunk = rdev_p->rnic_info.pbl_top - pbl_start + 1; + + while (pbl_start < rdev_p->rnic_info.pbl_top) { + pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1, + pbl_chunk); + if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) { + PDBG("%s failed to add PBL chunk (%x/%x)\n", + __func__, pbl_start, pbl_chunk); + if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) { + printk(KERN_WARNING MOD "%s: Failed to add all PBL chunks (%x/%x)\n", + __func__, pbl_start, rdev_p->rnic_info.pbl_top - pbl_start); + return 0; + } + pbl_chunk >>= 1; + } else { + PDBG("%s added PBL chunk (%x/%x)\n", + __func__, pbl_start, pbl_chunk); + pbl_start += pbl_chunk; + } + } + + return 0; } void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p) -- cgit v1.2.3 From 273748cc908a901d082b4da5a16b2541c9d78a02 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 6 May 2008 15:56:22 -0700 Subject: RDMA/cxgb3: Fix severe limit on userspace memory registration size Currently, iw_cxgb3 is severely limited on the amount of userspace memory that can be registered in in a single memory region, which causes big problems for applications that expect to be able to register 100s of MB. The problem is that the driver uses a single kmalloc()ed buffer to hold the physical buffer list (PBL) for the entire memory region during registration, which means that 8 bytes of contiguous memory are required for each page of memory being registered. For example, a 64 MB registration will require 128 KB of contiguous memory with 4 KB pages, and it unlikely that such an allocation will succeed on a busy system. This is purely a driver problem: the temporary page list buffer is not needed by the hardware, so we can fix this by writing the PBL to the hardware in page-sized chunks rather than all at once. We do this by splitting the memory registration operation up into several steps: - Allocate PBL space in adapter memory for the full registration - Copy PBL to adapter memory in chunks - Allocate STag and enable memory region This also allows several other cleanups to the __cxio_tpt_op() interface and related parts of the driver. This change leaves the reregister memory region and memory window operations broken, but they already didn't work due to other longstanding bugs, so fixing them will be left to a later patch. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/cxio_hal.c | 90 ++++++++++++++--------------- drivers/infiniband/hw/cxgb3/cxio_hal.h | 8 +-- drivers/infiniband/hw/cxgb3/iwch_mem.c | 75 +++++++++++++++--------- drivers/infiniband/hw/cxgb3/iwch_provider.c | 68 +++++++++++++++++----- drivers/infiniband/hw/cxgb3/iwch_provider.h | 8 +-- 5 files changed, 155 insertions(+), 94 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index 5fd8506a8657..ebf9d3043f80 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -588,7 +588,7 @@ static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p) * caller aquires the ctrl_qp lock before the call */ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, - u32 len, void *data, int completion) + u32 len, void *data) { u32 i, nr_wqe, copy_len; u8 *copy_data; @@ -624,7 +624,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, flag = 0; if (i == (nr_wqe - 1)) { /* last WQE */ - flag = completion ? T3_COMPLETION_FLAG : 0; + flag = T3_COMPLETION_FLAG; if (len % 32) utx_len = len / 32 + 1; else @@ -683,21 +683,20 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, return 0; } -/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl, and pbl_size - * OUT: stag index, actual pbl_size, pbl_addr allocated. +/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl_size and pbl_addr + * OUT: stag index * TBD: shared memory region support */ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, u32 *stag, u8 stag_state, u32 pdid, enum tpt_mem_type type, enum tpt_mem_perm perm, - u32 zbva, u64 to, u32 len, u8 page_size, __be64 *pbl, - u32 *pbl_size, u32 *pbl_addr) + u32 zbva, u64 to, u32 len, u8 page_size, + u32 pbl_size, u32 pbl_addr) { int err; struct tpt_entry tpt; u32 stag_idx; u32 wptr; - int rereg = (*stag != T3_STAG_UNSET); stag_state = stag_state > 0; stag_idx = (*stag) >> 8; @@ -711,30 +710,8 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n", __func__, stag_state, type, pdid, stag_idx); - if (reset_tpt_entry) - cxio_hal_pblpool_free(rdev_p, *pbl_addr, *pbl_size << 3); - else if (!rereg) { - *pbl_addr = cxio_hal_pblpool_alloc(rdev_p, *pbl_size << 3); - if (!*pbl_addr) { - return -ENOMEM; - } - } - mutex_lock(&rdev_p->ctrl_qp.lock); - /* write PBL first if any - update pbl only if pbl list exist */ - if (pbl) { - - PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n", - __func__, *pbl_addr, rdev_p->rnic_info.pbl_base, - *pbl_size); - err = cxio_hal_ctrl_qp_write_mem(rdev_p, - (*pbl_addr >> 5), - (*pbl_size << 3), pbl, 0); - if (err) - goto ret; - } - /* write TPT entry */ if (reset_tpt_entry) memset(&tpt, 0, sizeof(tpt)); @@ -749,23 +726,23 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) | V_TPT_PAGE_SIZE(page_size)); tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 : - cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, *pbl_addr)>>3)); + cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3)); tpt.len = cpu_to_be32(len); tpt.va_hi = cpu_to_be32((u32) (to >> 32)); tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL)); tpt.rsvd_bind_cnt_or_pstag = 0; tpt.rsvd_pbl_size = reset_tpt_entry ? 0 : - cpu_to_be32(V_TPT_PBL_SIZE((*pbl_size) >> 2)); + cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2)); } err = cxio_hal_ctrl_qp_write_mem(rdev_p, stag_idx + (rdev_p->rnic_info.tpt_base >> 5), - sizeof(tpt), &tpt, 1); + sizeof(tpt), &tpt); /* release the stag index to free pool */ if (reset_tpt_entry) cxio_hal_put_stag(rdev_p->rscp, stag_idx); -ret: + wptr = rdev_p->ctrl_qp.wptr; mutex_unlock(&rdev_p->ctrl_qp.lock); if (!err) @@ -776,44 +753,67 @@ ret: return err; } +int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl, + u32 pbl_addr, u32 pbl_size) +{ + u32 wptr; + int err; + + PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n", + __func__, pbl_addr, rdev_p->rnic_info.pbl_base, + pbl_size); + + mutex_lock(&rdev_p->ctrl_qp.lock); + err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3, + pbl); + wptr = rdev_p->ctrl_qp.wptr; + mutex_unlock(&rdev_p->ctrl_qp.lock); + if (err) + return err; + + if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, + SEQ32_GE(rdev_p->ctrl_qp.rptr, + wptr))) + return -ERESTARTSYS; + + return 0; +} + int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, __be64 *pbl, u32 *pbl_size, - u32 *pbl_addr) + u8 page_size, u32 pbl_size, u32 pbl_addr) { *stag = T3_STAG_UNSET; return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, - zbva, to, len, page_size, pbl, pbl_size, pbl_addr); + zbva, to, len, page_size, pbl_size, pbl_addr); } int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, __be64 *pbl, u32 *pbl_size, - u32 *pbl_addr) + u8 page_size, u32 pbl_size, u32 pbl_addr) { return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, - zbva, to, len, page_size, pbl, pbl_size, pbl_addr); + zbva, to, len, page_size, pbl_size, pbl_addr); } int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size, u32 pbl_addr) { - return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL, - &pbl_size, &pbl_addr); + return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, + pbl_size, pbl_addr); } int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid) { - u32 pbl_size = 0; *stag = T3_STAG_UNSET; return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0, - NULL, &pbl_size, NULL); + 0, 0); } int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag) { - return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL, - NULL, NULL); + return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, + 0, 0); } int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h index 69ab08ebc680..6e128f6bab05 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h @@ -154,14 +154,14 @@ int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq, int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq, struct cxio_ucontext *uctx); int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode); +int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl, + u32 pbl_addr, u32 pbl_size); int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, __be64 *pbl, u32 *pbl_size, - u32 *pbl_addr); + u8 page_size, u32 pbl_size, u32 pbl_addr); int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, __be64 *pbl, u32 *pbl_size, - u32 *pbl_addr); + u8 page_size, u32 pbl_size, u32 pbl_addr); int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size, u32 pbl_addr); int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid); diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c index 58c3d61bcd14..ec49a5cbdebb 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_mem.c +++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c @@ -35,17 +35,26 @@ #include #include "cxio_hal.h" +#include "cxio_resource.h" #include "iwch.h" #include "iwch_provider.h" -int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, - struct iwch_mr *mhp, - int shift, - __be64 *page_list) +static void iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag) { - u32 stag; u32 mmid; + mhp->attr.state = 1; + mhp->attr.stag = stag; + mmid = stag >> 8; + mhp->ibmr.rkey = mhp->ibmr.lkey = stag; + insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid); + PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); +} + +int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, + struct iwch_mr *mhp, int shift) +{ + u32 stag; if (cxio_register_phys_mem(&rhp->rdev, &stag, mhp->attr.pdid, @@ -53,28 +62,21 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, mhp->attr.zbva, mhp->attr.va_fbo, mhp->attr.len, - shift-12, - page_list, - &mhp->attr.pbl_size, &mhp->attr.pbl_addr)) + shift - 12, + mhp->attr.pbl_size, mhp->attr.pbl_addr)) return -ENOMEM; - mhp->attr.state = 1; - mhp->attr.stag = stag; - mmid = stag >> 8; - mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - insert_handle(rhp, &rhp->mmidr, mhp, mmid); - PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); + + iwch_finish_mem_reg(mhp, stag); + return 0; } int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, struct iwch_mr *mhp, int shift, - __be64 *page_list, int npages) { u32 stag; - u32 mmid; - /* We could support this... */ if (npages > mhp->attr.pbl_size) @@ -87,19 +89,40 @@ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, mhp->attr.zbva, mhp->attr.va_fbo, mhp->attr.len, - shift-12, - page_list, - &mhp->attr.pbl_size, &mhp->attr.pbl_addr)) + shift - 12, + mhp->attr.pbl_size, mhp->attr.pbl_addr)) return -ENOMEM; - mhp->attr.state = 1; - mhp->attr.stag = stag; - mmid = stag >> 8; - mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - insert_handle(rhp, &rhp->mmidr, mhp, mmid); - PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); + + iwch_finish_mem_reg(mhp, stag); + + return 0; +} + +int iwch_alloc_pbl(struct iwch_mr *mhp, int npages) +{ + mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev, + npages << 3); + + if (!mhp->attr.pbl_addr) + return -ENOMEM; + + mhp->attr.pbl_size = npages; + return 0; } +void iwch_free_pbl(struct iwch_mr *mhp) +{ + cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, + mhp->attr.pbl_size << 3); +} + +int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset) +{ + return cxio_write_pbl(&mhp->rhp->rdev, pages, + mhp->attr.pbl_addr + (offset << 3), npages); +} + int build_phys_page_list(struct ib_phys_buf *buffer_list, int num_phys_buf, u64 *iova_start, diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index d07d3a377b5f..8934178a23ee 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -442,6 +442,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr) mmid = mhp->attr.stag >> 8; cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, mhp->attr.pbl_addr); + iwch_free_pbl(mhp); remove_handle(rhp, &rhp->mmidr, mmid); if (mhp->kva) kfree((void *) (unsigned long) mhp->kva); @@ -475,6 +476,8 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, if (!mhp) return ERR_PTR(-ENOMEM); + mhp->rhp = rhp; + /* First check that we have enough alignment */ if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) { ret = -EINVAL; @@ -492,7 +495,17 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, if (ret) goto err; - mhp->rhp = rhp; + ret = iwch_alloc_pbl(mhp, npages); + if (ret) { + kfree(page_list); + goto err_pbl; + } + + ret = iwch_write_pbl(mhp, page_list, npages, 0); + kfree(page_list); + if (ret) + goto err_pbl; + mhp->attr.pdid = php->pdid; mhp->attr.zbva = 0; @@ -502,12 +515,15 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, mhp->attr.len = (u32) total_size; mhp->attr.pbl_size = npages; - ret = iwch_register_mem(rhp, php, mhp, shift, page_list); - kfree(page_list); - if (ret) { - goto err; - } + ret = iwch_register_mem(rhp, php, mhp, shift); + if (ret) + goto err_pbl; + return &mhp->ibmr; + +err_pbl: + iwch_free_pbl(mhp); + err: kfree(mhp); return ERR_PTR(ret); @@ -560,7 +576,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr, return ret; } - ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages); + ret = iwch_reregister_mem(rhp, php, &mh, shift, npages); kfree(page_list); if (ret) { return ret; @@ -602,6 +618,8 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mhp) return ERR_PTR(-ENOMEM); + mhp->rhp = rhp; + mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); if (IS_ERR(mhp->umem)) { err = PTR_ERR(mhp->umem); @@ -615,10 +633,14 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, list_for_each_entry(chunk, &mhp->umem->chunk_list, list) n += chunk->nents; - pages = kmalloc(n * sizeof(u64), GFP_KERNEL); + err = iwch_alloc_pbl(mhp, n); + if (err) + goto err; + + pages = (__be64 *) __get_free_page(GFP_KERNEL); if (!pages) { err = -ENOMEM; - goto err; + goto err_pbl; } i = n = 0; @@ -630,25 +652,38 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, pages[i++] = cpu_to_be64(sg_dma_address( &chunk->page_list[j]) + mhp->umem->page_size * k); + if (i == PAGE_SIZE / sizeof *pages) { + err = iwch_write_pbl(mhp, pages, i, n); + if (err) + goto pbl_done; + n += i; + i = 0; + } } } - mhp->rhp = rhp; + if (i) + err = iwch_write_pbl(mhp, pages, i, n); + +pbl_done: + free_page((unsigned long) pages); + if (err) + goto err_pbl; + mhp->attr.pdid = php->pdid; mhp->attr.zbva = 0; mhp->attr.perms = iwch_ib_to_tpt_access(acc); mhp->attr.va_fbo = virt; mhp->attr.page_size = shift - 12; mhp->attr.len = (u32) length; - mhp->attr.pbl_size = i; - err = iwch_register_mem(rhp, php, mhp, shift, pages); - kfree(pages); + + err = iwch_register_mem(rhp, php, mhp, shift); if (err) - goto err; + goto err_pbl; if (udata && !t3a_device(rhp)) { uresp.pbl_addr = (mhp->attr.pbl_addr - - rhp->rdev.rnic_info.pbl_base) >> 3; + rhp->rdev.rnic_info.pbl_base) >> 3; PDBG("%s user resp pbl_addr 0x%x\n", __func__, uresp.pbl_addr); @@ -661,6 +696,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return &mhp->ibmr; +err_pbl: + iwch_free_pbl(mhp); + err: ib_umem_release(mhp->umem); kfree(mhp); diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index db5100d27ca2..836163fc5429 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -340,14 +340,14 @@ int iwch_quiesce_qps(struct iwch_cq *chp); int iwch_resume_qps(struct iwch_cq *chp); void stop_read_rep_timer(struct iwch_qp *qhp); int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, - struct iwch_mr *mhp, - int shift, - __be64 *page_list); + struct iwch_mr *mhp, int shift); int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, struct iwch_mr *mhp, int shift, - __be64 *page_list, int npages); +int iwch_alloc_pbl(struct iwch_mr *mhp, int npages); +void iwch_free_pbl(struct iwch_mr *mhp); +int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset); int build_phys_page_list(struct ib_phys_buf *buffer_list, int num_phys_buf, u64 *iova_start, -- cgit v1.2.3 From 5f51efc195dfb860c60fafb4e47fe4b7cad2626d Mon Sep 17 00:00:00 2001 From: Michael Albaugh Date: Wed, 7 May 2008 10:56:47 -0700 Subject: IB/ipath: Only warn about prototype chip during init We warn about prototype chips, but the function that checks for support is also called as a result of a get_portinfo request, which can clutter the logs. Restrict warning to only appear during initialization. Signed-off-by: Michael Albaugh Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_iba7220.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c index e3ec0d1bdf50..5f693de66542 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba7220.c +++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c @@ -870,8 +870,9 @@ static int ipath_7220_boardname(struct ipath_devdata *dd, char *name, "revision %u.%u!\n", dd->ipath_majrev, dd->ipath_minrev); ret = 1; - } else if (dd->ipath_minrev == 1) { - /* Rev1 chips are prototype. Complain, but allow use */ + } else if (dd->ipath_minrev == 1 && + !(dd->ipath_flags & IPATH_INITTED)) { + /* Rev1 chips are prototype. Complain at init, but allow use */ ipath_dev_err(dd, "Unsupported hardware " "revision %u.%u, Contact support@qlogic.com\n", dd->ipath_majrev, dd->ipath_minrev); -- cgit v1.2.3 From 6e87d1500713767866db0668bbcec75719576f3c Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Wed, 7 May 2008 10:57:14 -0700 Subject: IB/ipath: Only increment SSN if WQE is put on send queue If a send work request has immediate errors and is not put on the send queue, we shouldn't update any of the QP state. The increment of the SSN wasn't obeying this. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index e63927cce5b5..5015cd2e57bd 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -396,7 +396,6 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) wqe = get_swqe_ptr(qp, qp->s_head); wqe->wr = *wr; - wqe->ssn = qp->s_ssn++; wqe->length = 0; if (wr->num_sge) { acc = wr->opcode >= IB_WR_RDMA_READ ? @@ -422,6 +421,7 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) goto bail_inval; } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu) goto bail_inval; + wqe->ssn = qp->s_ssn++; qp->s_head = next; ret = 0; -- cgit v1.2.3 From b4d390d8d219452e5d4257c87134a6934d7fabeb Mon Sep 17 00:00:00 2001 From: Dave Olson Date: Wed, 7 May 2008 10:57:48 -0700 Subject: IB/ipath: Fix bug that can leave sends disabled after freeze recovery The semantics of cancel_sends changed, but the code using it was missed. Don't leave sends and pioavail updates disabled, and add a comment as to why the force update is needed. Signed-off-by: Dave Olson Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_intr.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 1b58f4737c71..45c4c068ab1e 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -933,11 +933,15 @@ void ipath_clear_freeze(struct ipath_devdata *dd) * therefore would not be sent, and eventually * might cause the process to run out of bufs */ - ipath_cancel_sends(dd, 0); + ipath_cancel_sends(dd, 1); ipath_write_kreg(dd, dd->ipath_kregs->kr_control, dd->ipath_control); - /* ensure pio avail updates continue */ + /* + * ensure pio avail updates continue (because the update + * won't have happened from cancel_sends because we were + * still in freeze + */ ipath_force_pio_avail_update(dd); /* -- cgit v1.2.3 From 2bfc8e9edf200aeeca18ee44bcbf6bce65438a42 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Wed, 7 May 2008 10:58:50 -0700 Subject: IB/ipath: Return the correct opcode for RDMA WRITE with immediate This patch fixes a bug in the RC responder which generates a completion entry with the wrong opcode when an RDMA WRITE with immediate is received. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_rc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index c405dfba5531..08b11b567614 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -1746,7 +1746,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, qp->r_wrid_valid = 0; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; - wc.opcode = IB_WC_RECV; + if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) || + opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) + wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; + else + wc.opcode = IB_WC_RECV; wc.vendor_err = 0; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; -- cgit v1.2.3 From 2889d1ef1240591fa4c72a6753e0a8d1c6e18140 Mon Sep 17 00:00:00 2001 From: Michael Albaugh Date: Wed, 7 May 2008 10:59:23 -0700 Subject: IB/ipath: Fix count of packets received by kernel The loop in ipath_kreceive() that processes packets increments the loop-index 'i' once too often, because the exit condition does not depend on it, and is checked after the increment. By adding a check for !last to the iterator in the for loop, we correct that in a way that is not so likely to be re-broken by changes in the loop body. Signed-off-by: Michael Albaugh Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index acf30c06a0c0..f81dd4acdc60 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1197,7 +1197,7 @@ void ipath_kreceive(struct ipath_portdata *pd) } reloop: - for (last = 0, i = 1; !last; i++) { + for (last = 0, i = 1; !last; i += !last) { hdr = dd->ipath_f_get_msgheader(dd, rhf_addr); eflags = ipath_hdrget_err_flags(rhf_addr); etype = ipath_hdrget_rcv_type(rhf_addr); -- cgit v1.2.3 From e2ab41cae418108f376ad1634d7507f56379f7a2 Mon Sep 17 00:00:00 2001 From: Dave Olson Date: Wed, 7 May 2008 11:00:15 -0700 Subject: IB/ipath: Need to always request and handle PIO avail interrupts Now that we always use PIO for vl15 on 7220, we could get stuck forever if we happened to run out of PIO buffers from the verbs code, because the setup code wouldn't run; the interrupt was also ignored if SDMA was supported. We also have to reduce the pio update threshold if we have fewer kernel buffers than the existing threshold. Clean up the initialization a bit to get ordering safer and more sensible, and use the existing ipath_chg_kernavail call to do init, rather than doing it separately. Drop unnecessary clearing of pio buffer on pio parity error. Drop incorrect updating of pioavailshadow when exitting freeze mode (software state may not match chip state if buffer has been allocated and not yet written). If we couldn't get a kernel buffer for a while, make sure we are in sync with hardware, mainly to handle the exitting freeze case. Signed-off-by: Dave Olson Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_driver.c | 128 +++++++++++++++++++++++--- drivers/infiniband/hw/ipath/ipath_file_ops.c | 72 ++++++--------- drivers/infiniband/hw/ipath/ipath_iba7220.c | 21 ++--- drivers/infiniband/hw/ipath/ipath_init_chip.c | 95 +++++++++---------- drivers/infiniband/hw/ipath/ipath_intr.c | 82 +++-------------- drivers/infiniband/hw/ipath/ipath_kernel.h | 8 +- drivers/infiniband/hw/ipath/ipath_ruc.c | 7 +- drivers/infiniband/hw/ipath/ipath_sdma.c | 13 ++- 8 files changed, 224 insertions(+), 202 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index f81dd4acdc60..2036d38fac47 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1428,6 +1428,40 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd) spin_unlock_irqrestore(&ipath_pioavail_lock, flags); } +/* + * used to force update of pioavailshadow if we can't get a pio buffer. + * Needed primarily due to exitting freeze mode after recovering + * from errors. Done lazily, because it's safer (known to not + * be writing pio buffers). + */ +static void ipath_reset_availshadow(struct ipath_devdata *dd) +{ + int i, im; + unsigned long flags; + + spin_lock_irqsave(&ipath_pioavail_lock, flags); + for (i = 0; i < dd->ipath_pioavregs; i++) { + u64 val, oldval; + /* deal with 6110 chip bug on high register #s */ + im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? + i ^ 1 : i; + val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]); + /* + * busy out the buffers not in the kernel avail list, + * without changing the generation bits. + */ + oldval = dd->ipath_pioavailshadow[i]; + dd->ipath_pioavailshadow[i] = val | + ((~dd->ipath_pioavailkernel[i] << + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) & + 0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */ + if (oldval != dd->ipath_pioavailshadow[i]) + ipath_dbg("shadow[%d] was %Lx, now %lx\n", + i, oldval, dd->ipath_pioavailshadow[i]); + } + spin_unlock_irqrestore(&ipath_pioavail_lock, flags); +} + /** * ipath_setrcvhdrsize - set the receive header size * @dd: the infinipath device @@ -1482,9 +1516,12 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd) */ ipath_stats.sps_nopiobufs++; if (!(++dd->ipath_consec_nopiobuf % 100000)) { - ipath_dbg("%u pio sends with no bufavail; dmacopy: " - "%llx %llx %llx %llx; shadow: %lx %lx %lx %lx\n", + ipath_force_pio_avail_update(dd); /* at start */ + ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: " + "%llx %llx %llx %llx\n" + "ipath shadow: %lx %lx %lx %lx\n", dd->ipath_consec_nopiobuf, + (unsigned long)get_cycles(), (unsigned long long) le64_to_cpu(dma[0]), (unsigned long long) le64_to_cpu(dma[1]), (unsigned long long) le64_to_cpu(dma[2]), @@ -1496,14 +1533,17 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd) */ if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > (sizeof(shadow[0]) * 4 * 4)) - ipath_dbg("2nd group: dmacopy: %llx %llx " - "%llx %llx; shadow: %lx %lx %lx %lx\n", + ipath_dbg("2nd group: dmacopy: " + "%llx %llx %llx %llx\n" + "ipath shadow: %lx %lx %lx %lx\n", (unsigned long long)le64_to_cpu(dma[4]), (unsigned long long)le64_to_cpu(dma[5]), (unsigned long long)le64_to_cpu(dma[6]), (unsigned long long)le64_to_cpu(dma[7]), - shadow[4], shadow[5], shadow[6], - shadow[7]); + shadow[4], shadow[5], shadow[6], shadow[7]); + + /* at end, so update likely happened */ + ipath_reset_availshadow(dd); } } @@ -1652,19 +1692,46 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start, unsigned len, int avail) { unsigned long flags; - unsigned end; + unsigned end, cnt = 0, next; /* There are two bits per send buffer (busy and generation) */ start *= 2; - len *= 2; - end = start + len; + end = start + len * 2; - /* Set or clear the generation bits. */ spin_lock_irqsave(&ipath_pioavail_lock, flags); + /* Set or clear the busy bit in the shadow. */ while (start < end) { if (avail) { - __clear_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT, - dd->ipath_pioavailshadow); + unsigned long dma; + int i, im; + /* + * the BUSY bit will never be set, because we disarm + * the user buffers before we hand them back to the + * kernel. We do have to make sure the generation + * bit is set correctly in shadow, since it could + * have changed many times while allocated to user. + * We can't use the bitmap functions on the full + * dma array because it is always little-endian, so + * we have to flip to host-order first. + * BITS_PER_LONG is slightly wrong, since it's + * always 64 bits per register in chip... + * We only work on 64 bit kernels, so that's OK. + */ + /* deal with 6110 chip bug on high register #s */ + i = start / BITS_PER_LONG; + im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? + i ^ 1 : i; + __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT + + start, dd->ipath_pioavailshadow); + dma = (unsigned long) le64_to_cpu( + dd->ipath_pioavailregs_dma[im]); + if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT + + start) % BITS_PER_LONG, &dma)) + __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT + + start, dd->ipath_pioavailshadow); + else + __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT + + start, dd->ipath_pioavailshadow); __set_bit(start, dd->ipath_pioavailkernel); } else { __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT, @@ -1673,7 +1740,44 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start, } start += 2; } + + if (dd->ipath_pioupd_thresh) { + end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); + next = find_first_bit(dd->ipath_pioavailkernel, end); + while (next < end) { + cnt++; + next = find_next_bit(dd->ipath_pioavailkernel, end, + next + 1); + } + } spin_unlock_irqrestore(&ipath_pioavail_lock, flags); + + /* + * When moving buffers from kernel to user, if number assigned to + * the user is less than the pio update threshold, and threshold + * is supported (cnt was computed > 0), drop the update threshold + * so we update at least once per allocated number of buffers. + * In any case, if the kernel buffers are less than the threshold, + * drop the threshold. We don't bother increasing it, having once + * decreased it, since it would typically just cycle back and forth. + * If we don't decrease below buffers in use, we can wait a long + * time for an update, until some other context uses PIO buffers. + */ + if (!avail && len < cnt) + cnt = len; + if (cnt < dd->ipath_pioupd_thresh) { + dd->ipath_pioupd_thresh = cnt; + ipath_dbg("Decreased pio update threshold to %u\n", + dd->ipath_pioupd_thresh); + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK + << INFINIPATH_S_UPDTHRESH_SHIFT); + dd->ipath_sendctrl |= dd->ipath_pioupd_thresh + << INFINIPATH_S_UPDTHRESH_SHIFT; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + } } /** diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 8b1752202e78..3295177c937e 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -173,47 +173,25 @@ static int ipath_get_base_info(struct file *fp, (void *) dd->ipath_statusp - (void *) dd->ipath_pioavailregs_dma; if (!shared) { - kinfo->spi_piocnt = dd->ipath_pbufsport; + kinfo->spi_piocnt = pd->port_piocnt; kinfo->spi_piobufbase = (u64) pd->port_piobufs; kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port; } else if (master) { - kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) + - (dd->ipath_pbufsport % subport_cnt); + kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) + + (pd->port_piocnt % subport_cnt); /* Master's PIO buffers are after all the slave's */ kinfo->spi_piobufbase = (u64) pd->port_piobufs + dd->ipath_palign * - (dd->ipath_pbufsport - kinfo->spi_piocnt); + (pd->port_piocnt - kinfo->spi_piocnt); } else { unsigned slave = subport_fp(fp) - 1; - kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt; + kinfo->spi_piocnt = pd->port_piocnt / subport_cnt; kinfo->spi_piobufbase = (u64) pd->port_piobufs + dd->ipath_palign * kinfo->spi_piocnt * slave; } - /* - * Set the PIO avail update threshold to no larger - * than the number of buffers per process. Note that - * we decrease it here, but won't ever increase it. - */ - if (dd->ipath_pioupd_thresh && - kinfo->spi_piocnt < dd->ipath_pioupd_thresh) { - unsigned long flags; - - dd->ipath_pioupd_thresh = kinfo->spi_piocnt; - ipath_dbg("Decreased pio update threshold to %u\n", - dd->ipath_pioupd_thresh); - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK - << INFINIPATH_S_UPDTHRESH_SHIFT); - dd->ipath_sendctrl |= dd->ipath_pioupd_thresh - << INFINIPATH_S_UPDTHRESH_SHIFT; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - } - if (shared) { kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port; @@ -1309,19 +1287,19 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port; if (!pd->port_subport_cnt) { /* port is not shared */ - piocnt = dd->ipath_pbufsport; + piocnt = pd->port_piocnt; piobufs = pd->port_piobufs; } else if (!subport_fp(fp)) { /* caller is the master */ - piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) + - (dd->ipath_pbufsport % pd->port_subport_cnt); + piocnt = (pd->port_piocnt / pd->port_subport_cnt) + + (pd->port_piocnt % pd->port_subport_cnt); piobufs = pd->port_piobufs + - dd->ipath_palign * (dd->ipath_pbufsport - piocnt); + dd->ipath_palign * (pd->port_piocnt - piocnt); } else { unsigned slave = subport_fp(fp) - 1; /* caller is a slave */ - piocnt = dd->ipath_pbufsport / pd->port_subport_cnt; + piocnt = pd->port_piocnt / pd->port_subport_cnt; piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave; } @@ -1633,9 +1611,6 @@ static int try_alloc_port(struct ipath_devdata *dd, int port, port_fp(fp) = pd; pd->port_pid = current->pid; strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); - ipath_chg_pioavailkernel(dd, - dd->ipath_pbufsport * (pd->port_port - 1), - dd->ipath_pbufsport, 0); ipath_stats.sps_ports++; ret = 0; } else @@ -1938,11 +1913,25 @@ static int ipath_do_user_init(struct file *fp, /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ + /* some ports may get extra buffers, calculate that here */ + if (pd->port_port <= dd->ipath_ports_extrabuf) + pd->port_piocnt = dd->ipath_pbufsport + 1; + else + pd->port_piocnt = dd->ipath_pbufsport; + /* for right now, kernel piobufs are at end, so port 1 is at 0 */ + if (pd->port_port <= dd->ipath_ports_extrabuf) + pd->port_pio_base = (dd->ipath_pbufsport + 1) + * (pd->port_port - 1); + else + pd->port_pio_base = dd->ipath_ports_extrabuf + + dd->ipath_pbufsport * (pd->port_port - 1); pd->port_piobufs = dd->ipath_piobufbase + - dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign; - ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n", - pd->port_port, pd->port_piobufs); + pd->port_pio_base * dd->ipath_palign; + ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u," + " first pio %u\n", pd->port_port, pd->port_piobufs, + pd->port_piocnt, pd->port_pio_base); + ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0); /* * Now allocate the rcvhdr Q and eager TIDs; skip the TID @@ -2107,7 +2096,6 @@ static int ipath_close(struct inode *in, struct file *fp) } if (dd->ipath_kregbase) { - int i; /* atomically clear receive enable port and intr avail. */ clear_bit(dd->ipath_r_portenable_shift + port, &dd->ipath_rcvctrl); @@ -2136,9 +2124,9 @@ static int ipath_close(struct inode *in, struct file *fp) ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, pd->port_port, dd->ipath_dummy_hdrq_phys); - i = dd->ipath_pbufsport * (port - 1); - ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport); - ipath_chg_pioavailkernel(dd, i, dd->ipath_pbufsport, 1); + ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt); + ipath_chg_pioavailkernel(dd, pd->port_pio_base, + pd->port_piocnt, 1); dd->ipath_f_clear_tids(dd, pd->port_port); diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c index 5f693de66542..8eee7830f042 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba7220.c +++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c @@ -595,7 +595,7 @@ static void ipath_7220_txe_recover(struct ipath_devdata *dd) dev_info(&dd->pcidev->dev, "Recovering from TXE PIO parity error\n"); - ipath_disarm_senderrbufs(dd, 1); + ipath_disarm_senderrbufs(dd); } @@ -675,10 +675,8 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg, ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) { /* - * Parity errors in send memory are recoverable, - * just cancel the send (if indicated in * sendbuffererror), - * count the occurrence, unfreeze (if no other handled - * hardware error bits are set), and continue. + * Parity errors in send memory are recoverable by h/w + * just do housekeeping, exit freeze mode and continue. */ if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) @@ -687,13 +685,6 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg, hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); - if (!hwerrs) { - /* else leave in freeze mode */ - ipath_write_kreg(dd, - dd->ipath_kregs->kr_control, - dd->ipath_control); - goto bail; - } } if (hwerrs) { /* @@ -723,8 +714,8 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg, *dd->ipath_statusp |= IPATH_STATUS_HWERROR; dd->ipath_flags &= ~IPATH_INITTED; } else { - ipath_dbg("Clearing freezemode on ignored hardware " - "error\n"); + ipath_dbg("Clearing freezemode on ignored or " + "recovered hardware error\n"); ipath_clear_freeze(dd); } } @@ -1967,7 +1958,7 @@ static void ipath_7220_config_ports(struct ipath_devdata *dd, ushort cfgports) dd->ipath_rcvctrl); dd->ipath_p0_rcvegrcnt = 2048; /* always */ if (dd->ipath_flags & IPATH_HAS_SEND_DMA) - dd->ipath_pioreserved = 1; /* reserve a buffer */ + dd->ipath_pioreserved = 3; /* kpiobufs used for PIO */ } diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 27dd89476660..3e5baa43fc82 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -41,7 +41,7 @@ /* * min buffers we want to have per port, after driver */ -#define IPATH_MIN_USER_PORT_BUFCNT 8 +#define IPATH_MIN_USER_PORT_BUFCNT 7 /* * Number of ports we are configured to use (to allow for more pio @@ -54,13 +54,9 @@ MODULE_PARM_DESC(cfgports, "Set max number of ports to use"); /* * Number of buffers reserved for driver (verbs and layered drivers.) - * Reserved at end of buffer list. Initialized based on - * number of PIO buffers if not set via module interface. + * Initialized based on number of PIO buffers if not set via module interface. * The problem with this is that it's global, but we'll use different - * numbers for different chip types. So the default value is not - * very useful. I've redefined it for the 1.3 release so that it's - * zero unless set by the user to something else, in which case we - * try to respect it. + * numbers for different chip types. */ static ushort ipath_kpiobufs; @@ -546,9 +542,12 @@ static void enable_chip(struct ipath_devdata *dd, int reinit) pioavail = dd->ipath_pioavailregs_dma[i ^ 1]; else pioavail = dd->ipath_pioavailregs_dma[i]; - dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail) | - (~dd->ipath_pioavailkernel[i] << - INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT); + /* + * don't need to worry about ipath_pioavailkernel here + * because we will call ipath_chg_pioavailkernel() later + * in initialization, to busy out buffers as needed + */ + dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail); } /* can get counters, stats, etc. */ dd->ipath_flags |= IPATH_PRESENT; @@ -708,12 +707,11 @@ static void verify_interrupt(unsigned long opaque) int ipath_init_chip(struct ipath_devdata *dd, int reinit) { int ret = 0; - u32 val32, kpiobufs; + u32 kpiobufs, defkbufs; u32 piobufs, uports; u64 val; struct ipath_portdata *pd; gfp_t gfp_flags = GFP_USER | __GFP_COMP; - unsigned long flags; ret = init_housekeeping(dd, reinit); if (ret) @@ -753,69 +751,52 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2) / (sizeof(u64) * BITS_PER_BYTE / 2); uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0; - if (ipath_kpiobufs == 0) { - /* not set by user (this is default) */ - if (piobufs > 144) - kpiobufs = 32; - else - kpiobufs = 16; - } + if (piobufs > 144) + defkbufs = 32 + dd->ipath_pioreserved; else - kpiobufs = ipath_kpiobufs; + defkbufs = 16 + dd->ipath_pioreserved; - if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) { + if (ipath_kpiobufs && (ipath_kpiobufs + + (uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) { int i = (int) piobufs - (int) (uports * IPATH_MIN_USER_PORT_BUFCNT); if (i < 1) i = 1; dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of " "%d for kernel leaves too few for %d user ports " - "(%d each); using %u\n", kpiobufs, + "(%d each); using %u\n", ipath_kpiobufs, piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i); /* * shouldn't change ipath_kpiobufs, because could be * different for different devices... */ kpiobufs = i; - } + } else if (ipath_kpiobufs) + kpiobufs = ipath_kpiobufs; + else + kpiobufs = defkbufs; dd->ipath_lastport_piobuf = piobufs - kpiobufs; dd->ipath_pbufsport = uports ? dd->ipath_lastport_piobuf / uports : 0; - val32 = dd->ipath_lastport_piobuf - (dd->ipath_pbufsport * uports); - if (val32 > 0) { - ipath_dbg("allocating %u pbufs/port leaves %u unused, " - "add to kernel\n", dd->ipath_pbufsport, val32); - dd->ipath_lastport_piobuf -= val32; - kpiobufs += val32; - ipath_dbg("%u pbufs/port leaves %u unused, add to kernel\n", - dd->ipath_pbufsport, val32); - } + /* if not an even divisor, some user ports get extra buffers */ + dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf - + (dd->ipath_pbufsport * uports); + if (dd->ipath_ports_extrabuf) + ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to " + "ports <= %u\n", dd->ipath_pbufsport, + dd->ipath_ports_extrabuf); dd->ipath_lastpioindex = 0; dd->ipath_lastpioindexl = dd->ipath_piobcnt2k; - ipath_chg_pioavailkernel(dd, 0, piobufs, 1); + /* ipath_pioavailshadow initialized earlier */ ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u " "each for %u user ports\n", kpiobufs, piobufs, dd->ipath_pbufsport, uports); - if (dd->ipath_pioupd_thresh) { - if (dd->ipath_pbufsport < dd->ipath_pioupd_thresh) - dd->ipath_pioupd_thresh = dd->ipath_pbufsport; - if (kpiobufs < dd->ipath_pioupd_thresh) - dd->ipath_pioupd_thresh = kpiobufs; - } - ret = dd->ipath_f_early_init(dd); if (ret) { ipath_dev_err(dd, "Early initialization failure\n"); goto done; } - /* - * Cancel any possible active sends from early driver load. - * Follows early_init because some chips have to initialize - * PIO buffers in early_init to avoid false parity errors. - */ - ipath_cancel_sends(dd, 0); - /* * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be * done after early_init. @@ -836,6 +817,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr, dd->ipath_pioavailregs_phys); + /* * this is to detect s/w errors, which the h/w works around by * ignoring the low 6 bits of address, if it wasn't aligned. @@ -862,12 +844,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED); ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL); - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - /* * before error clears, since we expect serdes pll errors during * this, the first time after reset @@ -940,6 +916,19 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) else enable_chip(dd, reinit); + /* after enable_chip, so pioavailshadow setup */ + ipath_chg_pioavailkernel(dd, 0, piobufs, 1); + + /* + * Cancel any possible active sends from early driver load. + * Follows early_init because some chips have to initialize + * PIO buffers in early_init to avoid false parity errors. + * After enable and ipath_chg_pioavailkernel so we can safely + * enable pioavail updates and PIOENABLE; packets are now + * ready to go out. + */ + ipath_cancel_sends(dd, 1); + if (!reinit) { /* * Used when we close a port, for DMA already in flight diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 45c4c068ab1e..26900b3b7a4e 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -38,42 +38,12 @@ #include "ipath_verbs.h" #include "ipath_common.h" -/* - * clear (write) a pio buffer, to clear a parity error. This routine - * should only be called when in freeze mode, and the buffer should be - * canceled afterwards. - */ -static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum) -{ - u32 __iomem *pbuf; - u32 dwcnt; /* dword count to write */ - if (pnum < dd->ipath_piobcnt2k) { - pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum * - dd->ipath_palign); - dwcnt = dd->ipath_piosize2k >> 2; - } - else { - pbuf = (u32 __iomem *) (dd->ipath_pio4kbase + - (pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign); - dwcnt = dd->ipath_piosize4k >> 2; - } - dev_info(&dd->pcidev->dev, - "Rewrite PIO buffer %u, to recover from parity error\n", - pnum); - - /* no flush required, since already in freeze */ - writel(dwcnt + 1, pbuf); - while (--dwcnt) - writel(0, pbuf++); -} /* * Called when we might have an error that is specific to a particular * PIO buffer, and may need to cancel that buffer, so it can be re-used. - * If rewrite is true, and bits are set in the sendbufferror registers, - * we'll write to the buffer, for error recovery on parity errors. */ -void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) +void ipath_disarm_senderrbufs(struct ipath_devdata *dd) { u32 piobcnt; unsigned long sbuf[4]; @@ -109,11 +79,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) } for (i = 0; i < piobcnt; i++) - if (test_bit(i, sbuf)) { - if (rewrite) - ipath_clrpiobuf(dd, i); + if (test_bit(i, sbuf)) ipath_disarm_piobufs(dd, i, 1); - } /* ignore armlaunch errs for a bit */ dd->ipath_lastcancel = jiffies+3; } @@ -164,7 +131,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs) { u64 ignore_this_time = 0; - ipath_disarm_senderrbufs(dd, 0); + ipath_disarm_senderrbufs(dd); if ((errs & E_SUM_LINK_PKTERRS) && !(dd->ipath_flags & IPATH_LINKACTIVE)) { /* @@ -909,8 +876,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) * processes (causing armlaunch), send errors due to going into freeze mode, * etc., and try to avoid causing extra interrupts while doing so. * Forcibly update the in-memory pioavail register copies after cleanup - * because the chip won't do it for anything changing while in freeze mode - * (we don't want to wait for the next pio buffer state change). + * because the chip won't do it while in freeze mode (the register values + * themselves are kept correct). * Make sure that we don't lose any important interrupts by using the chip * feature that says that writing 0 to a bit in *clear that is set in * *status will cause an interrupt to be generated again (if allowed by @@ -918,47 +885,22 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) */ void ipath_clear_freeze(struct ipath_devdata *dd) { - int i, im; - u64 val; - /* disable error interrupts, to avoid confusion */ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL); /* also disable interrupts; errormask is sometimes overwriten */ ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); - /* - * clear all sends, because they have may been - * completed by usercode while in freeze mode, and - * therefore would not be sent, and eventually - * might cause the process to run out of bufs - */ ipath_cancel_sends(dd, 1); + + /* clear the freeze, and be sure chip saw it */ ipath_write_kreg(dd, dd->ipath_kregs->kr_control, dd->ipath_control); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - /* - * ensure pio avail updates continue (because the update - * won't have happened from cancel_sends because we were - * still in freeze - */ + /* force in-memory update now we are out of freeze */ ipath_force_pio_avail_update(dd); - /* - * We just enabled pioavailupdate, so dma copy is almost certainly - * not yet right, so read the registers directly. Similar to init - */ - for (i = 0; i < dd->ipath_pioavregs; i++) { - /* deal with 6110 chip bug */ - im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? - i ^ 1 : i; - val = ipath_read_kreg64(dd, (0x1000 / sizeof(u64)) + im); - dd->ipath_pioavailregs_dma[i] = cpu_to_le64(val); - dd->ipath_pioavailshadow[i] = val | - (~dd->ipath_pioavailkernel[i] << - INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT); - } - /* * force new interrupt if any hwerr, error or interrupt bits are * still set, and clear "safe" send packet errors related to freeze @@ -1316,10 +1258,8 @@ irqreturn_t ipath_intr(int irq, void *data) ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) - handle_layer_pioavail(dd); - else - ipath_dbg("unexpected BUFAVAIL intr\n"); + /* always process; sdma verbs uses PIO for acks and VL15 */ + handle_layer_pioavail(dd); } ret = IRQ_HANDLED; diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 202337ae90dc..02b24a340599 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -117,6 +117,10 @@ struct ipath_portdata { u16 port_subport_cnt; /* non-zero if port is being shared. */ u16 port_subport_id; + /* number of pio bufs for this port (all procs, if shared) */ + u32 port_piocnt; + /* first pio buffer for this port */ + u32 port_pio_base; /* chip offset of PIO buffers for this port */ u32 port_piobufs; /* how many alloc_pages() chunks in port_rcvegrbuf_pages */ @@ -384,6 +388,8 @@ struct ipath_devdata { u32 ipath_lastrpkts; /* pio bufs allocated per port */ u32 ipath_pbufsport; + /* if remainder on bufs/port, ports < extrabuf get 1 extra */ + u32 ipath_ports_extrabuf; u32 ipath_pioupd_thresh; /* update threshold, some chips */ /* * number of ports configured as max; zero is set to number chip @@ -1011,7 +1017,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *); int ipath_update_eeprom_log(struct ipath_devdata *dd); void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr); u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); -void ipath_disarm_senderrbufs(struct ipath_devdata *, int); +void ipath_disarm_senderrbufs(struct ipath_devdata *); void ipath_force_pio_avail_update(struct ipath_devdata *); void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev); diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 8ac5c1d82ccd..9e3fe61cbd08 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -481,9 +481,10 @@ done: wake_up(&qp->wait); } -static void want_buffer(struct ipath_devdata *dd) +static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp) { - if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) { + if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) || + qp->ibqp.qp_type == IB_QPT_SMI) { unsigned long flags; spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); @@ -519,7 +520,7 @@ static void ipath_no_bufs_available(struct ipath_qp *qp, spin_lock_irqsave(&dev->pending_lock, flags); list_add_tail(&qp->piowait, &dev->piowait); spin_unlock_irqrestore(&dev->pending_lock, flags); - want_buffer(dev->dd); + want_buffer(dev->dd, qp); dev->n_piowait++; } diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c index 1974df7a9f78..0d07682c7310 100644 --- a/drivers/infiniband/hw/ipath/ipath_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_sdma.c @@ -449,16 +449,19 @@ int setup_sdma(struct ipath_devdata *dd) ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, dd->ipath_sdma_head_phys); - /* Reserve all the former "kernel" piobufs */ - n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - dd->ipath_pioreserved; - for (i = dd->ipath_lastport_piobuf; i < n; ++i) { + /* + * Reserve all the former "kernel" piobufs, using high number range + * so we get as many 4K buffers as possible + */ + n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; + i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved; + ipath_chg_pioavailkernel(dd, i, n - i , 0); + for (; i < n; ++i) { unsigned word = i / 64; unsigned bit = i & 63; BUG_ON(word >= 3); senddmabufmask[word] |= 1ULL << bit; } - ipath_chg_pioavailkernel(dd, dd->ipath_lastport_piobuf, - n - dd->ipath_lastport_piobuf, 0); ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, senddmabufmask[0]); ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, -- cgit v1.2.3 From ab69b3cf1219e0d07bb4ea373f36b1de38af531c Mon Sep 17 00:00:00 2001 From: John Gregor Date: Wed, 7 May 2008 11:01:10 -0700 Subject: IB/ipath: Fix SDMA error recovery in absence of link status change What's fixed: in ipath_cancel_sends() We need to unconditionally set ABORTING. So, swap the tests so the set_bit() isn't shadowed by the &&. If we've disarmed the piobufs, then we need to unconditionally set DISARMED. So, move it out from the overly protective if at the bottom. in sdma_abort_task() Abort_task was written knowing that the SDMA engine would always be reset (and restarted) on error. A recent change broke that fundamental assumption by taking the restart portion and making it conditional on a link status change. But, SDMA can go boom without a link status change in some conditions. Signed-off-by: John Gregor Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_driver.c | 8 +++++--- drivers/infiniband/hw/ipath/ipath_sdma.c | 31 +++++++++++++++++++++++------- 2 files changed, 29 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 2036d38fac47..ce7b7c34360e 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1898,8 +1898,8 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) spin_lock_irqsave(&dd->ipath_sdma_lock, flags); skip_cancel = - !test_bit(IPATH_SDMA_DISABLED, statp) && - test_and_set_bit(IPATH_SDMA_ABORTING, statp); + test_and_set_bit(IPATH_SDMA_ABORTING, statp) + && !test_bit(IPATH_SDMA_DISABLED, statp); spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); if (skip_cancel) goto bail; @@ -1930,6 +1930,9 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) ipath_disarm_piobufs(dd, 0, dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) + set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); + if (restore_sendctrl) { /* else done by caller later if needed */ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); @@ -1949,7 +1952,6 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) /* only wait so long for intr */ dd->ipath_sdma_abort_intr_timeout = jiffies + HZ; dd->ipath_sdma_reset_wait = 200; - __set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) tasklet_hi_schedule(&dd->ipath_sdma_abort_task); spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c index 0d07682c7310..3697449c1ba4 100644 --- a/drivers/infiniband/hw/ipath/ipath_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_sdma.c @@ -308,13 +308,15 @@ static void sdma_abort_task(unsigned long opaque) spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); /* - * Don't restart sdma here. Wait until link is up to ACTIVE. - * VL15 MADs used to bring the link up use PIO, and multiple - * link transitions otherwise cause the sdma engine to be + * Don't restart sdma here (with the exception + * below). Wait until link is up to ACTIVE. VL15 MADs + * used to bring the link up use PIO, and multiple link + * transitions otherwise cause the sdma engine to be * stopped and started multiple times. - * The disable is done here, including the shadow, so the - * state is kept consistent. - * See ipath_restart_sdma() for the actual starting of sdma. + * The disable is done here, including the shadow, + * so the state is kept consistent. + * See ipath_restart_sdma() for the actual starting + * of sdma. */ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE; @@ -326,6 +328,13 @@ static void sdma_abort_task(unsigned long opaque) /* make sure I see next message */ dd->ipath_sdma_abort_jiffies = 0; + /* + * Not everything that takes SDMA offline is a link + * status change. If the link was up, restart SDMA. + */ + if (dd->ipath_flags & IPATH_LINKACTIVE) + ipath_restart_sdma(dd); + goto done; } @@ -427,7 +436,12 @@ int setup_sdma(struct ipath_devdata *dd) goto done; } - dd->ipath_sdma_status = 0; + /* + * Set initial status as if we had been up, then gone down. + * This lets initial start on transition to ACTIVE be the + * same as restart after link flap. + */ + dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED; dd->ipath_sdma_abort_jiffies = 0; dd->ipath_sdma_generation = 0; dd->ipath_sdma_descq_tail = 0; @@ -618,6 +632,9 @@ void ipath_restart_sdma(struct ipath_devdata *dd) ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + /* notify upper layers */ + ipath_ib_piobufavail(dd->verbs_dev); + bail: return; } -- cgit v1.2.3 From 12137c593d127c6c1a3eb050674da047682badaf Mon Sep 17 00:00:00 2001 From: Stefan Roscher Date: Wed, 7 May 2008 11:35:06 -0700 Subject: IB/ehca: Wait for async events to finish before destroying QP This is necessary because, in a multicore environment, a race between uverbs async handler and destroy QP could occur. Signed-off-by: Stefan Roscher Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_classes.h | 2 ++ drivers/infiniband/hw/ehca/ehca_irq.c | 4 ++++ drivers/infiniband/hw/ehca/ehca_qp.c | 5 +++++ 3 files changed, 11 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 00bab60f6de4..1e9e99a13933 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -192,6 +192,8 @@ struct ehca_qp { int mtu_shift; u32 message_count; u32 packet_count; + atomic_t nr_events; /* events seen */ + wait_queue_head_t wait_completion; }; #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index ca5eb0cb628c..ce1ab0571be3 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -204,6 +204,8 @@ static void qp_event_callback(struct ehca_shca *shca, u64 eqe, read_lock(&ehca_qp_idr_lock); qp = idr_find(&ehca_qp_idr, token); + if (qp) + atomic_inc(&qp->nr_events); read_unlock(&ehca_qp_idr_lock); if (!qp) @@ -223,6 +225,8 @@ static void qp_event_callback(struct ehca_shca *shca, u64 eqe, if (fatal && qp->ext_type == EQPT_SRQBASE) dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED); + if (atomic_dec_and_test(&qp->nr_events)) + wake_up(&qp->wait_completion); return; } diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 18fba92fa7ae..3f59587338ea 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -566,6 +566,8 @@ static struct ehca_qp *internal_create_qp( return ERR_PTR(-ENOMEM); } + atomic_set(&my_qp->nr_events, 0); + init_waitqueue_head(&my_qp->wait_completion); spin_lock_init(&my_qp->spinlock_s); spin_lock_init(&my_qp->spinlock_r); my_qp->qp_type = qp_type; @@ -1934,6 +1936,9 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, idr_remove(&ehca_qp_idr, my_qp->token); write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + /* now wait until all pending events have completed */ + wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events)); + h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); if (h_ret != H_SUCCESS) { ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%li " -- cgit v1.2.3 From dd37818dbdf8e51d0288c0197c351c005ffcdbdb Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 13 May 2008 11:27:25 -0700 Subject: RDMA/nes: Fix up nes_lro_max_aggr module parameter Fix some bugs with the max_aggr module parameter added with LRO support: - The module parameter value ignored and not actually used to set lro_mgr.max_aggr. - MODULE_PARM_DESC had a typo "_mro_" instead of "_lro_" so it didn't end up describing the actual module parameter. - The nes_lro_max_aggr variable was declared as unsigned, but the module_param line said "int" instead of "uint" for the type. - The default value for the parameter was stuck in the permissions field of module_param, which led to nonsensical permissions for the file under /sys/module/iw_nes/param. - The parameter was used in only one file but defined in another, which led to the variable being global for no good reason. Move everything related to the parameter to the file nes_hw.c where it is actually used. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes.c | 4 ---- drivers/infiniband/hw/nes/nes.h | 1 - drivers/infiniband/hw/nes/nes_hw.c | 6 +++++- 3 files changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 9f7364a9096d..a4e9269a29bd 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -91,10 +91,6 @@ unsigned int nes_debug_level = 0; module_param_named(debug_level, nes_debug_level, uint, 0644); MODULE_PARM_DESC(debug_level, "Enable debug output level"); -unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR; -module_param(nes_lro_max_aggr, int, NES_LRO_MAX_AGGR); -MODULE_PARM_DESC(nes_mro_max_aggr, " nic LRO MAX packet aggregation"); - LIST_HEAD(nes_adapter_list); static LIST_HEAD(nes_dev_list); diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 1f9f7bf73862..61b46e9c7d2d 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -173,7 +173,6 @@ extern int disable_mpa_crc; extern unsigned int send_first; extern unsigned int nes_drv_opt; extern unsigned int nes_debug_level; -extern unsigned int nes_lro_max_aggr; extern struct list_head nes_adapter_list; diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 8dc70f9bad2f..d3278f111ca7 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -42,6 +42,10 @@ #include "nes.h" +static unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR; +module_param(nes_lro_max_aggr, uint, 0444); +MODULE_PARM_DESC(nes_lro_max_aggr, "NIC LRO max packet aggregation"); + static u32 crit_err_count; u32 int_mod_timer_init; u32 int_mod_cq_depth_256; @@ -1738,7 +1742,7 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) jumbomode = 1; nes_nic_init_timer_defaults(nesdev, jumbomode); } - nesvnic->lro_mgr.max_aggr = NES_LRO_MAX_AGGR; + nesvnic->lro_mgr.max_aggr = nes_lro_max_aggr; nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS; nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc; nesvnic->lro_mgr.get_skb_header = nes_lro_get_skb_hdr; -- cgit v1.2.3 From 53dc1ca194c062aa9771e194047f27ec1ca592df Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 13 May 2008 11:40:25 -0700 Subject: IB/ipath: Fix RC and UC error handling When errors are detected in RC, the QP should transition to the IB_QPS_ERR state, not the IB_QPS_SQE state. Also, when the error is on the responder side, the receive work completion error was incorrect (remote vs. local). Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_qp.c | 54 ++-------- drivers/infiniband/hw/ipath/ipath_rc.c | 127 ++++++++--------------- drivers/infiniband/hw/ipath/ipath_ruc.c | 165 ++++++++++++++---------------- drivers/infiniband/hw/ipath/ipath_verbs.c | 4 +- drivers/infiniband/hw/ipath/ipath_verbs.h | 6 +- 5 files changed, 132 insertions(+), 224 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index dd5b6e9d57c2..6f98632877eb 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -374,13 +374,14 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) } /** - * ipath_error_qp - put a QP into an error state - * @qp: the QP to put into an error state + * ipath_error_qp - put a QP into the error state + * @qp: the QP to put into the error state * @err: the receive completion error to signal if a RWQE is active * * Flushes both send and receive work queues. * Returns true if last WQE event should be generated. * The QP s_lock should be held and interrupts disabled. + * If we are already in error state, just return. */ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) @@ -389,8 +390,10 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) struct ib_wc wc; int ret = 0; - ipath_dbg("QP%d/%d in error state (%d)\n", - qp->ibqp.qp_num, qp->remote_qpn, err); + if (qp->state == IB_QPS_ERR) + goto bail; + + qp->state = IB_QPS_ERR; spin_lock(&dev->pending_lock); if (!list_empty(&qp->timerwait)) @@ -460,6 +463,7 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) } else if (qp->ibqp.event_handler) ret = 1; +bail: return ret; } @@ -1025,48 +1029,6 @@ bail: return ret; } -/** - * ipath_sqerror_qp - put a QP's send queue into an error state - * @qp: QP who's send queue will be put into an error state - * @wc: the WC responsible for putting the QP in this state - * - * Flushes the send work queue. - * The QP s_lock should be held and interrupts disabled. - */ - -void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc) -{ - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); - - ipath_dbg("Send queue error on QP%d/%d: err: %d\n", - qp->ibqp.qp_num, qp->remote_qpn, wc->status); - - spin_lock(&dev->pending_lock); - if (!list_empty(&qp->timerwait)) - list_del_init(&qp->timerwait); - if (!list_empty(&qp->piowait)) - list_del_init(&qp->piowait); - spin_unlock(&dev->pending_lock); - - ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1); - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - - wc->status = IB_WC_WR_FLUSH_ERR; - - while (qp->s_last != qp->s_head) { - wqe = get_swqe_ptr(qp, qp->s_last); - wc->wr_id = wqe->wr.wr_id; - wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1); - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - } - qp->s_cur = qp->s_tail = qp->s_head; - qp->state = IB_QPS_SQE; -} - /** * ipath_get_credit - flush the send work queue of a QP * @qp: the qp who's send work queue to flush diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 08b11b567614..b4b26c3aa613 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -771,27 +771,14 @@ done: * * The QP s_lock should be held and interrupts disabled. */ -void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) +void ipath_restart_rc(struct ipath_qp *qp, u32 psn) { struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); struct ipath_ibdev *dev; if (qp->s_retry == 0) { - wc->wr_id = wqe->wr.wr_id; - wc->status = IB_WC_RETRY_EXC_ERR; - wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc->vendor_err = 0; - wc->byte_len = 0; - wc->qp = &qp->ibqp; - wc->imm_data = 0; - wc->src_qp = qp->remote_qpn; - wc->wc_flags = 0; - wc->pkey_index = 0; - wc->slid = qp->remote_ah_attr.dlid; - wc->sl = qp->remote_ah_attr.sl; - wc->dlid_path_bits = 0; - wc->port_num = 0; - ipath_sqerror_qp(qp, wc); + ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); + ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); goto bail; } qp->s_retry--; @@ -804,6 +791,8 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) spin_lock(&dev->pending_lock); if (!list_empty(&qp->timerwait)) list_del_init(&qp->timerwait); + if (!list_empty(&qp->piowait)) + list_del_init(&qp->piowait); spin_unlock(&dev->pending_lock); if (wqe->wr.opcode == IB_WR_RDMA_READ) @@ -845,6 +834,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, { struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ib_wc wc; + enum ib_wc_status status; struct ipath_swqe *wqe; int ret = 0; u32 ack_psn; @@ -909,7 +899,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, */ update_last_psn(qp, wqe->psn - 1); /* Retry this request. */ - ipath_restart_rc(qp, wqe->psn, &wc); + ipath_restart_rc(qp, wqe->psn); /* * No need to process the ACK/NAK since we are * restarting an earlier request. @@ -937,20 +927,15 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || (wqe->wr.send_flags & IB_SEND_SIGNALED)) { + memset(&wc, 0, sizeof wc); wc.wr_id = wqe->wr.wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.vendor_err = 0; wc.byte_len = wqe->length; - wc.imm_data = 0; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.wc_flags = 0; - wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; - wc.port_num = 0; ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); } qp->s_retry = qp->s_retry_cnt; @@ -1012,7 +997,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, if (qp->s_last == qp->s_tail) goto bail; if (qp->s_rnr_retry == 0) { - wc.status = IB_WC_RNR_RETRY_EXC_ERR; + status = IB_WC_RNR_RETRY_EXC_ERR; goto class_b; } if (qp->s_rnr_retry_cnt < 7) @@ -1050,37 +1035,25 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, * RDMA READ response which terminates the RDMA * READ. */ - ipath_restart_rc(qp, psn, &wc); + ipath_restart_rc(qp, psn); break; case 1: /* Invalid Request */ - wc.status = IB_WC_REM_INV_REQ_ERR; + status = IB_WC_REM_INV_REQ_ERR; dev->n_other_naks++; goto class_b; case 2: /* Remote Access Error */ - wc.status = IB_WC_REM_ACCESS_ERR; + status = IB_WC_REM_ACCESS_ERR; dev->n_other_naks++; goto class_b; case 3: /* Remote Operation Error */ - wc.status = IB_WC_REM_OP_ERR; + status = IB_WC_REM_OP_ERR; dev->n_other_naks++; class_b: - wc.wr_id = wqe->wr.wr_id; - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.vendor_err = 0; - wc.byte_len = 0; - wc.qp = &qp->ibqp; - wc.imm_data = 0; - wc.src_qp = qp->remote_qpn; - wc.wc_flags = 0; - wc.pkey_index = 0; - wc.slid = qp->remote_ah_attr.dlid; - wc.sl = qp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; - wc.port_num = 0; - ipath_sqerror_qp(qp, &wc); + ipath_send_complete(qp, wqe, status); + ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); break; default: @@ -1126,8 +1099,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, int header_in_data) { struct ipath_swqe *wqe; + enum ib_wc_status status; unsigned long flags; - struct ib_wc wc; int diff; u32 pad; u32 aeth; @@ -1159,6 +1132,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, if (unlikely(qp->s_last == qp->s_tail)) goto ack_done; wqe = get_swqe_ptr(qp, qp->s_last); + status = IB_WC_SUCCESS; switch (opcode) { case OP(ACKNOWLEDGE): @@ -1200,7 +1174,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, /* no AETH, no ACK */ if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { dev->n_rdma_seq++; - ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + ipath_restart_rc(qp, qp->s_last_psn + 1); goto ack_done; } if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) @@ -1261,7 +1235,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, /* ACKs READ req. */ if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { dev->n_rdma_seq++; - ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + ipath_restart_rc(qp, qp->s_last_psn + 1); goto ack_done; } if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) @@ -1291,31 +1265,16 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, goto ack_done; } -ack_done: - spin_unlock_irqrestore(&qp->s_lock, flags); - goto bail; - ack_op_err: - wc.status = IB_WC_LOC_QP_OP_ERR; + status = IB_WC_LOC_QP_OP_ERR; goto ack_err; ack_len_err: - wc.status = IB_WC_LOC_LEN_ERR; + status = IB_WC_LOC_LEN_ERR; ack_err: - wc.wr_id = wqe->wr.wr_id; - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.vendor_err = 0; - wc.byte_len = 0; - wc.imm_data = 0; - wc.qp = &qp->ibqp; - wc.src_qp = qp->remote_qpn; - wc.wc_flags = 0; - wc.pkey_index = 0; - wc.slid = qp->remote_ah_attr.dlid; - wc.sl = qp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; - wc.port_num = 0; - ipath_sqerror_qp(qp, &wc); + ipath_send_complete(qp, wqe, status); + ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); +ack_done: spin_unlock_irqrestore(&qp->s_lock, flags); bail: return; @@ -1523,13 +1482,12 @@ send_ack: return 0; } -static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) +void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) { unsigned long flags; int lastwqe; spin_lock_irqsave(&qp->s_lock, flags); - qp->state = IB_QPS_ERR; lastwqe = ipath_error_qp(qp, err); spin_unlock_irqrestore(&qp->s_lock, flags); @@ -1643,11 +1601,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, opcode == OP(SEND_LAST) || opcode == OP(SEND_LAST_WITH_IMMEDIATE)) break; - nack_inv: - ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR); - qp->r_nak_state = IB_NAK_INVALID_REQUEST; - qp->r_ack_psn = qp->r_psn; - goto send_ack; + goto nack_inv; case OP(RDMA_WRITE_FIRST): case OP(RDMA_WRITE_MIDDLE): @@ -1673,18 +1627,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, break; } - wc.imm_data = 0; - wc.wc_flags = 0; + memset(&wc, 0, sizeof wc); /* OK, process the packet. */ switch (opcode) { case OP(SEND_FIRST): - if (!ipath_get_rwqe(qp, 0)) { - rnr_nak: - qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; - qp->r_ack_psn = qp->r_psn; - goto send_ack; - } + if (!ipath_get_rwqe(qp, 0)) + goto rnr_nak; qp->r_rcv_len = 0; /* FALLTHROUGH */ case OP(SEND_MIDDLE): @@ -1751,14 +1700,10 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; else wc.opcode = IB_WC_RECV; - wc.vendor_err = 0; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; - wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & @@ -1951,11 +1896,21 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto send_ack; goto done; +rnr_nak: + qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; + qp->r_ack_psn = qp->r_psn; + goto send_ack; + +nack_inv: + ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR); + qp->r_nak_state = IB_NAK_INVALID_REQUEST; + qp->r_ack_psn = qp->r_psn; + goto send_ack; + nack_acc: - ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR); + ipath_rc_error(qp, IB_WC_LOC_PROT_ERR); qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; - send_ack: send_rc_ack(qp); diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 9e3fe61cbd08..c716a03dd399 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -140,20 +140,11 @@ int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe, goto bail; bad_lkey: + memset(&wc, 0, sizeof(wc)); wc.wr_id = wqe->wr_id; wc.status = IB_WC_LOC_PROT_ERR; wc.opcode = IB_WC_RECV; - wc.vendor_err = 0; - wc.byte_len = 0; - wc.imm_data = 0; wc.qp = &qp->ibqp; - wc.src_qp = 0; - wc.wc_flags = 0; - wc.pkey_index = 0; - wc.slid = 0; - wc.sl = 0; - wc.dlid_path_bits = 0; - wc.port_num = 0; /* Signal solicited completion event. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); ret = 0; @@ -270,6 +261,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp) struct ib_wc wc; u64 sdata; atomic64_t *maddr; + enum ib_wc_status send_status; qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); if (!qp) { @@ -300,8 +292,8 @@ again: wqe = get_swqe_ptr(sqp, sqp->s_last); spin_unlock_irqrestore(&sqp->s_lock, flags); - wc.wc_flags = 0; - wc.imm_data = 0; + memset(&wc, 0, sizeof wc); + send_status = IB_WC_SUCCESS; sqp->s_sge.sge = wqe->sg_list[0]; sqp->s_sge.sg_list = wqe->sg_list + 1; @@ -313,75 +305,33 @@ again: wc.imm_data = wqe->wr.ex.imm_data; /* FALLTHROUGH */ case IB_WR_SEND: - if (!ipath_get_rwqe(qp, 0)) { - rnr_nak: - /* Handle RNR NAK */ - if (qp->ibqp.qp_type == IB_QPT_UC) - goto send_comp; - if (sqp->s_rnr_retry == 0) { - wc.status = IB_WC_RNR_RETRY_EXC_ERR; - goto err; - } - if (sqp->s_rnr_retry_cnt < 7) - sqp->s_rnr_retry--; - dev->n_rnr_naks++; - sqp->s_rnr_timeout = - ib_ipath_rnr_table[qp->r_min_rnr_timer]; - ipath_insert_rnr_queue(sqp); - goto done; - } + if (!ipath_get_rwqe(qp, 0)) + goto rnr_nak; break; case IB_WR_RDMA_WRITE_WITH_IMM: - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_WRITE))) { - wc.status = IB_WC_REM_INV_REQ_ERR; - goto err; - } + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) + goto inv_err; wc.wc_flags = IB_WC_WITH_IMM; wc.imm_data = wqe->wr.ex.imm_data; if (!ipath_get_rwqe(qp, 1)) goto rnr_nak; /* FALLTHROUGH */ case IB_WR_RDMA_WRITE: - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_WRITE))) { - wc.status = IB_WC_REM_INV_REQ_ERR; - goto err; - } + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) + goto inv_err; if (wqe->length == 0) break; if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, - IB_ACCESS_REMOTE_WRITE))) { - acc_err: - wc.status = IB_WC_REM_ACCESS_ERR; - err: - wc.wr_id = wqe->wr.wr_id; - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.vendor_err = 0; - wc.byte_len = 0; - wc.qp = &sqp->ibqp; - wc.src_qp = sqp->remote_qpn; - wc.pkey_index = 0; - wc.slid = sqp->remote_ah_attr.dlid; - wc.sl = sqp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; - wc.port_num = 0; - spin_lock_irqsave(&sqp->s_lock, flags); - ipath_sqerror_qp(sqp, &wc); - spin_unlock_irqrestore(&sqp->s_lock, flags); - goto done; - } + IB_ACCESS_REMOTE_WRITE))) + goto acc_err; break; case IB_WR_RDMA_READ: - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_READ))) { - wc.status = IB_WC_REM_INV_REQ_ERR; - goto err; - } + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) + goto inv_err; if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, @@ -394,11 +344,8 @@ again: case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_ATOMIC))) { - wc.status = IB_WC_REM_INV_REQ_ERR; - goto err; - } + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) + goto inv_err; if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), wqe->wr.wr.atomic.remote_addr, wqe->wr.wr.atomic.rkey, @@ -415,7 +362,8 @@ again: goto send_comp; default: - goto done; + send_status = IB_WC_LOC_QP_OP_ERR; + goto serr; } sge = &sqp->s_sge.sge; @@ -458,14 +406,11 @@ again: wc.opcode = IB_WC_RECV; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; - wc.vendor_err = 0; wc.byte_len = wqe->length; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; wc.port_num = 1; /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, @@ -473,9 +418,63 @@ again: send_comp: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; - ipath_send_complete(sqp, wqe, IB_WC_SUCCESS); + ipath_send_complete(sqp, wqe, send_status); goto again; +rnr_nak: + /* Handle RNR NAK */ + if (qp->ibqp.qp_type == IB_QPT_UC) + goto send_comp; + /* + * Note: we don't need the s_lock held since the BUSY flag + * makes this single threaded. + */ + if (sqp->s_rnr_retry == 0) { + send_status = IB_WC_RNR_RETRY_EXC_ERR; + goto serr; + } + if (sqp->s_rnr_retry_cnt < 7) + sqp->s_rnr_retry--; + spin_lock_irqsave(&sqp->s_lock, flags); + if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK)) + goto unlock; + dev->n_rnr_naks++; + sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer]; + ipath_insert_rnr_queue(sqp); + goto unlock; + +inv_err: + send_status = IB_WC_REM_INV_REQ_ERR; + wc.status = IB_WC_LOC_QP_OP_ERR; + goto err; + +acc_err: + send_status = IB_WC_REM_ACCESS_ERR; + wc.status = IB_WC_LOC_PROT_ERR; +err: + /* responder goes to error state */ + ipath_rc_error(qp, wc.status); + +serr: + spin_lock_irqsave(&sqp->s_lock, flags); + ipath_send_complete(sqp, wqe, send_status); + if (sqp->ibqp.qp_type == IB_QPT_RC) { + int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + + sqp->s_flags &= ~IPATH_S_BUSY; + spin_unlock_irqrestore(&sqp->s_lock, flags); + if (lastwqe) { + struct ib_event ev; + + ev.device = sqp->ibqp.device; + ev.element.qp = &sqp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context); + } + goto done; + } +unlock: + spin_unlock_irqrestore(&sqp->s_lock, flags); done: if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); @@ -651,21 +650,15 @@ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, status != IB_WC_SUCCESS) { struct ib_wc wc; + memset(&wc, 0, sizeof wc); wc.wr_id = wqe->wr.wr_id; wc.status = status; wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.vendor_err = 0; - wc.byte_len = wqe->length; - wc.imm_data = 0; wc.qp = &qp->ibqp; - wc.src_qp = 0; - wc.wc_flags = 0; - wc.pkey_index = 0; - wc.slid = 0; - wc.sl = 0; - wc.dlid_path_bits = 0; - wc.port_num = 0; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); + if (status == IB_WC_SUCCESS) + wc.byte_len = wqe->length; + ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, + status != IB_WC_SUCCESS); } spin_lock_irqsave(&qp->s_lock, flags); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 5015cd2e57bd..22bb42dc8f73 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -744,12 +744,10 @@ static void ipath_ib_timer(struct ipath_ibdev *dev) /* XXX What if timer fires again while this is running? */ for (qp = resend; qp != NULL; qp = qp->timer_next) { - struct ib_wc wc; - spin_lock_irqsave(&qp->s_lock, flags); if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) { dev->n_timeouts++; - ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + ipath_restart_rc(qp, qp->s_last_psn + 1); } spin_unlock_irqrestore(&qp->s_lock, flags); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 6514aa8306cd..4c7c2aa8e19d 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -710,8 +710,6 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt); int ipath_init_qp_table(struct ipath_ibdev *idev, int size); -void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc); - void ipath_get_credit(struct ipath_qp *qp, u32 aeth); unsigned ipath_ib_rate_to_mult(enum ib_rate rate); @@ -729,7 +727,9 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int has_grh, void *data, u32 tlen, struct ipath_qp *qp); -void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc); +void ipath_restart_rc(struct ipath_qp *qp, u32 psn); + +void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err); int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr); -- cgit v1.2.3 From e509be898d8937634437caa474b57ac12795e5bc Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 13 May 2008 11:41:29 -0700 Subject: IB/ipath: Fix many locking issues when switching to error state The send DMA hardware queue voided a number of prior assumptions about when a send is complete which led to completions being generated out of order. There were also a number of locking issues when switching the QP to the error or reset states, and we implement the IB_QPS_SQD state. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_qp.c | 183 +++++++++++++------------- drivers/infiniband/hw/ipath/ipath_rc.c | 151 +++++++++++++-------- drivers/infiniband/hw/ipath/ipath_ruc.c | 168 +++++++++++++++-------- drivers/infiniband/hw/ipath/ipath_uc.c | 57 +++++--- drivers/infiniband/hw/ipath/ipath_ud.c | 66 +++++++--- drivers/infiniband/hw/ipath/ipath_user_sdma.h | 2 - drivers/infiniband/hw/ipath/ipath_verbs.c | 174 ++++++++++++++++-------- drivers/infiniband/hw/ipath/ipath_verbs.h | 57 ++++++-- 8 files changed, 554 insertions(+), 304 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 6f98632877eb..4715911101e4 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -242,7 +242,6 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp) { struct ipath_qp *q, **qpp; unsigned long flags; - int fnd = 0; spin_lock_irqsave(&qpt->lock, flags); @@ -253,51 +252,40 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp) *qpp = qp->next; qp->next = NULL; atomic_dec(&qp->refcount); - fnd = 1; break; } } spin_unlock_irqrestore(&qpt->lock, flags); - - if (!fnd) - return; - - free_qpn(qpt, qp->ibqp.qp_num); - - wait_event(qp->wait, !atomic_read(&qp->refcount)); } /** - * ipath_free_all_qps - remove all QPs from the table + * ipath_free_all_qps - check for QPs still in use * @qpt: the QP table to empty + * + * There should not be any QPs still in use. + * Free memory for table. */ -void ipath_free_all_qps(struct ipath_qp_table *qpt) +unsigned ipath_free_all_qps(struct ipath_qp_table *qpt) { unsigned long flags; - struct ipath_qp *qp, *nqp; - u32 n; + struct ipath_qp *qp; + u32 n, qp_inuse = 0; + spin_lock_irqsave(&qpt->lock, flags); for (n = 0; n < qpt->max; n++) { - spin_lock_irqsave(&qpt->lock, flags); qp = qpt->table[n]; qpt->table[n] = NULL; - spin_unlock_irqrestore(&qpt->lock, flags); - - while (qp) { - nqp = qp->next; - free_qpn(qpt, qp->ibqp.qp_num); - if (!atomic_dec_and_test(&qp->refcount) || - !ipath_destroy_qp(&qp->ibqp)) - ipath_dbg("QP memory leak!\n"); - qp = nqp; - } + + for (; qp; qp = qp->next) + qp_inuse++; } + spin_unlock_irqrestore(&qpt->lock, flags); - for (n = 0; n < ARRAY_SIZE(qpt->map); n++) { + for (n = 0; n < ARRAY_SIZE(qpt->map); n++) if (qpt->map[n].page) - free_page((unsigned long)qpt->map[n].page); - } + free_page((unsigned long) qpt->map[n].page); + return qp_inuse; } /** @@ -336,11 +324,12 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) qp->remote_qpn = 0; qp->qkey = 0; qp->qp_access_flags = 0; - qp->s_busy = 0; + atomic_set(&qp->s_dma_busy, 0); qp->s_flags &= IPATH_S_SIGNAL_REQ_WR; qp->s_hdrwords = 0; qp->s_wqe = NULL; qp->s_pkt_delay = 0; + qp->s_draining = 0; qp->s_psn = 0; qp->r_psn = 0; qp->r_msn = 0; @@ -353,7 +342,8 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) } qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; qp->r_nak_state = 0; - qp->r_wrid_valid = 0; + qp->r_aflags = 0; + qp->r_flags = 0; qp->s_rnr_timeout = 0; qp->s_head = 0; qp->s_tail = 0; @@ -361,7 +351,6 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) qp->s_last = 0; qp->s_ssn = 1; qp->s_lsn = 0; - qp->s_wait_credit = 0; memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); qp->r_head_ack_queue = 0; qp->s_tail_ack_queue = 0; @@ -370,7 +359,6 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) qp->r_rq.wq->head = 0; qp->r_rq.wq->tail = 0; } - qp->r_reuse_sge = 0; } /** @@ -402,39 +390,21 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) list_del_init(&qp->piowait); spin_unlock(&dev->pending_lock); - wc.vendor_err = 0; - wc.byte_len = 0; - wc.imm_data = 0; + /* Schedule the sending tasklet to drain the send work queue. */ + if (qp->s_last != qp->s_head) + ipath_schedule_send(qp); + + memset(&wc, 0, sizeof(wc)); wc.qp = &qp->ibqp; - wc.src_qp = 0; - wc.wc_flags = 0; - wc.pkey_index = 0; - wc.slid = 0; - wc.sl = 0; - wc.dlid_path_bits = 0; - wc.port_num = 0; - if (qp->r_wrid_valid) { - qp->r_wrid_valid = 0; + wc.opcode = IB_WC_RECV; + + if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) { wc.wr_id = qp->r_wr_id; - wc.opcode = IB_WC_RECV; wc.status = err; ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); } wc.status = IB_WC_WR_FLUSH_ERR; - while (qp->s_last != qp->s_head) { - struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); - - wc.wr_id = wqe->wr.wr_id; - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); - } - qp->s_cur = qp->s_tail = qp->s_head; - qp->s_hdrwords = 0; - qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; - if (qp->r_rq.wq) { struct ipath_rwq *wq; u32 head; @@ -450,7 +420,6 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) tail = wq->tail; if (tail >= qp->r_rq.size) tail = 0; - wc.opcode = IB_WC_RECV; while (tail != head) { wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; if (++tail >= qp->r_rq.size) @@ -482,11 +451,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct ipath_ibdev *dev = to_idev(ibqp->device); struct ipath_qp *qp = to_iqp(ibqp); enum ib_qp_state cur_state, new_state; - unsigned long flags; int lastwqe = 0; int ret; - spin_lock_irqsave(&qp->s_lock, flags); + spin_lock_irq(&qp->s_lock); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; @@ -539,16 +507,42 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, switch (new_state) { case IB_QPS_RESET: + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->timerwait)) + list_del_init(&qp->timerwait); + if (!list_empty(&qp->piowait)) + list_del_init(&qp->piowait); + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~IPATH_S_ANY_WAIT; + spin_unlock_irq(&qp->s_lock); + /* Stop the sending tasklet */ + tasklet_kill(&qp->s_task); + wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); + spin_lock_irq(&qp->s_lock); + } ipath_reset_qp(qp, ibqp->qp_type); break; + case IB_QPS_SQD: + qp->s_draining = qp->s_last != qp->s_cur; + qp->state = new_state; + break; + + case IB_QPS_SQE: + if (qp->ibqp.qp_type == IB_QPT_RC) + goto inval; + qp->state = new_state; + break; + case IB_QPS_ERR: lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); break; default: + qp->state = new_state; break; - } if (attr_mask & IB_QP_PKEY_INDEX) @@ -601,8 +595,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) qp->s_max_rd_atomic = attr->max_rd_atomic; - qp->state = new_state; - spin_unlock_irqrestore(&qp->s_lock, flags); + spin_unlock_irq(&qp->s_lock); if (lastwqe) { struct ib_event ev; @@ -616,7 +609,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto bail; inval: - spin_unlock_irqrestore(&qp->s_lock, flags); + spin_unlock_irq(&qp->s_lock); ret = -EINVAL; bail: @@ -647,7 +640,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->pkey_index = qp->s_pkey_index; attr->alt_pkey_index = 0; attr->en_sqd_async_notify = 0; - attr->sq_draining = 0; + attr->sq_draining = qp->s_draining; attr->max_rd_atomic = qp->s_max_rd_atomic; attr->max_dest_rd_atomic = qp->r_max_rd_atomic; attr->min_rnr_timer = qp->r_min_rnr_timer; @@ -837,6 +830,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, spin_lock_init(&qp->r_rq.lock); atomic_set(&qp->refcount, 0); init_waitqueue_head(&qp->wait); + init_waitqueue_head(&qp->wait_dma); tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp); INIT_LIST_HEAD(&qp->piowait); INIT_LIST_HEAD(&qp->timerwait); @@ -930,6 +924,7 @@ bail_ip: else vfree(qp->r_rq.wq); ipath_free_qp(&dev->qp_table, qp); + free_qpn(&dev->qp_table, qp->ibqp.qp_num); bail_qp: kfree(qp); bail_swq: @@ -951,41 +946,44 @@ int ipath_destroy_qp(struct ib_qp *ibqp) { struct ipath_qp *qp = to_iqp(ibqp); struct ipath_ibdev *dev = to_idev(ibqp->device); - unsigned long flags; - spin_lock_irqsave(&qp->s_lock, flags); - qp->state = IB_QPS_ERR; - spin_unlock_irqrestore(&qp->s_lock, flags); - spin_lock(&dev->n_qps_lock); - dev->n_qps_allocated--; - spin_unlock(&dev->n_qps_lock); + /* Make sure HW and driver activity is stopped. */ + spin_lock_irq(&qp->s_lock); + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->timerwait)) + list_del_init(&qp->timerwait); + if (!list_empty(&qp->piowait)) + list_del_init(&qp->piowait); + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~IPATH_S_ANY_WAIT; + spin_unlock_irq(&qp->s_lock); + /* Stop the sending tasklet */ + tasklet_kill(&qp->s_task); + wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); + } else + spin_unlock_irq(&qp->s_lock); - /* Stop the sending tasklet. */ - tasklet_kill(&qp->s_task); + ipath_free_qp(&dev->qp_table, qp); if (qp->s_tx) { atomic_dec(&qp->refcount); if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) kfree(qp->s_tx->txreq.map_addr); + spin_lock_irq(&dev->pending_lock); + list_add(&qp->s_tx->txreq.list, &dev->txreq_free); + spin_unlock_irq(&dev->pending_lock); + qp->s_tx = NULL; } - /* Make sure the QP isn't on the timeout list. */ - spin_lock_irqsave(&dev->pending_lock, flags); - if (!list_empty(&qp->timerwait)) - list_del_init(&qp->timerwait); - if (!list_empty(&qp->piowait)) - list_del_init(&qp->piowait); - if (qp->s_tx) - list_add(&qp->s_tx->txreq.list, &dev->txreq_free); - spin_unlock_irqrestore(&dev->pending_lock, flags); + wait_event(qp->wait, !atomic_read(&qp->refcount)); - /* - * Make sure that the QP is not in the QPN table so receive - * interrupts will discard packets for this QP. XXX Also remove QP - * from multicast table. - */ - if (atomic_read(&qp->refcount) != 0) - ipath_free_qp(&dev->qp_table, qp); + /* all user's cleaned up, mark it available */ + free_qpn(&dev->qp_table, qp->ibqp.qp_num); + spin_lock(&dev->n_qps_lock); + dev->n_qps_allocated--; + spin_unlock(&dev->n_qps_lock); if (qp->ip) kref_put(&qp->ip->ref, ipath_release_mmap_info); @@ -1055,9 +1053,10 @@ void ipath_get_credit(struct ipath_qp *qp, u32 aeth) } /* Restart sending if it was blocked due to lack of credits. */ - if (qp->s_cur != qp->s_head && + if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) && + qp->s_cur != qp->s_head && (qp->s_lsn == (u32) -1 || ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn, qp->s_lsn + 1) <= 0)) - tasklet_hi_schedule(&qp->s_task); + ipath_schedule_send(qp); } diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index b4b26c3aa613..5b5276a270bc 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -92,6 +92,10 @@ static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp, u32 bth0; u32 bth2; + /* Don't send an ACK if we aren't supposed to. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto bail; + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; @@ -238,14 +242,25 @@ int ipath_make_rc_req(struct ipath_qp *qp) ipath_make_rc_ack(dev, qp, ohdr, pmtu)) goto done; - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || - qp->s_rnr_timeout || qp->s_wait_credit) - goto bail; + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { + if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); + goto done; + } - /* Limit the number of packets sent without an ACK. */ - if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { - qp->s_wait_credit = 1; - dev->n_rc_stalls++; + /* Leave BUSY set until RNR timeout. */ + if (qp->s_rnr_timeout) { + qp->s_flags |= IPATH_S_WAITING; goto bail; } @@ -257,6 +272,9 @@ int ipath_make_rc_req(struct ipath_qp *qp) wqe = get_swqe_ptr(qp, qp->s_cur); switch (qp->s_state) { default: + if (!(ib_ipath_state_ops[qp->state] & + IPATH_PROCESS_NEXT_SEND_OK)) + goto bail; /* * Resend an old request or start a new one. * @@ -294,8 +312,10 @@ int ipath_make_rc_req(struct ipath_qp *qp) case IB_WR_SEND_WITH_IMM: /* If no credit, return. */ if (qp->s_lsn != (u32) -1 && - ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) + ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { + qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT; goto bail; + } wqe->lpsn = wqe->psn; if (len > pmtu) { wqe->lpsn += (len - 1) / pmtu; @@ -325,8 +345,10 @@ int ipath_make_rc_req(struct ipath_qp *qp) case IB_WR_RDMA_WRITE_WITH_IMM: /* If no credit, return. */ if (qp->s_lsn != (u32) -1 && - ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) + ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { + qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT; goto bail; + } ohdr->u.rc.reth.vaddr = cpu_to_be64(wqe->wr.wr.rdma.remote_addr); ohdr->u.rc.reth.rkey = @@ -570,7 +592,11 @@ int ipath_make_rc_req(struct ipath_qp *qp) ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2); done: ret = 1; + goto unlock; + bail: + qp->s_flags &= ~IPATH_S_BUSY; +unlock: spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -606,7 +632,11 @@ static void send_rc_ack(struct ipath_qp *qp) spin_unlock_irqrestore(&qp->s_lock, flags); + /* Don't try to send ACKs if the link isn't ACTIVE */ dd = dev->dd; + if (!(dd->ipath_flags & IPATH_LINKACTIVE)) + goto done; + piobuf = ipath_getpiobuf(dd, 0, NULL); if (!piobuf) { /* @@ -668,15 +698,16 @@ static void send_rc_ack(struct ipath_qp *qp) goto done; queue_ack: - dev->n_rc_qacks++; - qp->s_flags |= IPATH_S_ACK_PENDING; - qp->s_nak_state = qp->r_nak_state; - qp->s_ack_psn = qp->r_ack_psn; + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) { + dev->n_rc_qacks++; + qp->s_flags |= IPATH_S_ACK_PENDING; + qp->s_nak_state = qp->r_nak_state; + qp->s_ack_psn = qp->r_ack_psn; + + /* Schedule the send tasklet. */ + ipath_schedule_send(qp); + } spin_unlock_irqrestore(&qp->s_lock, flags); - - /* Call ipath_do_rc_send() in another thread. */ - tasklet_hi_schedule(&qp->s_task); - done: return; } @@ -735,7 +766,7 @@ static void reset_psn(struct ipath_qp *qp, u32 psn) /* * Set the state to restart in the middle of a request. * Don't change the s_sge, s_cur_sge, or s_cur_size. - * See ipath_do_rc_send(). + * See ipath_make_rc_req(). */ switch (opcode) { case IB_WR_SEND: @@ -801,7 +832,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn) dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK; reset_psn(qp, psn); - tasklet_hi_schedule(&qp->s_task); + ipath_schedule_send(qp); bail: return; @@ -809,13 +840,7 @@ bail: static inline void update_last_psn(struct ipath_qp *qp, u32 psn) { - if (qp->s_last_psn != psn) { - qp->s_last_psn = psn; - if (qp->s_wait_credit) { - qp->s_wait_credit = 0; - tasklet_hi_schedule(&qp->s_task); - } - } + qp->s_last_psn = psn; } /** @@ -915,14 +940,10 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { qp->s_num_rd_atomic--; /* Restart sending task if fence is complete */ - if ((qp->s_flags & IPATH_S_FENCE_PENDING) && - !qp->s_num_rd_atomic) { - qp->s_flags &= ~IPATH_S_FENCE_PENDING; - tasklet_hi_schedule(&qp->s_task); - } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) { - qp->s_flags &= ~IPATH_S_RDMAR_PENDING; - tasklet_hi_schedule(&qp->s_task); - } + if (((qp->s_flags & IPATH_S_FENCE_PENDING) && + !qp->s_num_rd_atomic) || + qp->s_flags & IPATH_S_RDMAR_PENDING) + ipath_schedule_send(qp); } /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || @@ -956,6 +977,8 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, } else { if (++qp->s_last >= qp->s_size) qp->s_last = 0; + if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur) + qp->s_draining = 0; if (qp->s_last == qp->s_tail) break; wqe = get_swqe_ptr(qp, qp->s_last); @@ -979,7 +1002,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, */ if (ipath_cmp24(qp->s_psn, psn) <= 0) { reset_psn(qp, psn + 1); - tasklet_hi_schedule(&qp->s_task); + ipath_schedule_send(qp); } } else if (ipath_cmp24(qp->s_psn, psn) <= 0) { qp->s_state = OP(SEND_LAST); @@ -1018,6 +1041,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK]; ipath_insert_rnr_queue(qp); + ipath_schedule_send(qp); goto bail; case 3: /* NAK */ @@ -1108,6 +1132,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this now that we hold the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto ack_done; + /* Ignore invalid responses. */ if (ipath_cmp24(psn, qp->s_next_psn) >= 0) goto ack_done; @@ -1343,7 +1371,12 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, psn &= IPATH_PSN_MASK; e = NULL; old_req = 1; + spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this now that we hold the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto unlock_done; + for (i = qp->r_head_ack_queue; ; i = prev) { if (i == qp->s_tail_ack_queue) old_req = 0; @@ -1471,7 +1504,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, break; } qp->r_nak_state = 0; - tasklet_hi_schedule(&qp->s_task); + ipath_schedule_send(qp); unlock_done: spin_unlock_irqrestore(&qp->s_lock, flags); @@ -1503,18 +1536,15 @@ void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n) { - unsigned long flags; unsigned next; next = n + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; - spin_lock_irqsave(&qp->s_lock, flags); if (n == qp->s_tail_ack_queue) { qp->s_tail_ack_queue = next; qp->s_ack_state = OP(ACKNOWLEDGE); } - spin_unlock_irqrestore(&qp->s_lock, flags); } /** @@ -1543,6 +1573,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int diff; struct ib_reth *reth; int header_in_data; + unsigned long flags; /* Validate the SLID. See Ch. 9.6.1.5 */ if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) @@ -1690,9 +1721,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto nack_inv; ipath_copy_sge(&qp->r_sge, data, tlen); qp->r_msn++; - if (!qp->r_wrid_valid) + if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) break; - qp->r_wrid_valid = 0; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) || @@ -1764,9 +1794,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, next = qp->r_head_ack_queue + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; + spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this while holding the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto unlock; if (unlikely(next == qp->s_tail_ack_queue)) { if (!qp->s_ack_queue[next].sent) - goto nack_inv; + goto nack_inv_unlck; ipath_update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; @@ -1787,7 +1821,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey, IB_ACCESS_REMOTE_READ); if (unlikely(!ok)) - goto nack_acc; + goto nack_acc_unlck; /* * Update the next expected PSN. We add 1 later * below, so only add the remainder here. @@ -1814,13 +1848,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, qp->r_psn++; qp->r_state = opcode; qp->r_nak_state = 0; - barrier(); qp->r_head_ack_queue = next; - /* Call ipath_do_rc_send() in another thread. */ - tasklet_hi_schedule(&qp->s_task); + /* Schedule the send tasklet. */ + ipath_schedule_send(qp); - goto done; + goto unlock; } case OP(COMPARE_SWAP): @@ -1839,9 +1872,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, next = qp->r_head_ack_queue + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; + spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this while holding the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto unlock; if (unlikely(next == qp->s_tail_ack_queue)) { if (!qp->s_ack_queue[next].sent) - goto nack_inv; + goto nack_inv_unlck; ipath_update_ack_queue(qp, next); } if (!header_in_data) @@ -1851,13 +1888,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | be32_to_cpu(ateth->vaddr[1]); if (unlikely(vaddr & (sizeof(u64) - 1))) - goto nack_inv; + goto nack_inv_unlck; rkey = be32_to_cpu(ateth->rkey); /* Check rkey & NAK */ if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), vaddr, rkey, IB_ACCESS_REMOTE_ATOMIC))) - goto nack_acc; + goto nack_acc_unlck; /* Perform atomic OP and save result. */ maddr = (atomic64_t *) qp->r_sge.sge.vaddr; sdata = be64_to_cpu(ateth->swap_data); @@ -1874,13 +1911,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, qp->r_psn++; qp->r_state = opcode; qp->r_nak_state = 0; - barrier(); qp->r_head_ack_queue = next; - /* Call ipath_do_rc_send() in another thread. */ - tasklet_hi_schedule(&qp->s_task); + /* Schedule the send tasklet. */ + ipath_schedule_send(qp); - goto done; + goto unlock; } default: @@ -1901,19 +1937,26 @@ rnr_nak: qp->r_ack_psn = qp->r_psn; goto send_ack; +nack_inv_unlck: + spin_unlock_irqrestore(&qp->s_lock, flags); nack_inv: ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR); qp->r_nak_state = IB_NAK_INVALID_REQUEST; qp->r_ack_psn = qp->r_psn; goto send_ack; +nack_acc_unlck: + spin_unlock_irqrestore(&qp->s_lock, flags); nack_acc: ipath_rc_error(qp, IB_WC_LOC_PROT_ERR); qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; send_ack: send_rc_ack(qp); + goto done; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); done: return; } diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index c716a03dd399..a4b5521567fe 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -78,6 +78,7 @@ const u32 ib_ipath_rnr_table[32] = { * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device * @qp: the QP * + * Called with the QP s_lock held and interrupts disabled. * XXX Use a simple list for now. We might need a priority * queue if we have lots of QPs waiting for RNR timeouts * but that should be rare. @@ -85,9 +86,9 @@ const u32 ib_ipath_rnr_table[32] = { void ipath_insert_rnr_queue(struct ipath_qp *qp) { struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - unsigned long flags; - spin_lock_irqsave(&dev->pending_lock, flags); + /* We already did a spin_lock_irqsave(), so just use spin_lock */ + spin_lock(&dev->pending_lock); if (list_empty(&dev->rnrwait)) list_add(&qp->timerwait, &dev->rnrwait); else { @@ -109,7 +110,7 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp) nqp->s_rnr_timeout -= qp->s_rnr_timeout; list_add(&qp->timerwait, l); } - spin_unlock_irqrestore(&dev->pending_lock, flags); + spin_unlock(&dev->pending_lock); } /** @@ -185,6 +186,11 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) } spin_lock_irqsave(&rq->lock, flags); + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { + ret = 0; + goto unlock; + } + wq = rq->wq; tail = wq->tail; /* Validate tail before using it since it is user writable. */ @@ -192,9 +198,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) tail = 0; do { if (unlikely(tail == wq->head)) { - spin_unlock_irqrestore(&rq->lock, flags); ret = 0; - goto bail; + goto unlock; } /* Make sure entry is read after head index is read. */ smp_rmb(); @@ -207,7 +212,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) wq->tail = tail; ret = 1; - qp->r_wrid_valid = 1; + set_bit(IPATH_R_WRID_VALID, &qp->r_aflags); if (handler) { u32 n; @@ -234,8 +239,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) goto bail; } } +unlock: spin_unlock_irqrestore(&rq->lock, flags); - bail: return ret; } @@ -263,35 +268,59 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp) atomic64_t *maddr; enum ib_wc_status send_status; + /* + * Note that we check the responder QP state after + * checking the requester's state. + */ qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); - if (!qp) { - dev->n_pkt_drops++; - return; - } -again: spin_lock_irqsave(&sqp->s_lock, flags); - if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) || - sqp->s_rnr_timeout) { - spin_unlock_irqrestore(&sqp->s_lock, flags); - goto done; - } + /* Return if we are already busy processing a work request. */ + if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || + !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) + goto unlock; - /* Get the next send request. */ - if (sqp->s_last == sqp->s_head) { - /* Send work queue is empty. */ - spin_unlock_irqrestore(&sqp->s_lock, flags); - goto done; + sqp->s_flags |= IPATH_S_BUSY; + +again: + if (sqp->s_last == sqp->s_head) + goto clr_busy; + wqe = get_swqe_ptr(sqp, sqp->s_last); + + /* Return if it is not OK to start a new work reqeust. */ + if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { + if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND)) + goto clr_busy; + /* We are in the error state, flush the work request. */ + send_status = IB_WC_WR_FLUSH_ERR; + goto flush_send; } /* * We can rely on the entry not changing without the s_lock * being held until we update s_last. + * We increment s_cur to indicate s_last is in progress. */ - wqe = get_swqe_ptr(sqp, sqp->s_last); + if (sqp->s_last == sqp->s_cur) { + if (++sqp->s_cur >= sqp->s_size) + sqp->s_cur = 0; + } spin_unlock_irqrestore(&sqp->s_lock, flags); + if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { + dev->n_pkt_drops++; + /* + * For RC, the requester would timeout and retry so + * shortcut the timeouts and just signal too many retries. + */ + if (sqp->ibqp.qp_type == IB_QPT_RC) + send_status = IB_WC_RETRY_EXC_ERR; + else + send_status = IB_WC_SUCCESS; + goto serr; + } + memset(&wc, 0, sizeof wc); send_status = IB_WC_SUCCESS; @@ -396,8 +425,7 @@ again: sqp->s_len -= len; } - if (wqe->wr.opcode == IB_WR_RDMA_WRITE || - wqe->wr.opcode == IB_WR_RDMA_READ) + if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) goto send_comp; if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) @@ -417,6 +445,8 @@ again: wqe->wr.send_flags & IB_SEND_SOLICITED); send_comp: + spin_lock_irqsave(&sqp->s_lock, flags); +flush_send: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; ipath_send_complete(sqp, wqe, send_status); goto again; @@ -437,11 +467,12 @@ rnr_nak: sqp->s_rnr_retry--; spin_lock_irqsave(&sqp->s_lock, flags); if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK)) - goto unlock; + goto clr_busy; + sqp->s_flags |= IPATH_S_WAITING; dev->n_rnr_naks++; sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer]; ipath_insert_rnr_queue(sqp); - goto unlock; + goto clr_busy; inv_err: send_status = IB_WC_REM_INV_REQ_ERR; @@ -473,17 +504,19 @@ serr: } goto done; } +clr_busy: + sqp->s_flags &= ~IPATH_S_BUSY; unlock: spin_unlock_irqrestore(&sqp->s_lock, flags); done: - if (atomic_dec_and_test(&qp->refcount)) + if (qp && atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); } static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp) { if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) || - qp->ibqp.qp_type == IB_QPT_SMI) { + qp->ibqp.qp_type == IB_QPT_SMI) { unsigned long flags; spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); @@ -501,26 +534,36 @@ static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp) * @dev: the device we ran out of buffers on * * Called when we run out of PIO buffers. + * If we are now in the error state, return zero to flush the + * send work request. */ -static void ipath_no_bufs_available(struct ipath_qp *qp, +static int ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) { unsigned long flags; + int ret = 1; /* * Note that as soon as want_buffer() is called and * possibly before it returns, ipath_ib_piobufavail() - * could be called. If we are still in the tasklet function, - * tasklet_hi_schedule() will not call us until the next time - * tasklet_hi_schedule() is called. - * We leave the busy flag set so that another post send doesn't - * try to put the same QP on the piowait list again. + * could be called. Therefore, put QP on the piowait list before + * enabling the PIO avail interrupt. */ - spin_lock_irqsave(&dev->pending_lock, flags); - list_add_tail(&qp->piowait, &dev->piowait); - spin_unlock_irqrestore(&dev->pending_lock, flags); - want_buffer(dev->dd, qp); - dev->n_piowait++; + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { + dev->n_piowait++; + qp->s_flags |= IPATH_S_WAITING; + qp->s_flags &= ~IPATH_S_BUSY; + spin_lock(&dev->pending_lock); + if (list_empty(&qp->piowait)) + list_add_tail(&qp->piowait, &dev->piowait); + spin_unlock(&dev->pending_lock); + } else + ret = 0; + spin_unlock_irqrestore(&qp->s_lock, flags); + if (ret) + want_buffer(dev->dd, qp); + return ret; } /** @@ -596,15 +639,13 @@ void ipath_do_send(unsigned long data) struct ipath_qp *qp = (struct ipath_qp *)data; struct ipath_ibdev *dev = to_idev(qp->ibqp.device); int (*make_req)(struct ipath_qp *qp); - - if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy)) - goto bail; + unsigned long flags; if ((qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) && qp->remote_ah_attr.dlid == dev->dd->ipath_lid) { ipath_ruc_loopback(qp); - goto clear; + goto bail; } if (qp->ibqp.qp_type == IB_QPT_RC) @@ -614,6 +655,19 @@ void ipath_do_send(unsigned long data) else make_req = ipath_make_ud_req; + spin_lock_irqsave(&qp->s_lock, flags); + + /* Return if we are already busy processing a work request. */ + if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || + !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) { + spin_unlock_irqrestore(&qp->s_lock, flags); + goto bail; + } + + qp->s_flags |= IPATH_S_BUSY; + + spin_unlock_irqrestore(&qp->s_lock, flags); + again: /* Check for a constructed packet to be sent. */ if (qp->s_hdrwords != 0) { @@ -623,8 +677,8 @@ again: */ if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords, qp->s_cur_sge, qp->s_cur_size)) { - ipath_no_bufs_available(qp, dev); - goto bail; + if (ipath_no_bufs_available(qp, dev)) + goto bail; } dev->n_unicast_xmit++; /* Record that we sent the packet and s_hdr is empty. */ @@ -633,16 +687,20 @@ again: if (make_req(qp)) goto again; -clear: - clear_bit(IPATH_S_BUSY, &qp->s_busy); + bail:; } +/* + * This should be called with s_lock held. + */ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, enum ib_wc_status status) { - unsigned long flags; - u32 last; + u32 old_last, last; + + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) + return; /* See ch. 11.2.4.1 and 10.7.3.1 */ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || @@ -661,10 +719,14 @@ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, status != IB_WC_SUCCESS); } - spin_lock_irqsave(&qp->s_lock, flags); - last = qp->s_last; + old_last = last = qp->s_last; if (++last >= qp->s_size) last = 0; qp->s_last = last; - spin_unlock_irqrestore(&qp->s_lock, flags); + if (qp->s_cur == old_last) + qp->s_cur = last; + if (qp->s_tail == old_last) + qp->s_tail = last; + if (qp->state == IB_QPS_SQD && last == qp->s_cur) + qp->s_draining = 0; } diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index bfe8926b5514..7fd18e833907 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -47,14 +47,30 @@ int ipath_make_uc_req(struct ipath_qp *qp) { struct ipath_other_headers *ohdr; struct ipath_swqe *wqe; + unsigned long flags; u32 hwords; u32 bth0; u32 len; u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); int ret = 0; - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) + spin_lock_irqsave(&qp->s_lock, flags); + + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { + if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); goto done; + } ohdr = &qp->s_hdr.u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) @@ -69,9 +85,12 @@ int ipath_make_uc_req(struct ipath_qp *qp) qp->s_wqe = NULL; switch (qp->s_state) { default: + if (!(ib_ipath_state_ops[qp->state] & + IPATH_PROCESS_NEXT_SEND_OK)) + goto bail; /* Check if send work queue is empty. */ if (qp->s_cur == qp->s_head) - goto done; + goto bail; /* * Start a new request. */ @@ -134,7 +153,7 @@ int ipath_make_uc_req(struct ipath_qp *qp) break; default: - goto done; + goto bail; } break; @@ -194,9 +213,14 @@ int ipath_make_uc_req(struct ipath_qp *qp) ipath_make_ruc_header(to_idev(qp->ibqp.device), qp, ohdr, bth0 | (qp->s_state << 24), qp->s_next_psn++ & IPATH_PSN_MASK); +done: ret = 1; + goto unlock; -done: +bail: + qp->s_flags &= ~IPATH_S_BUSY; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -258,8 +282,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, */ opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - wc.imm_data = 0; - wc.wc_flags = 0; + memset(&wc, 0, sizeof wc); /* Compare the PSN verses the expected PSN. */ if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) { @@ -322,8 +345,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(SEND_ONLY): case OP(SEND_ONLY_WITH_IMMEDIATE): send_first: - if (qp->r_reuse_sge) { - qp->r_reuse_sge = 0; + if (qp->r_flags & IPATH_R_REUSE_SGE) { + qp->r_flags &= ~IPATH_R_REUSE_SGE; qp->r_sge = qp->s_rdma_read_sge; } else if (!ipath_get_rwqe(qp, 0)) { dev->n_pkt_drops++; @@ -340,13 +363,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(SEND_MIDDLE): /* Check for invalid length PMTU or posted rwqe len. */ if (unlikely(tlen != (hdrsize + pmtu + 4))) { - qp->r_reuse_sge = 1; + qp->r_flags |= IPATH_R_REUSE_SGE; dev->n_pkt_drops++; goto done; } qp->r_rcv_len += pmtu; if (unlikely(qp->r_rcv_len > qp->r_len)) { - qp->r_reuse_sge = 1; + qp->r_flags |= IPATH_R_REUSE_SGE; dev->n_pkt_drops++; goto done; } @@ -372,7 +395,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, /* Check for invalid length. */ /* XXX LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) { - qp->r_reuse_sge = 1; + qp->r_flags |= IPATH_R_REUSE_SGE; dev->n_pkt_drops++; goto done; } @@ -380,7 +403,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, tlen -= (hdrsize + pad + 4); wc.byte_len = tlen + qp->r_rcv_len; if (unlikely(wc.byte_len > qp->r_len)) { - qp->r_reuse_sge = 1; + qp->r_flags |= IPATH_R_REUSE_SGE; dev->n_pkt_drops++; goto done; } @@ -390,14 +413,10 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; - wc.vendor_err = 0; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; - wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & @@ -488,8 +507,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, dev->n_pkt_drops++; goto done; } - if (qp->r_reuse_sge) - qp->r_reuse_sge = 0; + if (qp->r_flags & IPATH_R_REUSE_SGE) + qp->r_flags &= ~IPATH_R_REUSE_SGE; else if (!ipath_get_rwqe(qp, 1)) { dev->n_pkt_drops++; goto done; diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 8b6a261c89e3..77ca8ca74e78 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -65,9 +65,9 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) u32 length; qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn); - if (!qp) { + if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { dev->n_pkt_drops++; - goto send_comp; + goto done; } rsge.sg_list = NULL; @@ -91,14 +91,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) * present on the wire. */ length = swqe->length; + memset(&wc, 0, sizeof wc); wc.byte_len = length + sizeof(struct ib_grh); if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { wc.wc_flags = IB_WC_WITH_IMM; wc.imm_data = swqe->wr.ex.imm_data; - } else { - wc.wc_flags = 0; - wc.imm_data = 0; } /* @@ -229,7 +227,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) } wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; - wc.vendor_err = 0; wc.qp = &qp->ibqp; wc.src_qp = sqp->ibqp.qp_num; /* XXX do we know which pkey matched? Only needed for GSI. */ @@ -248,8 +245,7 @@ drop: kfree(rsge.sg_list); if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); -send_comp: - ipath_send_complete(sqp, swqe, IB_WC_SUCCESS); +done:; } /** @@ -264,6 +260,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) struct ipath_other_headers *ohdr; struct ib_ah_attr *ah_attr; struct ipath_swqe *wqe; + unsigned long flags; u32 nwords; u32 extra_bytes; u32 bth0; @@ -271,13 +268,30 @@ int ipath_make_ud_req(struct ipath_qp *qp) u16 lid; int ret = 0; - if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))) - goto bail; + spin_lock_irqsave(&qp->s_lock, flags); + + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { + if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); + goto done; + } if (qp->s_cur == qp->s_head) goto bail; wqe = get_swqe_ptr(qp, qp->s_cur); + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; /* Construct the header. */ ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; @@ -288,10 +302,23 @@ int ipath_make_ud_req(struct ipath_qp *qp) dev->n_unicast_xmit++; } else { dev->n_unicast_xmit++; - lid = ah_attr->dlid & - ~((1 << dev->dd->ipath_lmc) - 1); + lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1); if (unlikely(lid == dev->dd->ipath_lid)) { + /* + * If DMAs are in progress, we can't generate + * a completion for the loopback packet since + * it would be out of order. + * XXX Instead of waiting, we could queue a + * zero length descriptor so we get a callback. + */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + spin_unlock_irqrestore(&qp->s_lock, flags); ipath_ud_loopback(qp, wqe); + spin_lock_irqsave(&qp->s_lock, flags); + ipath_send_complete(qp, wqe, IB_WC_SUCCESS); goto done; } } @@ -368,11 +395,13 @@ int ipath_make_ud_req(struct ipath_qp *qp) ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); done: - if (++qp->s_cur >= qp->s_size) - qp->s_cur = 0; ret = 1; + goto unlock; bail: + qp->s_flags &= ~IPATH_S_BUSY; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -506,8 +535,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, /* * Get the next work request entry to find where to put the data. */ - if (qp->r_reuse_sge) - qp->r_reuse_sge = 0; + if (qp->r_flags & IPATH_R_REUSE_SGE) + qp->r_flags &= ~IPATH_R_REUSE_SGE; else if (!ipath_get_rwqe(qp, 0)) { /* * Count VL15 packets dropped due to no receive buffer. @@ -523,7 +552,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, } /* Silently drop packets which are too big. */ if (wc.byte_len > qp->r_len) { - qp->r_reuse_sge = 1; + qp->r_flags |= IPATH_R_REUSE_SGE; dev->n_pkt_drops++; goto bail; } @@ -535,7 +564,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); ipath_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh)); - qp->r_wrid_valid = 0; + if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) + goto bail; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.h b/drivers/infiniband/hw/ipath/ipath_user_sdma.h index e70946c1428c..fc76316c4a58 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_sdma.h +++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.h @@ -45,8 +45,6 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd, int ipath_user_sdma_make_progress(struct ipath_devdata *dd, struct ipath_user_sdma_queue *pq); -int ipath_user_sdma_pkt_sent(const struct ipath_user_sdma_queue *pq, - u32 counter); void ipath_user_sdma_queue_drain(struct ipath_devdata *dd, struct ipath_user_sdma_queue *pq); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 22bb42dc8f73..e0ec540042bf 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -111,16 +111,24 @@ static unsigned int ib_ipath_disable_sma; module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(disable_sma, "Disable the SMA"); +/* + * Note that it is OK to post send work requests in the SQE and ERR + * states; ipath_do_send() will process them and generate error + * completions as per IB 1.2 C10-96. + */ const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { [IB_QPS_RESET] = 0, [IB_QPS_INIT] = IPATH_POST_RECV_OK, [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | - IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, + IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK | + IPATH_PROCESS_NEXT_SEND_OK, [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | - IPATH_POST_SEND_OK, - [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, - [IB_QPS_ERR] = 0, + IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, + [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | + IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, + [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV | + IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, }; struct ipath_ucontext { @@ -230,18 +238,6 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length) } } -static void ipath_flush_wqe(struct ipath_qp *qp, struct ib_send_wr *wr) -{ - struct ib_wc wc; - - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wr->wr_id; - wc.status = IB_WC_WR_FLUSH_ERR; - wc.opcode = ib_ipath_wc_opcode[wr->opcode]; - wc.qp = &qp->ibqp; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); -} - /* * Count the number of DMA descriptors needed to send length bytes of data. * Don't modify the ipath_sge_state to get the count. @@ -347,14 +343,8 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) spin_lock_irqsave(&qp->s_lock, flags); /* Check that state is OK to post send. */ - if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) { - if (qp->state != IB_QPS_SQE && qp->state != IB_QPS_ERR) - goto bail_inval; - /* C10-96 says generate a flushed completion entry. */ - ipath_flush_wqe(qp, wr); - ret = 0; - goto bail; - } + if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) + goto bail_inval; /* IB spec says that num_sge == 0 is OK. */ if (wr->num_sge > qp->s_max_sge) @@ -677,6 +667,7 @@ bail:; static void ipath_ib_timer(struct ipath_ibdev *dev) { struct ipath_qp *resend = NULL; + struct ipath_qp *rnr = NULL; struct list_head *last; struct ipath_qp *qp; unsigned long flags; @@ -703,7 +694,9 @@ static void ipath_ib_timer(struct ipath_ibdev *dev) if (--qp->s_rnr_timeout == 0) { do { list_del_init(&qp->timerwait); - tasklet_hi_schedule(&qp->s_task); + qp->timer_next = rnr; + rnr = qp; + atomic_inc(&qp->refcount); if (list_empty(last)) break; qp = list_entry(last->next, struct ipath_qp, @@ -743,14 +736,31 @@ static void ipath_ib_timer(struct ipath_ibdev *dev) spin_unlock_irqrestore(&dev->pending_lock, flags); /* XXX What if timer fires again while this is running? */ - for (qp = resend; qp != NULL; qp = qp->timer_next) { + while (resend != NULL) { + qp = resend; + resend = qp->timer_next; + spin_lock_irqsave(&qp->s_lock, flags); - if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) { + if (qp->s_last != qp->s_tail && + ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { dev->n_timeouts++; ipath_restart_rc(qp, qp->s_last_psn + 1); } spin_unlock_irqrestore(&qp->s_lock, flags); + /* Notify ipath_destroy_qp() if it is waiting. */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } + while (rnr != NULL) { + qp = rnr; + rnr = qp->timer_next; + + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) + ipath_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + /* Notify ipath_destroy_qp() if it is waiting. */ if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); @@ -1010,13 +1020,24 @@ static void sdma_complete(void *cookie, int status) struct ipath_verbs_txreq *tx = cookie; struct ipath_qp *qp = tx->qp; struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + unsigned int flags; + enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? + IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; - /* Generate a completion queue entry if needed */ - if (qp->ibqp.qp_type != IB_QPT_RC && tx->wqe) { - enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? - IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; - + if (atomic_dec_and_test(&qp->s_dma_busy)) { + spin_lock_irqsave(&qp->s_lock, flags); + if (tx->wqe) + ipath_send_complete(qp, tx->wqe, ibs); + if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && + qp->s_last != qp->s_head) || + (qp->s_flags & IPATH_S_WAIT_DMA)) + ipath_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + wake_up(&qp->wait_dma); + } else if (tx->wqe) { + spin_lock_irqsave(&qp->s_lock, flags); ipath_send_complete(qp, tx->wqe, ibs); + spin_unlock_irqrestore(&qp->s_lock, flags); } if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) @@ -1027,6 +1048,21 @@ static void sdma_complete(void *cookie, int status) wake_up(&qp->wait); } +static void decrement_dma_busy(struct ipath_qp *qp) +{ + unsigned int flags; + + if (atomic_dec_and_test(&qp->s_dma_busy)) { + spin_lock_irqsave(&qp->s_lock, flags); + if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && + qp->s_last != qp->s_head) || + (qp->s_flags & IPATH_S_WAIT_DMA)) + ipath_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + wake_up(&qp->wait_dma); + } +} + /* * Compute the number of clock cycles of delay before sending the next packet. * The multipliers reflect the number of clocks for the fastest rate so @@ -1065,9 +1101,12 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, if (tx) { qp->s_tx = NULL; /* resend previously constructed packet */ + atomic_inc(&qp->s_dma_busy); ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx); - if (ret) + if (ret) { qp->s_tx = tx; + decrement_dma_busy(qp); + } goto bail; } @@ -1118,12 +1157,14 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, tx->txreq.sg_count = ndesc; tx->map_len = (hdrwords + 2) << 2; tx->txreq.map_addr = &tx->hdr; + atomic_inc(&qp->s_dma_busy); ret = ipath_sdma_verbs_send(dd, ss, dwords, tx); if (ret) { /* save ss and length in dwords */ tx->ss = ss; tx->len = dwords; qp->s_tx = tx; + decrement_dma_busy(qp); } goto bail; } @@ -1144,6 +1185,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, memcpy(piobuf, hdr, hdrwords << 2); ipath_copy_from_sge(piobuf + hdrwords, ss, len); + atomic_inc(&qp->s_dma_busy); ret = ipath_sdma_verbs_send(dd, NULL, 0, tx); /* * If we couldn't queue the DMA request, save the info @@ -1154,6 +1196,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, tx->ss = NULL; tx->len = 0; qp->s_tx = tx; + decrement_dma_busy(qp); } dev->n_unaligned++; goto bail; @@ -1177,6 +1220,7 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp, unsigned flush_wc; u32 control; int ret; + unsigned int flags; piobuf = ipath_getpiobuf(dd, plen, NULL); if (unlikely(piobuf == NULL)) { @@ -1247,8 +1291,11 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp, } copy_io(piobuf, ss, len, flush_wc); done: - if (qp->s_wqe) + if (qp->s_wqe) { + spin_lock_irqsave(&qp->s_lock, flags); ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); + spin_unlock_irqrestore(&qp->s_lock, flags); + } ret = 0; bail: return ret; @@ -1281,19 +1328,12 @@ int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr, * can defer SDMA restart until link goes ACTIVE without * worrying about just how we got there. */ - if (qp->ibqp.qp_type == IB_QPT_SMI) + if (qp->ibqp.qp_type == IB_QPT_SMI || + !(dd->ipath_flags & IPATH_HAS_SEND_DMA)) ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, plen, dwords); - /* All non-VL15 packets are dropped if link is not ACTIVE */ - else if (!(dd->ipath_flags & IPATH_LINKACTIVE)) { - if (qp->s_wqe) - ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); - ret = 0; - } else if (dd->ipath_flags & IPATH_HAS_SEND_DMA) - ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len, - plen, dwords); else - ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, + ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len, plen, dwords); return ret; @@ -1401,27 +1441,46 @@ bail: * This is called from ipath_intr() at interrupt level when a PIO buffer is * available after ipath_verbs_send() returned an error that no buffers were * available. Return 1 if we consumed all the PIO buffers and we still have - * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and + * QPs waiting for buffers (for now, just restart the send tasklet and * return zero). */ int ipath_ib_piobufavail(struct ipath_ibdev *dev) { + struct list_head *list; + struct ipath_qp *qplist; struct ipath_qp *qp; unsigned long flags; if (dev == NULL) goto bail; + list = &dev->piowait; + qplist = NULL; + spin_lock_irqsave(&dev->pending_lock, flags); - while (!list_empty(&dev->piowait)) { - qp = list_entry(dev->piowait.next, struct ipath_qp, - piowait); + while (!list_empty(list)) { + qp = list_entry(list->next, struct ipath_qp, piowait); list_del_init(&qp->piowait); - clear_bit(IPATH_S_BUSY, &qp->s_busy); - tasklet_hi_schedule(&qp->s_task); + qp->pio_next = qplist; + qplist = qp; + atomic_inc(&qp->refcount); } spin_unlock_irqrestore(&dev->pending_lock, flags); + while (qplist != NULL) { + qp = qplist; + qplist = qp->pio_next; + + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) + ipath_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + + /* Notify ipath_destroy_qp() if it is waiting. */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } + bail: return 0; } @@ -2143,11 +2202,12 @@ bail: void ipath_unregister_ib_device(struct ipath_ibdev *dev) { struct ib_device *ibdev = &dev->ibdev; - - disable_timer(dev->dd); + u32 qps_inuse; ib_unregister_device(ibdev); + disable_timer(dev->dd); + if (!list_empty(&dev->pending[0]) || !list_empty(&dev->pending[1]) || !list_empty(&dev->pending[2])) @@ -2162,7 +2222,10 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev) * Note that ipath_unregister_ib_device() can be called before all * the QPs are destroyed! */ - ipath_free_all_qps(&dev->qp_table); + qps_inuse = ipath_free_all_qps(&dev->qp_table); + if (qps_inuse) + ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n", + qps_inuse); kfree(dev->qp_table.table); kfree(dev->lk_table.table); kfree(dev->txreq_bufs); @@ -2213,17 +2276,14 @@ static ssize_t show_stats(struct device *device, struct device_attribute *attr, "RC OTH NAKs %d\n" "RC timeouts %d\n" "RC RDMA dup %d\n" - "RC stalls %d\n" "piobuf wait %d\n" - "no piobuf %d\n" "unaligned %d\n" "PKT drops %d\n" "WQE errs %d\n", dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, dev->n_other_naks, dev->n_timeouts, - dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait, - dev->n_no_piobuf, dev->n_unaligned, + dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned, dev->n_pkt_drops, dev->n_wqe_errs); for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { const struct ipath_opcode_stats *si = &dev->opstats[i]; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 4c7c2aa8e19d..deee02ca7ca4 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -74,6 +74,11 @@ #define IPATH_POST_RECV_OK 0x02 #define IPATH_PROCESS_RECV_OK 0x04 #define IPATH_PROCESS_SEND_OK 0x08 +#define IPATH_PROCESS_NEXT_SEND_OK 0x10 +#define IPATH_FLUSH_SEND 0x20 +#define IPATH_FLUSH_RECV 0x40 +#define IPATH_PROCESS_OR_FLUSH_SEND \ + (IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND) /* IB Performance Manager status values */ #define IB_PMA_SAMPLE_STATUS_DONE 0x00 @@ -353,12 +358,14 @@ struct ipath_qp { struct ib_qp ibqp; struct ipath_qp *next; /* link list for QPN hash table */ struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */ + struct ipath_qp *pio_next; /* link for ipath_ib_piobufavail() */ struct list_head piowait; /* link for wait PIO buf */ struct list_head timerwait; /* link for waiting for timeouts */ struct ib_ah_attr remote_ah_attr; struct ipath_ib_header s_hdr; /* next packet header to send */ atomic_t refcount; wait_queue_head_t wait; + wait_queue_head_t wait_dma; struct tasklet_struct s_task; struct ipath_mmap_info *ip; struct ipath_sge_state *s_cur_sge; @@ -369,7 +376,7 @@ struct ipath_qp { struct ipath_sge_state s_rdma_read_sge; struct ipath_sge_state r_sge; /* current receive data */ spinlock_t s_lock; - unsigned long s_busy; + atomic_t s_dma_busy; u16 s_pkt_delay; u16 s_hdrwords; /* size of s_hdr in 32 bit words */ u32 s_cur_size; /* size of send packet in bytes */ @@ -383,6 +390,7 @@ struct ipath_qp { u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ u64 r_wr_id; /* ID for current receive WQE */ + unsigned long r_aflags; u32 r_len; /* total length of r_sge */ u32 r_rcv_len; /* receive data len processed */ u32 r_psn; /* expected rcv packet sequence number */ @@ -394,8 +402,7 @@ struct ipath_qp { u8 r_state; /* opcode of last packet received */ u8 r_nak_state; /* non-zero if NAK is pending */ u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ - u8 r_reuse_sge; /* for UC receive errors */ - u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */ + u8 r_flags; u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ u8 r_head_ack_queue; /* index into s_ack_queue[] */ u8 qp_access_flags; @@ -404,13 +411,13 @@ struct ipath_qp { u8 s_rnr_retry_cnt; u8 s_retry; /* requester retry counter */ u8 s_rnr_retry; /* requester RNR retry counter */ - u8 s_wait_credit; /* limit number of unacked packets sent */ u8 s_pkey_index; /* PKEY index to use */ u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ u8 s_tail_ack_queue; /* index into s_ack_queue[] */ u8 s_flags; u8 s_dmult; + u8 s_draining; u8 timeout; /* Timeout for this QP */ enum ib_mtu path_mtu; u32 remote_qpn; @@ -428,16 +435,39 @@ struct ipath_qp { struct ipath_sge r_sg_list[0]; /* verified SGEs */ }; -/* Bit definition for s_busy. */ -#define IPATH_S_BUSY 0 +/* + * Atomic bit definitions for r_aflags. + */ +#define IPATH_R_WRID_VALID 0 + +/* + * Bit definitions for r_flags. + */ +#define IPATH_R_REUSE_SGE 0x01 /* * Bit definitions for s_flags. + * + * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs + * before processing the next SWQE + * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs + * before processing the next SWQE + * IPATH_S_WAITING - waiting for RNR timeout or send buffer available. + * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE + * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating + * next send completion entry not via send DMA. */ #define IPATH_S_SIGNAL_REQ_WR 0x01 #define IPATH_S_FENCE_PENDING 0x02 #define IPATH_S_RDMAR_PENDING 0x04 #define IPATH_S_ACK_PENDING 0x08 +#define IPATH_S_BUSY 0x10 +#define IPATH_S_WAITING 0x20 +#define IPATH_S_WAIT_SSN_CREDIT 0x40 +#define IPATH_S_WAIT_DMA 0x80 + +#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \ + IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA) #define IPATH_PSN_CREDIT 512 @@ -573,13 +603,11 @@ struct ipath_ibdev { u32 n_rnr_naks; u32 n_other_naks; u32 n_timeouts; - u32 n_rc_stalls; u32 n_pkt_drops; u32 n_vl15_dropped; u32 n_wqe_errs; u32 n_rdma_dup_busy; u32 n_piowait; - u32 n_no_piobuf; u32 n_unaligned; u32 port_cap_flags; u32 pma_sample_start; @@ -657,6 +685,17 @@ static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev) return container_of(ibdev, struct ipath_ibdev, ibdev); } +/* + * This must be called with s_lock held. + */ +static inline void ipath_schedule_send(struct ipath_qp *qp) +{ + if (qp->s_flags & IPATH_S_ANY_WAIT) + qp->s_flags &= ~IPATH_S_ANY_WAIT; + if (!(qp->s_flags & IPATH_S_BUSY)) + tasklet_hi_schedule(&qp->s_task); +} + int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, @@ -706,7 +745,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr); -void ipath_free_all_qps(struct ipath_qp_table *qpt); +unsigned ipath_free_all_qps(struct ipath_qp_table *qpt); int ipath_init_qp_table(struct ipath_ibdev *idev, int size); -- cgit v1.2.3 From 74116f580b7279543340dd716a2af642f5c1c2c7 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 13 May 2008 11:42:20 -0700 Subject: IB/ipath: Fix RDMA read response sequence checking If an out of sequence RDMA read response middle or last packet is received, we should only resend the RDMA read request on the first out of sequence packet and drop subsequent out of sequence packets otherwise, we get "too many retries". Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_rc.c | 7 +++++++ drivers/infiniband/hw/ipath/ipath_verbs.h | 1 + 2 files changed, 8 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 5b5276a270bc..108df667d2ee 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -1189,6 +1189,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, wqe = get_swqe_ptr(qp, qp->s_last); if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) goto ack_op_err; + qp->r_flags &= ~IPATH_R_RDMAR_SEQ; /* * If this is a response to a resent RDMA read, we * have to be careful to copy the data to the right @@ -1202,6 +1203,9 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, /* no AETH, no ACK */ if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { dev->n_rdma_seq++; + if (qp->r_flags & IPATH_R_RDMAR_SEQ) + goto ack_done; + qp->r_flags |= IPATH_R_RDMAR_SEQ; ipath_restart_rc(qp, qp->s_last_psn + 1); goto ack_done; } @@ -1263,6 +1267,9 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, /* ACKs READ req. */ if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { dev->n_rdma_seq++; + if (qp->r_flags & IPATH_R_RDMAR_SEQ) + goto ack_done; + qp->r_flags |= IPATH_R_RDMAR_SEQ; ipath_restart_rc(qp, qp->s_last_psn + 1); goto ack_done; } diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index deee02ca7ca4..9d12ae8a778e 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -444,6 +444,7 @@ struct ipath_qp { * Bit definitions for r_flags. */ #define IPATH_R_REUSE_SGE 0x01 +#define IPATH_R_RDMAR_SEQ 0x02 /* * Bit definitions for s_flags. -- cgit v1.2.3 From 40d97692fbfe52ef68fa771d8121394b2210fd67 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 13 May 2008 11:45:32 -0700 Subject: IB/ipath: Make ipath_portdata work with struct pid * not pid_t The official reason is "with the presence of pid namespaces in the kernel using pid_t-s inside one is no longer safe." But the reason I fix this right now is the following: About a month ago (when 2.6.25 was not yet released) there still was a one last caller of a to-be-deprecated-soon function find_pid() - the kill_proc() function, which in turn was only used by nfs callback code. During the last merge window, this last caller was finally eliminated by some NFS patch(es) and I was about to finally kill this kill_proc() and find_pid(), but found, that I was late and the kill_proc is now called from the ipath driver since commit 58411d1c ("IB/ipath: Head of Line blocking vs forward progress of user apps"). So here's a patch that fixes this code to use struct pid * and (!) the kill_pid routine. Signed-off-by: Pavel Emelyanov Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_driver.c | 18 ++++++++++-------- drivers/infiniband/hw/ipath/ipath_file_ops.c | 19 +++++++++++-------- drivers/infiniband/hw/ipath/ipath_kernel.h | 4 ++-- 3 files changed, 23 insertions(+), 18 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index ce7b7c34360e..258e66cf3546 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -2616,7 +2616,7 @@ int ipath_reset_device(int unit) ipath_dbg("unit %u port %d is in use " "(PID %u cmd %s), can't reset\n", unit, i, - dd->ipath_pd[i]->port_pid, + pid_nr(dd->ipath_pd[i]->port_pid), dd->ipath_pd[i]->port_comm); ret = -EBUSY; goto bail; @@ -2654,19 +2654,21 @@ bail: static int ipath_signal_procs(struct ipath_devdata *dd, int sig) { int i, sub, any = 0; - pid_t pid; + struct pid *pid; if (!dd->ipath_pd) return 0; for (i = 1; i < dd->ipath_cfgports; i++) { - if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt || - !dd->ipath_pd[i]->port_pid) + if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) continue; pid = dd->ipath_pd[i]->port_pid; + if (!pid) + continue; + dev_info(&dd->pcidev->dev, "context %d in use " "(PID %u), sending signal %d\n", - i, pid, sig); - kill_proc(pid, sig, 1); + i, pid_nr(pid), sig); + kill_pid(pid, sig, 1); any++; for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) { pid = dd->ipath_pd[i]->port_subpid[sub]; @@ -2674,8 +2676,8 @@ static int ipath_signal_procs(struct ipath_devdata *dd, int sig) continue; dev_info(&dd->pcidev->dev, "sub-context " "%d:%d in use (PID %u), sending " - "signal %d\n", i, sub, pid, sig); - kill_proc(pid, sig, 1); + "signal %d\n", i, sub, pid_nr(pid), sig); + kill_pid(pid, sig, 1); any++; } } diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 3295177c937e..b472b15637f0 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -555,7 +555,7 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport, p = dd->ipath_pageshadow[porttid + tid]; dd->ipath_pageshadow[porttid + tid] = NULL; ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n", - pd->port_pid, tid); + pid_nr(pd->port_pid), tid); dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED, dd->ipath_tidinvalid); @@ -1609,7 +1609,7 @@ static int try_alloc_port(struct ipath_devdata *dd, int port, port); pd->port_cnt = 1; port_fp(fp) = pd; - pd->port_pid = current->pid; + pd->port_pid = get_pid(task_pid(current)); strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); ipath_stats.sps_ports++; ret = 0; @@ -1793,14 +1793,15 @@ static int find_shared_port(struct file *fp, } port_fp(fp) = pd; subport_fp(fp) = pd->port_cnt++; - pd->port_subpid[subport_fp(fp)] = current->pid; + pd->port_subpid[subport_fp(fp)] = + get_pid(task_pid(current)); tidcursor_fp(fp) = 0; pd->active_slaves |= 1 << subport_fp(fp); ipath_cdbg(PROC, "%s[%u] %u sharing %s[%u] unit:port %u:%u\n", current->comm, current->pid, subport_fp(fp), - pd->port_comm, pd->port_pid, + pd->port_comm, pid_nr(pd->port_pid), dd->ipath_unit, pd->port_port); ret = 1; goto done; @@ -2066,7 +2067,8 @@ static int ipath_close(struct inode *in, struct file *fp) * the slave(s) don't wait for receive data forever. */ pd->active_slaves &= ~(1 << fd->subport); - pd->port_subpid[fd->subport] = 0; + put_pid(pd->port_subpid[fd->subport]); + pd->port_subpid[fd->subport] = NULL; mutex_unlock(&ipath_mutex); goto bail; } @@ -2074,7 +2076,7 @@ static int ipath_close(struct inode *in, struct file *fp) if (pd->port_hdrqfull) { ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors " - "during run\n", pd->port_comm, pd->port_pid, + "during run\n", pd->port_comm, pid_nr(pd->port_pid), pd->port_hdrqfull); pd->port_hdrqfull = 0; } @@ -2134,11 +2136,12 @@ static int ipath_close(struct inode *in, struct file *fp) unlock_expected_tids(pd); ipath_stats.sps_ports--; ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n", - pd->port_comm, pd->port_pid, + pd->port_comm, pid_nr(pd->port_pid), dd->ipath_unit, port); } - pd->port_pid = 0; + put_pid(pd->port_pid); + pd->port_pid = NULL; dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */ mutex_unlock(&ipath_mutex); ipath_free_pddata(dd, pd); /* after releasing the mutex */ diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 02b24a340599..20975875a8d1 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -159,8 +159,8 @@ struct ipath_portdata { /* saved total number of polled urgent packets for poll edge trigger */ u32 port_urgent_poll; /* pid of process using this port */ - pid_t port_pid; - pid_t port_subpid[INFINIPATH_MAX_SUBPORT]; + struct pid *port_pid; + struct pid *port_subpid[INFINIPATH_MAX_SUBPORT]; /* same size as task_struct .comm[] */ char port_comm[16]; /* pkeys set by this use of this port */ -- cgit v1.2.3 From f018c7e177a50390f6fcb137f1a28a6027d8ba50 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 13 May 2008 11:51:23 -0700 Subject: IB/ipath: Change ipath_devdata.ipath_sdma_status to be unsigned long Andrew Morton pointed out that bitops should take an unsigned long * arg. However, the ipath driver was doing bitops on struct ipath_devdata.ipath_sdma_status, which is u64. Change this member to unsigned long to avoid tons of warnings when x86 fixes the bitops to take unsigned long * instead of void *. Also, change the IPATH_SDMA_RUNNING and IPATH_SDMA_SHUTDOWN bit numbers to 30 and 31 (instead of 62 and 63) so that we're not setting another booby trap for someone who tries to make ipath work on a 32-bit architecture. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_driver.c | 2 +- drivers/infiniband/hw/ipath/ipath_kernel.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 258e66cf3546..daad09a45910 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1894,7 +1894,7 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) */ if (dd->ipath_flags & IPATH_HAS_SEND_DMA) { int skip_cancel; - u64 *statp = &dd->ipath_sdma_status; + unsigned long *statp = &dd->ipath_sdma_status; spin_lock_irqsave(&dd->ipath_sdma_lock, flags); skip_cancel = diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 20975875a8d1..59a8b254b97f 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -483,7 +483,7 @@ struct ipath_devdata { /* SendDMA related entries */ spinlock_t ipath_sdma_lock; - u64 ipath_sdma_status; + unsigned long ipath_sdma_status; unsigned long ipath_sdma_abort_jiffies; unsigned long ipath_sdma_abort_intr_timeout; unsigned long ipath_sdma_buf_jiffies; @@ -822,8 +822,8 @@ struct ipath_devdata { #define IPATH_SDMA_DISARMED 1 #define IPATH_SDMA_DISABLED 2 #define IPATH_SDMA_LAYERBUF 3 -#define IPATH_SDMA_RUNNING 62 -#define IPATH_SDMA_SHUTDOWN 63 +#define IPATH_SDMA_RUNNING 30 +#define IPATH_SDMA_SHUTDOWN 31 /* bit combinations that correspond to abort states */ #define IPATH_SDMA_ABORT_NONE 0 -- cgit v1.2.3 From a58e58fafdff4c25949221e46132e86f709d0b79 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 13 May 2008 11:52:55 -0700 Subject: RDMA/cxgb3: Wrap the software send queue pointer as needed on flush cxio_flush_sq() was failing to wrap around the software send queue causing garbage completion entries on a flush operation. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/cxio_hal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index ebf9d3043f80..3f441fc57c17 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -405,11 +405,11 @@ int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2); ptr = wq->sq_rptr + count; - sqp += count; + sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); while (ptr != wq->sq_wptr) { insert_sq_cqe(wq, cq, sqp); - sqp++; ptr++; + sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); flushed++; } return flushed; -- cgit v1.2.3