From cc7fb05946fb1cd2fd0582f9e39f759e20dfeefa Mon Sep 17 00:00:00 2001
From: Mitko Haralanov <mitko@qlogic.com>
Date: Tue, 22 Feb 2011 16:56:37 -0800
Subject: IB/qib: Return correct MAD when setting link width to 255

Fix a bug which causes the driver to return incorrect MADs as a
response to Set(PortInfo) which sets the link width to 0xFF or link
speed to 0xF.

Signed-off-by: Mitko Haralanov <mitko@qlogic.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@qlogic.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/qib/qib_mad.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 5ad224e4a38b..4b9e11cc561b 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -705,7 +705,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 	lwe = pip->link_width_enabled;
 	if (lwe) {
 		if (lwe == 0xFF)
-			lwe = ppd->link_width_supported;
+			set_link_width_enabled(ppd, ppd->link_width_supported);
 		else if (lwe >= 16 || (lwe & ~ppd->link_width_supported))
 			smp->status |= IB_SMP_INVALID_FIELD;
 		else if (lwe != ppd->link_width_enabled)
@@ -720,7 +720,8 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 		 * speeds.
 		 */
 		if (lse == 15)
-			lse = ppd->link_speed_supported;
+			set_link_speed_enabled(ppd,
+					       ppd->link_speed_supported);
 		else if (lse >= 8 || (lse & ~ppd->link_speed_supported))
 			smp->status |= IB_SMP_INVALID_FIELD;
 		else if (lse != ppd->link_speed_enabled)
@@ -849,7 +850,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 	if (clientrereg)
 		pip->clientrereg_resv_subnetto |= 0x80;
 
-	goto done;
+	goto get_only;
 
 err:
 	smp->status |= IB_SMP_INVALID_FIELD;
-- 
cgit v1.2.3


From b52fe09e3309c3d7069cd0e5a3bdb5b4ba45e01f Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Fri, 11 Mar 2011 22:30:01 +0000
Subject: RDMA/cxgb4: Turn on delayed ACK

Set the default to on.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/cxgb4/cm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 8b00e6c46f01..65d3fe6cfd5c 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -61,9 +61,9 @@ static char *states[] = {
 	NULL,
 };
 
-static int dack_mode;
+static int dack_mode = 1;
 module_param(dack_mode, int, 0644);
-MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=0)");
+MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)");
 
 int c4iw_max_read_depth = 8;
 module_param(c4iw_max_read_depth, int, 0644);
-- 
cgit v1.2.3


From 294281373999e7fff393c04eb16092a8f00ad5aa Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Fri, 11 Mar 2011 22:30:32 +0000
Subject: RDMA/cxgb4: Remove db_drop_task

Unloading iw_cxgb4 can crash due to the unload code trying to use
db_drop_task, which is uninitialized.  So remove this dead code.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/cxgb4/device.c   | 1 -
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 54fbc1118abe..1a65ee6235bf 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -368,7 +368,6 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev)
 static void c4iw_remove(struct c4iw_dev *dev)
 {
 	PDBG("%s c4iw_dev %p\n", __func__,  dev);
-	cancel_delayed_work_sync(&dev->db_drop_task);
 	list_del(&dev->entry);
 	if (dev->registered)
 		c4iw_unregister_device(dev);
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 2fe19ec9ba60..9f6166f59268 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -176,7 +176,6 @@ struct c4iw_dev {
 	struct idr mmidr;
 	spinlock_t lock;
 	struct list_head entry;
-	struct delayed_work db_drop_task;
 	struct dentry *debugfs_root;
 	u8 registered;
 };
-- 
cgit v1.2.3


From ffc3f7487ff0b32500b319c770475383f6f6efab Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Fri, 11 Mar 2011 22:30:42 +0000
Subject: RDMA/cxgb4: Do CIDX_INC updates every 1/16 CQ depth CQE reaps

This avoids the CIDX_INC overflow issue with T4A2 when running
kernel RDMA applications.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/cxgb4/t4.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 70004425d695..24af12fc8228 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -507,8 +507,14 @@ static inline void t4_swcq_consume(struct t4_cq *cq)
 static inline void t4_hwcq_consume(struct t4_cq *cq)
 {
 	cq->bits_type_ts = cq->queue[cq->cidx].bits_type_ts;
-	if (++cq->cidx_inc == cq->size)
+	if (++cq->cidx_inc == (cq->size >> 4)) {
+		u32 val;
+
+		val = SEINTARM(0) | CIDXINC(cq->cidx_inc) | TIMERREG(7) |
+		      INGRESSQID(cq->cqid);
+		writel(val, cq->gts);
 		cq->cidx_inc = 0;
+	}
 	if (++cq->cidx == cq->size) {
 		cq->cidx = 0;
 		cq->gen ^= 1;
-- 
cgit v1.2.3


From a9c7719800ac513b2df14e267d062ec84dc9313e Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Fri, 11 Mar 2011 22:30:11 +0000
Subject: RDMA/cxgb4: Enable on-chip SQ support by default

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/cxgb4/qp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 4f0be25cab1a..70a5a3c646da 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -31,9 +31,9 @@
  */
 #include "iw_cxgb4.h"
 
-static int ocqp_support;
+static int ocqp_support = 1;
 module_param(ocqp_support, int, 0644);
-MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=0)");
+MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)");
 
 static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state)
 {
-- 
cgit v1.2.3


From b48f3b9c10d731160f0af5c3028ad57d9c66673b Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Fri, 11 Mar 2011 22:30:21 +0000
Subject: RDMA/cxgb4: Use ULP_MODE_TCPDDP

Set the ULP mode for initial RDMA connection setup to the proper DDP
mode.  This avoids wasting some HW resources while in streaming mode.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/cxgb4/cm.c | 2 ++
 drivers/net/cxgb4/t4_msg.h       | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 65d3fe6cfd5c..b4d9e4caf3c9 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -482,6 +482,7 @@ static int send_connect(struct c4iw_ep *ep)
 	       TX_CHAN(ep->tx_chan) |
 	       SMAC_SEL(ep->smac_idx) |
 	       DSCP(ep->tos) |
+	       ULP_MODE(ULP_MODE_TCPDDP) |
 	       RCV_BUFSIZ(rcv_win>>10);
 	opt2 = RX_CHANNEL(0) |
 	       RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
@@ -1274,6 +1275,7 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb,
 	       TX_CHAN(ep->tx_chan) |
 	       SMAC_SEL(ep->smac_idx) |
 	       DSCP(ep->tos) |
+	       ULP_MODE(ULP_MODE_TCPDDP) |
 	       RCV_BUFSIZ(rcv_win>>10);
 	opt2 = RX_CHANNEL(0) |
 	       RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
diff --git a/drivers/net/cxgb4/t4_msg.h b/drivers/net/cxgb4/t4_msg.h
index a550d0c706f3..eb71b8250b91 100644
--- a/drivers/net/cxgb4/t4_msg.h
+++ b/drivers/net/cxgb4/t4_msg.h
@@ -123,6 +123,7 @@ enum {
 	ULP_MODE_NONE          = 0,
 	ULP_MODE_ISCSI         = 2,
 	ULP_MODE_RDMA          = 4,
+	ULP_MODE_TCPDDP	       = 5,
 	ULP_MODE_FCOE          = 6,
 };
 
-- 
cgit v1.2.3


From 767fbe8151d1a7cc8a69e52e354e4220a5e804fb Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Fri, 11 Mar 2011 22:30:53 +0000
Subject: RDMA/cxgb4: Dispatch FATAL event on EEH errors

This at least kicks the user mode applications that are watching for
device events.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/cxgb4/device.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 1a65ee6235bf..fadb326e41e0 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -522,8 +522,16 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
 	case CXGB4_STATE_START_RECOVERY:
 		printk(KERN_INFO MOD "%s: Fatal Error\n",
 		       pci_name(dev->rdev.lldi.pdev));
-		if (dev->registered)
+		dev->rdev.flags |= T4_FATAL_ERROR;
+		if (dev->registered) {
+			struct ib_event event;
+
+			memset(&event, 0, sizeof event);
+			event.event  = IB_EVENT_DEVICE_FATAL;
+			event.device = &dev->ibdev;
+			ib_dispatch_event(&event);
 			c4iw_unregister_device(dev);
+		}
 		break;
 	case CXGB4_STATE_DETACH:
 		printk(KERN_INFO MOD "%s: Detach\n",
-- 
cgit v1.2.3


From db5d040d7b2d15539d2c84932f93621d9bd482f7 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Fri, 11 Mar 2011 22:29:50 +0000
Subject: RDMA/cxgb4: Debugfs dump_qp() updates

- Show whether the SQ is in onchip memory or not.
- Dump both SQ and RQ QIDs.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/cxgb4/device.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index fadb326e41e0..e29172c2afcb 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -87,17 +87,22 @@ static int dump_qp(int id, void *p, void *data)
 		return 1;
 
 	if (qp->ep)
-		cc = snprintf(qpd->buf + qpd->pos, space, "qp id %u state %u "
+		cc = snprintf(qpd->buf + qpd->pos, space,
+			     "qp sq id %u rq id %u state %u onchip %u "
 			     "ep tid %u state %u %pI4:%u->%pI4:%u\n",
-			     qp->wq.sq.qid, (int)qp->attr.state,
+			     qp->wq.sq.qid, qp->wq.rq.qid, (int)qp->attr.state,
+			     qp->wq.sq.flags & T4_SQ_ONCHIP,
 			     qp->ep->hwtid, (int)qp->ep->com.state,
 			     &qp->ep->com.local_addr.sin_addr.s_addr,
 			     ntohs(qp->ep->com.local_addr.sin_port),
 			     &qp->ep->com.remote_addr.sin_addr.s_addr,
 			     ntohs(qp->ep->com.remote_addr.sin_port));
 	else
-		cc = snprintf(qpd->buf + qpd->pos, space, "qp id %u state %u\n",
-			      qp->wq.sq.qid, (int)qp->attr.state);
+		cc = snprintf(qpd->buf + qpd->pos, space,
+			     "qp sq id %u rq id %u state %u onchip %u\n",
+			      qp->wq.sq.qid, qp->wq.rq.qid,
+			      (int)qp->attr.state,
+			      qp->wq.sq.flags & T4_SQ_ONCHIP);
 	if (cc < space)
 		qpd->pos += cc;
 	return 0;
-- 
cgit v1.2.3


From 4634b7945cf0d6a66036ad10c3d658ae4eb39fa0 Mon Sep 17 00:00:00 2001
From: Mitko Haralanov <mitko@qlogic.com>
Date: Mon, 28 Feb 2011 13:39:49 +0000
Subject: IB/qib: Set default LE2 value for active cables to 0

For active and far-EQ cables use an LE2 value of 0 for improved SI.

Signed-off-by: Mitko Haralanov <mitko@qlogic.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@qlogic.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/qib/qib_iba7322.c | 13 ++++++++++---
 drivers/infiniband/hw/qib/qib_qsfp.h    |  2 ++
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index b01809a82cb0..4a2d21e15a70 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -5582,9 +5582,16 @@ static void qsfp_7322_event(struct work_struct *work)
 	 * even on failure to read cable information.  We don't
 	 * get here for QME, so IS_QME check not needed here.
 	 */
-	le2 = (!ret && qd->cache.atten[1] >= qib_long_atten &&
-	       !ppd->dd->cspec->r1 && QSFP_IS_CU(qd->cache.tech)) ?
-		LE2_5m : LE2_DEFAULT;
+	if (!ret && !ppd->dd->cspec->r1) {
+		if (QSFP_IS_ACTIVE_FAR(qd->cache.tech))
+			le2 = LE2_QME;
+		else if (qd->cache.atten[1] >= qib_long_atten &&
+			 QSFP_IS_CU(qd->cache.tech))
+			le2 = LE2_5m;
+		else
+			le2 = LE2_DEFAULT;
+	} else
+		le2 = LE2_DEFAULT;
 	ibsd_wr_allchans(ppd, 13, (le2 << 7), BMASK(9, 7));
 	init_txdds_table(ppd, 0);
 }
diff --git a/drivers/infiniband/hw/qib/qib_qsfp.h b/drivers/infiniband/hw/qib/qib_qsfp.h
index 19b527bafd57..c109bbdc90ac 100644
--- a/drivers/infiniband/hw/qib/qib_qsfp.h
+++ b/drivers/infiniband/hw/qib/qib_qsfp.h
@@ -79,6 +79,8 @@
 extern const char *const qib_qsfp_devtech[16];
 /* Active Equalization includes fiber, copper full EQ, and copper near Eq */
 #define QSFP_IS_ACTIVE(tech) ((0xA2FF >> ((tech) >> 4)) & 1)
+/* Active Equalization includes fiber, copper full EQ, and copper far Eq */
+#define QSFP_IS_ACTIVE_FAR(tech) ((0x32FF >> ((tech) >> 4)) & 1)
 /* Attenuation should be valid for copper other than full/near Eq */
 #define QSFP_HAS_ATTEN(tech) ((0x4D00 >> ((tech) >> 4)) & 1)
 /* Length is only valid if technology is "copper" */
-- 
cgit v1.2.3


From 36b87b419c7616a8aaa9889566d9a9b50689dee1 Mon Sep 17 00:00:00 2001
From: Mitko Haralanov <mitko@qlogic.com>
Date: Fri, 4 Mar 2011 18:53:17 +0000
Subject: IB/qib: Fix M_Key field in SubnGet and SubnGetResp MADs

Set the M_Key field in SubnGet and SugnGetResp MADs based on correctly
interpreting the protection level specified in the M_KeyProtBits field.

Signed-off-by: Mitko Haralanov <mitko@qlogic.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@qlogic.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/qib/qib_mad.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 4b9e11cc561b..8fd3df5bf04d 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -464,8 +464,9 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 	memset(smp->data, 0, sizeof(smp->data));
 
 	/* Only return the mkey if the protection field allows it. */
-	if (smp->method == IB_MGMT_METHOD_SET || ibp->mkey == smp->mkey ||
-	    ibp->mkeyprot == 0)
+	if (!(smp->method == IB_MGMT_METHOD_GET &&
+	      ibp->mkey != smp->mkey &&
+	      ibp->mkeyprot == 1))
 		pip->mkey = ibp->mkey;
 	pip->gid_prefix = ibp->gid_prefix;
 	lid = ppd->lid;
-- 
cgit v1.2.3


From 2a543904ddcb463db9d56d1efcb2f80884ea55f3 Mon Sep 17 00:00:00 2001
From: Nicolas Kaiser <nikai@nikai.net>
Date: Tue, 26 Oct 2010 15:47:52 +0000
Subject: IB/ipath: Don't reset disabled devices

The comment some lines above states that disabled devices must not reset.

Signed-off-by: Nicolas Kaiser <nikai@nikai.net>
---
 drivers/infiniband/hw/ipath/ipath_sysfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index b8cb2f145ae4..8991677e9a08 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -557,6 +557,7 @@ static ssize_t store_reset(struct device *dev,
 		dev_info(dev,"Unit %d is disabled, can't reset\n",
 			 dd->ipath_unit);
 		ret = -EINVAL;
+		goto bail;
 	}
 	ret = ipath_reset_device(dd->ipath_unit);
 bail:
-- 
cgit v1.2.3


From 25ae21a10112875763c18b385624df713a288a05 Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Wed, 23 Feb 2011 08:11:32 -0800
Subject: RDMA/cma: Fix crash in request handlers

Doug Ledford and Red Hat reported a crash when running the rdma_cm on
a real-time OS.  The crash has the following call trace:

    cm_process_work
       cma_req_handler
          cma_disable_callback
          rdma_create_id
             kzalloc
             init_completion
          cma_get_net_info
          cma_save_net_info
          cma_any_addr
             cma_zero_addr
          rdma_translate_ip
             rdma_copy_addr
          cma_acquire_dev
             rdma_addr_get_sgid
             ib_find_cached_gid
             cma_attach_to_dev
          ucma_event_handler
             kzalloc
             ib_copy_ah_attr_to_user
          cma_comp

[ preempted ]

    cma_write
        copy_from_user
        ucma_destroy_id
           copy_from_user
           _ucma_find_context
           ucma_put_ctx
           ucma_free_ctx
              rdma_destroy_id
                 cma_exch
                 cma_cancel_operation
                 rdma_node_get_transport

        rt_mutex_slowunlock
        bad_area_nosemaphore
        oops_enter

They were able to reproduce the crash multiple times with the
following details:

    Crash seems to always happen on the:
            mutex_unlock(&conn_id->handler_mutex);
    as conn_id looks to have been freed during this code path.

An examination of the code shows that a race exists in the request
handlers.  When a new connection request is received, the rdma_cm
allocates a new connection identifier.  This identifier has a single
reference count on it.  If a user calls rdma_destroy_id() from another
thread after receiving a callback, rdma_destroy_id will proceed to
destroy the id and free the associated memory.  However, the request
handlers may still be in the process of running.  When control returns
to the request handlers, they can attempt to access the newly created
identifiers.

Fix this by holding a reference on the newly created rdma_cm_id until
the request handler is through accessing it.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Acked-by: Doug Ledford <dledford@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/core/cma.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 6884da24fde1..e450c5a87727 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1210,6 +1210,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	cm_id->context = conn_id;
 	cm_id->cm_handler = cma_ib_handler;
 
+	/*
+	 * Protect against the user destroying conn_id from another thread
+	 * until we're done accessing it.
+	 */
+	atomic_inc(&conn_id->refcount);
 	ret = conn_id->id.event_handler(&conn_id->id, &event);
 	if (!ret) {
 		/*
@@ -1222,8 +1227,10 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 			ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
 		mutex_unlock(&lock);
 		mutex_unlock(&conn_id->handler_mutex);
+		cma_deref_id(conn_id);
 		goto out;
 	}
+	cma_deref_id(conn_id);
 
 	/* Destroy the CM ID by returning a non-zero value. */
 	conn_id->cm_id.ib = NULL;
@@ -1425,17 +1432,25 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 	event.param.conn.private_data_len = iw_event->private_data_len;
 	event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
 	event.param.conn.responder_resources = attr.max_qp_rd_atom;
+
+	/*
+	 * Protect against the user destroying conn_id from another thread
+	 * until we're done accessing it.
+	 */
+	atomic_inc(&conn_id->refcount);
 	ret = conn_id->id.event_handler(&conn_id->id, &event);
 	if (ret) {
 		/* User wants to destroy the CM ID */
 		conn_id->cm_id.iw = NULL;
 		cma_exch(conn_id, CMA_DESTROYING);
 		mutex_unlock(&conn_id->handler_mutex);
+		cma_deref_id(conn_id);
 		rdma_destroy_id(&conn_id->id);
 		goto out;
 	}
 
 	mutex_unlock(&conn_id->handler_mutex);
+	cma_deref_id(conn_id);
 
 out:
 	if (dev)
-- 
cgit v1.2.3


From 29963437a48475036353b95ab142bf199adb909e Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Wed, 23 Feb 2011 08:17:40 -0800
Subject: IB/cm: Bump reference count on cm_id before invoking callback

When processing a SIDR REQ, the ib_cm allocates a new cm_id.  The
refcount of the cm_id is initialized to 1.  However, cm_process_work
will decrement the refcount after invoking all callbacks.  The result
is that the cm_id will end up with refcount set to 0 by the end of the
sidr req handler.

If a user tries to destroy the cm_id, the destruction will proceed,
under the incorrect assumption that no other threads are referencing
the cm_id.  This can lead to a crash when the cm callback thread tries
to access the cm_id.

This problem was noticed as part of a larger investigation with kernel
crashes in the rdma_cm when running on a real time OS.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Acked-by: Doug Ledford <dledford@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/core/cm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 64e0903091a8..1d9616be4192 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -2989,6 +2989,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
 		goto out; /* No match. */
 	}
 	atomic_inc(&cur_cm_id_priv->refcount);
+	atomic_inc(&cm_id_priv->refcount);
 	spin_unlock_irq(&cm.lock);
 
 	cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
-- 
cgit v1.2.3


From 8d8ac86564b616bc17054cbb6e727588da64c86b Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Thu, 3 Mar 2011 23:31:06 +0000
Subject: IB/cm: Cancel pending LAP message when exiting IB_CM_ESTABLISH state

This problem was reported by Moni Shoua <monis@mellanox.com> and Amir
Vadai <amirv@mellanox.com>:

	When destroying a cm_id from a context of a work queue and if
	the lap_state of this cm_id is IB_CM_LAP_SENT, we need to
	release the reference of this id that was taken upon the send
	of the LAP message.  Otherwise, if the expected APR message
	gets lost, it is only after a long time that the reference
	will be released, while during that the work handler thread is
	not available to process other things.

It turns out that we need to cancel any pending LAP messages whenever
we transition out of the IB_CM_ESTABLISH state.  This occurs when
disconnecting - either sending or receiving a DREQ.  It can also
happen in a corner case where we receive a REJ message after sending
an RTU, followed by a LAP.  Add checks and cancel any outstanding LAP
messages in these three cases.

Canceling the LAP when sending a DREQ fixes the destroy problem
reported by Moni.  When a cm_id is destroyed in the IB_CM_ESTABLISHED
state, it sends a DREQ to the remote side to notify the peer that the
connection is going away.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/core/cm.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 1d9616be4192..f804e28e1ebb 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1988,6 +1988,10 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
 		goto out;
 	}
 
+	if (cm_id->lap_state == IB_CM_LAP_SENT ||
+	    cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+
 	ret = cm_alloc_msg(cm_id_priv, &msg);
 	if (ret) {
 		cm_enter_timewait(cm_id_priv);
@@ -2129,6 +2133,10 @@ static int cm_dreq_handler(struct cm_work *work)
 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 		break;
 	case IB_CM_ESTABLISHED:
+		if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
+		    cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
+			ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+		break;
 	case IB_CM_MRA_REP_RCVD:
 		break;
 	case IB_CM_TIMEWAIT:
@@ -2349,9 +2357,18 @@ static int cm_rej_handler(struct cm_work *work)
 		/* fall through */
 	case IB_CM_REP_RCVD:
 	case IB_CM_MRA_REP_SENT:
-	case IB_CM_ESTABLISHED:
 		cm_enter_timewait(cm_id_priv);
 		break;
+	case IB_CM_ESTABLISHED:
+		if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
+		    cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
+			if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
+				ib_cancel_mad(cm_id_priv->av.port->mad_agent,
+					      cm_id_priv->msg);
+			cm_enter_timewait(cm_id_priv);
+			break;
+		}
+		/* fall through */
 	default:
 		spin_unlock_irq(&cm_id_priv->lock);
 		ret = -EINVAL;
-- 
cgit v1.2.3


From a396d43a35fb91f2d4920a4700d25ecc5ec92404 Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Wed, 23 Feb 2011 09:05:39 -0800
Subject: RDMA/cma: Replace global lock in rdma_destroy_id() with id-specific
 one

rdma_destroy_id currently uses the global rdma cm 'lock' to test if an
rdma_cm_id has been bound to a device.  This prevents an active
address resolution callback handler from assigning a device to the
rdma_cm_id after rdma_destroy_id checks for one.

Instead, we can replace the use of the global lock around the check to
the rdma_cm_id device pointer by setting the id state to destroying,
then flushing all active callbacks.  The latter is accomplished by
acquiring and releasing the handler_mutex.  Any active handler will
complete first, and any newly scheduled handlers will find the
rdma_cm_id in an invalid state.

In addition to optimizing the current locking scheme, the use of the
rdma_cm_id mutex is a more intuitive synchronization mechanism than
that of the global lock.  These changes are based on feedback from
Doug Ledford <dledford@redhat.com> while he was trying to debug a
crash in the rdma cm destroy path.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/core/cma.c | 43 ++++++++++++++++---------------------------
 1 file changed, 16 insertions(+), 27 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index e450c5a87727..5ed9d25d021a 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -308,11 +308,13 @@ static inline void release_mc(struct kref *kref)
 	kfree(mc);
 }
 
-static void cma_detach_from_dev(struct rdma_id_private *id_priv)
+static void cma_release_dev(struct rdma_id_private *id_priv)
 {
+	mutex_lock(&lock);
 	list_del(&id_priv->list);
 	cma_deref_dev(id_priv->cma_dev);
 	id_priv->cma_dev = NULL;
+	mutex_unlock(&lock);
 }
 
 static int cma_set_qkey(struct rdma_id_private *id_priv)
@@ -373,6 +375,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
 	enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
 		IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
 
+	mutex_lock(&lock);
 	iboe_addr_get_sgid(dev_addr, &iboe_gid);
 	memcpy(&gid, dev_addr->src_dev_addr +
 	       rdma_addr_gid_offset(dev_addr), sizeof gid);
@@ -398,6 +401,7 @@ out:
 	if (!ret)
 		cma_attach_to_dev(id_priv, cma_dev);
 
+	mutex_unlock(&lock);
 	return ret;
 }
 
@@ -904,9 +908,14 @@ void rdma_destroy_id(struct rdma_cm_id *id)
 	state = cma_exch(id_priv, CMA_DESTROYING);
 	cma_cancel_operation(id_priv, state);
 
-	mutex_lock(&lock);
+	/*
+	 * Wait for any active callback to finish.  New callbacks will find
+	 * the id_priv state set to destroying and abort.
+	 */
+	mutex_lock(&id_priv->handler_mutex);
+	mutex_unlock(&id_priv->handler_mutex);
+
 	if (id_priv->cma_dev) {
-		mutex_unlock(&lock);
 		switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
 		case RDMA_TRANSPORT_IB:
 			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
@@ -920,10 +929,8 @@ void rdma_destroy_id(struct rdma_cm_id *id)
 			break;
 		}
 		cma_leave_mc_groups(id_priv);
-		mutex_lock(&lock);
-		cma_detach_from_dev(id_priv);
+		cma_release_dev(id_priv);
 	}
-	mutex_unlock(&lock);
 
 	cma_release_port(id_priv);
 	cma_deref_id(id_priv);
@@ -1200,9 +1207,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	}
 
 	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
-	mutex_lock(&lock);
 	ret = cma_acquire_dev(conn_id);
-	mutex_unlock(&lock);
 	if (ret)
 		goto release_conn_id;
 
@@ -1401,9 +1406,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 		goto out;
 	}
 
-	mutex_lock(&lock);
 	ret = cma_acquire_dev(conn_id);
-	mutex_unlock(&lock);
 	if (ret) {
 		mutex_unlock(&conn_id->handler_mutex);
 		rdma_destroy_id(new_cm_id);
@@ -1966,20 +1969,11 @@ static void addr_handler(int status, struct sockaddr *src_addr,
 
 	memset(&event, 0, sizeof event);
 	mutex_lock(&id_priv->handler_mutex);
-
-	/*
-	 * Grab mutex to block rdma_destroy_id() from removing the device while
-	 * we're trying to acquire it.
-	 */
-	mutex_lock(&lock);
-	if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
-		mutex_unlock(&lock);
+	if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED))
 		goto out;
-	}
 
 	if (!status && !id_priv->cma_dev)
 		status = cma_acquire_dev(id_priv);
-	mutex_unlock(&lock);
 
 	if (status) {
 		if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
@@ -2280,9 +2274,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 		if (ret)
 			goto err1;
 
-		mutex_lock(&lock);
 		ret = cma_acquire_dev(id_priv);
-		mutex_unlock(&lock);
 		if (ret)
 			goto err1;
 	}
@@ -2294,11 +2286,8 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 
 	return 0;
 err2:
-	if (id_priv->cma_dev) {
-		mutex_lock(&lock);
-		cma_detach_from_dev(id_priv);
-		mutex_unlock(&lock);
-	}
+	if (id_priv->cma_dev)
+		cma_release_dev(id_priv);
 err1:
 	cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
 	return ret;
-- 
cgit v1.2.3