[ewg] [GIT PULL ofed-1.5] RDMA/cxgb3: Pull in recent bug fixes.

Steve Wise swise at opengridcomputing.com
Mon Aug 24 14:23:53 PDT 2009


Vlad,  please pull these recent iw_cxgb3 bug fixes from:

ssh://vlad@sofa.openfabrics.org/~swise/scm/ofed_kernel ofed_1_5

Thanks,

Steve.



-------- Original Message --------
Subject: 	[PATCH] RDMA/cxgb3: Pull in recent bug fixes.
Date: 	Mon, 24 Aug 2009 16:18:20 -0500
From: 	Steve Wise <swise at opengridcomputing.com>
To: 	swise at opengridcomputing.com



Signed-off-by: Steve Wise <swise at opengridcomputing.com>
---

 kernel_patches/fixes/iw_cxgb3_0300_memleak.patch   |   30 +++
 kernel_patches/fixes/iw_cxgb3_0310_iochannel.patch |   60 ++++++
 .../fixes/iw_cxgb3_0320_portevents.patch           |  167 +++++++++++++++
 .../iw_cxgb3_0330_dontfreeendpointsearly.patch     |  218 ++++++++++++++++++++
 .../fixes/iw_cxgb3_0340_wakeupwaitersonclose.patch |   59 +++++
 5 files changed, 534 insertions(+), 0 deletions(-)
 create mode 100644 kernel_patches/fixes/iw_cxgb3_0300_memleak.patch
 create mode 100644 kernel_patches/fixes/iw_cxgb3_0310_iochannel.patch
 create mode 100644 kernel_patches/fixes/iw_cxgb3_0320_portevents.patch
 create mode 100644 kernel_patches/fixes/iw_cxgb3_0330_dontfreeendpointsearly.patch
 create mode 100644 kernel_patches/fixes/iw_cxgb3_0340_wakeupwaitersonclose.patch

diff --git a/kernel_patches/fixes/iw_cxgb3_0300_memleak.patch b/kernel_patches/fixes/iw_cxgb3_0300_memleak.patch
new file mode 100644
index 0000000..321ad73
--- /dev/null
+++ b/kernel_patches/fixes/iw_cxgb3_0300_memleak.patch
@@ -0,0 +1,30 @@
+commit 80f9bf7345b367848e85d0ae3ab648a5d5b4382c
+Author: Steve Wise <swise at opengridcomputing.com>
+Date:   Wed Aug 5 13:03:58 2009 -0700
+
+    RDMA/cxgb3: iwch_unregister_device leaks memory
+    
+    The iwcm struct mem is never freed.
+    
+    Signed-off-by: Steve Wise <swise at opengridcomputing.com>
+    Signed-off-by: Roland Dreier <rolandd at cisco.com>
+
+diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
+index e2a6321..72aa57c 100644
+--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
++++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
+@@ -1418,6 +1418,7 @@ int iwch_register_device(struct iwch_dev *dev)
+ bail2:
+ 	ib_unregister_device(&dev->ibdev);
+ bail1:
++	kfree(dev->ibdev.iwcm);
+ 	return ret;
+ }
+ 
+@@ -1430,5 +1431,6 @@ void iwch_unregister_device(struct iwch_dev *dev)
+ 		device_remove_file(&dev->ibdev.dev,
+ 				   iwch_class_attributes[i]);
+ 	ib_unregister_device(&dev->ibdev);
++	kfree(dev->ibdev.iwcm);
+ 	return;
+ }
diff --git a/kernel_patches/fixes/iw_cxgb3_0310_iochannel.patch b/kernel_patches/fixes/iw_cxgb3_0310_iochannel.patch
new file mode 100644
index 0000000..4ec9186
--- /dev/null
+++ b/kernel_patches/fixes/iw_cxgb3_0310_iochannel.patch
@@ -0,0 +1,60 @@
+commit 2399446de5e5c7f6aa096ca33a948dd3ed389cfa
+Author: Steve Wise <swise at opengridcomputing.com>
+Date:   Wed Aug 5 13:05:56 2009 -0700
+
+    RDMA/cxgb3: Set the appropriate IO channel in rdma_init work requests
+    
+    Signed-off-by: Steve Wise <swise at opengridcomputing.com>
+    Signed-off-by: Roland Dreier <rolandd at cisco.com>
+
+diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
+index 62f9cf2..4dec515 100644
+--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
++++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
+@@ -852,7 +852,9 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
+ 	wqe->qpcaps = attr->qpcaps;
+ 	wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
+ 	wqe->rqe_count = cpu_to_be16(attr->rqe_count);
+-	wqe->flags_rtr_type = cpu_to_be16(attr->flags|V_RTR_TYPE(attr->rtr_type));
++	wqe->flags_rtr_type = cpu_to_be16(attr->flags |
++					  V_RTR_TYPE(attr->rtr_type) |
++					  V_CHAN(attr->chan));
+ 	wqe->ord = cpu_to_be32(attr->ord);
+ 	wqe->ird = cpu_to_be32(attr->ird);
+ 	wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
+diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
+index 32e3b14..a197a5b 100644
+--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
++++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
+@@ -327,6 +327,11 @@ enum rdma_init_rtr_types {
+ #define V_RTR_TYPE(x)	((x) << S_RTR_TYPE)
+ #define G_RTR_TYPE(x)	((((x) >> S_RTR_TYPE)) & M_RTR_TYPE)
+ 
++#define S_CHAN		4
++#define M_CHAN		0x3
++#define V_CHAN(x)	((x) << S_CHAN)
++#define G_CHAN(x)	((((x) >> S_CHAN)) & M_CHAN)
++
+ struct t3_rdma_init_attr {
+ 	u32 tid;
+ 	u32 qpid;
+@@ -346,6 +351,7 @@ struct t3_rdma_init_attr {
+ 	u16 flags;
+ 	u16 rqe_count;
+ 	u32 irs;
++	u32 chan;
+ };
+ 
+ struct t3_rdma_init_wr {
+diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
+index 27bbdc8..6e86534 100644
+--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
++++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
+@@ -889,6 +889,7 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
+ 	init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
+ 	init_attr.rqe_count = iwch_rqes_posted(qhp);
+ 	init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
++	init_attr.chan = qhp->ep->l2t->smt_idx;
+ 	if (peer2peer) {
+ 		init_attr.rtr_type = RTR_READ;
+ 		if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
diff --git a/kernel_patches/fixes/iw_cxgb3_0320_portevents.patch b/kernel_patches/fixes/iw_cxgb3_0320_portevents.patch
new file mode 100644
index 0000000..82afa6d
--- /dev/null
+++ b/kernel_patches/fixes/iw_cxgb3_0320_portevents.patch
@@ -0,0 +1,167 @@
+commit 978304316edfa7b6e7f7bce7ea43c8672808be1d
+Author: Steve Wise <swise at opengridcomputing.com>
+Date:   Wed Aug 5 13:05:57 2009 -0700
+
+    RDMA/cxgb3: Handle port events properly
+    
+    Massage the err_handler upcall into an event handler upcall, pass
+    netdev port events to the cxgb3 ULPs and generate RDMA port events
+    based on LLD port events.
+    
+    Signed-off-by: Steve Wise <swise at opengridcomputing.com>
+    Signed-off-by: Roland Dreier <rolandd at cisco.com>
+
+diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
+index 26fc0a4..5796170 100644
+--- a/drivers/infiniband/hw/cxgb3/iwch.c
++++ b/drivers/infiniband/hw/cxgb3/iwch.c
+@@ -51,7 +51,7 @@ cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
+ 
+ static void open_rnic_dev(struct t3cdev *);
+ static void close_rnic_dev(struct t3cdev *);
+-static void iwch_err_handler(struct t3cdev *, u32, u32);
++static void iwch_event_handler(struct t3cdev *, u32, u32);
+ 
+ struct cxgb3_client t3c_client = {
+ 	.name = "iw_cxgb3",
+@@ -59,7 +59,7 @@ struct cxgb3_client t3c_client = {
+ 	.remove = close_rnic_dev,
+ 	.handlers = t3c_handlers,
+ 	.redirect = iwch_ep_redirect,
+-	.err_handler = iwch_err_handler
++	.event_handler = iwch_event_handler
+ };
+ 
+ static LIST_HEAD(dev_list);
+@@ -162,21 +162,33 @@ static void close_rnic_dev(struct t3cdev *tdev)
+ 	mutex_unlock(&dev_mutex);
+ }
+ 
+-static void iwch_err_handler(struct t3cdev *tdev, u32 status, u32 error)
++static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id)
+ {
+ 	struct cxio_rdev *rdev = tdev->ulp;
+ 	struct iwch_dev *rnicp = rdev_to_iwch_dev(rdev);
+ 	struct ib_event event;
++	u32    portnum = port_id + 1;
+ 
+-	if (status == OFFLOAD_STATUS_DOWN) {
++	switch (evt) {
++	case OFFLOAD_STATUS_DOWN: {
+ 		rdev->flags = CXIO_ERROR_FATAL;
+-
+-		event.device = &rnicp->ibdev;
+ 		event.event  = IB_EVENT_DEVICE_FATAL;
+-		event.element.port_num = 0;
+-		ib_dispatch_event(&event);
++		break;
++		}
++	case OFFLOAD_PORT_DOWN: {
++		event.event  = IB_EVENT_PORT_ERR;
++		break;
++		}
++	case OFFLOAD_PORT_UP: {
++		event.event  = IB_EVENT_PORT_ACTIVE;
++		break;
++		}
+ 	}
+ 
++	event.device = &rnicp->ibdev;
++	event.element.port_num = portnum;
++	ib_dispatch_event(&event);
++
+ 	return;
+ }
+ 
+diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
+index fb5df5c..c97ab82 100644
+--- a/drivers/net/cxgb3/cxgb3_main.c
++++ b/drivers/net/cxgb3/cxgb3_main.c
+@@ -1286,6 +1286,7 @@ static int cxgb_open(struct net_device *dev)
+ 	if (!other_ports)
+ 		schedule_chk_task(adapter);
+ 
++	cxgb3_event_notify(&adapter->tdev, OFFLOAD_PORT_UP, pi->port_id);
+ 	return 0;
+ }
+ 
+@@ -1318,6 +1319,7 @@ static int cxgb_close(struct net_device *dev)
+ 	if (!adapter->open_device_map)
+ 		cxgb_down(adapter);
+ 
++	cxgb3_event_notify(&adapter->tdev, OFFLOAD_PORT_DOWN, pi->port_id);
+ 	return 0;
+ }
+ 
+@@ -2717,7 +2719,7 @@ static int t3_adapter_error(struct adapter *adapter, int reset)
+ 
+ 	if (is_offload(adapter) &&
+ 	    test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map)) {
+-		cxgb3_err_notify(&adapter->tdev, OFFLOAD_STATUS_DOWN, 0);
++		cxgb3_event_notify(&adapter->tdev, OFFLOAD_STATUS_DOWN, 0);
+ 		offload_close(&adapter->tdev);
+ 	}
+ 
+@@ -2782,7 +2784,7 @@ static void t3_resume_ports(struct adapter *adapter)
+ 	}
+ 
+ 	if (is_offload(adapter) && !ofld_disable)
+-		cxgb3_err_notify(&adapter->tdev, OFFLOAD_STATUS_UP, 0);
++		cxgb3_event_notify(&adapter->tdev, OFFLOAD_STATUS_UP, 0);
+ }
+ 
+ /*
+diff --git a/drivers/net/cxgb3/cxgb3_offload.c b/drivers/net/cxgb3/cxgb3_offload.c
+index f9f54b5..75064ee 100644
+--- a/drivers/net/cxgb3/cxgb3_offload.c
++++ b/drivers/net/cxgb3/cxgb3_offload.c
+@@ -153,14 +153,14 @@ void cxgb3_remove_clients(struct t3cdev *tdev)
+ 	mutex_unlock(&cxgb3_db_lock);
+ }
+ 
+-void cxgb3_err_notify(struct t3cdev *tdev, u32 status, u32 error)
++void cxgb3_event_notify(struct t3cdev *tdev, u32 event, u32 port)
+ {
+ 	struct cxgb3_client *client;
+ 
+ 	mutex_lock(&cxgb3_db_lock);
+ 	list_for_each_entry(client, &client_list, client_list) {
+-		if (client->err_handler)
+-			client->err_handler(tdev, status, error);
++		if (client->event_handler)
++			client->event_handler(tdev, event, port);
+ 	}
+ 	mutex_unlock(&cxgb3_db_lock);
+ }
+diff --git a/drivers/net/cxgb3/cxgb3_offload.h b/drivers/net/cxgb3/cxgb3_offload.h
+index 55945f4..670aa62 100644
+--- a/drivers/net/cxgb3/cxgb3_offload.h
++++ b/drivers/net/cxgb3/cxgb3_offload.h
+@@ -64,14 +64,16 @@ void cxgb3_register_client(struct cxgb3_client *client);
+ void cxgb3_unregister_client(struct cxgb3_client *client);
+ void cxgb3_add_clients(struct t3cdev *tdev);
+ void cxgb3_remove_clients(struct t3cdev *tdev);
+-void cxgb3_err_notify(struct t3cdev *tdev, u32 status, u32 error);
++void cxgb3_event_notify(struct t3cdev *tdev, u32 event, u32 port);
+ 
+ typedef int (*cxgb3_cpl_handler_func)(struct t3cdev *dev,
+ 				      struct sk_buff *skb, void *ctx);
+ 
+ enum {
+ 	OFFLOAD_STATUS_UP,
+-	OFFLOAD_STATUS_DOWN
++	OFFLOAD_STATUS_DOWN,
++	OFFLOAD_PORT_DOWN,
++	OFFLOAD_PORT_UP
+ };
+ 
+ struct cxgb3_client {
+@@ -82,7 +84,7 @@ struct cxgb3_client {
+ 	int (*redirect)(void *ctx, struct dst_entry *old,
+ 			struct dst_entry *new, struct l2t_entry *l2t);
+ 	struct list_head client_list;
+-	void (*err_handler)(struct t3cdev *tdev, u32 status, u32 error);
++	void (*event_handler)(struct t3cdev *tdev, u32 event, u32 port);
+ };
+ 
+ /*
diff --git a/kernel_patches/fixes/iw_cxgb3_0330_dontfreeendpointsearly.patch b/kernel_patches/fixes/iw_cxgb3_0330_dontfreeendpointsearly.patch
new file mode 100644
index 0000000..aed9c29
--- /dev/null
+++ b/kernel_patches/fixes/iw_cxgb3_0330_dontfreeendpointsearly.patch
@@ -0,0 +1,218 @@
+commit 0d0531f53eab06a0506932d885c9ee066b73a778
+Author: Steve Wise <swise at opengridcomputing.com>
+Date:   Fri Aug 7 13:58:26 2009 -0700
+
+    RDMA/cxgb3: Don't free endpoints early
+    
+    - Keep ref on connection request endpoints until either accepted or
+      rejected so it doesn't get freed early.
+    
+    - Endpoint flags now need to be set via atomic bitops because they can
+      be set on both the iw_cxgb3 workqueue thread and user disconnect
+      threads.
+    
+    - Don't move out of CLOSING too early due to multiple calls to
+      iwch_ep_disconnect.
+    
+    Signed-off-by: Steve Wise <swise at opengridcomputing.com>
+    Signed-off-by: Roland Dreier <rolandd at cisco.com>
+
+diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
+index 52d7bb0..7f22f17 100644
+--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
++++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
+@@ -286,7 +286,7 @@ void __free_ep(struct kref *kref)
+ 	ep = container_of(container_of(kref, struct iwch_ep_common, kref),
+ 			  struct iwch_ep, com);
+ 	PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
+-	if (ep->com.flags & RELEASE_RESOURCES) {
++	if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
+ 		cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
+ 		dst_release(ep->dst);
+ 		l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+@@ -297,7 +297,7 @@ void __free_ep(struct kref *kref)
+ static void release_ep_resources(struct iwch_ep *ep)
+ {
+ 	PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
+-	ep->com.flags |= RELEASE_RESOURCES;
++	set_bit(RELEASE_RESOURCES, &ep->com.flags);
+ 	put_ep(&ep->com);
+ }
+ 
+@@ -786,10 +786,12 @@ static void connect_request_upcall(struct iwch_ep *ep)
+ 	event.private_data_len = ep->plen;
+ 	event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
+ 	event.provider_data = ep;
+-	if (state_read(&ep->parent_ep->com) != DEAD)
++	if (state_read(&ep->parent_ep->com) != DEAD) {
++		get_ep(&ep->com);
+ 		ep->parent_ep->com.cm_id->event_handler(
+ 						ep->parent_ep->com.cm_id,
+ 						&event);
++	}
+ 	put_ep(&ep->parent_ep->com);
+ 	ep->parent_ep = NULL;
+ }
+@@ -1156,8 +1158,7 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+ 	 * We get 2 abort replies from the HW.  The first one must
+ 	 * be ignored except for scribbling that we need one more.
+ 	 */
+-	if (!(ep->com.flags & ABORT_REQ_IN_PROGRESS)) {
+-		ep->com.flags |= ABORT_REQ_IN_PROGRESS;
++	if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) {
+ 		return CPL_RET_BUF_DONE;
+ 	}
+ 
+@@ -1480,7 +1481,6 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+ 		 * rejects the CR.
+ 		 */
+ 		__state_set(&ep->com, CLOSING);
+-		get_ep(&ep->com);
+ 		break;
+ 	case MPA_REP_SENT:
+ 		__state_set(&ep->com, CLOSING);
+@@ -1561,8 +1561,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+ 	 * We get 2 peer aborts from the HW.  The first one must
+ 	 * be ignored except for scribbling that we need one more.
+ 	 */
+-	if (!(ep->com.flags & PEER_ABORT_IN_PROGRESS)) {
+-		ep->com.flags |= PEER_ABORT_IN_PROGRESS;
++	if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) {
+ 		return CPL_RET_BUF_DONE;
+ 	}
+ 
+@@ -1591,7 +1590,6 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+ 		 * the reference on it until the ULP accepts or
+ 		 * rejects the CR.
+ 		 */
+-		get_ep(&ep->com);
+ 		break;
+ 	case MORIBUND:
+ 	case CLOSING:
+@@ -1797,6 +1795,7 @@ int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+ 		err = send_mpa_reject(ep, pdata, pdata_len);
+ 		err = iwch_ep_disconnect(ep, 0, GFP_KERNEL);
+ 	}
++	put_ep(&ep->com);
+ 	return 0;
+ }
+ 
+@@ -1810,8 +1809,10 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+ 	struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
+ 
+ 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+-	if (state_read(&ep->com) == DEAD)
+-		return -ECONNRESET;
++	if (state_read(&ep->com) == DEAD) {
++		err = -ECONNRESET;
++		goto err;
++	}
+ 
+ 	BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
+ 	BUG_ON(!qp);
+@@ -1819,7 +1820,8 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+ 	if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
+ 	    (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
+ 		abort_connection(ep, NULL, GFP_KERNEL);
+-		return -EINVAL;
++		err = -EINVAL;
++		goto err;
+ 	}
+ 
+ 	cm_id->add_ref(cm_id);
+@@ -1836,8 +1838,6 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+ 
+ 	PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
+ 
+-	get_ep(&ep->com);
+-
+ 	/* bind QP to EP and move to RTS */
+ 	attrs.mpa_attr = ep->mpa_attr;
+ 	attrs.max_ird = ep->ird;
+@@ -1855,30 +1855,31 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+ 	err = iwch_modify_qp(ep->com.qp->rhp,
+ 			     ep->com.qp, mask, &attrs, 1);
+ 	if (err)
+-		goto err;
++		goto err1;
+ 
+ 	/* if needed, wait for wr_ack */
+ 	if (iwch_rqes_posted(qp)) {
+ 		wait_event(ep->com.waitq, ep->com.rpl_done);
+ 		err = ep->com.rpl_err;
+ 		if (err)
+-			goto err;
++			goto err1;
+ 	}
+ 
+ 	err = send_mpa_reply(ep, conn_param->private_data,
+ 			     conn_param->private_data_len);
+ 	if (err)
+-		goto err;
++		goto err1;
+ 
+ 
+ 	state_set(&ep->com, FPDU_MODE);
+ 	established_upcall(ep);
+ 	put_ep(&ep->com);
+ 	return 0;
+-err:
++err1:
+ 	ep->com.cm_id = NULL;
+ 	ep->com.qp = NULL;
+ 	cm_id->rem_ref(cm_id);
++err:
+ 	put_ep(&ep->com);
+ 	return err;
+ }
+@@ -2097,14 +2098,17 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
+ 			ep->com.state = CLOSING;
+ 			start_ep_timer(ep);
+ 		}
++		set_bit(CLOSE_SENT, &ep->com.flags);
+ 		break;
+ 	case CLOSING:
+-		close = 1;
+-		if (abrupt) {
+-			stop_ep_timer(ep);
+-			ep->com.state = ABORTING;
+-		} else
+-			ep->com.state = MORIBUND;
++		if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
++			close = 1;
++			if (abrupt) {
++				stop_ep_timer(ep);
++				ep->com.state = ABORTING;
++			} else
++				ep->com.state = MORIBUND;
++		}
+ 		break;
+ 	case MORIBUND:
+ 	case ABORTING:
+diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
+index 43c0aea..b9efadf 100644
+--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
++++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
+@@ -145,9 +145,10 @@ enum iwch_ep_state {
+ };
+ 
+ enum iwch_ep_flags {
+-	PEER_ABORT_IN_PROGRESS	= (1 << 0),
+-	ABORT_REQ_IN_PROGRESS	= (1 << 1),
+-	RELEASE_RESOURCES	= (1 << 2),
++	PEER_ABORT_IN_PROGRESS	= 0,
++	ABORT_REQ_IN_PROGRESS	= 1,
++	RELEASE_RESOURCES	= 2,
++	CLOSE_SENT		= 3,
+ };
+ 
+ struct iwch_ep_common {
+@@ -162,7 +163,7 @@ struct iwch_ep_common {
+ 	wait_queue_head_t waitq;
+ 	int rpl_done;
+ 	int rpl_err;
+-	u32 flags;
++	unsigned long flags;
+ };
+ 
+ struct iwch_listen_ep {
diff --git a/kernel_patches/fixes/iw_cxgb3_0340_wakeupwaitersonclose.patch b/kernel_patches/fixes/iw_cxgb3_0340_wakeupwaitersonclose.patch
new file mode 100644
index 0000000..c4475da
--- /dev/null
+++ b/kernel_patches/fixes/iw_cxgb3_0340_wakeupwaitersonclose.patch
@@ -0,0 +1,59 @@
+commit 8d4b0bd35644aec773317b725feda2efadd46c3c
+Author: Steve Wise <swise at opengridcomputing.com>
+Date:   Fri Aug 7 13:58:27 2009 -0700
+
+    RDMA/cxgb3: Wake up any waiters on peer close/abort
+    
+    A close/abort while waiting for a wr_ack during connection migration
+    can cause a hung process in iwch_accept_cr/iwch_reject_cr.
+    
+    The fix is to set rpl_error/rpl_done and wake up the waiters when we
+    get a close/abort while in MPA_REQ_RCVD state.
+    
+    Signed-off-by: Steve Wise <swise at opengridcomputing.com>
+    Signed-off-by: Roland Dreier <rolandd at cisco.com>
+
+diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
+index 7f22f17..66b4135 100644
+--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
++++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
+@@ -1478,9 +1478,14 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+ 		/*
+ 		 * We're gonna mark this puppy DEAD, but keep
+ 		 * the reference on it until the ULP accepts or
+-		 * rejects the CR.
++		 * rejects the CR. Also wake up anyone waiting
++		 * in rdma connection migration (see iwch_accept_cr()).
+ 		 */
+ 		__state_set(&ep->com, CLOSING);
++		ep->com.rpl_done = 1;
++		ep->com.rpl_err = -ECONNRESET;
++		PDBG("waking up ep %p\n", ep);
++		wake_up(&ep->com.waitq);
+ 		break;
+ 	case MPA_REP_SENT:
+ 		__state_set(&ep->com, CLOSING);
+@@ -1588,8 +1593,13 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+ 		/*
+ 		 * We're gonna mark this puppy DEAD, but keep
+ 		 * the reference on it until the ULP accepts or
+-		 * rejects the CR.
++		 * rejects the CR. Also wake up anyone waiting
++		 * in rdma connection migration (see iwch_accept_cr()).
+ 		 */
++		ep->com.rpl_done = 1;
++		ep->com.rpl_err = -ECONNRESET;
++		PDBG("waking up ep %p\n", ep);
++		wake_up(&ep->com.waitq);
+ 		break;
+ 	case MORIBUND:
+ 	case CLOSING:
+@@ -1828,8 +1838,6 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+ 	ep->com.cm_id = cm_id;
+ 	ep->com.qp = qp;
+ 
+-	ep->com.rpl_done = 0;
+-	ep->com.rpl_err = 0;
+ 	ep->ird = conn_param->ird;
+ 	ep->ord = conn_param->ord;
+ 




More information about the ewg mailing list