[ewg] [GIT PULL ofed-1.5] RDMA/cxgb3: Pull in recent bug fixes.
Steve Wise
swise at opengridcomputing.com
Mon Aug 24 14:23:53 PDT 2009
Vlad, please pull these recent iw_cxgb3 bug fixes from:
ssh://vlad@sofa.openfabrics.org/~swise/scm/ofed_kernel ofed_1_5
Thanks,
Steve.
-------- Original Message --------
Subject: [PATCH] RDMA/cxgb3: Pull in recent bug fixes.
Date: Mon, 24 Aug 2009 16:18:20 -0500
From: Steve Wise <swise at opengridcomputing.com>
To: swise at opengridcomputing.com
Signed-off-by: Steve Wise <swise at opengridcomputing.com>
---
kernel_patches/fixes/iw_cxgb3_0300_memleak.patch | 30 +++
kernel_patches/fixes/iw_cxgb3_0310_iochannel.patch | 60 ++++++
.../fixes/iw_cxgb3_0320_portevents.patch | 167 +++++++++++++++
.../iw_cxgb3_0330_dontfreeendpointsearly.patch | 218 ++++++++++++++++++++
.../fixes/iw_cxgb3_0340_wakeupwaitersonclose.patch | 59 +++++
5 files changed, 534 insertions(+), 0 deletions(-)
create mode 100644 kernel_patches/fixes/iw_cxgb3_0300_memleak.patch
create mode 100644 kernel_patches/fixes/iw_cxgb3_0310_iochannel.patch
create mode 100644 kernel_patches/fixes/iw_cxgb3_0320_portevents.patch
create mode 100644 kernel_patches/fixes/iw_cxgb3_0330_dontfreeendpointsearly.patch
create mode 100644 kernel_patches/fixes/iw_cxgb3_0340_wakeupwaitersonclose.patch
diff --git a/kernel_patches/fixes/iw_cxgb3_0300_memleak.patch b/kernel_patches/fixes/iw_cxgb3_0300_memleak.patch
new file mode 100644
index 0000000..321ad73
--- /dev/null
+++ b/kernel_patches/fixes/iw_cxgb3_0300_memleak.patch
@@ -0,0 +1,30 @@
+commit 80f9bf7345b367848e85d0ae3ab648a5d5b4382c
+Author: Steve Wise <swise at opengridcomputing.com>
+Date: Wed Aug 5 13:03:58 2009 -0700
+
+ RDMA/cxgb3: iwch_unregister_device leaks memory
+
+ The iwcm struct mem is never freed.
+
+ Signed-off-by: Steve Wise <swise at opengridcomputing.com>
+ Signed-off-by: Roland Dreier <rolandd at cisco.com>
+
+diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
+index e2a6321..72aa57c 100644
+--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
++++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
+@@ -1418,6 +1418,7 @@ int iwch_register_device(struct iwch_dev *dev)
+ bail2:
+ ib_unregister_device(&dev->ibdev);
+ bail1:
++ kfree(dev->ibdev.iwcm);
+ return ret;
+ }
+
+@@ -1430,5 +1431,6 @@ void iwch_unregister_device(struct iwch_dev *dev)
+ device_remove_file(&dev->ibdev.dev,
+ iwch_class_attributes[i]);
+ ib_unregister_device(&dev->ibdev);
++ kfree(dev->ibdev.iwcm);
+ return;
+ }
diff --git a/kernel_patches/fixes/iw_cxgb3_0310_iochannel.patch b/kernel_patches/fixes/iw_cxgb3_0310_iochannel.patch
new file mode 100644
index 0000000..4ec9186
--- /dev/null
+++ b/kernel_patches/fixes/iw_cxgb3_0310_iochannel.patch
@@ -0,0 +1,60 @@
+commit 2399446de5e5c7f6aa096ca33a948dd3ed389cfa
+Author: Steve Wise <swise at opengridcomputing.com>
+Date: Wed Aug 5 13:05:56 2009 -0700
+
+ RDMA/cxgb3: Set the appropriate IO channel in rdma_init work requests
+
+ Signed-off-by: Steve Wise <swise at opengridcomputing.com>
+ Signed-off-by: Roland Dreier <rolandd at cisco.com>
+
+diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
+index 62f9cf2..4dec515 100644
+--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
++++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
+@@ -852,7 +852,9 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
+ wqe->qpcaps = attr->qpcaps;
+ wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
+ wqe->rqe_count = cpu_to_be16(attr->rqe_count);
+- wqe->flags_rtr_type = cpu_to_be16(attr->flags|V_RTR_TYPE(attr->rtr_type));
++ wqe->flags_rtr_type = cpu_to_be16(attr->flags |
++ V_RTR_TYPE(attr->rtr_type) |
++ V_CHAN(attr->chan));
+ wqe->ord = cpu_to_be32(attr->ord);
+ wqe->ird = cpu_to_be32(attr->ird);
+ wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
+diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
+index 32e3b14..a197a5b 100644
+--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
++++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
+@@ -327,6 +327,11 @@ enum rdma_init_rtr_types {
+ #define V_RTR_TYPE(x) ((x) << S_RTR_TYPE)
+ #define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE)
+
++#define S_CHAN 4
++#define M_CHAN 0x3
++#define V_CHAN(x) ((x) << S_CHAN)
++#define G_CHAN(x) ((((x) >> S_CHAN)) & M_CHAN)
++
+ struct t3_rdma_init_attr {
+ u32 tid;
+ u32 qpid;
+@@ -346,6 +351,7 @@ struct t3_rdma_init_attr {
+ u16 flags;
+ u16 rqe_count;
+ u32 irs;
++ u32 chan;
+ };
+
+ struct t3_rdma_init_wr {
+diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
+index 27bbdc8..6e86534 100644
+--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
++++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
+@@ -889,6 +889,7 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
+ init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
+ init_attr.rqe_count = iwch_rqes_posted(qhp);
+ init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
++ init_attr.chan = qhp->ep->l2t->smt_idx;
+ if (peer2peer) {
+ init_attr.rtr_type = RTR_READ;
+ if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
diff --git a/kernel_patches/fixes/iw_cxgb3_0320_portevents.patch b/kernel_patches/fixes/iw_cxgb3_0320_portevents.patch
new file mode 100644
index 0000000..82afa6d
--- /dev/null
+++ b/kernel_patches/fixes/iw_cxgb3_0320_portevents.patch
@@ -0,0 +1,167 @@
+commit 978304316edfa7b6e7f7bce7ea43c8672808be1d
+Author: Steve Wise <swise at opengridcomputing.com>
+Date: Wed Aug 5 13:05:57 2009 -0700
+
+ RDMA/cxgb3: Handle port events properly
+
+ Massage the err_handler upcall into an event handler upcall, pass
+ netdev port events to the cxgb3 ULPs and generate RDMA port events
+ based on LLD port events.
+
+ Signed-off-by: Steve Wise <swise at opengridcomputing.com>
+ Signed-off-by: Roland Dreier <rolandd at cisco.com>
+
+diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
+index 26fc0a4..5796170 100644
+--- a/drivers/infiniband/hw/cxgb3/iwch.c
++++ b/drivers/infiniband/hw/cxgb3/iwch.c
+@@ -51,7 +51,7 @@ cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
+
+ static void open_rnic_dev(struct t3cdev *);
+ static void close_rnic_dev(struct t3cdev *);
+-static void iwch_err_handler(struct t3cdev *, u32, u32);
++static void iwch_event_handler(struct t3cdev *, u32, u32);
+
+ struct cxgb3_client t3c_client = {
+ .name = "iw_cxgb3",
+@@ -59,7 +59,7 @@ struct cxgb3_client t3c_client = {
+ .remove = close_rnic_dev,
+ .handlers = t3c_handlers,
+ .redirect = iwch_ep_redirect,
+- .err_handler = iwch_err_handler
++ .event_handler = iwch_event_handler
+ };
+
+ static LIST_HEAD(dev_list);
+@@ -162,21 +162,33 @@ static void close_rnic_dev(struct t3cdev *tdev)
+ mutex_unlock(&dev_mutex);
+ }
+
+-static void iwch_err_handler(struct t3cdev *tdev, u32 status, u32 error)
++static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id)
+ {
+ struct cxio_rdev *rdev = tdev->ulp;
+ struct iwch_dev *rnicp = rdev_to_iwch_dev(rdev);
+ struct ib_event event;
++ u32 portnum = port_id + 1;
+
+- if (status == OFFLOAD_STATUS_DOWN) {
++ switch (evt) {
++ case OFFLOAD_STATUS_DOWN: {
+ rdev->flags = CXIO_ERROR_FATAL;
+-
+- event.device = &rnicp->ibdev;
+ event.event = IB_EVENT_DEVICE_FATAL;
+- event.element.port_num = 0;
+- ib_dispatch_event(&event);
++ break;
++ }
++ case OFFLOAD_PORT_DOWN: {
++ event.event = IB_EVENT_PORT_ERR;
++ break;
++ }
++ case OFFLOAD_PORT_UP: {
++ event.event = IB_EVENT_PORT_ACTIVE;
++ break;
++ }
+ }
+
++ event.device = &rnicp->ibdev;
++ event.element.port_num = portnum;
++ ib_dispatch_event(&event);
++
+ return;
+ }
+
+diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
+index fb5df5c..c97ab82 100644
+--- a/drivers/net/cxgb3/cxgb3_main.c
++++ b/drivers/net/cxgb3/cxgb3_main.c
+@@ -1286,6 +1286,7 @@ static int cxgb_open(struct net_device *dev)
+ if (!other_ports)
+ schedule_chk_task(adapter);
+
++ cxgb3_event_notify(&adapter->tdev, OFFLOAD_PORT_UP, pi->port_id);
+ return 0;
+ }
+
+@@ -1318,6 +1319,7 @@ static int cxgb_close(struct net_device *dev)
+ if (!adapter->open_device_map)
+ cxgb_down(adapter);
+
++ cxgb3_event_notify(&adapter->tdev, OFFLOAD_PORT_DOWN, pi->port_id);
+ return 0;
+ }
+
+@@ -2717,7 +2719,7 @@ static int t3_adapter_error(struct adapter *adapter, int reset)
+
+ if (is_offload(adapter) &&
+ test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map)) {
+- cxgb3_err_notify(&adapter->tdev, OFFLOAD_STATUS_DOWN, 0);
++ cxgb3_event_notify(&adapter->tdev, OFFLOAD_STATUS_DOWN, 0);
+ offload_close(&adapter->tdev);
+ }
+
+@@ -2782,7 +2784,7 @@ static void t3_resume_ports(struct adapter *adapter)
+ }
+
+ if (is_offload(adapter) && !ofld_disable)
+- cxgb3_err_notify(&adapter->tdev, OFFLOAD_STATUS_UP, 0);
++ cxgb3_event_notify(&adapter->tdev, OFFLOAD_STATUS_UP, 0);
+ }
+
+ /*
+diff --git a/drivers/net/cxgb3/cxgb3_offload.c b/drivers/net/cxgb3/cxgb3_offload.c
+index f9f54b5..75064ee 100644
+--- a/drivers/net/cxgb3/cxgb3_offload.c
++++ b/drivers/net/cxgb3/cxgb3_offload.c
+@@ -153,14 +153,14 @@ void cxgb3_remove_clients(struct t3cdev *tdev)
+ mutex_unlock(&cxgb3_db_lock);
+ }
+
+-void cxgb3_err_notify(struct t3cdev *tdev, u32 status, u32 error)
++void cxgb3_event_notify(struct t3cdev *tdev, u32 event, u32 port)
+ {
+ struct cxgb3_client *client;
+
+ mutex_lock(&cxgb3_db_lock);
+ list_for_each_entry(client, &client_list, client_list) {
+- if (client->err_handler)
+- client->err_handler(tdev, status, error);
++ if (client->event_handler)
++ client->event_handler(tdev, event, port);
+ }
+ mutex_unlock(&cxgb3_db_lock);
+ }
+diff --git a/drivers/net/cxgb3/cxgb3_offload.h b/drivers/net/cxgb3/cxgb3_offload.h
+index 55945f4..670aa62 100644
+--- a/drivers/net/cxgb3/cxgb3_offload.h
++++ b/drivers/net/cxgb3/cxgb3_offload.h
+@@ -64,14 +64,16 @@ void cxgb3_register_client(struct cxgb3_client *client);
+ void cxgb3_unregister_client(struct cxgb3_client *client);
+ void cxgb3_add_clients(struct t3cdev *tdev);
+ void cxgb3_remove_clients(struct t3cdev *tdev);
+-void cxgb3_err_notify(struct t3cdev *tdev, u32 status, u32 error);
++void cxgb3_event_notify(struct t3cdev *tdev, u32 event, u32 port);
+
+ typedef int (*cxgb3_cpl_handler_func)(struct t3cdev *dev,
+ struct sk_buff *skb, void *ctx);
+
+ enum {
+ OFFLOAD_STATUS_UP,
+- OFFLOAD_STATUS_DOWN
++ OFFLOAD_STATUS_DOWN,
++ OFFLOAD_PORT_DOWN,
++ OFFLOAD_PORT_UP
+ };
+
+ struct cxgb3_client {
+@@ -82,7 +84,7 @@ struct cxgb3_client {
+ int (*redirect)(void *ctx, struct dst_entry *old,
+ struct dst_entry *new, struct l2t_entry *l2t);
+ struct list_head client_list;
+- void (*err_handler)(struct t3cdev *tdev, u32 status, u32 error);
++ void (*event_handler)(struct t3cdev *tdev, u32 event, u32 port);
+ };
+
+ /*
diff --git a/kernel_patches/fixes/iw_cxgb3_0330_dontfreeendpointsearly.patch b/kernel_patches/fixes/iw_cxgb3_0330_dontfreeendpointsearly.patch
new file mode 100644
index 0000000..aed9c29
--- /dev/null
+++ b/kernel_patches/fixes/iw_cxgb3_0330_dontfreeendpointsearly.patch
@@ -0,0 +1,218 @@
+commit 0d0531f53eab06a0506932d885c9ee066b73a778
+Author: Steve Wise <swise at opengridcomputing.com>
+Date: Fri Aug 7 13:58:26 2009 -0700
+
+ RDMA/cxgb3: Don't free endpoints early
+
+ - Keep ref on connection request endpoints until either accepted or
+ rejected so it doesn't get freed early.
+
+ - Endpoint flags now need to be set via atomic bitops because they can
+ be set on both the iw_cxgb3 workqueue thread and user disconnect
+ threads.
+
+ - Don't move out of CLOSING too early due to multiple calls to
+ iwch_ep_disconnect.
+
+ Signed-off-by: Steve Wise <swise at opengridcomputing.com>
+ Signed-off-by: Roland Dreier <rolandd at cisco.com>
+
+diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
+index 52d7bb0..7f22f17 100644
+--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
++++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
+@@ -286,7 +286,7 @@ void __free_ep(struct kref *kref)
+ ep = container_of(container_of(kref, struct iwch_ep_common, kref),
+ struct iwch_ep, com);
+ PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
+- if (ep->com.flags & RELEASE_RESOURCES) {
++ if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
+ cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
+ dst_release(ep->dst);
+ l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+@@ -297,7 +297,7 @@ void __free_ep(struct kref *kref)
+ static void release_ep_resources(struct iwch_ep *ep)
+ {
+ PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
+- ep->com.flags |= RELEASE_RESOURCES;
++ set_bit(RELEASE_RESOURCES, &ep->com.flags);
+ put_ep(&ep->com);
+ }
+
+@@ -786,10 +786,12 @@ static void connect_request_upcall(struct iwch_ep *ep)
+ event.private_data_len = ep->plen;
+ event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
+ event.provider_data = ep;
+- if (state_read(&ep->parent_ep->com) != DEAD)
++ if (state_read(&ep->parent_ep->com) != DEAD) {
++ get_ep(&ep->com);
+ ep->parent_ep->com.cm_id->event_handler(
+ ep->parent_ep->com.cm_id,
+ &event);
++ }
+ put_ep(&ep->parent_ep->com);
+ ep->parent_ep = NULL;
+ }
+@@ -1156,8 +1158,7 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+ * We get 2 abort replies from the HW. The first one must
+ * be ignored except for scribbling that we need one more.
+ */
+- if (!(ep->com.flags & ABORT_REQ_IN_PROGRESS)) {
+- ep->com.flags |= ABORT_REQ_IN_PROGRESS;
++ if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) {
+ return CPL_RET_BUF_DONE;
+ }
+
+@@ -1480,7 +1481,6 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+ * rejects the CR.
+ */
+ __state_set(&ep->com, CLOSING);
+- get_ep(&ep->com);
+ break;
+ case MPA_REP_SENT:
+ __state_set(&ep->com, CLOSING);
+@@ -1561,8 +1561,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+ * We get 2 peer aborts from the HW. The first one must
+ * be ignored except for scribbling that we need one more.
+ */
+- if (!(ep->com.flags & PEER_ABORT_IN_PROGRESS)) {
+- ep->com.flags |= PEER_ABORT_IN_PROGRESS;
++ if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) {
+ return CPL_RET_BUF_DONE;
+ }
+
+@@ -1591,7 +1590,6 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+ * the reference on it until the ULP accepts or
+ * rejects the CR.
+ */
+- get_ep(&ep->com);
+ break;
+ case MORIBUND:
+ case CLOSING:
+@@ -1797,6 +1795,7 @@ int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+ err = send_mpa_reject(ep, pdata, pdata_len);
+ err = iwch_ep_disconnect(ep, 0, GFP_KERNEL);
+ }
++ put_ep(&ep->com);
+ return 0;
+ }
+
+@@ -1810,8 +1809,10 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+ struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
+
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+- if (state_read(&ep->com) == DEAD)
+- return -ECONNRESET;
++ if (state_read(&ep->com) == DEAD) {
++ err = -ECONNRESET;
++ goto err;
++ }
+
+ BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
+ BUG_ON(!qp);
+@@ -1819,7 +1820,8 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+ if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
+ (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
+ abort_connection(ep, NULL, GFP_KERNEL);
+- return -EINVAL;
++ err = -EINVAL;
++ goto err;
+ }
+
+ cm_id->add_ref(cm_id);
+@@ -1836,8 +1838,6 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+
+ PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
+
+- get_ep(&ep->com);
+-
+ /* bind QP to EP and move to RTS */
+ attrs.mpa_attr = ep->mpa_attr;
+ attrs.max_ird = ep->ird;
+@@ -1855,30 +1855,31 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+ err = iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, mask, &attrs, 1);
+ if (err)
+- goto err;
++ goto err1;
+
+ /* if needed, wait for wr_ack */
+ if (iwch_rqes_posted(qp)) {
+ wait_event(ep->com.waitq, ep->com.rpl_done);
+ err = ep->com.rpl_err;
+ if (err)
+- goto err;
++ goto err1;
+ }
+
+ err = send_mpa_reply(ep, conn_param->private_data,
+ conn_param->private_data_len);
+ if (err)
+- goto err;
++ goto err1;
+
+
+ state_set(&ep->com, FPDU_MODE);
+ established_upcall(ep);
+ put_ep(&ep->com);
+ return 0;
+-err:
++err1:
+ ep->com.cm_id = NULL;
+ ep->com.qp = NULL;
+ cm_id->rem_ref(cm_id);
++err:
+ put_ep(&ep->com);
+ return err;
+ }
+@@ -2097,14 +2098,17 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
+ ep->com.state = CLOSING;
+ start_ep_timer(ep);
+ }
++ set_bit(CLOSE_SENT, &ep->com.flags);
+ break;
+ case CLOSING:
+- close = 1;
+- if (abrupt) {
+- stop_ep_timer(ep);
+- ep->com.state = ABORTING;
+- } else
+- ep->com.state = MORIBUND;
++ if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
++ close = 1;
++ if (abrupt) {
++ stop_ep_timer(ep);
++ ep->com.state = ABORTING;
++ } else
++ ep->com.state = MORIBUND;
++ }
+ break;
+ case MORIBUND:
+ case ABORTING:
+diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
+index 43c0aea..b9efadf 100644
+--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
++++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
+@@ -145,9 +145,10 @@ enum iwch_ep_state {
+ };
+
+ enum iwch_ep_flags {
+- PEER_ABORT_IN_PROGRESS = (1 << 0),
+- ABORT_REQ_IN_PROGRESS = (1 << 1),
+- RELEASE_RESOURCES = (1 << 2),
++ PEER_ABORT_IN_PROGRESS = 0,
++ ABORT_REQ_IN_PROGRESS = 1,
++ RELEASE_RESOURCES = 2,
++ CLOSE_SENT = 3,
+ };
+
+ struct iwch_ep_common {
+@@ -162,7 +163,7 @@ struct iwch_ep_common {
+ wait_queue_head_t waitq;
+ int rpl_done;
+ int rpl_err;
+- u32 flags;
++ unsigned long flags;
+ };
+
+ struct iwch_listen_ep {
diff --git a/kernel_patches/fixes/iw_cxgb3_0340_wakeupwaitersonclose.patch b/kernel_patches/fixes/iw_cxgb3_0340_wakeupwaitersonclose.patch
new file mode 100644
index 0000000..c4475da
--- /dev/null
+++ b/kernel_patches/fixes/iw_cxgb3_0340_wakeupwaitersonclose.patch
@@ -0,0 +1,59 @@
+commit 8d4b0bd35644aec773317b725feda2efadd46c3c
+Author: Steve Wise <swise at opengridcomputing.com>
+Date: Fri Aug 7 13:58:27 2009 -0700
+
+ RDMA/cxgb3: Wake up any waiters on peer close/abort
+
+ A close/abort while waiting for a wr_ack during connection migration
+ can cause a hung process in iwch_accept_cr/iwch_reject_cr.
+
+ The fix is to set rpl_error/rpl_done and wake up the waiters when we
+ get a close/abort while in MPA_REQ_RCVD state.
+
+ Signed-off-by: Steve Wise <swise at opengridcomputing.com>
+ Signed-off-by: Roland Dreier <rolandd at cisco.com>
+
+diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
+index 7f22f17..66b4135 100644
+--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
++++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
+@@ -1478,9 +1478,14 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+ /*
+ * We're gonna mark this puppy DEAD, but keep
+ * the reference on it until the ULP accepts or
+- * rejects the CR.
++ * rejects the CR. Also wake up anyone waiting
++ * in rdma connection migration (see iwch_accept_cr()).
+ */
+ __state_set(&ep->com, CLOSING);
++ ep->com.rpl_done = 1;
++ ep->com.rpl_err = -ECONNRESET;
++ PDBG("waking up ep %p\n", ep);
++ wake_up(&ep->com.waitq);
+ break;
+ case MPA_REP_SENT:
+ __state_set(&ep->com, CLOSING);
+@@ -1588,8 +1593,13 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+ /*
+ * We're gonna mark this puppy DEAD, but keep
+ * the reference on it until the ULP accepts or
+- * rejects the CR.
++ * rejects the CR. Also wake up anyone waiting
++ * in rdma connection migration (see iwch_accept_cr()).
+ */
++ ep->com.rpl_done = 1;
++ ep->com.rpl_err = -ECONNRESET;
++ PDBG("waking up ep %p\n", ep);
++ wake_up(&ep->com.waitq);
+ break;
+ case MORIBUND:
+ case CLOSING:
+@@ -1828,8 +1838,6 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+ ep->com.cm_id = cm_id;
+ ep->com.qp = qp;
+
+- ep->com.rpl_done = 0;
+- ep->com.rpl_err = 0;
+ ep->ird = conn_param->ird;
+ ep->ord = conn_param->ord;
+
More information about the ewg
mailing list