[openib-general] [PATCH 2/2] 2.6.19 ib_cm: send DREP in response to unmatched DREQ
Sean Hefty
sean.hefty at intel.com
Wed Oct 4 11:37:25 PDT 2006
From: Sean Hefty <sean.hefty at intel.com>
Currently a DREP is only sent in response to a DREQ if a connection
has been found matching the DREQ, and it is in the proper state. Once
a DREP is sent, the local connection moves into timewait. Duplicate
DREQs received while in this state result in re-sending the DREP.
However, it's likely that the local connection will enter and exit
timewait before the remote side times out a lost DREP and resends a DREQ.
To handle this, we send a DREP in response to a DREQ, even if a local
connection is not found. This avoids maintaining disconnected
id's in timewait states for excessively long times, just to handle a
lost DREP.
Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
This addresses a problem experienced by MPI during scale-up testing.
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 470c482..25b1018 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1902,6 +1902,32 @@ out: spin_unlock_irqrestore(&cm_id_priv-
}
EXPORT_SYMBOL(ib_send_cm_drep);
+static int cm_issue_drep(struct cm_port *port,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct ib_mad_send_buf *msg = NULL;
+ struct cm_dreq_msg *dreq_msg;
+ struct cm_drep_msg *drep_msg;
+ int ret;
+
+ ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
+ if (ret)
+ return ret;
+
+ dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
+ drep_msg = (struct cm_drep_msg *) msg->mad;
+
+ cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
+ drep_msg->remote_comm_id = dreq_msg->local_comm_id;
+ drep_msg->local_comm_id = dreq_msg->remote_comm_id;
+
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret)
+ cm_free_msg(msg);
+
+ return ret;
+}
+
static int cm_dreq_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
@@ -1913,8 +1939,10 @@ static int cm_dreq_handler(struct cm_wor
dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
dreq_msg->local_comm_id);
- if (!cm_id_priv)
+ if (!cm_id_priv) {
+ cm_issue_drep(work->port, work->mad_recv_wc);
return -EINVAL;
+ }
work->cm_event.private_data = &dreq_msg->private_data;
More information about the general
mailing list