[openib-general] [PATCH] CM: add cleanup for missing connection endpoint states
Sean Hefty
mshefty at ichips.intel.com
Thu Jan 20 00:29:25 PST 2005
This patch adds cleanup code for missing connection endpoint states when
destroying a connection identifier. As part of the fix, insertion/deletion
issues when tracking connections by their remote communication ID have been
addressed. It also forces state transitions to IDLE/TIMEWAIT when
rejecting a connection or when sending a DREQ/DREP fail.
signed-off-by: Sean Hefty <sean.hefty at intel.com>
Index: core/cm.c
===================================================================
--- core/cm.c (revision 1592)
+++ core/cm.c (working copy)
@@ -60,7 +60,6 @@
static struct ib_cm {
spinlock_t lock;
- /* todo: (high priority) walk through table insertion/removal paths */
struct rb_root service_table;
struct rb_root remote_qp_table;
struct rb_root remote_id_table;
@@ -109,6 +108,7 @@
union ib_gid remote_port_gid;
int timeout_ms;
u8 max_cm_retries;
+ u8 passive;
};
struct cm_recv_work {
@@ -360,21 +360,18 @@
return NULL;
}
-static struct cm_id_private * cm_acquire_id_by_remote_id(u64 remote_ca_guid,
- u32 remote_id)
+static void cm_remove_remote_id(struct cm_id_private *cm_id_priv)
{
- struct cm_id_private *cm_id_priv;
unsigned long flags;
+ cm_id_priv->passive = 0;
spin_lock_irqsave(&cm.lock, flags);
- cm_id_priv = cm_find_id_by_remote_id(remote_ca_guid, remote_id);
- if (cm_id_priv)
- atomic_inc(&cm_id_priv->refcount);
+ rb_erase(&cm_id_priv->remote_id_node, &cm.remote_id_table);
spin_unlock_irqrestore(&cm.lock, flags);
-
- return cm_id_priv;
}
+/*
+todo: add test for stale connections
static struct cm_id_private * cm_insert_remote_qpn(struct cm_id_private
*cm_id_priv)
{
@@ -436,21 +433,7 @@
}
return NULL;
}
-
-static struct cm_id_private * cm_acquire_id_by_remote_qpn(union ib_gid *port_gid,
- u32 remote_qpn)
-{
- struct cm_id_private *cm_id_priv;
- unsigned long flags;
-
- spin_lock_irqsave(&cm.lock, flags);
- cm_id_priv = cm_find_id_by_remote_qpn(port_gid, remote_qpn);
- if (cm_id_priv)
- atomic_inc(&cm_id_priv->refcount);
- spin_unlock_irqrestore(&cm.lock, flags);
-
- return cm_id_priv;
-}
+*/
static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
*cm_id_priv)
@@ -528,19 +511,18 @@
int ib_destroy_cm_id(struct ib_cm_id *cm_id)
{
struct cm_id_private *cm_id_priv;
- unsigned long flags, flags2;
+ unsigned long flags;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-
+retest:
spin_lock_irqsave(&cm_id_priv->lock, flags);
- /* todo: enter timewait state */
switch (cm_id->state) {
case IB_CM_LISTEN:
- spin_lock_irqsave(&cm.lock, flags2);
- rb_erase(&cm_id_priv->service_node, &cm.service_table);
- spin_unlock_irqrestore(&cm.lock, flags2);
cm_id->state = IB_CM_IDLE;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ spin_lock_irqsave(&cm.lock, flags);
+ rb_erase(&cm_id_priv->service_node, &cm.service_table);
+ spin_unlock_irqrestore(&cm.lock, flags);
break;
case IB_CM_SIDR_REQ_SENT:
cm_id->state = IB_CM_IDLE;
@@ -552,7 +534,35 @@
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
break;
+ case IB_CM_REQ_SENT:
+ case IB_CM_MRA_REQ_RCVD:
+ case IB_CM_REQ_RCVD:
+ case IB_CM_MRA_REQ_SENT:
+ case IB_CM_REP_RCVD:
+ case IB_CM_MRA_REP_SENT:
+ case IB_CM_REP_SENT:
+ case IB_CM_MRA_REP_RCVD:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
+ &cm_id_priv->port->ca_guid,
+ sizeof &cm_id_priv->port->ca_guid, NULL, 0);
+ break;
+ case IB_CM_ESTABLISHED:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ ib_send_cm_dreq(cm_id, NULL, 0);
+ goto retest;
+ case IB_CM_DREQ_SENT:
+ cm_id->state = IB_CM_TIMEWAIT;
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (cm_id_priv->passive)
+ cm_remove_remote_id(cm_id_priv);
+ break;
+ case IB_CM_DREQ_RCVD:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ ib_send_cm_drep(cm_id, NULL, 0);
+ break;
default:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
break;
}
@@ -585,6 +595,8 @@
cm_id->service_id = service_id;
cm_id->service_mask = service_mask ? service_mask : ~0ULL;
+ /* todo: simplify - no need to check state above, so just insert
+ and see if something was already there */
spin_lock_irqsave(&cm.lock, flags);
if (cm_find_service(service_id)) {
/* No one else is able to change the cm_id_priv state. */
@@ -760,6 +772,7 @@
cm_id_priv->msg = msg;
if (param->peer_to_peer) {
+ /* todo: locking is out of order - fix me */
spin_lock_irqsave(&cm.lock, flags2);
cm_insert_service(cm_id_priv);
spin_unlock_irqrestore(&cm.lock, flags2);
@@ -839,6 +852,7 @@
req_msg = (struct cm_req_msg *)recv_work->mad_recv_wc->recv_buf.mad;
cm_id_priv->remote_ca_guid = req_msg->local_ca_guid;
cm_id_priv->id.remote_id = req_msg->local_comm_id;
+ cm_id_priv->passive = 1;
spin_lock_irqsave(&cm.lock, flags);
/* Check for duplicate REQ. */
@@ -856,7 +870,8 @@
goto out; /* No match. */
}
spin_lock_irqsave(&cur_cm_id_priv->lock, flags2);
- if (cur_cm_id_priv->id.state == IB_CM_LISTEN) {
+ switch (cur_cm_id_priv->id.state) {
+ case IB_CM_LISTEN:
atomic_inc(&cur_cm_id_priv->refcount);
spin_unlock_irqrestore(&cur_cm_id_priv->lock, flags2);
cm_insert_remote_id(cm_id_priv);
@@ -867,7 +882,8 @@
cm_id_priv->id.service_id = req_msg->service_id;
cm_id_priv->id.service_mask = ~0ULL;
cm_id_priv->id.state = IB_CM_REQ_RCVD;
- } else {
+ break;
+ case IB_CM_REQ_SENT:
/* Process peer requests. */
if (cm_is_active_peer(recv_work->port->ca_guid,
req_msg->local_ca_guid,
@@ -889,6 +905,11 @@
(unsigned long) cur_cm_id_priv->msg);
ib_destroy_cm_id(&cm_id_priv->id);
cm_id_priv = cur_cm_id_priv;
+ break;
+ default:
+ spin_unlock_irqrestore(&cm.lock, flags);
+ /* todo: reject with no match */
+ goto out; /* No match. */
}
cm_id_priv->port = recv_work->port;
cm_id_priv->timeout_ms = cm_convert_to_ms(
@@ -1236,38 +1257,41 @@
struct cm_msg *msg;
struct ib_send_wr *bad_send_wr;
unsigned long flags;
- int ret;
+ int msg_ret, ret;
if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
return -EINVAL;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto out;
-
- cm_format_dreq((struct cm_dreq_msg *)&msg->mad, cm_id_priv,
- private_data, private_data_len);
- msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+ msg_ret = cm_alloc_msg(cm_id_priv, &msg);
+ if (!msg_ret) {
+ cm_format_dreq((struct cm_dreq_msg *)&msg->mad, cm_id_priv,
+ private_data, private_data_len);
+ msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+ }
spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state == IB_CM_ESTABLISHED)
- ret = ib_post_send_mad(cm_id_priv->port->mad_agent,
- &msg->send_wr, &bad_send_wr);
- else
- ret = -EINVAL;
-
- if (ret) {
+ if (cm_id->state != IB_CM_ESTABLISHED) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(cm_id_priv->msg);
+ ret = -EINVAL;
goto out;
}
-
- cm_id->state = IB_CM_DREQ_SENT;
- msg->sent_state = IB_CM_DREQ_SENT;
- cm_id_priv->msg = msg;
+ ret = msg_ret ? msg_ret :
+ ib_post_send_mad(cm_id_priv->port->mad_agent,
+ &msg->send_wr, &bad_send_wr);
+ if (!ret) {
+ cm_id->state = IB_CM_DREQ_SENT;
+ cm_id_priv->msg = msg;
+ msg->sent_state = IB_CM_DREQ_SENT;
+ } else
+ cm_id->state = IB_CM_TIMEWAIT;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+ if (ret && cm_id_priv->passive)
+ cm_remove_remote_id(cm_id_priv);
out:
+ if (!msg_ret && ret)
+ cm_free_msg(cm_id_priv->msg);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_dreq);
@@ -1329,35 +1353,35 @@
struct cm_msg *msg;
struct ib_send_wr *bad_send_wr;
unsigned long flags;
- int ret;
+ int msg_ret, ret;
if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
return -EINVAL;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto out;
-
- cm_format_drep((struct cm_drep_msg *)&msg->mad, cm_id_priv,
- private_data, private_data_len);
+ msg_ret = cm_alloc_msg(cm_id_priv, &msg);
+ if (!msg_ret)
+ cm_format_drep((struct cm_drep_msg *)&msg->mad, cm_id_priv,
+ private_data, private_data_len);
spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state == IB_CM_DREQ_RCVD)
- ret = ib_post_send_mad(cm_id_priv->port->mad_agent,
- &msg->send_wr, &bad_send_wr);
- else
- ret = -EINVAL;
-
- if (ret) {
+ if (cm_id->state != IB_CM_DREQ_RCVD) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(cm_id_priv->msg);
+ ret = -EINVAL;
goto out;
}
+ ret = msg_ret ? msg_ret :
+ ib_post_send_mad(cm_id_priv->port->mad_agent,
+ &msg->send_wr, &bad_send_wr);
cm_id->state = IB_CM_TIMEWAIT;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+ if (cm_id_priv->passive)
+ cm_remove_remote_id(cm_id_priv);
out:
+ if (!msg_ret && ret)
+ cm_free_msg(cm_id_priv->msg);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_drep);
@@ -1382,6 +1406,9 @@
cm_id_priv->id.state = IB_CM_TIMEWAIT;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (cm_id_priv->passive)
+ cm_remove_remote_id(cm_id_priv);
+
ib_cancel_mad(recv_work->port->mad_agent,
(unsigned long) cm_id_priv->msg);
@@ -1444,43 +1471,49 @@
struct cm_msg *msg;
struct ib_send_wr *bad_send_wr;
unsigned long flags;
- int ret;
+ int msg_ret, ret;
if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
(ari && ari_length > IB_CM_REJ_ARI_LENGTH))
return -EINVAL;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto out;
-
- cm_format_rej((struct cm_rej_msg *)&msg->mad, cm_id_priv, reason,
- ari, ari_length, private_data, private_data_len);
+ msg_ret = cm_alloc_msg(cm_id_priv, &msg);
+ if (!msg_ret)
+ cm_format_rej((struct cm_rej_msg *) &msg->mad, cm_id_priv,
+ reason, ari, ari_length, private_data,
+ private_data_len);
spin_lock_irqsave(&cm_id_priv->lock, flags);
switch (cm_id->state) {
+ case IB_CM_REQ_SENT:
+ case IB_CM_MRA_REQ_RCVD:
case IB_CM_REQ_RCVD:
case IB_CM_MRA_REQ_SENT:
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
- ret = ib_post_send_mad(cm_id_priv->port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ cm_id->state = IB_CM_IDLE;
break;
- default:
- ret = -EINVAL;
+ case IB_CM_REP_SENT:
+ case IB_CM_MRA_REP_RCVD:
+ cm_id->state = IB_CM_TIMEWAIT;
break;
- }
-
- if (ret) {
+ default:
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(cm_id_priv->msg);
+ ret = -EINVAL;
goto out;
}
-
- cm_id->state = IB_CM_IDLE;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+ if (cm_id_priv->passive)
+ cm_remove_remote_id(cm_id_priv);
+
+ ret = msg_ret ? msg_ret :
+ ib_post_send_mad(cm_id_priv->port->mad_agent,
+ &msg->send_wr, &bad_send_wr);
out:
+ if (!msg_ret && ret)
+ cm_free_msg(cm_id_priv->msg);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_rej);
@@ -1797,7 +1830,6 @@
cm_free_msg(cm_id_priv->msg);
goto out;
}
-
cm_id->lap_state = IB_CM_LAP_IDLE;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
out:
@@ -2125,6 +2157,8 @@
goto discard;
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (cm_id_priv->passive)
+ cm_remove_remote_id(cm_id_priv);
cm_event.param.send_status = wc_status;
cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
cm_free_msg(msg);
@@ -2151,17 +2185,15 @@
cm_free_msg(msg);
return;
}
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
if (msg->retry++ < cm_id_priv->max_cm_retries) {
ret = ib_post_send_mad(cm_id_priv->port->mad_agent,
&msg->send_wr, &bad_send_wr);
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
if (ret)
cm_process_send_error(msg, IB_WC_GENERAL_ERR);
- } else {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ } else
cm_process_send_error(msg, IB_WC_RESP_TIMEOUT_ERR);
- }
}
static void cm_send_handler(struct ib_mad_agent *mad_agent,
More information about the general
mailing list