[ofw] [PATCH] ib/cm: fix handling failed send completions

Sean Hefty sean.hefty at intel.com
Thu Dec 17 14:52:41 PST 2009


__cep_mad_send_cb() assumes that the mad being processed is
associated with the current state of the CEP.  This may not be
the case.

For example, for a short lived connection, it was observed that
a REP mad completed with status canceled.  This is normal.  However,
the user already attempted to disconnect the connection by sending
a DREQ.  This left the cep in the DREQ_SENT state by the time that
the REP mad completed.  Since the REP status was not success, but the
state was DREQ_SENT, the code assumed that the DREQ had failed and
transitioned the cep into TIMEWAIT.  The result is that the DREQ is never
matched with a DREP or canceled, but holds a reference on the CEP.

Until the DREQ times out (time depends on connection, but easily
up to a minute), attempts to destroy the CEP are blocked.

Fix this by simply discarding any completed sends that were not
sent from the current state of the cep when the completion handler
is invoked. 

Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
 trunk/core/al/kernel/al_cm.c     |    2 -
 trunk/core/al/kernel/al_cm_cep.c |  140 +++++++++++++++-----------------------
 2 files changed, 56 insertions(+), 86 deletions(-)

diff --git a/trunk/core/al/kernel/al_cm.c b/trunk/core/al/kernel/al_cm.c
index 48b0cb5..955985a 100644
--- a/trunk/core/al/kernel/al_cm.c
+++ b/trunk/core/al/kernel/al_cm.c
@@ -37,7 +37,7 @@
 typedef struct _iba_cm_id_priv
 {
 	iba_cm_id	id;
-	KEVENT		destroy_event;	
+	KEVENT		destroy_event;
 
 }	iba_cm_id_priv;
 
diff --git a/trunk/core/al/kernel/al_cm_cep.c b/trunk/core/al/kernel/al_cm_cep.c
index 49fa417..4749e1d 100644
--- a/trunk/core/al/kernel/al_cm_cep.c
+++ b/trunk/core/al/kernel/al_cm_cep.c
@@ -27,7 +27,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id$
+ * $Id: al_cm_cep.c 2540 2009-11-03 17:23:09Z shefty $
  */
 
 
@@ -2213,105 +2213,79 @@ __cep_mad_send_cb(
 
 	p_cep = (kcep_t*)p_mad->context1;
 
-	/*
-	 * The connection context is not set when performing immediate responses,
-	 * such as repeating MADS.
-	 */
-	if( !p_cep )
+	/* The cep context is only set for MADs that are retried. */
+	if( !p_cep  )
 	{
 		ib_put_mad( p_mad );
 		AL_EXIT( AL_DBG_CM );
 		return;
 	}
 
+	CL_ASSERT( p_mad->status != IB_WCS_SUCCESS );
 	p_mad->context1 = NULL;
 
 	KeAcquireInStackQueuedSpinLockAtDpcLevel( &gp_cep_mgr->lock, &hdl );
-	/* Clear the sent MAD pointer so that we don't try cancelling again. */
-	if( p_cep->p_send_mad == p_mad )
-		p_cep->p_send_mad = NULL;
-
-	switch( p_mad->status )
+	if( p_cep->p_send_mad != p_mad )
 	{
-	case IB_WCS_SUCCESS:
 		KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
 		ib_put_mad( p_mad );
-		break;
-
-	case IB_WCS_CANCELED:
-		if( p_cep->state != CEP_STATE_REQ_SENT &&
-			p_cep->state != CEP_STATE_REQ_MRA_RCVD &&
-			p_cep->state != CEP_STATE_REP_SENT &&
-			p_cep->state != CEP_STATE_REP_MRA_RCVD &&
-			p_cep->state != CEP_STATE_LAP_SENT &&
-			p_cep->state != CEP_STATE_LAP_MRA_RCVD &&
-			p_cep->state != CEP_STATE_DREQ_SENT &&
-			p_cep->state != CEP_STATE_SREQ_SENT )
-		{
-			KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
-			ib_put_mad( p_mad );
-			break;
-		}
-		/* Treat as a timeout so we don't stall the state machine. */
-		p_mad->status = IB_WCS_TIMEOUT_RETRY_ERR;
-
-		/* Fall through. */
-	case IB_WCS_TIMEOUT_RETRY_ERR:
-	default:
-		/* Timeout.  Reject the connection. */
-		switch( p_cep->state )
-		{
-		case CEP_STATE_REQ_SENT:
-		case CEP_STATE_REQ_MRA_RCVD:
-		case CEP_STATE_REP_SENT:
-		case CEP_STATE_REP_MRA_RCVD:
-			/* Send the REJ. */
-			__reject_timeout( p_port_cep, p_cep, p_mad );
-			__remove_cep( p_cep );
-			p_cep->state = CEP_STATE_IDLE;
-			break;
-
-		case CEP_STATE_DREQ_DESTROY:
-			p_cep->state = CEP_STATE_DESTROY;
-			__insert_timewait( p_cep );
-			/* Fall through. */
+		goto done;
+	}
 
-		case CEP_STATE_DESTROY:
-			KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
-			ib_put_mad( p_mad );
-			goto done;
+	/* Clear the sent MAD pointer so that we don't try cancelling again. */
+	p_cep->p_send_mad = NULL;
 
-		case CEP_STATE_DREQ_SENT:
-			/*
-			 * Make up a DREP mad so we can respond if we receive
-			 * a DREQ while in timewait.
-			 */
-			__format_mad_hdr( &p_cep->mads.drep.hdr, p_cep, CM_DREP_ATTR_ID );
-			__format_drep( p_cep, NULL, 0, &p_cep->mads.drep );
-			p_cep->state = CEP_STATE_TIMEWAIT;
-			__insert_timewait( p_cep );
-			break;
+	switch( p_cep->state )
+	{
+	case CEP_STATE_REQ_SENT:
+	case CEP_STATE_REQ_MRA_RCVD:
+	case CEP_STATE_REP_SENT:
+	case CEP_STATE_REP_MRA_RCVD:
+		/* Send the REJ. */
+		__reject_timeout( p_port_cep, p_cep, p_mad );
+		__remove_cep( p_cep );
+		p_cep->state = CEP_STATE_IDLE;
+		break;
 
-		case CEP_STATE_LAP_SENT:
-			/*
-			 * Before CEP was sent, we have been in CEP_STATE_ESTABLISHED as we
-			 * failed to send, we return to that state.
-			 */
-			p_cep->state = CEP_STATE_ESTABLISHED;
-			break;
-		default:
-			break;
-		}
+	case CEP_STATE_DREQ_DESTROY:
+		p_cep->state = CEP_STATE_DESTROY;
+		__insert_timewait( p_cep );
+		/* Fall through. */
 
-		status = __cep_queue_mad( p_cep, p_mad );
-		CL_ASSERT( status != IB_INVALID_STATE );
+	case CEP_STATE_DESTROY:
 		KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
+		ib_put_mad( p_mad );
+		goto done;
 
-		if( status == IB_SUCCESS )
-			__process_cep( p_cep );
+	case CEP_STATE_DREQ_SENT:
+		/*
+		 * Make up a DREP mad so we can respond if we receive
+		 * a DREQ while in timewait.
+		 */
+		__format_mad_hdr( &p_cep->mads.drep.hdr, p_cep, CM_DREP_ATTR_ID );
+		__format_drep( p_cep, NULL, 0, &p_cep->mads.drep );
+		p_cep->state = CEP_STATE_TIMEWAIT;
+		__insert_timewait( p_cep );
+		break;
+
+	case CEP_STATE_LAP_SENT:
+		/*
+		 * Before CEP was sent, we have been in CEP_STATE_ESTABLISHED as we
+		 * failed to send, we return to that state.
+		 */
+		p_cep->state = CEP_STATE_ESTABLISHED;
+		break;
+	default:
 		break;
 	}
 
+	status = __cep_queue_mad( p_cep, p_mad );
+	CL_ASSERT( status != IB_INVALID_STATE );
+	KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
+
+	if( status == IB_SUCCESS )
+		__process_cep( p_cep );
+
 done:
 	pfn_destroy_cb = p_cep->pfn_destroy_cb;
 	cep_context = p_cep->context;
@@ -3938,12 +3912,8 @@ __cleanup_cep(
 	CL_ASSERT( KeGetCurrentIrql() == DISPATCH_LEVEL );
 
 	/* If we've already come through here, we're done. */
-	if( p_cep->state == CEP_STATE_DESTROY ||
-		p_cep->state == CEP_STATE_DREQ_DESTROY )
-	{
-		AL_EXIT( AL_DBG_CM );
-		return -1;
-	}
+	CL_ASSERT( p_cep->state != CEP_STATE_DESTROY &&
+		p_cep->state != CEP_STATE_DREQ_DESTROY );
 
 	/* Cleanup the pending MAD list. */
 	while( p_cep->p_mad_head )

-------------- next part --------------
A non-text attachment was scrubbed...
Name: cm_send_cb
Type: application/octet-stream
Size: 6726 bytes
Desc: not available
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20091217/f68849b1/attachment.obj>


More information about the ofw mailing list