[ofw] CM ref counting issues...

Hefty, Sean sean.hefty at intel.com
Thu Dec 17 13:31:25 PST 2009


>The only MADs that can be canceled are those that get retried: REQ, REP, LAP,
>and DREQ.  Of these, the only one that needs some action when it gets canceled
>is the DREQ when the CEP has been destroyed.
>
>Does the following patch work for you?  I haven't tested it (not even compiled,
>sorry.)

This is the basics of the patch that I'm currently testing.  I think it's safer and more maintainable to always just ignore any send completion if the cep is
no longer tracking the mad. 

I kept the check for p_mad->status == IB_WCS_SUCCESS, but moved it up.  However, I don't think that check will ever be true.  The only time the status can be success is for non-repeated MADs, which means that p_mad->context1 will be NULL.

diff --git a/trunk/core/al/kernel/al_cm_cep.c b/trunk/core/al/kernel/al_cm_cep.c
index 49fa417..4d0199d 100644
--- a/trunk/core/al/kernel/al_cm_cep.c
+++ b/trunk/core/al/kernel/al_cm_cep.c
@@ -2227,91 +2227,72 @@ __cep_mad_send_cb(
 	p_mad->context1 = NULL;
 
 	KeAcquireInStackQueuedSpinLockAtDpcLevel( &gp_cep_mgr->lock, &hdl );
+	if( p_cep->p_send_mad != p_mad || p_mad->status == IB_WCS_SUCCESS)
+	{
+		KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
+		ib_put_mad( p_mad );
+		goto done;
+	}
+
 	/* Clear the sent MAD pointer so that we don't try cancelling again. */
 	if( p_cep->p_send_mad == p_mad )
 		p_cep->p_send_mad = NULL;
 
-	switch( p_mad->status )
+	/* Treat as a timeout so we don't stall the state machine. */
+	if( p_mad->status == IB_WCS_CANCELED)
+		p_mad->status = IB_WCS_TIMEOUT_RETRY_ERR;
+
+	switch( p_cep->state )
 	{
-	case IB_WCS_SUCCESS:
-		KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
-		ib_put_mad( p_mad );
+	case CEP_STATE_REQ_SENT:
+	case CEP_STATE_REQ_MRA_RCVD:
+	case CEP_STATE_REP_SENT:
+	case CEP_STATE_REP_MRA_RCVD:
+		/* Send the REJ. */
+		__reject_timeout( p_port_cep, p_cep, p_mad );
+		__remove_cep( p_cep );
+		p_cep->state = CEP_STATE_IDLE;
 		break;
 
-	case IB_WCS_CANCELED:
-		if( p_cep->state != CEP_STATE_REQ_SENT &&
-			p_cep->state != CEP_STATE_REQ_MRA_RCVD &&
-			p_cep->state != CEP_STATE_REP_SENT &&
-			p_cep->state != CEP_STATE_REP_MRA_RCVD &&
-			p_cep->state != CEP_STATE_LAP_SENT &&
-			p_cep->state != CEP_STATE_LAP_MRA_RCVD &&
-			p_cep->state != CEP_STATE_DREQ_SENT &&
-			p_cep->state != CEP_STATE_SREQ_SENT )
-		{
-			KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
-			ib_put_mad( p_mad );
-			break;
-		}
-		/* Treat as a timeout so we don't stall the state machine. */
-		p_mad->status = IB_WCS_TIMEOUT_RETRY_ERR;
-
+	case CEP_STATE_DREQ_DESTROY:
+		p_cep->state = CEP_STATE_DESTROY;
+		__insert_timewait( p_cep );
 		/* Fall through. */
-	case IB_WCS_TIMEOUT_RETRY_ERR:
-	default:
-		/* Timeout.  Reject the connection. */
-		switch( p_cep->state )
-		{
-		case CEP_STATE_REQ_SENT:
-		case CEP_STATE_REQ_MRA_RCVD:
-		case CEP_STATE_REP_SENT:
-		case CEP_STATE_REP_MRA_RCVD:
-			/* Send the REJ. */
-			__reject_timeout( p_port_cep, p_cep, p_mad );
-			__remove_cep( p_cep );
-			p_cep->state = CEP_STATE_IDLE;
-			break;
-
-		case CEP_STATE_DREQ_DESTROY:
-			p_cep->state = CEP_STATE_DESTROY;
-			__insert_timewait( p_cep );
-			/* Fall through. */
-
-		case CEP_STATE_DESTROY:
-			KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
-			ib_put_mad( p_mad );
-			goto done;
-
-		case CEP_STATE_DREQ_SENT:
-			/*
-			 * Make up a DREP mad so we can respond if we receive
-			 * a DREQ while in timewait.
-			 */
-			__format_mad_hdr( &p_cep->mads.drep.hdr, p_cep, CM_DREP_ATTR_ID );
-			__format_drep( p_cep, NULL, 0, &p_cep->mads.drep );
-			p_cep->state = CEP_STATE_TIMEWAIT;
-			__insert_timewait( p_cep );
-			break;
-
-		case CEP_STATE_LAP_SENT:
-			/*
-			 * Before CEP was sent, we have been in CEP_STATE_ESTABLISHED as we
-			 * failed to send, we return to that state.
-			 */
-			p_cep->state = CEP_STATE_ESTABLISHED;
-			break;
-		default:
-			break;
-		}
 
-		status = __cep_queue_mad( p_cep, p_mad );
-		CL_ASSERT( status != IB_INVALID_STATE );
+	case CEP_STATE_DESTROY:
 		KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
+		ib_put_mad( p_mad );
+		goto done;
 
-		if( status == IB_SUCCESS )
-			__process_cep( p_cep );
+	case CEP_STATE_DREQ_SENT:
+		/*
+		 * Make up a DREP mad so we can respond if we receive
+		 * a DREQ while in timewait.
+		 */
+		__format_mad_hdr( &p_cep->mads.drep.hdr, p_cep, CM_DREP_ATTR_ID );
+		__format_drep( p_cep, NULL, 0, &p_cep->mads.drep );
+		p_cep->state = CEP_STATE_TIMEWAIT;
+		__insert_timewait( p_cep );
+		break;
+
+	case CEP_STATE_LAP_SENT:
+		/*
+		 * Before CEP was sent, we have been in CEP_STATE_ESTABLISHED as we
+		 * failed to send, we return to that state.
+		 */
+		p_cep->state = CEP_STATE_ESTABLISHED;
+		break;
+	default:
 		break;
 	}
 
+	status = __cep_queue_mad( p_cep, p_mad );
+	CL_ASSERT( status != IB_INVALID_STATE );
+	KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
+
+	if( status == IB_SUCCESS )
+		__process_cep( p_cep );
+
 done:
 	pfn_destroy_cb = p_cep->pfn_destroy_cb;
 	cep_context = p_cep->context;



More information about the ofw mailing list