[ofw] CM ref counting issues...
Hefty, Sean
sean.hefty at intel.com
Thu Dec 17 13:31:25 PST 2009
>The only MADs that can be canceled are those that get retried: REQ, REP, LAP,
>and DREQ. Of these, the only one that needs some action when it gets canceled
>is the DREQ when the CEP has been destroyed.
>
>Does the following patch work for you? I haven't tested it (not even compiled,
>sorry.)
This is the basics of the patch that I'm currently testing. I think it's safer and more maintainable to always just ignore any send completion if the cep is
no longer tracking the mad.
I kept the check for p_mad->status == IB_WCS_SUCCESS, but moved it up. However, I don't think that check will ever be true. The only time the status can be success is for non-repeated MADs, which means that p_mad->context1 will be NULL.
diff --git a/trunk/core/al/kernel/al_cm_cep.c b/trunk/core/al/kernel/al_cm_cep.c
index 49fa417..4d0199d 100644
--- a/trunk/core/al/kernel/al_cm_cep.c
+++ b/trunk/core/al/kernel/al_cm_cep.c
@@ -2227,91 +2227,72 @@ __cep_mad_send_cb(
p_mad->context1 = NULL;
KeAcquireInStackQueuedSpinLockAtDpcLevel( &gp_cep_mgr->lock, &hdl );
+ if( p_cep->p_send_mad != p_mad || p_mad->status == IB_WCS_SUCCESS)
+ {
+ KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
+ ib_put_mad( p_mad );
+ goto done;
+ }
+
/* Clear the sent MAD pointer so that we don't try cancelling again. */
if( p_cep->p_send_mad == p_mad )
p_cep->p_send_mad = NULL;
- switch( p_mad->status )
+ /* Treat as a timeout so we don't stall the state machine. */
+ if( p_mad->status == IB_WCS_CANCELED)
+ p_mad->status = IB_WCS_TIMEOUT_RETRY_ERR;
+
+ switch( p_cep->state )
{
- case IB_WCS_SUCCESS:
- KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
- ib_put_mad( p_mad );
+ case CEP_STATE_REQ_SENT:
+ case CEP_STATE_REQ_MRA_RCVD:
+ case CEP_STATE_REP_SENT:
+ case CEP_STATE_REP_MRA_RCVD:
+ /* Send the REJ. */
+ __reject_timeout( p_port_cep, p_cep, p_mad );
+ __remove_cep( p_cep );
+ p_cep->state = CEP_STATE_IDLE;
break;
- case IB_WCS_CANCELED:
- if( p_cep->state != CEP_STATE_REQ_SENT &&
- p_cep->state != CEP_STATE_REQ_MRA_RCVD &&
- p_cep->state != CEP_STATE_REP_SENT &&
- p_cep->state != CEP_STATE_REP_MRA_RCVD &&
- p_cep->state != CEP_STATE_LAP_SENT &&
- p_cep->state != CEP_STATE_LAP_MRA_RCVD &&
- p_cep->state != CEP_STATE_DREQ_SENT &&
- p_cep->state != CEP_STATE_SREQ_SENT )
- {
- KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
- ib_put_mad( p_mad );
- break;
- }
- /* Treat as a timeout so we don't stall the state machine. */
- p_mad->status = IB_WCS_TIMEOUT_RETRY_ERR;
-
+ case CEP_STATE_DREQ_DESTROY:
+ p_cep->state = CEP_STATE_DESTROY;
+ __insert_timewait( p_cep );
/* Fall through. */
- case IB_WCS_TIMEOUT_RETRY_ERR:
- default:
- /* Timeout. Reject the connection. */
- switch( p_cep->state )
- {
- case CEP_STATE_REQ_SENT:
- case CEP_STATE_REQ_MRA_RCVD:
- case CEP_STATE_REP_SENT:
- case CEP_STATE_REP_MRA_RCVD:
- /* Send the REJ. */
- __reject_timeout( p_port_cep, p_cep, p_mad );
- __remove_cep( p_cep );
- p_cep->state = CEP_STATE_IDLE;
- break;
-
- case CEP_STATE_DREQ_DESTROY:
- p_cep->state = CEP_STATE_DESTROY;
- __insert_timewait( p_cep );
- /* Fall through. */
-
- case CEP_STATE_DESTROY:
- KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
- ib_put_mad( p_mad );
- goto done;
-
- case CEP_STATE_DREQ_SENT:
- /*
- * Make up a DREP mad so we can respond if we receive
- * a DREQ while in timewait.
- */
- __format_mad_hdr( &p_cep->mads.drep.hdr, p_cep, CM_DREP_ATTR_ID );
- __format_drep( p_cep, NULL, 0, &p_cep->mads.drep );
- p_cep->state = CEP_STATE_TIMEWAIT;
- __insert_timewait( p_cep );
- break;
-
- case CEP_STATE_LAP_SENT:
- /*
- * Before CEP was sent, we have been in CEP_STATE_ESTABLISHED as we
- * failed to send, we return to that state.
- */
- p_cep->state = CEP_STATE_ESTABLISHED;
- break;
- default:
- break;
- }
- status = __cep_queue_mad( p_cep, p_mad );
- CL_ASSERT( status != IB_INVALID_STATE );
+ case CEP_STATE_DESTROY:
KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
+ ib_put_mad( p_mad );
+ goto done;
- if( status == IB_SUCCESS )
- __process_cep( p_cep );
+ case CEP_STATE_DREQ_SENT:
+ /*
+ * Make up a DREP mad so we can respond if we receive
+ * a DREQ while in timewait.
+ */
+ __format_mad_hdr( &p_cep->mads.drep.hdr, p_cep, CM_DREP_ATTR_ID );
+ __format_drep( p_cep, NULL, 0, &p_cep->mads.drep );
+ p_cep->state = CEP_STATE_TIMEWAIT;
+ __insert_timewait( p_cep );
+ break;
+
+ case CEP_STATE_LAP_SENT:
+ /*
+ * Before CEP was sent, we have been in CEP_STATE_ESTABLISHED as we
+ * failed to send, we return to that state.
+ */
+ p_cep->state = CEP_STATE_ESTABLISHED;
+ break;
+ default:
break;
}
+ status = __cep_queue_mad( p_cep, p_mad );
+ CL_ASSERT( status != IB_INVALID_STATE );
+ KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
+
+ if( status == IB_SUCCESS )
+ __process_cep( p_cep );
+
done:
pfn_destroy_cb = p_cep->pfn_destroy_cb;
cep_context = p_cep->context;
More information about the ofw
mailing list