[ofw] [PATCH] ib/cm: fix handling failed send completions
Smith, Stan
stan.smith at intel.com
Mon Jan 4 11:51:14 PST 2010
Hefty, Sean wrote:
> committed to trunk - needs to be added to winof 2.2
Hello,
w.r.t. WinOF 2.2,
1) what larger set of application problems does this patch address?
2) what type/degree of testing has been successfully passed with this patch applied?
Stan.
>
>> -----Original Message-----
>> From: ofw-bounces at lists.openfabrics.org [mailto:ofw-
>> bounces at lists.openfabrics.org] On Behalf Of Sean Hefty
>> Sent: Thursday, December 17, 2009 2:53 PM
>> To: ofw at lists.openfabrics.org
>> Subject: [ofw] [PATCH] ib/cm: fix handling failed send completions
>>
>> __cep_mad_send_cb() assumes that the mad being processed is
>> associated with the current state of the CEP. This may not be
>> the case.
>>
>> For example, for a short lived connection, it was observed that
>> a REP mad completed with status canceled. This is normal. However,
>> the user already attempted to disconnect the connection by sending
>> a DREQ. This left the cep in the DREQ_SENT state by the time that
>> the REP mad completed. Since the REP status was not success, but the
>> state was DREQ_SENT, the code assumed that the DREQ had failed and
>> transitioned the cep into TIMEWAIT. The result is that the DREQ is
>> never matched with a DREP or canceled, but holds a reference on the
>> CEP.
>>
>> Until the DREQ times out (time depends on connection, but easily
>> up to a minute), attempts to destroy the CEP are blocked.
>>
>> Fix this by simply discarding any completed sends that were not
>> sent from the current state of the cep when the completion handler
>> is invoked.
>>
>> Signed-off-by: Sean Hefty <sean.hefty at intel.com>
>> ---
>> trunk/core/al/kernel/al_cm.c | 2 -
>> trunk/core/al/kernel/al_cm_cep.c | 140
>> +++++++++++++++----------------------- 2 files changed, 56
>> insertions(+), 86 deletions(-)
>>
>> diff --git a/trunk/core/al/kernel/al_cm.c
>> b/trunk/core/al/kernel/al_cm.c
>> index 48b0cb5..955985a 100644
>> --- a/trunk/core/al/kernel/al_cm.c
>> +++ b/trunk/core/al/kernel/al_cm.c
>> @@ -37,7 +37,7 @@
>> typedef struct _iba_cm_id_priv
>> {
>> iba_cm_id id;
>> - KEVENT destroy_event;
>> + KEVENT destroy_event;
>>
>> } iba_cm_id_priv;
>>
>> diff --git a/trunk/core/al/kernel/al_cm_cep.c
>> b/trunk/core/al/kernel/al_cm_cep.c
>> index 49fa417..4749e1d 100644
>> --- a/trunk/core/al/kernel/al_cm_cep.c
>> +++ b/trunk/core/al/kernel/al_cm_cep.c
>> @@ -27,7 +27,7 @@
>> * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
>> * SOFTWARE.
>> *
>> - * $Id$
>> + * $Id: al_cm_cep.c 2540 2009-11-03 17:23:09Z shefty $ */
>>
>>
>> @@ -2213,105 +2213,79 @@ __cep_mad_send_cb(
>>
>> p_cep = (kcep_t*)p_mad->context1;
>>
>> - /*
>> - * The connection context is not set when performing immediate
>> responses,
>> - * such as repeating MADS.
>> - */
>> - if( !p_cep )
>> + /* The cep context is only set for MADs that are retried. */ + if(
>> !p_cep ) {
>> ib_put_mad( p_mad );
>> AL_EXIT( AL_DBG_CM );
>> return;
>> }
>>
>> + CL_ASSERT( p_mad->status != IB_WCS_SUCCESS );
>> p_mad->context1 = NULL;
>>
>> KeAcquireInStackQueuedSpinLockAtDpcLevel( &gp_cep_mgr->lock, &hdl );
>> - /* Clear the sent MAD pointer so that we don't try cancelling
>> again. */
>> - if( p_cep->p_send_mad == p_mad )
>> - p_cep->p_send_mad = NULL;
>> -
>> - switch( p_mad->status )
>> + if( p_cep->p_send_mad != p_mad )
>> {
>> - case IB_WCS_SUCCESS:
>> KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl ); ib_put_mad(
>> p_mad );
>> - break;
>> -
>> - case IB_WCS_CANCELED:
>> - if( p_cep->state != CEP_STATE_REQ_SENT &&
>> - p_cep->state != CEP_STATE_REQ_MRA_RCVD &&
>> - p_cep->state != CEP_STATE_REP_SENT &&
>> - p_cep->state != CEP_STATE_REP_MRA_RCVD &&
>> - p_cep->state != CEP_STATE_LAP_SENT &&
>> - p_cep->state != CEP_STATE_LAP_MRA_RCVD &&
>> - p_cep->state != CEP_STATE_DREQ_SENT &&
>> - p_cep->state != CEP_STATE_SREQ_SENT )
>> - {
>> - KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
>> - ib_put_mad( p_mad );
>> - break;
>> - }
>> - /* Treat as a timeout so we don't stall the state machine. */
>> - p_mad->status = IB_WCS_TIMEOUT_RETRY_ERR;
>> -
>> - /* Fall through. */
>> - case IB_WCS_TIMEOUT_RETRY_ERR:
>> - default:
>> - /* Timeout. Reject the connection. */
>> - switch( p_cep->state )
>> - {
>> - case CEP_STATE_REQ_SENT:
>> - case CEP_STATE_REQ_MRA_RCVD:
>> - case CEP_STATE_REP_SENT:
>> - case CEP_STATE_REP_MRA_RCVD:
>> - /* Send the REJ. */
>> - __reject_timeout( p_port_cep, p_cep, p_mad );
>> - __remove_cep( p_cep );
>> - p_cep->state = CEP_STATE_IDLE;
>> - break;
>> -
>> - case CEP_STATE_DREQ_DESTROY:
>> - p_cep->state = CEP_STATE_DESTROY;
>> - __insert_timewait( p_cep );
>> - /* Fall through. */
>> + goto done;
>> + }
>>
>> - case CEP_STATE_DESTROY:
>> - KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
>> - ib_put_mad( p_mad );
>> - goto done;
>> + /* Clear the sent MAD pointer so that we don't try cancelling
>> again. */ + p_cep->p_send_mad = NULL;
>>
>> - case CEP_STATE_DREQ_SENT:
>> - /*
>> - * Make up a DREP mad so we can respond if we receive
>> - * a DREQ while in timewait.
>> - */
>> - __format_mad_hdr( &p_cep->mads.drep.hdr, p_cep,
>> CM_DREP_ATTR_ID );
>> - __format_drep( p_cep, NULL, 0, &p_cep->mads.drep );
>> - p_cep->state = CEP_STATE_TIMEWAIT;
>> - __insert_timewait( p_cep );
>> - break;
>> + switch( p_cep->state )
>> + {
>> + case CEP_STATE_REQ_SENT:
>> + case CEP_STATE_REQ_MRA_RCVD:
>> + case CEP_STATE_REP_SENT:
>> + case CEP_STATE_REP_MRA_RCVD:
>> + /* Send the REJ. */
>> + __reject_timeout( p_port_cep, p_cep, p_mad );
>> + __remove_cep( p_cep );
>> + p_cep->state = CEP_STATE_IDLE;
>> + break;
>>
>> - case CEP_STATE_LAP_SENT:
>> - /*
>> - * Before CEP was sent, we have been in CEP_STATE_ESTABLISHED
>> as we
>> - * failed to send, we return to that state.
>> - */
>> - p_cep->state = CEP_STATE_ESTABLISHED;
>> - break;
>> - default:
>> - break;
>> - }
>> + case CEP_STATE_DREQ_DESTROY:
>> + p_cep->state = CEP_STATE_DESTROY;
>> + __insert_timewait( p_cep );
>> + /* Fall through. */
>>
>> - status = __cep_queue_mad( p_cep, p_mad );
>> - CL_ASSERT( status != IB_INVALID_STATE );
>> + case CEP_STATE_DESTROY:
>> KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl ); + ib_put_mad(
>> p_mad ); + goto done;
>>
>> - if( status == IB_SUCCESS )
>> - __process_cep( p_cep );
>> + case CEP_STATE_DREQ_SENT:
>> + /*
>> + * Make up a DREP mad so we can respond if we receive
>> + * a DREQ while in timewait.
>> + */
>> + __format_mad_hdr( &p_cep->mads.drep.hdr, p_cep, CM_DREP_ATTR_ID );
>> + __format_drep( p_cep, NULL, 0, &p_cep->mads.drep );
>> + p_cep->state = CEP_STATE_TIMEWAIT;
>> + __insert_timewait( p_cep );
>> + break;
>> +
>> + case CEP_STATE_LAP_SENT:
>> + /*
>> + * Before CEP was sent, we have been in CEP_STATE_ESTABLISHED as
>> we + * failed to send, we return to that state.
>> + */
>> + p_cep->state = CEP_STATE_ESTABLISHED;
>> + break;
>> + default:
>> break;
>> }
>>
>> + status = __cep_queue_mad( p_cep, p_mad );
>> + CL_ASSERT( status != IB_INVALID_STATE );
>> + KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl ); +
>> + if( status == IB_SUCCESS )
>> + __process_cep( p_cep );
>> +
>> done:
>> pfn_destroy_cb = p_cep->pfn_destroy_cb;
>> cep_context = p_cep->context;
>> @@ -3938,12 +3912,8 @@ __cleanup_cep(
>> CL_ASSERT( KeGetCurrentIrql() == DISPATCH_LEVEL );
>>
>> /* If we've already come through here, we're done. */
>> - if( p_cep->state == CEP_STATE_DESTROY ||
>> - p_cep->state == CEP_STATE_DREQ_DESTROY )
>> - {
>> - AL_EXIT( AL_DBG_CM );
>> - return -1;
>> - }
>> + CL_ASSERT( p_cep->state != CEP_STATE_DESTROY &&
>> + p_cep->state != CEP_STATE_DREQ_DESTROY );
>>
>> /* Cleanup the pending MAD list. */
>> while( p_cep->p_mad_head )
More information about the ofw
mailing list