[ofw] [PATCH] ib/cm: fix handling failed send completions

Smith, Stan stan.smith at intel.com
Mon Jan 4 11:51:14 PST 2010


Hefty, Sean wrote:
> committed to trunk - needs to be added to winof 2.2

Hello,
 w.r.t. WinOF 2.2,
1) what larger set of application problems does this patch address?
2) what type/degree of testing has been successfully passed with this patch applied?

Stan.

>
>> -----Original Message-----
>> From: ofw-bounces at lists.openfabrics.org [mailto:ofw-
>> bounces at lists.openfabrics.org] On Behalf Of Sean Hefty
>> Sent: Thursday, December 17, 2009 2:53 PM
>> To: ofw at lists.openfabrics.org
>> Subject: [ofw] [PATCH] ib/cm: fix handling failed send completions
>>
>> __cep_mad_send_cb() assumes that the mad being processed is
>> associated with the current state of the CEP.  This may not be
>> the case.
>>
>> For example, for a short lived connection, it was observed that
>> a REP mad completed with status canceled.  This is normal.  However,
>> the user already attempted to disconnect the connection by sending
>> a DREQ.  This left the cep in the DREQ_SENT state by the time that
>> the REP mad completed.  Since the REP status was not success, but the
>> state was DREQ_SENT, the code assumed that the DREQ had failed and
>> transitioned the cep into TIMEWAIT.  The result is that the DREQ is
>> never matched with a DREP or canceled, but holds a reference on the
>> CEP.
>>
>> Until the DREQ times out (time depends on connection, but easily
>> up to a minute), attempts to destroy the CEP are blocked.
>>
>> Fix this by simply discarding any completed sends that were not
>> sent from the current state of the cep when the completion handler
>> is invoked.
>>
>> Signed-off-by: Sean Hefty <sean.hefty at intel.com>
>> ---
>> trunk/core/al/kernel/al_cm.c     |    2 -
>> trunk/core/al/kernel/al_cm_cep.c |  140
>> +++++++++++++++----------------------- 2 files changed, 56
>> insertions(+), 86 deletions(-)
>>
>> diff --git a/trunk/core/al/kernel/al_cm.c
>> b/trunk/core/al/kernel/al_cm.c
>> index 48b0cb5..955985a 100644
>> --- a/trunk/core/al/kernel/al_cm.c
>> +++ b/trunk/core/al/kernel/al_cm.c
>> @@ -37,7 +37,7 @@
>> typedef struct _iba_cm_id_priv
>> {
>>      iba_cm_id       id;
>> -    KEVENT          destroy_event;
>> +    KEVENT          destroy_event;
>>
>> }    iba_cm_id_priv;
>>
>> diff --git a/trunk/core/al/kernel/al_cm_cep.c
>> b/trunk/core/al/kernel/al_cm_cep.c
>> index 49fa417..4749e1d 100644
>> --- a/trunk/core/al/kernel/al_cm_cep.c
>> +++ b/trunk/core/al/kernel/al_cm_cep.c
>> @@ -27,7 +27,7 @@
>>  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
>>  * SOFTWARE.
>>  *
>> - * $Id$
>> + * $Id: al_cm_cep.c 2540 2009-11-03 17:23:09Z shefty $  */
>>
>>
>> @@ -2213,105 +2213,79 @@ __cep_mad_send_cb(
>>
>>      p_cep = (kcep_t*)p_mad->context1;
>>
>> -    /*
>> -     * The connection context is not set when performing immediate
>> responses,
>> -     * such as repeating MADS.
>> -     */
>> -    if( !p_cep )
>> +    /* The cep context is only set for MADs that are retried. */ +  if(
>>      !p_cep  ) {
>>              ib_put_mad( p_mad );
>>              AL_EXIT( AL_DBG_CM );
>>              return;
>>      }
>>
>> +    CL_ASSERT( p_mad->status != IB_WCS_SUCCESS );
>>      p_mad->context1 = NULL;
>>
>>      KeAcquireInStackQueuedSpinLockAtDpcLevel( &gp_cep_mgr->lock, &hdl );
>> -    /* Clear the sent MAD pointer so that we don't try cancelling
>> again. */
>> -    if( p_cep->p_send_mad == p_mad )
>> -            p_cep->p_send_mad = NULL;
>> -
>> -    switch( p_mad->status )
>> +    if( p_cep->p_send_mad != p_mad )
>>      {
>> -    case IB_WCS_SUCCESS:
>>              KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );             ib_put_mad(
>> p_mad );
>> -            break;
>> -
>> -    case IB_WCS_CANCELED:
>> -            if( p_cep->state != CEP_STATE_REQ_SENT &&
>> -                    p_cep->state != CEP_STATE_REQ_MRA_RCVD &&
>> -                    p_cep->state != CEP_STATE_REP_SENT &&
>> -                    p_cep->state != CEP_STATE_REP_MRA_RCVD &&
>> -                    p_cep->state != CEP_STATE_LAP_SENT &&
>> -                    p_cep->state != CEP_STATE_LAP_MRA_RCVD &&
>> -                    p_cep->state != CEP_STATE_DREQ_SENT &&
>> -                    p_cep->state != CEP_STATE_SREQ_SENT )
>> -            {
>> -                    KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
>> -                    ib_put_mad( p_mad );
>> -                    break;
>> -            }
>> -            /* Treat as a timeout so we don't stall the state machine. */
>> -            p_mad->status = IB_WCS_TIMEOUT_RETRY_ERR;
>> -
>> -            /* Fall through. */
>> -    case IB_WCS_TIMEOUT_RETRY_ERR:
>> -    default:
>> -            /* Timeout.  Reject the connection. */
>> -            switch( p_cep->state )
>> -            {
>> -            case CEP_STATE_REQ_SENT:
>> -            case CEP_STATE_REQ_MRA_RCVD:
>> -            case CEP_STATE_REP_SENT:
>> -            case CEP_STATE_REP_MRA_RCVD:
>> -                    /* Send the REJ. */
>> -                    __reject_timeout( p_port_cep, p_cep, p_mad );
>> -                    __remove_cep( p_cep );
>> -                    p_cep->state = CEP_STATE_IDLE;
>> -                    break;
>> -
>> -            case CEP_STATE_DREQ_DESTROY:
>> -                    p_cep->state = CEP_STATE_DESTROY;
>> -                    __insert_timewait( p_cep );
>> -                    /* Fall through. */
>> +            goto done;
>> +    }
>>
>> -            case CEP_STATE_DESTROY:
>> -                    KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );
>> -                    ib_put_mad( p_mad );
>> -                    goto done;
>> +    /* Clear the sent MAD pointer so that we don't try cancelling
>> again. */ +  p_cep->p_send_mad = NULL;
>>
>> -            case CEP_STATE_DREQ_SENT:
>> -                    /*
>> -                     * Make up a DREP mad so we can respond if we receive
>> -                     * a DREQ while in timewait.
>> -                     */
>> -                    __format_mad_hdr( &p_cep->mads.drep.hdr, p_cep,
>> CM_DREP_ATTR_ID );
>> -                    __format_drep( p_cep, NULL, 0, &p_cep->mads.drep );
>> -                    p_cep->state = CEP_STATE_TIMEWAIT;
>> -                    __insert_timewait( p_cep );
>> -                    break;
>> +    switch( p_cep->state )
>> +    {
>> +    case CEP_STATE_REQ_SENT:
>> +    case CEP_STATE_REQ_MRA_RCVD:
>> +    case CEP_STATE_REP_SENT:
>> +    case CEP_STATE_REP_MRA_RCVD:
>> +            /* Send the REJ. */
>> +            __reject_timeout( p_port_cep, p_cep, p_mad );
>> +            __remove_cep( p_cep );
>> +            p_cep->state = CEP_STATE_IDLE;
>> +            break;
>>
>> -            case CEP_STATE_LAP_SENT:
>> -                    /*
>> -                     * Before CEP was sent, we have been in CEP_STATE_ESTABLISHED
>> as we
>> -                     * failed to send, we return to that state.
>> -                     */
>> -                    p_cep->state = CEP_STATE_ESTABLISHED;
>> -                    break;
>> -            default:
>> -                    break;
>> -            }
>> +    case CEP_STATE_DREQ_DESTROY:
>> +            p_cep->state = CEP_STATE_DESTROY;
>> +            __insert_timewait( p_cep );
>> +            /* Fall through. */
>>
>> -            status = __cep_queue_mad( p_cep, p_mad );
>> -            CL_ASSERT( status != IB_INVALID_STATE );
>> +    case CEP_STATE_DESTROY:
>>              KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl ); +           ib_put_mad(
>> p_mad ); +           goto done;
>>
>> -            if( status == IB_SUCCESS )
>> -                    __process_cep( p_cep );
>> +    case CEP_STATE_DREQ_SENT:
>> +            /*
>> +             * Make up a DREP mad so we can respond if we receive
>> +             * a DREQ while in timewait.
>> +             */
>> +            __format_mad_hdr( &p_cep->mads.drep.hdr, p_cep, CM_DREP_ATTR_ID );
>> +            __format_drep( p_cep, NULL, 0, &p_cep->mads.drep );
>> +            p_cep->state = CEP_STATE_TIMEWAIT;
>> +            __insert_timewait( p_cep );
>> +            break;
>> +
>> +    case CEP_STATE_LAP_SENT:
>> +            /*
>> +             * Before CEP was sent, we have been in CEP_STATE_ESTABLISHED as
>> we +          * failed to send, we return to that state.
>> +             */
>> +            p_cep->state = CEP_STATE_ESTABLISHED;
>> +            break;
>> +    default:
>>              break;
>>      }
>>
>> +    status = __cep_queue_mad( p_cep, p_mad );
>> +    CL_ASSERT( status != IB_INVALID_STATE );
>> +    KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl ); +
>> +    if( status == IB_SUCCESS )
>> +            __process_cep( p_cep );
>> +
>> done:
>>      pfn_destroy_cb = p_cep->pfn_destroy_cb;
>>      cep_context = p_cep->context;
>> @@ -3938,12 +3912,8 @@ __cleanup_cep(
>>      CL_ASSERT( KeGetCurrentIrql() == DISPATCH_LEVEL );
>>
>>      /* If we've already come through here, we're done. */
>> -    if( p_cep->state == CEP_STATE_DESTROY ||
>> -            p_cep->state == CEP_STATE_DREQ_DESTROY )
>> -    {
>> -            AL_EXIT( AL_DBG_CM );
>> -            return -1;
>> -    }
>> +    CL_ASSERT( p_cep->state != CEP_STATE_DESTROY &&
>> +            p_cep->state != CEP_STATE_DREQ_DESTROY );
>>
>>      /* Cleanup the pending MAD list. */
>>      while( p_cep->p_mad_head )




More information about the ofw mailing list