[openib-general] Re: [PATCH] uDAPL with uCM and uAT support

Arlin Davis ardavis at ichips.intel.com
Thu Jul 28 10:32:44 PDT 2005


James Lentini wrote:

>
> A couple of questions:
>
> On Sun, 24 Jul 2005, Arlin Davis wrote:
>
>> Index: dapl/udapl/dapl_evd_wait.c
>> ===================================================================
>> --- dapl/udapl/dapl_evd_wait.c    (revision 2899)
>> +++ dapl/udapl/dapl_evd_wait.c    (working copy)
>> @@ -145,24 +146,12 @@
>>                     (DAT_COUNT) DAPL_EVD_STATE_OPEN,
>>                     (DAT_COUNT) DAPL_EVD_STATE_WAITED );
>>     dapl_os_unlock ( &evd_ptr->header.lock );
>> -
>> -    if ( evd_state != DAPL_EVD_STATE_OPEN )
>> +    if ( evd_state != DAPL_EVD_STATE_OPEN || !waitable)
>>     {
>> -    /* Bogus state, bail out */
>>     dat_status = DAT_ERROR (DAT_INVALID_STATE,0);
>>     goto bail;
>>     }
>>
>> -    if (!waitable)
>> -    {
>> -    /* This EVD is not waitable, reset the state and bail */
>> -    (void) dapl_os_atomic_assign ((DAPL_ATOMIC *)&evd_ptr->evd_state,
>> -                    (DAT_COUNT) DAPL_EVD_STATE_WAITED,
>> -                    evd_state);
>> -    dat_status = DAT_ERROR (DAT_INVALID_STATE, 
>> DAT_INVALID_STATE_EVD_UNWAITABLE);
>> -    goto bail;
>> -    }
>> -
>
>
> Why don't we need to reset the state?

good catch. Yes we need to reset back before bailing.

>
>>     /*
>>      * We now own the EVD, even though we don't have the lock anymore,
>>      * because we're in the WAITED state.
>> @@ -182,37 +171,54 @@
>>      * return right away if the ib_cq_handle associate with these evd
>>      * equal to IB_INVALID_HANDLE
>>      */
>> -    dapls_evd_copy_cq(evd_ptr);
>> -
>> -    if (dapls_rbuf_count(&evd_ptr->pending_event_queue) >= threshold)
>> -    {
>> -        break;
>> -    }
>> -
>> -    /*
>> -     * Do not enable the completion notification if this evd is not
>> -     * a DTO_EVD or RMR_BIND_EVD
>> +    /* Logic to prevent missing completion between copy_cq (poll)
>> +     * and completion_notify (re-arm)
>>      */
>> -    if ( (!notify_requested) &&
>> -             ((evd_ptr->evd_flags & DAT_EVD_DTO_FLAG) ||
>> -              (evd_ptr->evd_flags & DAT_EVD_RMR_BIND_FLAG)) )
>> +    notify_needed = DAT_TRUE;
>> +    new_events = 0;
>> +    while (DAT_TRUE)
>>     {
>> -        dat_status = dapls_ib_completion_notify (
>> -        evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle,
>> -        evd_ptr,
>> -        (evd_ptr->completion_type == DAPL_EVD_STATE_SOLICITED_WAIT) ?
>> -             IB_NOTIFY_ON_SOLIC_COMP : IB_NOTIFY_ON_NEXT_COMP );
>> -
>> -        DAPL_CNTR(DCNT_EVD_WAIT_CMP_NTFY);
>> -        /* FIXME report error */
>> -        dapl_os_assert(dat_status == DAT_SUCCESS);
>> +        dapls_evd_copy_cq(evd_ptr); /* poll for new completions */
>> +        total_events = dapls_rbuf_count 
>> (&evd_ptr->pending_event_queue);
>> +        new_events = total_events - new_events;
>> +        if (total_events >= threshold ||
>> +            (!new_events && notify_needed == DAT_FALSE))
>> +        {
>> +            break;
>> +        }
>> +
>> +        /*
>> +         * Do not enable the completion notification if this evd is not
>> +         * a DTO_EVD or RMR_BIND_EVD
>> +         */
>> +        if ( (evd_ptr->evd_flags & DAT_EVD_DTO_FLAG) ||
>> +            (evd_ptr->evd_flags & DAT_EVD_RMR_BIND_FLAG) )
>> +        {
>> +            dat_status = dapls_ib_completion_notify (
>> +                    evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle,
>> +                    evd_ptr,
>> +                    (evd_ptr->completion_type == 
>> DAPL_EVD_STATE_SOLICITED_WAIT)
>> ?
>> +                         IB_NOTIFY_ON_SOLIC_COMP : 
>> IB_NOTIFY_ON_NEXT_COMP );
>> +
>> +            DAPL_CNTR(DCNT_EVD_WAIT_CMP_NTFY);
>> +            notify_needed = DAT_FALSE;
>> +            new_events = total_events;
>> +
>> +            /* FIXME report error */
>> +            dapl_os_assert(dat_status == DAT_SUCCESS);
>> +        }
>> +        else
>> +        {
>> +            break;
>> +        }
>>
>> -        notify_requested = DAT_TRUE;
>> +    } /* while completions < threshold, and rearm needed */
>>
>> -        /* Try again.  */
>> -        continue;
>> +    if (total_events >= threshold)
>> +    {
>> +        break;
>>     }
>> -
>> +
>
>
> Can you explain the chages above?
>
> This is my reading of how things used to work: the thread that called 
> dapl_evd_wait polled the CQ until the number of events reaped was >= 
> the threshold paramter. If a poll did not reap enough events, the 
> thread turned notification on (in certain situations). I don't see why 
> it need to do this though, since it went directly back to polling and 
> the notification upcall essentially did nothing if there EVD was in 
> the WAITED state.
>
> Obviously you saw a bug in the old implementation. Is my description 
> of how the code used to work wrong?

You are correct, but there is a small window where you can miss the CQ 
event if you re-arm and wait but the CQ completion/event occured between 
the last poll and the rearm/wait.  This change adds another cq_poll 
after every re-arm to cover this senario and make sure notification is 
armed and new events are expected before going into the wait.

>
>> Index: dapl/udapl/Makefile
>> ===================================================================
>> --- dapl/udapl/Makefile    (revision 2899)
>> +++ dapl/udapl/Makefile    (working copy)
>> @@ -122,7 +122,8 @@
>> #
>> ifeq ($(VERBS),openib)
>> PROVIDER = $(TOPDIR)/../openib
>> -CFLAGS   += -DOPENIB -DCQ_WAIT_OBJECT
>> +CFLAGS   += -DOPENIB
>> +#CFLAGS   += -DCQ_WAIT_OBJECT
>
>
> Do the OpenIB uverbs support a CQ_WAIT_OBJECT? If not, we should 
> remove the line above. When I see it commented out, it makes me think 
> that the functionality is supported but not implemented in uDAPL.

Yes, openIB supports direct wait objects via fd's. There are some issues 
with the openIB implementation that need to be worked out.

>
>> Index: dapl/common/dapl_evd_resize.c
>> ===================================================================
>> --- dapl/common/dapl_evd_resize.c    (revision 2899)
>> +++ dapl/common/dapl_evd_resize.c    (working copy)
>> @@ -67,72 +67,130 @@
>>     IN    DAT_EVD_HANDLE       evd_handle,
>>     IN    DAT_COUNT       evd_qlen )
>> {
>> -    DAPL_IA        *ia_ptr;
>> -    DAPL_EVD        *evd_ptr;
>> -    DAT_COUNT       pend_cnt;
>> -    DAT_RETURN        dat_status;
>> +    DAPL_IA          *ia_ptr;
>> +    DAPL_EVD         *evd_ptr;
>> +    DAT_EVENT        *event_ptr;
>> +    DAT_EVENT        *events;
>> +    DAT_EVENT        *orig_event;
>> +    DAPL_RING_BUFFER free_event_queue;
>> +    DAPL_RING_BUFFER pending_event_queue;
>> +    DAT_COUNT        pend_cnt;
>> +    DAT_COUNT        i;
>>
>>     dapl_dbg_log (DAPL_DBG_TYPE_API, "dapl_evd_resize (%p, %d)\n",
>>           evd_handle, evd_qlen);
>>
>>     if (DAPL_BAD_HANDLE (evd_handle, DAPL_MAGIC_EVD))
>>     {
>> -    dat_status = DAT_ERROR (DAT_INVALID_HANDLE,0);
>> -    goto bail;
>> +        return DAT_ERROR (DAT_INVALID_PARAMETER,DAT_INVALID_ARG1);
>>     }
>>
>>     evd_ptr     = (DAPL_EVD *)evd_handle;
>>     ia_ptr      = evd_ptr->header.owner_ia;
>>
>> -    if ( evd_qlen == evd_ptr->qlen )
>> +    if ((evd_qlen <= 0) || (evd_ptr->qlen > evd_qlen))
>>     {
>> -     dat_status = DAT_SUCCESS;
>> -     goto bail;
>> +        return DAT_ERROR (DAT_INVALID_PARAMETER,DAT_INVALID_ARG2);
>>     }
>
>
> Your check to make sure that the new size is greater that 0 is a good 
> one.
>
> I don't think we should return an error when the consumer asks for a 
> size less than the current size. A return of success seems more 
> appropriate. We can remove the old behavoir of shrinking the EVD if 
> you like. I'm sure that makes things simpler.
>
agree. I see no need to shrink.

> In general, I think we should try to retain the single return point 
> convention (in other words, we should keep the "goto bail" stuff). I 
> admit that we should have made better use of this. For example, there 
> should have been a second "bail" label (bail_unlock?) just before the 
> last call to dapl_os_unlock(). That would have saved us several lines 
> of code in this function.
>
agree.

>>     if ( evd_qlen > ia_ptr->hca_ptr->ia_attr.max_evd_qlen )
>>     {
>> -    dat_status = DAT_ERROR (DAT_INVALID_PARAMETER,DAT_INVALID_ARG2);
>> -    goto bail;
>> +    return DAT_ERROR (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_TEVD);
>>     }
>>
>>     dapl_os_lock(&evd_ptr->header.lock);
>>
>> -    /* Don't try to resize if we are actively waiting */
>>     if (evd_ptr->evd_state == DAPL_EVD_STATE_WAITED)
>>     {
>>         dapl_os_unlock(&evd_ptr->header.lock);
>> -    dat_status = DAT_ERROR (DAT_INVALID_STATE,0);
>> -    goto bail;
>> +        return DAT_ERROR (DAT_INVALID_STATE,0);
>>     }
>>
>>     pend_cnt = dapls_rbuf_count(&evd_ptr->pending_event_queue);
>>     if (pend_cnt > evd_qlen) {
>> -    dapl_os_unlock(&evd_ptr->header.lock);
>> -    dat_status = DAT_ERROR (DAT_INVALID_STATE,0);
>> -    goto bail;
>> +        dapl_os_unlock(&evd_ptr->header.lock);
>> +        return DAT_ERROR (DAT_INVALID_STATE,0);
>> +    }
>> +
>> +    if (DAT_SUCCESS != dapls_ib_cq_resize(evd_ptr->header.owner_ia,
>> +                                          evd_ptr,
>> +                                          &evd_qlen))
>> +    {
>> +        dapl_os_unlock(&evd_ptr->header.lock);
>> +        return DAT_ERROR 
>> (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
>> +    }
>> +
>> +    /* Allocate EVENTs */
>> +    events = (DAT_EVENT *) dapl_os_alloc (evd_qlen * sizeof 
>> (DAT_EVENT));
>> +    if (!events)
>> +    {
>> +        dapl_os_unlock(&evd_ptr->header.lock);
>> +        return DAT_ERROR 
>> (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
>>     }
>> +    event_ptr = events;
>>
>> -    dat_status = dapls_ib_cq_resize(evd_ptr->header.owner_ia,
>> -                    evd_ptr,
>> -                    &evd_qlen);
>> -    if (dat_status != DAT_SUCCESS)
>> +    /* allocate free event queue */
>> +    if (DAT_SUCCESS != dapls_rbuf_alloc (&free_event_queue, evd_qlen))
>>     {
>> +        dapl_os_free(event_ptr, evd_qlen * sizeof (DAT_EVENT));
>>         dapl_os_unlock(&evd_ptr->header.lock);
>> -    goto bail;
>> +        return DAT_ERROR 
>> (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
>>     }
>>
>> -    dat_status = dapls_evd_event_realloc (evd_ptr, evd_qlen);
>> -    if (dat_status != DAT_SUCCESS)
>> +    /* allocate pending event queue */
>> +    if (DAT_SUCCESS != dapls_rbuf_alloc (&pending_event_queue, 
>> evd_qlen))
>>     {
>> +        dapl_os_free(event_ptr, evd_qlen * sizeof (DAT_EVENT));
>>         dapl_os_unlock(&evd_ptr->header.lock);
>> -    goto bail;
>> +        return DAT_ERROR 
>> (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
>>     }
>>
>> +    for (i = 0; i < pend_cnt; i++)
>> +    {
>> +        orig_event = dapls_rbuf_remove(&evd_ptr->pending_event_queue);
>> +        if (orig_event == NULL) {
>> +            dapl_dbg_log (DAPL_DBG_TYPE_ERR, " Inconsistent event 
>> queue\n");
>> +            dapl_os_free(event_ptr, evd_qlen * sizeof (DAT_EVENT));
>> +            dapl_os_unlock(&evd_ptr->header.lock);
>> +        return DAT_ERROR 
>> (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
>> +        }
>> +        memcpy(event_ptr, orig_event, sizeof(DAT_EVENT));
>> +        if (DAT_SUCCESS != dapls_rbuf_add(&pending_event_queue, 
>> event_ptr)) {
>> +            dapl_os_free(event_ptr, evd_qlen * sizeof (DAT_EVENT));
>> +            dapl_os_unlock(&evd_ptr->header.lock);
>> +        return DAT_ERROR 
>> (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
>> +        }
>> +        event_ptr++;
>> +    }
>> +
>> +    for (i = pend_cnt; i < evd_qlen; i++)
>> +    {
>> +        if (DAT_SUCCESS != dapls_rbuf_add(&free_event_queue,
>> +                                          (void *) event_ptr)) {
>> +            dapl_os_free(event_ptr, evd_qlen * sizeof (DAT_EVENT));
>> +            dapl_os_unlock(&evd_ptr->header.lock);
>> +        return DAT_ERROR 
>> (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
>> +        }
>> +        event_ptr++;
>> +    }
>> +
>> +    dapls_rbuf_destroy (&evd_ptr->free_event_queue);
>> +    dapls_rbuf_destroy (&evd_ptr->pending_event_queue);
>> +    if (evd_ptr->events)
>> +    {
>> +        dapl_os_free (evd_ptr->events, evd_ptr->qlen * sizeof 
>> (DAT_EVENT));
>> +    }
>> +    evd_ptr->free_event_queue    = free_event_queue;
>> +    evd_ptr->pending_event_queue = pending_event_queue;
>> +    evd_ptr->events              = events;
>> +    evd_ptr->qlen                = evd_qlen;
>> +
>
>
>
> Can you hide this in dapls_ib_cq_resize()?
>
which part?

>
>>     dapl_os_unlock(&evd_ptr->header.lock);
>>
>> - bail:
>> -    return dat_status;
>> +    dapl_dbg_log (DAPL_DBG_TYPE_RTN,
>> +            "dapl_evd_resize returns DAT_SUCCESS\n");
>> +
>> +    return DAT_SUCCESS;
>> }
>>
>> /*
>
> _______________________________________________
> openib-general mailing list
> openib-general at openib.org
> http://openib.org/mailman/listinfo/openib-general
>
> To unsubscribe, please visit 
> http://openib.org/mailman/listinfo/openib-general
>




More information about the general mailing list