[openib-general] Re: [PATCH] uDAPL with uCM and uAT support

James Lentini jlentini at netapp.com
Thu Jul 28 09:10:35 PDT 2005


A couple of questions:

On Sun, 24 Jul 2005, Arlin Davis wrote:

> Index: dapl/udapl/dapl_evd_wait.c
> ===================================================================
> --- dapl/udapl/dapl_evd_wait.c	(revision 2899)
> +++ dapl/udapl/dapl_evd_wait.c	(working copy)
> @@ -145,24 +146,12 @@
> 					(DAT_COUNT) DAPL_EVD_STATE_OPEN,
> 					(DAT_COUNT) DAPL_EVD_STATE_WAITED );
>     dapl_os_unlock ( &evd_ptr->header.lock );
> -
> -    if ( evd_state != DAPL_EVD_STATE_OPEN )
> +    if ( evd_state != DAPL_EVD_STATE_OPEN || !waitable)
>     {
> -	/* Bogus state, bail out */
> 	dat_status = DAT_ERROR (DAT_INVALID_STATE,0);
> 	goto bail;
>     }
>
> -    if (!waitable)
> -    {
> -	/* This EVD is not waitable, reset the state and bail */
> -	(void) dapl_os_atomic_assign ((DAPL_ATOMIC *)&evd_ptr->evd_state,
> -					(DAT_COUNT) DAPL_EVD_STATE_WAITED,
> -					evd_state);
> -	dat_status = DAT_ERROR (DAT_INVALID_STATE, DAT_INVALID_STATE_EVD_UNWAITABLE);
> -	goto bail;
> -    }
> -

Why don't we need to reset the state?

>     /*
>      * We now own the EVD, even though we don't have the lock anymore,
>      * because we're in the WAITED state.
> @@ -182,37 +171,54 @@
> 	 * return right away if the ib_cq_handle associate with these evd
> 	 * equal to IB_INVALID_HANDLE
> 	 */
> -	dapls_evd_copy_cq(evd_ptr);
> -
> -	if (dapls_rbuf_count(&evd_ptr->pending_event_queue) >= threshold)
> -	{
> -	    break;
> -	}
> -
> -	/*
> -	 * Do not enable the completion notification if this evd is not
> -	 * a DTO_EVD or RMR_BIND_EVD
> +	/* Logic to prevent missing completion between copy_cq (poll)
> +	 * and completion_notify (re-arm)
> 	 */
> -	if ( (!notify_requested) &&
> -             ((evd_ptr->evd_flags & DAT_EVD_DTO_FLAG) ||
> -              (evd_ptr->evd_flags & DAT_EVD_RMR_BIND_FLAG)) )
> +	notify_needed = DAT_TRUE;
> +	new_events = 0;
> +	while (DAT_TRUE)
> 	{
> -	    dat_status = dapls_ib_completion_notify (
> -		evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle,
> -		evd_ptr,
> -		(evd_ptr->completion_type == DAPL_EVD_STATE_SOLICITED_WAIT) ?
> -		     IB_NOTIFY_ON_SOLIC_COMP : IB_NOTIFY_ON_NEXT_COMP );
> -
> -	    DAPL_CNTR(DCNT_EVD_WAIT_CMP_NTFY);
> -	    /* FIXME report error */
> -	    dapl_os_assert(dat_status == DAT_SUCCESS);
> +		dapls_evd_copy_cq(evd_ptr); /* poll for new completions */
> +		total_events = dapls_rbuf_count (&evd_ptr->pending_event_queue);
> +		new_events = total_events - new_events;
> +		if (total_events >= threshold ||
> +			(!new_events && notify_needed == DAT_FALSE))
> +		{
> +			break;
> +		}
> +
> +		/*
> +		 * Do not enable the completion notification if this evd is not
> +		 * a DTO_EVD or RMR_BIND_EVD
> +		 */
> +		if ( (evd_ptr->evd_flags & DAT_EVD_DTO_FLAG) ||
> +			(evd_ptr->evd_flags & DAT_EVD_RMR_BIND_FLAG) )
> +		{
> +			dat_status = dapls_ib_completion_notify (
> +					evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle,
> +					evd_ptr,
> +					(evd_ptr->completion_type == DAPL_EVD_STATE_SOLICITED_WAIT)
> ?
> +		     			IB_NOTIFY_ON_SOLIC_COMP : IB_NOTIFY_ON_NEXT_COMP );
> +
> +			DAPL_CNTR(DCNT_EVD_WAIT_CMP_NTFY);
> +			notify_needed = DAT_FALSE;
> +			new_events = total_events;
> +
> +			/* FIXME report error */
> +			dapl_os_assert(dat_status == DAT_SUCCESS);
> +		}
> +		else
> +		{
> +			break;
> +		}
>
> -	    notify_requested = DAT_TRUE;
> +	} /* while completions < threshold, and rearm needed */
>
> -	    /* Try again.  */
> -	    continue;
> +	if (total_events >= threshold)
> +	{
> +		break;
> 	}
> -
> +

Can you explain the chages above?

This is my reading of how things used to work: the thread that called 
dapl_evd_wait polled the CQ until the number of events reaped was >= 
the threshold paramter. If a poll did not reap enough events, the 
thread turned notification on (in certain situations). I don't see why 
it need to do this though, since it went directly back to polling and 
the notification upcall essentially did nothing if there EVD was in 
the WAITED state.

Obviously you saw a bug in the old implementation. Is my description 
of how the code used to work wrong?

> Index: dapl/udapl/Makefile
> ===================================================================
> --- dapl/udapl/Makefile	(revision 2899)
> +++ dapl/udapl/Makefile	(working copy)
> @@ -122,7 +122,8 @@
> #
> ifeq ($(VERBS),openib)
> PROVIDER = $(TOPDIR)/../openib
> -CFLAGS   += -DOPENIB -DCQ_WAIT_OBJECT
> +CFLAGS   += -DOPENIB
> +#CFLAGS   += -DCQ_WAIT_OBJECT

Do the OpenIB uverbs support a CQ_WAIT_OBJECT? If not, we should 
remove the line above. When I see it commented out, it makes me 
think that the functionality is supported but not implemented in 
uDAPL.

> Index: dapl/common/dapl_evd_resize.c
> ===================================================================
> --- dapl/common/dapl_evd_resize.c	(revision 2899)
> +++ dapl/common/dapl_evd_resize.c	(working copy)
> @@ -67,72 +67,130 @@
> 	IN	DAT_EVD_HANDLE	   evd_handle,
> 	IN	DAT_COUNT	   evd_qlen )
> {
> -    DAPL_IA		*ia_ptr;
> -    DAPL_EVD		*evd_ptr;
> -    DAT_COUNT   	pend_cnt;
> -    DAT_RETURN		dat_status;
> +    DAPL_IA          *ia_ptr;
> +    DAPL_EVD         *evd_ptr;
> +    DAT_EVENT        *event_ptr;
> +    DAT_EVENT        *events;
> +    DAT_EVENT        *orig_event;
> +    DAPL_RING_BUFFER free_event_queue;
> +    DAPL_RING_BUFFER pending_event_queue;
> +    DAT_COUNT        pend_cnt;
> +    DAT_COUNT        i;
>
>     dapl_dbg_log (DAPL_DBG_TYPE_API, "dapl_evd_resize (%p, %d)\n",
> 		  evd_handle, evd_qlen);
>
>     if (DAPL_BAD_HANDLE (evd_handle, DAPL_MAGIC_EVD))
>     {
> -	dat_status = DAT_ERROR (DAT_INVALID_HANDLE,0);
> -	goto bail;
> +        return DAT_ERROR (DAT_INVALID_PARAMETER,DAT_INVALID_ARG1);
>     }
>
>     evd_ptr     = (DAPL_EVD *)evd_handle;
>     ia_ptr      = evd_ptr->header.owner_ia;
>
> -    if ( evd_qlen == evd_ptr->qlen )
> +    if ((evd_qlen <= 0) || (evd_ptr->qlen > evd_qlen))
>     {
> -	 dat_status = DAT_SUCCESS;
> -	 goto bail;
> +        return DAT_ERROR (DAT_INVALID_PARAMETER,DAT_INVALID_ARG2);
>     }

Your check to make sure that the new size is greater that 0 is a good 
one.

I don't think we should return an error when the consumer asks for a 
size less than the current size. A return of success seems more 
appropriate. We can remove the old behavoir of shrinking the EVD if 
you like. I'm sure that makes things simpler.

In general, I think we should try to retain the single return point 
convention (in other words, we should keep the "goto bail" stuff). I 
admit that we should have made better use of this. For example, there 
should have been a second "bail" label (bail_unlock?) just before the 
last call to dapl_os_unlock(). That would have saved us several lines 
of code in this function.

>
>     if ( evd_qlen > ia_ptr->hca_ptr->ia_attr.max_evd_qlen )
>     {
> -	dat_status = DAT_ERROR (DAT_INVALID_PARAMETER,DAT_INVALID_ARG2);
> -	goto bail;
> +	return DAT_ERROR (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_TEVD);
>     }
>
>     dapl_os_lock(&evd_ptr->header.lock);
>
> -    /* Don't try to resize if we are actively waiting */
>     if (evd_ptr->evd_state == DAPL_EVD_STATE_WAITED)
>     {
>         dapl_os_unlock(&evd_ptr->header.lock);
> -	dat_status = DAT_ERROR (DAT_INVALID_STATE,0);
> -	goto bail;
> +        return DAT_ERROR (DAT_INVALID_STATE,0);
>     }
>
>     pend_cnt = dapls_rbuf_count(&evd_ptr->pending_event_queue);
>     if (pend_cnt > evd_qlen) {
> -	dapl_os_unlock(&evd_ptr->header.lock);
> -	dat_status = DAT_ERROR (DAT_INVALID_STATE,0);
> -	goto bail;
> +        dapl_os_unlock(&evd_ptr->header.lock);
> +        return DAT_ERROR (DAT_INVALID_STATE,0);
> +    }
> +
> +    if (DAT_SUCCESS != dapls_ib_cq_resize(evd_ptr->header.owner_ia,
> +                                          evd_ptr,
> +                                          &evd_qlen))
> +    {
> +        dapl_os_unlock(&evd_ptr->header.lock);
> +        return DAT_ERROR (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
> +    }
> +
> +    /* Allocate EVENTs */
> +    events = (DAT_EVENT *) dapl_os_alloc (evd_qlen * sizeof (DAT_EVENT));
> +    if (!events)
> +    {
> +        dapl_os_unlock(&evd_ptr->header.lock);
> +        return DAT_ERROR (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
>     }
> +    event_ptr = events;
>
> -    dat_status = dapls_ib_cq_resize(evd_ptr->header.owner_ia,
> -				    evd_ptr,
> -				    &evd_qlen);
> -    if (dat_status != DAT_SUCCESS)
> +    /* allocate free event queue */
> +    if (DAT_SUCCESS != dapls_rbuf_alloc (&free_event_queue, evd_qlen))
>     {
> +        dapl_os_free(event_ptr, evd_qlen * sizeof (DAT_EVENT));
>         dapl_os_unlock(&evd_ptr->header.lock);
> -	goto bail;
> +        return DAT_ERROR (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
>     }
>
> -    dat_status = dapls_evd_event_realloc (evd_ptr, evd_qlen);
> -    if (dat_status != DAT_SUCCESS)
> +    /* allocate pending event queue */
> +    if (DAT_SUCCESS != dapls_rbuf_alloc (&pending_event_queue, evd_qlen))
>     {
> +        dapl_os_free(event_ptr, evd_qlen * sizeof (DAT_EVENT));
>         dapl_os_unlock(&evd_ptr->header.lock);
> -	goto bail;
> +        return DAT_ERROR (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
>     }
>
> +    for (i = 0; i < pend_cnt; i++)
> +    {
> +        orig_event = dapls_rbuf_remove(&evd_ptr->pending_event_queue);
> +        if (orig_event == NULL) {
> +            dapl_dbg_log (DAPL_DBG_TYPE_ERR, " Inconsistent event queue\n");
> +            dapl_os_free(event_ptr, evd_qlen * sizeof (DAT_EVENT));
> +            dapl_os_unlock(&evd_ptr->header.lock);
> +	    return DAT_ERROR (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
> +        }
> +        memcpy(event_ptr, orig_event, sizeof(DAT_EVENT));
> +        if (DAT_SUCCESS != dapls_rbuf_add(&pending_event_queue, event_ptr)) {
> +            dapl_os_free(event_ptr, evd_qlen * sizeof (DAT_EVENT));
> +            dapl_os_unlock(&evd_ptr->header.lock);
> +	    return DAT_ERROR (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
> +        }
> +        event_ptr++;
> +    }
> +
> +    for (i = pend_cnt; i < evd_qlen; i++)
> +    {
> +        if (DAT_SUCCESS != dapls_rbuf_add(&free_event_queue,
> +                                          (void *) event_ptr)) {
> +            dapl_os_free(event_ptr, evd_qlen * sizeof (DAT_EVENT));
> +            dapl_os_unlock(&evd_ptr->header.lock);
> +	    return DAT_ERROR (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
> +        }
> +        event_ptr++;
> +    }
> +
> +    dapls_rbuf_destroy (&evd_ptr->free_event_queue);
> +    dapls_rbuf_destroy (&evd_ptr->pending_event_queue);
> +    if (evd_ptr->events)
> +    {
> +        dapl_os_free (evd_ptr->events, evd_ptr->qlen * sizeof (DAT_EVENT));
> +    }
> +    evd_ptr->free_event_queue    = free_event_queue;
> +    evd_ptr->pending_event_queue = pending_event_queue;
> +    evd_ptr->events              = events;
> +    evd_ptr->qlen                = evd_qlen;
> +


Can you hide this in dapls_ib_cq_resize()?


>     dapl_os_unlock(&evd_ptr->header.lock);
>
> - bail:
> -    return dat_status;
> +    dapl_dbg_log (DAPL_DBG_TYPE_RTN,
> +			"dapl_evd_resize returns DAT_SUCCESS\n");
> +
> +    return DAT_SUCCESS;
> }
>
> /*



More information about the general mailing list