[openib-general] Re: [PATCH] uDAPL with uCM and uAT support
James Lentini
jlentini at netapp.com
Thu Jul 28 09:10:35 PDT 2005
A couple of questions:
On Sun, 24 Jul 2005, Arlin Davis wrote:
> Index: dapl/udapl/dapl_evd_wait.c
> ===================================================================
> --- dapl/udapl/dapl_evd_wait.c (revision 2899)
> +++ dapl/udapl/dapl_evd_wait.c (working copy)
> @@ -145,24 +146,12 @@
> (DAT_COUNT) DAPL_EVD_STATE_OPEN,
> (DAT_COUNT) DAPL_EVD_STATE_WAITED );
> dapl_os_unlock ( &evd_ptr->header.lock );
> -
> - if ( evd_state != DAPL_EVD_STATE_OPEN )
> + if ( evd_state != DAPL_EVD_STATE_OPEN || !waitable)
> {
> - /* Bogus state, bail out */
> dat_status = DAT_ERROR (DAT_INVALID_STATE,0);
> goto bail;
> }
>
> - if (!waitable)
> - {
> - /* This EVD is not waitable, reset the state and bail */
> - (void) dapl_os_atomic_assign ((DAPL_ATOMIC *)&evd_ptr->evd_state,
> - (DAT_COUNT) DAPL_EVD_STATE_WAITED,
> - evd_state);
> - dat_status = DAT_ERROR (DAT_INVALID_STATE, DAT_INVALID_STATE_EVD_UNWAITABLE);
> - goto bail;
> - }
> -
Why don't we need to reset the state?
> /*
> * We now own the EVD, even though we don't have the lock anymore,
> * because we're in the WAITED state.
> @@ -182,37 +171,54 @@
> * return right away if the ib_cq_handle associate with these evd
> * equal to IB_INVALID_HANDLE
> */
> - dapls_evd_copy_cq(evd_ptr);
> -
> - if (dapls_rbuf_count(&evd_ptr->pending_event_queue) >= threshold)
> - {
> - break;
> - }
> -
> - /*
> - * Do not enable the completion notification if this evd is not
> - * a DTO_EVD or RMR_BIND_EVD
> + /* Logic to prevent missing completion between copy_cq (poll)
> + * and completion_notify (re-arm)
> */
> - if ( (!notify_requested) &&
> - ((evd_ptr->evd_flags & DAT_EVD_DTO_FLAG) ||
> - (evd_ptr->evd_flags & DAT_EVD_RMR_BIND_FLAG)) )
> + notify_needed = DAT_TRUE;
> + new_events = 0;
> + while (DAT_TRUE)
> {
> - dat_status = dapls_ib_completion_notify (
> - evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle,
> - evd_ptr,
> - (evd_ptr->completion_type == DAPL_EVD_STATE_SOLICITED_WAIT) ?
> - IB_NOTIFY_ON_SOLIC_COMP : IB_NOTIFY_ON_NEXT_COMP );
> -
> - DAPL_CNTR(DCNT_EVD_WAIT_CMP_NTFY);
> - /* FIXME report error */
> - dapl_os_assert(dat_status == DAT_SUCCESS);
> + dapls_evd_copy_cq(evd_ptr); /* poll for new completions */
> + total_events = dapls_rbuf_count (&evd_ptr->pending_event_queue);
> + new_events = total_events - new_events;
> + if (total_events >= threshold ||
> + (!new_events && notify_needed == DAT_FALSE))
> + {
> + break;
> + }
> +
> + /*
> + * Do not enable the completion notification if this evd is not
> + * a DTO_EVD or RMR_BIND_EVD
> + */
> + if ( (evd_ptr->evd_flags & DAT_EVD_DTO_FLAG) ||
> + (evd_ptr->evd_flags & DAT_EVD_RMR_BIND_FLAG) )
> + {
> + dat_status = dapls_ib_completion_notify (
> + evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle,
> + evd_ptr,
> + (evd_ptr->completion_type == DAPL_EVD_STATE_SOLICITED_WAIT)
> ?
> + IB_NOTIFY_ON_SOLIC_COMP : IB_NOTIFY_ON_NEXT_COMP );
> +
> + DAPL_CNTR(DCNT_EVD_WAIT_CMP_NTFY);
> + notify_needed = DAT_FALSE;
> + new_events = total_events;
> +
> + /* FIXME report error */
> + dapl_os_assert(dat_status == DAT_SUCCESS);
> + }
> + else
> + {
> + break;
> + }
>
> - notify_requested = DAT_TRUE;
> + } /* while completions < threshold, and rearm needed */
>
> - /* Try again. */
> - continue;
> + if (total_events >= threshold)
> + {
> + break;
> }
> -
> +
Can you explain the chages above?
This is my reading of how things used to work: the thread that called
dapl_evd_wait polled the CQ until the number of events reaped was >=
the threshold paramter. If a poll did not reap enough events, the
thread turned notification on (in certain situations). I don't see why
it need to do this though, since it went directly back to polling and
the notification upcall essentially did nothing if there EVD was in
the WAITED state.
Obviously you saw a bug in the old implementation. Is my description
of how the code used to work wrong?
> Index: dapl/udapl/Makefile
> ===================================================================
> --- dapl/udapl/Makefile (revision 2899)
> +++ dapl/udapl/Makefile (working copy)
> @@ -122,7 +122,8 @@
> #
> ifeq ($(VERBS),openib)
> PROVIDER = $(TOPDIR)/../openib
> -CFLAGS += -DOPENIB -DCQ_WAIT_OBJECT
> +CFLAGS += -DOPENIB
> +#CFLAGS += -DCQ_WAIT_OBJECT
Do the OpenIB uverbs support a CQ_WAIT_OBJECT? If not, we should
remove the line above. When I see it commented out, it makes me
think that the functionality is supported but not implemented in
uDAPL.
> Index: dapl/common/dapl_evd_resize.c
> ===================================================================
> --- dapl/common/dapl_evd_resize.c (revision 2899)
> +++ dapl/common/dapl_evd_resize.c (working copy)
> @@ -67,72 +67,130 @@
> IN DAT_EVD_HANDLE evd_handle,
> IN DAT_COUNT evd_qlen )
> {
> - DAPL_IA *ia_ptr;
> - DAPL_EVD *evd_ptr;
> - DAT_COUNT pend_cnt;
> - DAT_RETURN dat_status;
> + DAPL_IA *ia_ptr;
> + DAPL_EVD *evd_ptr;
> + DAT_EVENT *event_ptr;
> + DAT_EVENT *events;
> + DAT_EVENT *orig_event;
> + DAPL_RING_BUFFER free_event_queue;
> + DAPL_RING_BUFFER pending_event_queue;
> + DAT_COUNT pend_cnt;
> + DAT_COUNT i;
>
> dapl_dbg_log (DAPL_DBG_TYPE_API, "dapl_evd_resize (%p, %d)\n",
> evd_handle, evd_qlen);
>
> if (DAPL_BAD_HANDLE (evd_handle, DAPL_MAGIC_EVD))
> {
> - dat_status = DAT_ERROR (DAT_INVALID_HANDLE,0);
> - goto bail;
> + return DAT_ERROR (DAT_INVALID_PARAMETER,DAT_INVALID_ARG1);
> }
>
> evd_ptr = (DAPL_EVD *)evd_handle;
> ia_ptr = evd_ptr->header.owner_ia;
>
> - if ( evd_qlen == evd_ptr->qlen )
> + if ((evd_qlen <= 0) || (evd_ptr->qlen > evd_qlen))
> {
> - dat_status = DAT_SUCCESS;
> - goto bail;
> + return DAT_ERROR (DAT_INVALID_PARAMETER,DAT_INVALID_ARG2);
> }
Your check to make sure that the new size is greater that 0 is a good
one.
I don't think we should return an error when the consumer asks for a
size less than the current size. A return of success seems more
appropriate. We can remove the old behavoir of shrinking the EVD if
you like. I'm sure that makes things simpler.
In general, I think we should try to retain the single return point
convention (in other words, we should keep the "goto bail" stuff). I
admit that we should have made better use of this. For example, there
should have been a second "bail" label (bail_unlock?) just before the
last call to dapl_os_unlock(). That would have saved us several lines
of code in this function.
>
> if ( evd_qlen > ia_ptr->hca_ptr->ia_attr.max_evd_qlen )
> {
> - dat_status = DAT_ERROR (DAT_INVALID_PARAMETER,DAT_INVALID_ARG2);
> - goto bail;
> + return DAT_ERROR (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_TEVD);
> }
>
> dapl_os_lock(&evd_ptr->header.lock);
>
> - /* Don't try to resize if we are actively waiting */
> if (evd_ptr->evd_state == DAPL_EVD_STATE_WAITED)
> {
> dapl_os_unlock(&evd_ptr->header.lock);
> - dat_status = DAT_ERROR (DAT_INVALID_STATE,0);
> - goto bail;
> + return DAT_ERROR (DAT_INVALID_STATE,0);
> }
>
> pend_cnt = dapls_rbuf_count(&evd_ptr->pending_event_queue);
> if (pend_cnt > evd_qlen) {
> - dapl_os_unlock(&evd_ptr->header.lock);
> - dat_status = DAT_ERROR (DAT_INVALID_STATE,0);
> - goto bail;
> + dapl_os_unlock(&evd_ptr->header.lock);
> + return DAT_ERROR (DAT_INVALID_STATE,0);
> + }
> +
> + if (DAT_SUCCESS != dapls_ib_cq_resize(evd_ptr->header.owner_ia,
> + evd_ptr,
> + &evd_qlen))
> + {
> + dapl_os_unlock(&evd_ptr->header.lock);
> + return DAT_ERROR (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
> + }
> +
> + /* Allocate EVENTs */
> + events = (DAT_EVENT *) dapl_os_alloc (evd_qlen * sizeof (DAT_EVENT));
> + if (!events)
> + {
> + dapl_os_unlock(&evd_ptr->header.lock);
> + return DAT_ERROR (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
> }
> + event_ptr = events;
>
> - dat_status = dapls_ib_cq_resize(evd_ptr->header.owner_ia,
> - evd_ptr,
> - &evd_qlen);
> - if (dat_status != DAT_SUCCESS)
> + /* allocate free event queue */
> + if (DAT_SUCCESS != dapls_rbuf_alloc (&free_event_queue, evd_qlen))
> {
> + dapl_os_free(event_ptr, evd_qlen * sizeof (DAT_EVENT));
> dapl_os_unlock(&evd_ptr->header.lock);
> - goto bail;
> + return DAT_ERROR (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
> }
>
> - dat_status = dapls_evd_event_realloc (evd_ptr, evd_qlen);
> - if (dat_status != DAT_SUCCESS)
> + /* allocate pending event queue */
> + if (DAT_SUCCESS != dapls_rbuf_alloc (&pending_event_queue, evd_qlen))
> {
> + dapl_os_free(event_ptr, evd_qlen * sizeof (DAT_EVENT));
> dapl_os_unlock(&evd_ptr->header.lock);
> - goto bail;
> + return DAT_ERROR (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
> }
>
> + for (i = 0; i < pend_cnt; i++)
> + {
> + orig_event = dapls_rbuf_remove(&evd_ptr->pending_event_queue);
> + if (orig_event == NULL) {
> + dapl_dbg_log (DAPL_DBG_TYPE_ERR, " Inconsistent event queue\n");
> + dapl_os_free(event_ptr, evd_qlen * sizeof (DAT_EVENT));
> + dapl_os_unlock(&evd_ptr->header.lock);
> + return DAT_ERROR (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
> + }
> + memcpy(event_ptr, orig_event, sizeof(DAT_EVENT));
> + if (DAT_SUCCESS != dapls_rbuf_add(&pending_event_queue, event_ptr)) {
> + dapl_os_free(event_ptr, evd_qlen * sizeof (DAT_EVENT));
> + dapl_os_unlock(&evd_ptr->header.lock);
> + return DAT_ERROR (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
> + }
> + event_ptr++;
> + }
> +
> + for (i = pend_cnt; i < evd_qlen; i++)
> + {
> + if (DAT_SUCCESS != dapls_rbuf_add(&free_event_queue,
> + (void *) event_ptr)) {
> + dapl_os_free(event_ptr, evd_qlen * sizeof (DAT_EVENT));
> + dapl_os_unlock(&evd_ptr->header.lock);
> + return DAT_ERROR (DAT_INSUFFICIENT_RESOURCES,DAT_RESOURCE_MEMORY);
> + }
> + event_ptr++;
> + }
> +
> + dapls_rbuf_destroy (&evd_ptr->free_event_queue);
> + dapls_rbuf_destroy (&evd_ptr->pending_event_queue);
> + if (evd_ptr->events)
> + {
> + dapl_os_free (evd_ptr->events, evd_ptr->qlen * sizeof (DAT_EVENT));
> + }
> + evd_ptr->free_event_queue = free_event_queue;
> + evd_ptr->pending_event_queue = pending_event_queue;
> + evd_ptr->events = events;
> + evd_ptr->qlen = evd_qlen;
> +
Can you hide this in dapls_ib_cq_resize()?
> dapl_os_unlock(&evd_ptr->header.lock);
>
> - bail:
> - return dat_status;
> + dapl_dbg_log (DAPL_DBG_TYPE_RTN,
> + "dapl_evd_resize returns DAT_SUCCESS\n");
> +
> + return DAT_SUCCESS;
> }
>
> /*
More information about the general
mailing list