[ofa-general] [PATCH 1/7][v1.2] dapl: endpoint pending request count is wrong

Davis, Arlin R arlin.r.davis at intel.com
Fri Jun 20 11:42:20 PDT 2008


The code assumes every cookie allocated during posting of
requests gets completed. This incorrect assumption results in
wrong pending count. Remove request_pending field and replace
with direct call, dapl_cb_pending, to provide accurate
data to consumer.

Add debug print if consumer overruns request queue.

Signed-off by: Arlin Davis ardavis at ichips.intel.com
---
 dapl/common/dapl_cookie.c        |   28 ++++++++++++++++++++++++++++
 dapl/common/dapl_cookie.h        |    4 ++++
 dapl/common/dapl_ep_get_status.c |    4 ++--
 dapl/common/dapl_ep_post_recv.c  |    7 -------
 dapl/common/dapl_ep_util.c       |   22 ++++++++++------------
 dapl/common/dapl_evd_util.c      |   16 ++++------------
 dapl/common/dapl_rmr_bind.c      |   14 --------------
 7 files changed, 48 insertions(+), 47 deletions(-)

diff --git a/dapl/common/dapl_cookie.c b/dapl/common/dapl_cookie.c
index ee12ce2..5b0b598 100644
--- a/dapl/common/dapl_cookie.c
+++ b/dapl/common/dapl_cookie.c
@@ -260,6 +260,34 @@ dapls_cb_put (
     return DAT_SUCCESS;
 }
 
+/*
+ * dapls_cb_pending
+ *
+ * snapshot of active entries on cookie ring buffer 
+ *
+ * Input:
+ *	buffer		pointer to DAPL_COOKIE_BUFFER
+ *
+ * Returns:
+ *	DAT_COUNT	number of active/pending cookies
+ *
+ */
+extern DAT_COUNT 
+dapls_cb_pending (
+    DAPL_COOKIE_BUFFER	*buffer )
+{
+	DAT_COUNT head, tail;
+
+	head = dapl_os_atomic_read(&buffer->head);
+	tail = dapl_os_atomic_read(&buffer->tail);
+
+	if (head == tail)
+		return 0;
+	else if (head > tail)
+		return (head - tail);
+	else 
+		return ((buffer->pool_size - tail) + head);
+}
 
 /*
  * dapls_rmr_cookie_alloc
diff --git a/dapl/common/dapl_cookie.h b/dapl/common/dapl_cookie.h
index 20fbec6..f953b28 100644
--- a/dapl/common/dapl_cookie.h
+++ b/dapl/common/dapl_cookie.h
@@ -50,6 +50,10 @@ extern void
 dapls_cb_free (
     DAPL_COOKIE_BUFFER		*buffer );
 
+extern DAT_COUNT 
+dapls_cb_pending (
+    DAPL_COOKIE_BUFFER		*buffer );
+
 extern DAT_RETURN
 dapls_rmr_cookie_alloc (
     IN  DAPL_COOKIE_BUFFER	*buffer,
diff --git a/dapl/common/dapl_ep_get_status.c
b/dapl/common/dapl_ep_get_status.c
index 49b4fef..a931355 100644
--- a/dapl/common/dapl_ep_get_status.c
+++ b/dapl/common/dapl_ep_get_status.c
@@ -98,12 +98,12 @@ dapl_ep_get_status (
 
     if ( in_dto_idle != NULL )
     {
-	*in_dto_idle = (dapl_os_atomic_read (&ep_ptr->recv_count)) ?
DAT_FALSE : DAT_TRUE;
+	*in_dto_idle = (dapls_cb_pending(&ep_ptr->recv_buffer)) ?
DAT_FALSE : DAT_TRUE;
     }
 
     if ( out_dto_idle != NULL )
     {
-	*out_dto_idle = (dapl_os_atomic_read (&ep_ptr->req_count)) ?
DAT_FALSE : DAT_TRUE;
+	*out_dto_idle = (dapls_cb_pending(&ep_ptr->req_buffer)) ?
DAT_FALSE : DAT_TRUE;
     }
 
  bail:
diff --git a/dapl/common/dapl_ep_post_recv.c
b/dapl/common/dapl_ep_post_recv.c
index c9be9ec..5f903b8 100644
--- a/dapl/common/dapl_ep_post_recv.c
+++ b/dapl/common/dapl_ep_post_recv.c
@@ -109,19 +109,12 @@ dapl_ep_post_recv (
     }
 
     /*
-     * Take reference before posting to avoid race conditions with
-     * completions
-     */
-    dapl_os_atomic_inc (&ep_ptr->recv_count);
-
-    /*
      * Invoke provider specific routine to post DTO
      */
     dat_status = dapls_ib_post_recv (ep_ptr, cookie, num_segments,
local_iov);
 
     if ( dat_status != DAT_SUCCESS )
     {
-	dapl_os_atomic_dec (&ep_ptr->recv_count);
 	dapls_cookie_dealloc (&ep_ptr->recv_buffer, cookie);
     }
 
diff --git a/dapl/common/dapl_ep_util.c b/dapl/common/dapl_ep_util.c
index d11aed3..2bad346 100644
--- a/dapl/common/dapl_ep_util.c
+++ b/dapl/common/dapl_ep_util.c
@@ -131,9 +131,6 @@ dapl_ep_alloc (
     ep_ptr->qp_state  = DAPL_QP_STATE_UNATTACHED;
     ep_ptr->cm_handle = IB_INVALID_HANDLE;
 
-    dapl_os_atomic_set (&ep_ptr->req_count, 0);
-    dapl_os_atomic_set (&ep_ptr->recv_count, 0);
-
     if ( DAT_SUCCESS != dapls_cb_create (
 	     &ep_ptr->req_buffer,
 	     ep_ptr,
@@ -393,18 +390,20 @@ dapl_ep_post_send_req (
 		 dto_type,
 		 user_cookie,
 		 &cookie );
-    if ( dat_status != DAT_SUCCESS )
-    {
+    if (dat_status != DAT_SUCCESS) {
+	dapl_log(DAPL_DBG_TYPE_ERR,
+		 " dapl_post_req resource ERR:"
+		 " dtos pending = %d, max_dtos %d, max_cb %d hd %d tl
%d\n",
+		 dapls_cb_pending(&ep_ptr->req_buffer),
+		 ep_ptr->param.ep_attr.max_request_dtos,
+		 ep_ptr->req_buffer.pool_size,
+		 ep_ptr->req_buffer.head,
+		 ep_ptr->req_buffer.tail);
+
 	goto bail;
     }
 
     /*
-     * Take reference before posting to avoid race conditions with
-     * completions
-     */
-    dapl_os_atomic_inc (&ep_ptr->req_count);
-
-    /*
      * Invoke provider specific routine to post DTO
      */
     dat_status = dapls_ib_post_send ( ep_ptr,
@@ -417,7 +416,6 @@ dapl_ep_post_send_req (
 
     if ( dat_status != DAT_SUCCESS )
     {
-	dapl_os_atomic_dec (&ep_ptr->req_count);
 	dapls_cookie_dealloc (&ep_ptr->req_buffer, cookie);
     }
 
diff --git a/dapl/common/dapl_evd_util.c b/dapl/common/dapl_evd_util.c
index 2c95c6d..41c128c 100644
--- a/dapl/common/dapl_evd_util.c
+++ b/dapl/common/dapl_evd_util.c
@@ -1029,17 +1029,11 @@ dapli_evd_cqe_to_event (
 	{
 	    DAPL_COOKIE_BUFFER	*buffer;
 
-	    if ( DAPL_DTO_TYPE_RECV == cookie->val.dto.type )
-	    {
-		dapl_os_atomic_dec (&ep_ptr->recv_count);
-		buffer = &ep_ptr->recv_buffer;
-	    }
+	    if (DAPL_DTO_TYPE_RECV == cookie->val.dto.type)
+	    	buffer = &ep_ptr->recv_buffer;
 	    else
-	    {
-		dapl_os_atomic_dec (&ep_ptr->req_count);
-		buffer = &ep_ptr->req_buffer;
-	    }
-
+	    	buffer = &ep_ptr->req_buffer;
+	    
 	    event_ptr->event_number = DAT_DTO_COMPLETION_EVENT;
 	    event_ptr->event_data.dto_completion_event_data.ep_handle =
 		cookie->ep;
@@ -1084,8 +1078,6 @@ dapli_evd_cqe_to_event (
 
 	case DAPL_COOKIE_TYPE_RMR:
 	{
-	    dapl_os_atomic_dec (&ep_ptr->req_count);
-
 	    event_ptr->event_number = DAT_RMR_BIND_COMPLETION_EVENT;
 
 	    event_ptr->event_data.rmr_completion_event_data.rmr_handle =
diff --git a/dapl/common/dapl_rmr_bind.c b/dapl/common/dapl_rmr_bind.c
index 1582fb3..905ea2c 100644
--- a/dapl/common/dapl_rmr_bind.c
+++ b/dapl/common/dapl_rmr_bind.c
@@ -151,12 +151,6 @@ dapli_rmr_bind_fuse (
 
     is_signaled = (completion_flags & DAT_COMPLETION_SUPPRESS_FLAG) ?
DAT_FALSE : DAT_TRUE;
 
-    /*
-     * Take reference before posting to avoid race conditions with
-     * completions
-     */
-    dapl_os_atomic_inc (&ep_ptr->req_count);
-
     dat_status = dapls_ib_mw_bind (rmr,
 				   lmr,
 				   ep_ptr,
@@ -167,7 +161,6 @@ dapli_rmr_bind_fuse (
 				   is_signaled);
     if ( DAT_SUCCESS != dat_status )
     {
-	dapl_os_atomic_dec (&ep_ptr->req_count);
 	dapls_cookie_dealloc (&ep_ptr->req_buffer, cookie);
 	goto bail;
     }
@@ -255,19 +248,12 @@ dapli_rmr_bind_unfuse (
 
     is_signaled = (completion_flags & DAT_COMPLETION_UNSIGNALLED_FLAG)
? DAT_FALSE : DAT_TRUE;
 
-    /*
-     * Take reference before posting to avoid race conditions with 
-     * completions 
-     */
-    dapl_os_atomic_inc (&ep_ptr->req_count);
-
     dat_status = dapls_ib_mw_unbind (rmr,
 				     ep_ptr,
 				     cookie,
 				     is_signaled);
     if ( DAT_SUCCESS != dat_status )
     {
-	dapl_os_atomic_dec (&ep_ptr->req_count);
 	dapls_cookie_dealloc (&ep_ptr->req_buffer, cookie);
 	goto bail1;
     }
-- 
1.5.2.5




More information about the general mailing list