[ofw] [PATCH] DAPL v2.0: ucm, scm: UD mode triggers list_head assert with large scale alltoall test

Davis, Arlin R arlin.r.davis at intel.com
Thu Oct 3 16:03:06 PDT 2013


1024+ ranks, IMB alltoall may hit assert when running Intel MPI in UD mode.

CR cleanup was implemented with EP to CR references still linked.
During cr_accept, the CR remote_ia_address is linked to EP object
by mistake with UD mode. UD mode my have multiple CRs per EP so
no direct mappings to CR memory can exist. Only with RC mode which
always has one EP to CR mapping.

In scm, ucm: for CM object free with CR references the search and
unlinking from SP must be under SP lock to serialize. Also,
change cleanup thread wakeup logic to only trigger the thread if
reference count indicates the need for more processing.

Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
---
 dapl/common/dapl_cr_accept.c   |   10 ++++++----
 dapl/openib_scm/cm.c           |   14 +++++++-------
 dapl/openib_ucm/cm.c           |   26 +++++++++++++-------------
 dapl/openib_ucm/dapl_ib_util.h |    1 +
 4 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/dapl/common/dapl_cr_accept.c b/dapl/common/dapl_cr_accept.c
index 5df9458..4e48fea 100644
--- a/dapl/common/dapl_cr_accept.c
+++ b/dapl/common/dapl_cr_accept.c
@@ -180,11 +180,13 @@ dapl_cr_accept(IN DAT_CR_HANDLE cr_handle,
 	entry_ep_state = ep_ptr->param.ep_state;
 	entry_ep_handle = cr_ptr->param.local_ep_handle;
 	ep_ptr->param.ep_state = DAT_EP_STATE_COMPLETION_PENDING;
-	ep_ptr->cr_ptr = cr_ptr;
-	ep_ptr->param.remote_ia_address_ptr =
-	    cr_ptr->param.remote_ia_address_ptr;
-	cr_ptr->param.local_ep_handle = ep_handle;
 
+	/* UD supports multiple CR's per EP, provider will manage CR's */
+	if (ep_ptr->param.ep_attr.service_type == DAT_SERVICE_TYPE_RC) {
+		ep_ptr->cr_ptr = cr_ptr;
+		ep_ptr->param.remote_ia_address_ptr = cr_ptr->param.remote_ia_address_ptr;
+	}
+	cr_ptr->param.local_ep_handle = ep_handle;
 	dapl_os_unlock(&ep_ptr->header.lock);
 
 	dat_status = dapls_ib_accept_connection(cr_handle,
diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c
index d4964bd..f7838f2 100644
--- a/dapl/openib_scm/cm.c
+++ b/dapl/openib_scm/cm.c
@@ -439,25 +439,25 @@ void dapls_cm_free(dp_ib_cm_handle_t cm_ptr)
 		 cm_ptr, dapl_cm_state_str(cm_ptr->state),
 		 cm_ptr->ep, sp_ptr, cm_ptr->ref_count);
 
+	dapl_os_lock(&cm_ptr->lock);
 	if (sp_ptr && cm_ptr->state == DCM_CONNECTED &&
 	    cm_ptr->msg.daddr.ib.qp_type == IBV_QPT_UD) {
-		DAPL_CR *cr_ptr = dapl_sp_search_cr(sp_ptr, cm_ptr);
+		DAPL_CR *cr_ptr;
+
+		dapl_os_lock(&sp_ptr->header.lock);
+		cr_ptr = dapl_sp_search_cr(sp_ptr, cm_ptr);
 		if (cr_ptr != NULL) {
-			dapl_os_lock(&sp_ptr->header.lock);
 			dapl_sp_remove_cr(sp_ptr, cr_ptr);
-			dapl_os_unlock(&sp_ptr->header.lock);
 			dapls_cr_free(cr_ptr);
 		}
+		dapl_os_unlock(&sp_ptr->header.lock);
 	}
 	
 	/* free from internal workq, wait until EP is last ref */
-	dapl_os_lock(&cm_ptr->lock);
 	cm_ptr->state = DCM_FREE;
-	dapl_os_unlock(&cm_ptr->lock);
 
-	dapli_cm_thread_signal(cm_ptr);
-	dapl_os_lock(&cm_ptr->lock);
 	if (cm_ptr->ref_count != 1) {
+		dapli_cm_thread_signal(cm_ptr);
 		dapl_os_unlock(&cm_ptr->lock);
 		dapl_os_wait_object_wait(&cm_ptr->event, DAT_TIMEOUT_INFINITE);
 		dapl_os_lock(&cm_ptr->lock);
diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c
index 05cff10..d6f923e 100644
--- a/dapl/openib_ucm/cm.c
+++ b/dapl/openib_ucm/cm.c
@@ -779,19 +779,19 @@ void dapli_cm_free(dp_ib_cm_handle_t cm)
 		 cm, dapl_cm_state_str(cm->state),
 		 cm->ep, sp_ptr, sp_ptr ? sp_ptr->cr_list_count:0, cm->ref_count);
 
+	dapl_os_lock(&cm->lock);
 	if (sp_ptr && cm->state == DCM_CONNECTED &&
 	    cm->msg.daddr.ib.qp_type == IBV_QPT_UD) {
-		DAPL_CR *cr_ptr = dapl_sp_search_cr(sp_ptr, cm);
-		dapl_log(DAPL_DBG_TYPE_CM, " dapli_cm_free: UD CR %p\n", cr_ptr);
-		if (cr_ptr != NULL) {
-			dapl_os_lock(&sp_ptr->header.lock);
-			dapl_sp_remove_cr(sp_ptr, cr_ptr);
-			dapl_os_unlock(&sp_ptr->header.lock);
-			dapls_cr_free(cr_ptr);
+		dapl_os_lock(&sp_ptr->header.lock);
+		cm->cr = dapl_sp_search_cr(sp_ptr, cm);
+		dapl_log(DAPL_DBG_TYPE_CM, " dapli_cm_free: UD CR %p\n", cm->cr);
+
+		if (cm->cr != NULL) {
+			dapl_sp_remove_cr(sp_ptr, cm->cr);
+			/* free CR at EP destroy */
 		}
+		dapl_os_unlock(&sp_ptr->header.lock);
 	}
-
-	dapl_os_lock(&cm->lock);
 	cm->state = DCM_FREE;
 	dapls_thread_signal(&cm->hca->ib_trans.signal);
 	dapl_os_unlock(&cm->lock);
@@ -809,12 +809,12 @@ void dapls_cm_free(dp_ib_cm_handle_t cm)
 	dapl_os_lock(&cm->lock);
 	if (cm->state != DCM_FREE) 
 		cm->state = DCM_FREE;
-	
-	dapl_os_unlock(&cm->lock);
-	dapls_thread_signal(&cm->hca->ib_trans.signal);
 
-	dapl_os_lock(&cm->lock);
+	if (cm->cr)
+		dapls_cr_free(cm->cr);
+
 	if (cm->ref_count != 1) {
+		dapls_thread_signal(&cm->hca->ib_trans.signal);
 		dapl_os_unlock(&cm->lock);
 		dapl_os_wait_object_wait(&cm->f_event, DAT_TIMEOUT_INFINITE);
 		dapl_os_lock(&cm->lock);
diff --git a/dapl/openib_ucm/dapl_ib_util.h b/dapl/openib_ucm/dapl_ib_util.h
index b5bd082..469560e 100644
--- a/dapl/openib_ucm/dapl_ib_util.h
+++ b/dapl/openib_ucm/dapl_ib_util.h
@@ -48,6 +48,7 @@ struct ib_cm_handle
 	struct dapl_hca		*hca;
 	struct dapl_sp		*sp;	
 	struct dapl_ep 		*ep;
+	struct dapl_cr 		*cr;
 	struct ibv_ah		*ah;
 	uint16_t		p_size; /* accept p_data, for retries */
 	uint8_t			p_data[DCM_MAX_PDATA_SIZE];
-- 
1.7.3





More information about the ofw mailing list