[ofw] [PATCH 1/3] DAPL v2.0: ucm, scm: remove use of usec_sleep delays and use events for disc and destroy
Davis, Arlin R
arlin.r.davis at intel.com
Sat Feb 12 11:33:30 PST 2011
Some optimizations and fixes for ucm/scm provider after issues discovered during MPI
testing with UD QP's on larger clusters.
PATCH [1/3]
use pthread mutex when processing and waiting for disconnect completions
and for CM object destruction. Add f_event, d_event to cm object.
Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
---
dapl/openib_scm/cm.c | 11 ++++++++---
dapl/openib_ucm/cm.c | 38 ++++++++++++++++++++++++++------------
dapl/openib_ucm/dapl_ib_util.h | 3 ++-
3 files changed, 36 insertions(+), 16 deletions(-)
diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c
index b0fbadf..1145f17 100644
--- a/dapl/openib_scm/cm.c
+++ b/dapl/openib_scm/cm.c
@@ -362,6 +362,8 @@ void dapls_cm_release(dp_ib_cm_handle_t cm_ptr)
dapl_os_lock(&cm_ptr->lock);
cm_ptr->ref_count--;
if (cm_ptr->ref_count) {
+ if (cm_ptr->ref_count == 1)
+ dapl_os_wait_object_wakeup(&cm_ptr->event);
dapl_os_unlock(&cm_ptr->lock);
return;
}
@@ -437,10 +439,13 @@ void dapls_cm_free(dp_ib_cm_handle_t cm_ptr)
/* free from internal workq, wait until EP is last ref */
dapl_os_lock(&cm_ptr->lock);
cm_ptr->state = DCM_FREE;
- while (cm_ptr->ref_count != 1) {
- dapli_cm_thread_signal(cm_ptr);
+ dapl_os_unlock(&cm_ptr->lock);
+
+ dapli_cm_thread_signal(cm_ptr);
+ dapl_os_lock(&cm_ptr->lock);
+ if (cm_ptr->ref_count != 1) {
dapl_os_unlock(&cm_ptr->lock);
- dapl_os_sleep_usec(10000);
+ dapl_os_wait_object_wait(&cm_ptr->event, DAT_TIMEOUT_INFINITE);
dapl_os_lock(&cm_ptr->lock);
}
dapl_os_unlock(&cm_ptr->lock);
diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c
index c5ddf04..69f7610 100644
--- a/dapl/openib_ucm/cm.c
+++ b/dapl/openib_ucm/cm.c
@@ -649,7 +649,8 @@ static void dapli_cm_dealloc(dp_ib_cm_handle_t cm) {
dapl_os_assert(!cm->ref_count);
dapl_os_lock_destroy(&cm->lock);
- dapl_os_wait_object_destroy(&cm->event);
+ dapl_os_wait_object_destroy(&cm->d_event);
+ dapl_os_wait_object_destroy(&cm->f_event);
dapl_os_free(cm, sizeof(*cm));
}
@@ -665,6 +666,8 @@ void dapls_cm_release(dp_ib_cm_handle_t cm)
dapl_os_lock(&cm->lock);
cm->ref_count--;
if (cm->ref_count) {
+ if (cm->ref_count == 1)
+ dapl_os_wait_object_wakeup(&cm->f_event);
dapl_os_unlock(&cm->lock);
return;
}
@@ -693,10 +696,15 @@ dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
if (dapl_os_lock_init(&cm->lock))
goto bail;
- if (dapl_os_wait_object_init(&cm->event)) {
+ if (dapl_os_wait_object_init(&cm->f_event)) {
dapl_os_lock_destroy(&cm->lock);
goto bail;
}
+ if (dapl_os_wait_object_init(&cm->d_event)) {
+ dapl_os_lock_destroy(&cm->lock);
+ dapl_os_wait_object_destroy(&cm->f_event);
+ goto bail;
+ }
dapls_cm_acquire(cm);
cm->msg.ver = htons(DCM_VER);
@@ -708,7 +716,8 @@ dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
cm->msg.sport = htons(ucm_get_port(&hca->ib_trans, 0));
if (!cm->msg.sport) {
- dapl_os_wait_object_destroy(&cm->event);
+ dapl_os_wait_object_destroy(&cm->f_event);
+ dapl_os_wait_object_destroy(&cm->d_event);
dapl_os_lock_destroy(&cm->lock);
goto bail;
}
@@ -758,10 +767,13 @@ void dapls_cm_free(dp_ib_cm_handle_t cm)
if (cm->state != DCM_FREE)
cm->state = DCM_FREE;
- while (cm->ref_count != 1) {
+ dapl_os_unlock(&cm->lock);
+ dapls_thread_signal(&cm->hca->ib_trans.signal);
+
+ dapl_os_lock(&cm->lock);
+ if (cm->ref_count != 1) {
dapl_os_unlock(&cm->lock);
- dapls_thread_signal(&cm->hca->ib_trans.signal);
- dapl_os_sleep_usec(10000);
+ dapl_os_wait_object_wait(&cm->f_event, DAT_TIMEOUT_INFINITE);
dapl_os_lock(&cm->lock);
}
dapl_os_unlock(&cm->lock);
@@ -836,6 +848,8 @@ static void ucm_disconnect_final(dp_ib_cm_handle_t cm)
else
dapl_evd_connection_callback(cm, IB_CME_DISCONNECTED, NULL, 0, cm->ep);
+ dapl_os_wait_object_wakeup(&cm->d_event);
+
}
/*
@@ -888,7 +902,7 @@ DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm)
dapl_os_unlock(&cm->lock);
return DAT_SUCCESS;
default:
- dapl_log(DAPL_DBG_TYPE_WARN,
+ dapl_log(DAPL_DBG_TYPE_EP,
" disconnect UNKNOWN state: ep %p cm %p %s %s"
" %x %x %x %s %x %x %x r_id %x l_id %x\n",
cm->ep, cm,
@@ -1684,13 +1698,13 @@ dapls_ib_disconnect(IN DAPL_EP *ep_ptr, IN DAT_CLOSE_FLAGS close_flags)
/* ABRUPT close, wait for callback and DISCONNECTED state */
if (close_flags == DAT_CLOSE_ABRUPT_FLAG) {
dapl_os_lock(&ep_ptr->header.lock);
- while (ep_ptr->param.ep_state != DAT_EP_STATE_DISCONNECTED) {
- dapl_os_unlock(&ep_ptr->header.lock);
- dapl_os_sleep_usec(10000);
- dapl_os_lock(&ep_ptr->header.lock);
+ if (ep_ptr->param.ep_state != DAT_EP_STATE_DISCONNECTED) {
+ dapl_os_unlock(&ep_ptr->header.lock);
+ dapl_os_wait_object_wait(&cm_ptr->d_event, DAT_TIMEOUT_INFINITE);
+ dapl_os_lock(&ep_ptr->header.lock);
}
dapl_os_unlock(&ep_ptr->header.lock);
- }
+ }
return DAT_SUCCESS;
}
diff --git a/dapl/openib_ucm/dapl_ib_util.h b/dapl/openib_ucm/dapl_ib_util.h
index 7769307..efeec4d 100644
--- a/dapl/openib_ucm/dapl_ib_util.h
+++ b/dapl/openib_ucm/dapl_ib_util.h
@@ -38,7 +38,8 @@ struct ib_cm_handle
{
struct dapl_llist_entry list_entry;
struct dapl_llist_entry local_entry;
- DAPL_OS_WAIT_OBJECT event;
+ DAPL_OS_WAIT_OBJECT d_event;
+ DAPL_OS_WAIT_OBJECT f_event;
DAPL_OS_LOCK lock;
DAPL_OS_TIMEVAL timer;
int ref_count;
--
1.7.3
More information about the ofw
mailing list