[ofa-general] [PATCH 4/7][v2.0] dapl scm: Adding socket cm provider for better scalability on large homogeneous clusters.

Davis, Arlin R arlin.r.davis at intel.com
Fri Jun 20 11:58:11 PDT 2008


Bring socket cm provider back to life with some changes:

better threading support for exchanging QP information.
Avoid blocking during connect to support dynamic connection
model with MPI implementations.

consumer control of ack timeout/retries.

disconnect/reject capabilities via socket exchange.

version support for wire protocol to insure compatibility
with peer scm provider. Add gids to exchange.

validated with Intel MPI on a 14,000+ core fabric using IB DDR.

Signed-off by: Arlin Davis ardavis at ichips.intel.com
---
 Makefile.am                          |  152 ++++++-
 dapl.spec.in                         |   10 +-
 dapl/openib_scm/dapl_ib_cm.c         |  858
+++++++++++++++++++++-------------
 dapl/openib_scm/dapl_ib_cq.c         |   19 +-
 dapl/openib_scm/dapl_ib_dto.h        |  397 ++++++++++++----
 dapl/openib_scm/dapl_ib_extensions.c |  313 +++++++++++++
 dapl/openib_scm/dapl_ib_mem.c        |  157 ++++---
 dapl/openib_scm/dapl_ib_qp.c         |   53 ++-
 dapl/openib_scm/dapl_ib_util.c       |  196 +++++---
 dapl/openib_scm/dapl_ib_util.h       |   46 ++-
 dapl/udapl/libdaploscm.map           |    7 +
 11 files changed, 1619 insertions(+), 589 deletions(-)
 create mode 100755 dapl/openib_scm/dapl_ib_extensions.c
 create mode 100755 dapl/udapl/libdaploscm.map

diff --git a/Makefile.am b/Makefile.am
index bb75dea..db3ab57 100755
--- a/Makefile.am
+++ b/Makefile.am
@@ -13,10 +13,12 @@ endif
 
 if EXT_TYPE_IB
 XFLAGS = -DDAT_EXTENSIONS
-XPROGRAMS = dapl/openib_cma/dapl_ib_extensions.c
+XPROGRAMS_CMA = dapl/openib_cma/dapl_ib_extensions.c
+XPROGRAMS_SCM = dapl/openib_scm/dapl_ib_extensions.c
 else
 XFLAGS =
-XPROGRAMS =
+XPROGRAMS_CMA =
+XPROGRAMS_SCM =
 endif
 
 if DEBUG
@@ -27,9 +29,11 @@ endif
 
 datlibdir = $(libdir)
 dapllibofadir = $(libdir)
+daplliboscmdir = $(libdir)
 
 datlib_LTLIBRARIES = dat/udat/libdat2.la
 dapllibofa_LTLIBRARIES = dapl/udapl/libdaplofa.la
+daplliboscm_LTLIBRARIES = dapl/udapl/libdaploscm.la
 
 dat_udat_libdat2_la_CFLAGS = -Wall $(DBGFLAGS) -D_GNU_SOURCE $(OSFLAGS)
$(XFLAGS) \
 				-I$(srcdir)/dat/include/
-I$(srcdir)/dat/udat/ \
@@ -41,14 +45,20 @@ dapl_udapl_libdaplofa_la_CFLAGS = -Wall $(DBGFLAGS)
-D_GNU_SOURCE $(OSFLAGS) $(X
 				-I$(srcdir)/dapl/common
-I$(srcdir)/dapl/udapl/linux \
 				-I$(srcdir)/dapl/openib_cma 
 		
+dapl_udapl_libdaploscm_la_CFLAGS = -Wall $(DBGFLAGS) -D_GNU_SOURCE
$(OSFLAGS) $(XFLAGS) \
+                                -DOPENIB -DCQ_WAIT_OBJECT \
+                                -I$(srcdir)/dat/include/
-I$(srcdir)/dapl/include/ \
+                                -I$(srcdir)/dapl/common
-I$(srcdir)/dapl/udapl/linux \
+                                -I$(srcdir)/dapl/openib_scm
+
 if HAVE_LD_VERSION_SCRIPT
     dat_version_script =
-Wl,--version-script=$(srcdir)/dat/udat/libdat2.map
     daplofa_version_script =
-Wl,--version-script=$(srcdir)/dapl/udapl/libdaplofa.map
-			
+    daploscm_version_script =
-Wl,--version-script=$(srcdir)/dapl/udapl/libdaploscm.map
 else
     dat_version_script = 
     daplofa_version_script = 
-    
+    daploscm_version_script =
 endif
 
 #
@@ -173,12 +183,125 @@ dapl_udapl_libdaplofa_la_SOURCES =
dapl/udapl/dapl_init.c \
         dapl/openib_cma/dapl_ib_cq.c                \
         dapl/openib_cma/dapl_ib_qp.c                \
         dapl/openib_cma/dapl_ib_cm.c                \
-        dapl/openib_cma/dapl_ib_mem.c $(XPROGRAMS)
+        dapl/openib_cma/dapl_ib_mem.c $(XPROGRAMS_CMA)
 
 dapl_udapl_libdaplofa_la_LDFLAGS = -version-info 2:0:0
$(daplofa_version_script) \
 				   -Wl,-init,dapl_init
-Wl,-fini,dapl_fini \
 				   -lpthread -libverbs -lrdmacm 
 				
+#
+# uDAPL OpenFabrics Socket CM version: libdaplscm.so
+#
+dapl_udapl_libdaploscm_la_SOURCES = dapl/udapl/dapl_init.c \
+        dapl/udapl/dapl_evd_create.c               \
+        dapl/udapl/dapl_evd_query.c                \
+        dapl/udapl/dapl_cno_create.c               \
+        dapl/udapl/dapl_cno_modify_agent.c         \
+        dapl/udapl/dapl_cno_free.c                 \
+        dapl/udapl/dapl_cno_wait.c                 \
+        dapl/udapl/dapl_cno_query.c                \
+        dapl/udapl/dapl_lmr_create.c               \
+        dapl/udapl/dapl_evd_wait.c                 \
+        dapl/udapl/dapl_evd_disable.c              \
+        dapl/udapl/dapl_evd_enable.c               \
+        dapl/udapl/dapl_evd_modify_cno.c           \
+        dapl/udapl/dapl_evd_set_unwaitable.c       \
+        dapl/udapl/dapl_evd_clear_unwaitable.c     \
+        dapl/udapl/linux/dapl_osd.c                \
+        dapl/common/dapl_cookie.c                   \
+        dapl/common/dapl_cr_accept.c                \
+        dapl/common/dapl_cr_query.c                 \
+        dapl/common/dapl_cr_reject.c                \
+        dapl/common/dapl_cr_util.c                  \
+        dapl/common/dapl_cr_callback.c              \
+        dapl/common/dapl_cr_handoff.c               \
+        dapl/common/dapl_ep_connect.c               \
+        dapl/common/dapl_ep_create.c                \
+        dapl/common/dapl_ep_disconnect.c            \
+        dapl/common/dapl_ep_dup_connect.c           \
+        dapl/common/dapl_ep_free.c                  \
+        dapl/common/dapl_ep_reset.c                 \
+        dapl/common/dapl_ep_get_status.c            \
+        dapl/common/dapl_ep_modify.c                \
+        dapl/common/dapl_ep_post_rdma_read.c        \
+        dapl/common/dapl_ep_post_rdma_write.c       \
+        dapl/common/dapl_ep_post_recv.c             \
+        dapl/common/dapl_ep_post_send.c             \
+        dapl/common/dapl_ep_query.c                 \
+        dapl/common/dapl_ep_util.c                  \
+        dapl/common/dapl_evd_dequeue.c              \
+        dapl/common/dapl_evd_free.c                 \
+        dapl/common/dapl_evd_post_se.c              \
+        dapl/common/dapl_evd_resize.c               \
+        dapl/common/dapl_evd_util.c                 \
+        dapl/common/dapl_evd_cq_async_error_callb.c \
+        dapl/common/dapl_evd_qp_async_error_callb.c \
+        dapl/common/dapl_evd_un_async_error_callb.c \
+        dapl/common/dapl_evd_connection_callb.c     \
+        dapl/common/dapl_evd_dto_callb.c            \
+        dapl/common/dapl_get_consumer_context.c     \
+        dapl/common/dapl_get_handle_type.c          \
+        dapl/common/dapl_hash.c                     \
+        dapl/common/dapl_hca_util.c                 \
+        dapl/common/dapl_ia_close.c                 \
+        dapl/common/dapl_ia_open.c                  \
+        dapl/common/dapl_ia_query.c                 \
+        dapl/common/dapl_ia_util.c                  \
+        dapl/common/dapl_llist.c                    \
+        dapl/common/dapl_lmr_free.c                 \
+        dapl/common/dapl_lmr_query.c                \
+        dapl/common/dapl_lmr_util.c                 \
+        dapl/common/dapl_lmr_sync_rdma_read.c       \
+        dapl/common/dapl_lmr_sync_rdma_write.c      \
+        dapl/common/dapl_mr_util.c                  \
+        dapl/common/dapl_provider.c                 \
+        dapl/common/dapl_sp_util.c                  \
+        dapl/common/dapl_psp_create.c               \
+        dapl/common/dapl_psp_create_any.c           \
+        dapl/common/dapl_psp_free.c                 \
+        dapl/common/dapl_psp_query.c                \
+        dapl/common/dapl_pz_create.c                \
+        dapl/common/dapl_pz_free.c                  \
+        dapl/common/dapl_pz_query.c                 \
+        dapl/common/dapl_pz_util.c                  \
+        dapl/common/dapl_rmr_create.c               \
+        dapl/common/dapl_rmr_free.c                 \
+        dapl/common/dapl_rmr_bind.c                 \
+        dapl/common/dapl_rmr_query.c                \
+        dapl/common/dapl_rmr_util.c                 \
+        dapl/common/dapl_rsp_create.c               \
+        dapl/common/dapl_rsp_free.c                 \
+        dapl/common/dapl_rsp_query.c                \
+        dapl/common/dapl_cno_util.c                 \
+        dapl/common/dapl_set_consumer_context.c     \
+        dapl/common/dapl_ring_buffer_util.c         \
+        dapl/common/dapl_name_service.c             \
+        dapl/common/dapl_timer_util.c               \
+        dapl/common/dapl_ep_create_with_srq.c       \
+        dapl/common/dapl_ep_recv_query.c            \
+        dapl/common/dapl_ep_set_watermark.c         \
+        dapl/common/dapl_srq_create.c               \
+        dapl/common/dapl_srq_free.c                 \
+        dapl/common/dapl_srq_query.c                \
+        dapl/common/dapl_srq_resize.c               \
+        dapl/common/dapl_srq_post_recv.c            \
+        dapl/common/dapl_srq_set_lw.c               \
+        dapl/common/dapl_srq_util.c                 \
+        dapl/common/dapl_debug.c                    \
+        dapl/common/dapl_ia_ha.c                    \
+        dapl/common/dapl_csp.c                      \
+        dapl/common/dapl_ep_post_send_invalidate.c  \
+        dapl/common/dapl_ep_post_rdma_read_to_rmr.c \
+        dapl/openib_scm/dapl_ib_util.c              \
+        dapl/openib_scm/dapl_ib_cq.c                \
+        dapl/openib_scm/dapl_ib_qp.c                \
+        dapl/openib_scm/dapl_ib_cm.c                \
+        dapl/openib_scm/dapl_ib_mem.c $(XPROGRAMS_SCM)
+
+dapl_udapl_libdaploscm_la_LDFLAGS = -version-info 2:0:0
$(daploscm_version_script) \
+                                   -Wl,-init,dapl_init
-Wl,-fini,dapl_fini \
+                                   -lpthread -libverbs
+
 libdatincludedir = $(includedir)/dat2
 
 libdatinclude_HEADERS = dat/include/dat2/dat.h \
@@ -238,8 +361,12 @@ EXTRA_DIST = dat/common/dat_dictionary.h \
 	     dapl/include/dapl_vendor.h \
 	     dapl/openib_cma/dapl_ib_dto.h \
 	     dapl/openib_cma/dapl_ib_util.h \
+	     dapl/openib_scm/dapl_ib_dto.h \
+	     dapl/openib_scm/dapl_ib_util.h \
 	     dat/udat/libdat2.map \
+	     doc/dat.conf \
 	     dapl/udapl/libdaplofa.map \
+	     dapl/udapl/libdaploscm.map \
 	     dapl.spec.in \
 	     $(man_MANS) \
 	     test/dapltest/include/dapl_bpool.h \
@@ -272,7 +399,7 @@ EXTRA_DIST = dat/common/dat_dictionary.h \
 	 
 dist-hook: dapl.spec 
 	cp dapl.spec $(distdir)
-
+	
 install-exec-hook:
 	if test -e $(sysconfdir)/dat.conf; then \
 		sed -e '/ofa-v2-.* u2/d' < $(sysconfdir)/dat.conf >
/tmp/$$$$ofadapl; \
@@ -280,15 +407,18 @@ install-exec-hook:
 	fi; \
 	echo ofa-v2-ib0 u2.0 nonthreadsafe default libdaplofa.so.2
dapl.2.0 '"ib0 0" ""' >> $(sysconfdir)/dat.conf; \
 	echo ofa-v2-ib1 u2.0 nonthreadsafe default libdaplofa.so.2
dapl.2.0 '"ib1 0" ""' >> $(sysconfdir)/dat.conf; \
-	echo ofa-v2-ib2 u2.0 nonthreadsafe default libdaplofa.so.2
dapl.2.0 '"ib2 0" ""' >> $(sysconfdir)/dat.conf; \
-	echo ofa-v2-ib3 u2.0 nonthreadsafe default libdaplofa.so.2
dapl.2.0 '"ib3 0" ""' >> $(sysconfdir)/dat.conf; \
-	echo ofa-v2-bond u2.0 nonthreadsafe default libdaplofa.so.2
dapl.2.0 '"bond0 0" ""' >> $(sysconfdir)/dat.conf;
+	echo ofa-v2-mthca0-1 u2.0 nonthreadsafe default libdaploscm.so.2
dapl.2.0 '"mthca0 1" ""' >> $(sysconfdir)/dat.conf; \
+	echo ofa-v2-mthca0-2 u2.0 nonthreadsafe default libdaploscm.so.2
dapl.2.0 '"mthca0 2" ""' >> $(sysconfdir)/dat.conf; \
+	echo ofa-v2-mlx4_0-1 u2.0 nonthreadsafe default libdaploscm.so.2
dapl.2.0 '"mlx4_0 1" ""' >> $(sysconfdir)/dat.conf; \
+	echo ofa-v2-mlx4_0-2 u2.0 nonthreadsafe default libdaploscm.so.2
dapl.2.0 '"mlx4_0 2" ""' >> $(sysconfdir)/dat.conf;
+	echo ofa-v2-ipath0-1 u2.0 nonthreadsafe default libdaploscm.so.2
dapl.2.0 '"ipath0 1" ""' >> $(sysconfdir)/dat.conf; \
+	echo ofa-v2-ipath0-2 u2.0 nonthreadsafe default libdaploscm.so.2
dapl.2.0 '"ipath0 2" ""' >> $(sysconfdir)/dat.conf;
+	echo ofa-v2-ehca0-2 u2.0 nonthreadsafe default libdaploscm.so.2
dapl.2.0 '"ehca0 1" ""' >> $(sysconfdir)/dat.conf;
 
 uninstall-hook:
 	if test -e $(sysconfdir)/dat.conf; then \
 		sed -e '/ofa-v2-.* u2/d' < $(sysconfdir)/dat.conf >
/tmp/$$$$ofadapl; \
 		cp /tmp/$$$$ofadapl $(sysconfdir)/dat.conf; \
-	fi; 
+	fi;
 
 SUBDIRS = . test/dtest test/dapltest
-
diff --git a/dapl.spec.in b/dapl.spec.in
index de28e48..b73967a 100644
--- a/dapl.spec.in
+++ b/dapl.spec.in
@@ -95,9 +95,13 @@ if [ -e %{_sysconfdir}/dat.conf ]; then
 fi
 echo ofa-v2-ib0 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0
'"ib0 0" ""' >> %{_sysconfdir}/dat.conf
 echo ofa-v2-ib1 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0
'"ib1 0" ""' >> %{_sysconfdir}/dat.conf
-echo ofa-v2-ib2 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0
'"ib2 0" ""' >> %{_sysconfdir}/dat.conf
-echo ofa-v2-ib3 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0
'"ib3 0" ""' >> %{_sysconfdir}/dat.conf
-echo ofa-v2-bond u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0
'"bond0 0" ""' >> %{_sysconfdir}/dat.conf
+echo ofa-v2-mthca0-1 u2.0 nonthreadsafe default libdaploscm.so.2
dapl.2.0 '"mthca0 1" ""' >> %{_sysconfdir}/dat.conf
+echo ofa-v2-mthca0-2 u2.0 nonthreadsafe default libdaploscm.so.2
dapl.2.0 '"mthca0 2" ""' >> %{_sysconfdir}/dat.conf
+echo ofa-v2-mlx4_0-1 u2.0 nonthreadsafe default libdaploscm.so.2
dapl.2.0 '"mlx4_0 1" ""' >> %{_sysconfdir}/dat.conf
+echo ofa-v2-mlx4_0-2 u2.0 nonthreadsafe default libdaploscm.so.2
dapl.2.0 '"mlx4_0 2" ""' >> %{_sysconfdir}/dat.conf
+echo ofa-v2-ipath0-1 u2.0 nonthreadsafe default libdaploscm.so.2
dapl.2.0 '"ipath0 1" ""' >> %{_sysconfdir}/dat.conf
+echo ofa-v2-ipath0-2 u2.0 nonthreadsafe default libdaploscm.so.2
dapl.2.0 '"ipath0 2" ""' >> %{_sysconfdir}/dat.conf
+echo ofa-v2-ehca0-1 u2.0 nonthreadsafe default libdaploscm.so.2
dapl.2.0 '"ehca0 1" ""' >> %{_sysconfdir}/dat.conf
 
 %postun 
 /sbin/ldconfig
diff --git a/dapl/openib_scm/dapl_ib_cm.c b/dapl/openib_scm/dapl_ib_cm.c
index 94e3959..9b30625 100644
--- a/dapl/openib_scm/dapl_ib_cm.c
+++ b/dapl/openib_scm/dapl_ib_cm.c
@@ -57,76 +57,171 @@
 #include <unistd.h>
 #include <fcntl.h>
 #include <netinet/tcp.h>
-#include <sysfs/libsysfs.h>
+#include <byteswap.h>
+#include <poll.h>
 
-/* prototypes */
-static uint16_t dapli_get_lid( struct ibv_device *dev, int port );
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
 
-static DAT_RETURN dapli_socket_connect ( DAPL_EP		*ep_ptr,
-					 DAT_IA_ADDRESS_PTR	r_addr,
-					 DAT_CONN_QUAL		r_qual,
-					 DAT_COUNT		p_size,
-					 DAT_PVOID		p_data
);
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+static inline uint64_t cpu_to_be64(uint64_t x) {return bswap_64(x);}
+#elif __BYTE_ORDER == __BIG_ENDIAN
+static inline uint64_t cpu_to_be64(uint64_t x) {return x;}
+#endif
 
-static DAT_RETURN dapli_socket_listen ( DAPL_IA
*ia_ptr,
-					DAT_CONN_QUAL
serviceID,
-					DAPL_SP			*sp_ptr
);
+extern int g_scm_pipe[2];
 
-static DAT_RETURN dapli_socket_accept(	ib_cm_srvc_handle_t cm_ptr );
+static struct ib_cm_handle *dapli_cm_create(void)
+{ 
+	struct ib_cm_handle *cm_ptr;
 
-static DAT_RETURN dapli_socket_accept_final(	DAPL_EP		*ep_ptr,
-						DAPL_CR		*cr_ptr,
-						DAT_COUNT	p_size,
-						DAT_PVOID	p_data
);
+	/* Allocate CM, init lock, and initialize */
+	if ((cm_ptr = dapl_os_alloc(sizeof(*cm_ptr))) == NULL) 
+		return NULL;
+
+        if (dapl_os_lock_init(&cm_ptr->lock)) 
+		goto bail;
 
-/* XXX temporary hack to get lid */
-static uint16_t dapli_get_lid(IN struct ibv_device *dev, IN int port)
+	(void)dapl_os_memzero(cm_ptr, sizeof(*cm_ptr));
+	cm_ptr->dst.ver = htons(DSCM_VER);
+	cm_ptr->socket = -1;
+	return cm_ptr;
+bail:
+	dapl_os_free(cm_ptr, sizeof(*cm_ptr));
+	return NULL;
+}
+
+/* mark for destroy, remove all references, schedule cleanup */
+static void dapli_cm_destroy(struct ib_cm_handle *cm_ptr)
 {
-	char path[128];
-	char val[16];
-	char name[256];
+	dapl_dbg_log(DAPL_DBG_TYPE_CM, 
+		     " cm_destroy: cm %p ep %p\n", cm_ptr,cm_ptr->ep);
+	
+	/* cleanup, never made it to work queue */
+	if (cm_ptr->state == SCM_INIT) {
+		if (cm_ptr->socket >= 0)  
+			close(cm_ptr->socket);
+		dapl_os_free(cm_ptr, sizeof(*cm_ptr));
+		return;
+	}
 
-	if (sysfs_get_mnt_path(path, sizeof path)) {
-		fprintf(stderr, "Couldn't find sysfs mount.\n");
-		return 0;
+	dapl_os_lock(&cm_ptr->lock);
+	cm_ptr->state = SCM_DESTROY;
+	if (cm_ptr->ep) {
+		cm_ptr->ep->cm_handle = IB_INVALID_HANDLE;
+		cm_ptr->ep->qp_handle = IB_INVALID_HANDLE;
 	}
-	sprintf(name, "%s/class/infiniband/%s/ports/%d/lid", path,
-		 ibv_get_device_name(dev), port);
 
-	if (sysfs_read_attribute_value(name, val, sizeof val)) {
-		fprintf(stderr, "Couldn't read LID at %s\n", name);
-		return 0;
+	/* close socket if still active */
+	if (cm_ptr->socket >= 0) {
+		close(cm_ptr->socket);
+		cm_ptr->socket = -1;
 	}
-	return strtol(val, NULL, 0);
+	dapl_os_unlock(&cm_ptr->lock);
+
+	/* wakeup work thread */
+        write(g_scm_pipe[1], "w", sizeof "w");
+}
+
+/* queue socket for processing CM work */
+static void dapli_cm_queue(struct ib_cm_handle *cm_ptr)
+{
+	/* add to work queue for cr thread processing */
+	dapl_llist_init_entry((DAPL_LLIST_ENTRY*)&cm_ptr->entry);
+	dapl_os_lock(&cm_ptr->hca->ib_trans.lock);
+	dapl_llist_add_tail(&cm_ptr->hca->ib_trans.list, 
+			    (DAPL_LLIST_ENTRY*)&cm_ptr->entry, cm_ptr);
+	dapl_os_unlock(&cm_ptr->hca->ib_trans.lock);
+
+        /* wakeup CM work thread */
+        write(g_scm_pipe[1], "w", sizeof "w");
+}
+
+static uint16_t dapli_get_lid(IN struct ibv_context *ctx, IN uint8_t
port)
+{
+	struct ibv_port_attr port_attr;
+
+	if(ibv_query_port(ctx, port,&port_attr))
+		return(0xffff);
+	else
+		return(port_attr.lid);
 }
 
 /*
- * ACTIVE: Create socket, connect, and exchange QP information 
+ * ACTIVE/PASSIVE: called from CR thread or consumer via ep_disconnect
  */
 static DAT_RETURN 
-dapli_socket_connect (	DAPL_EP			*ep_ptr,
-			DAT_IA_ADDRESS_PTR	r_addr,
-			DAT_CONN_QUAL		r_qual,
-			DAT_COUNT		p_size,
-			DAT_PVOID		p_data )
+dapli_socket_disconnect(dp_ib_cm_handle_t cm_ptr)
 {
-	ib_cm_handle_t	cm_ptr;
-	DAPL_IA		*ia_ptr = ep_ptr->header.owner_ia;
+	DAPL_EP	*ep_ptr = cm_ptr->ep;
+	DAT_UINT32 disc_data = htonl(0xdead);
+
+	if (ep_ptr == NULL)
+		return DAT_SUCCESS;
+	
+	dapl_os_lock(&cm_ptr->lock);
+	if ((cm_ptr->state == SCM_INIT) ||
+	    (cm_ptr->state == SCM_DISCONNECTED)) {
+		dapl_os_unlock(&cm_ptr->lock);
+		return DAT_SUCCESS;
+	} else {
+		/* send disc date, close socket, schedule destroy */
+		if (cm_ptr->socket >= 0) { 
+			write(cm_ptr->socket, &disc_data,
sizeof(disc_data));
+			close(cm_ptr->socket);
+			cm_ptr->socket = -1;
+		}
+		cm_ptr->state = SCM_DISCONNECTED;
+		write(g_scm_pipe[1], "w", sizeof "w");
+	}
+	dapl_os_unlock(&cm_ptr->lock);
+
+
+	if (ep_ptr->cr_ptr) {
+		dapls_cr_callback(cm_ptr,
+				  IB_CME_DISCONNECTED,
+				  NULL,
+				  ((DAPL_CR *)ep_ptr->cr_ptr)->sp_ptr);
+	} else {
+		dapl_evd_connection_callback(ep_ptr->cm_handle,
+					     IB_CME_DISCONNECTED,
+					     NULL,
+					     ep_ptr);
+	}	
+
+	/* remove reference from endpoint */
+	ep_ptr->cm_handle = NULL;
+	
+	/* schedule destroy */
+
+
+	return DAT_SUCCESS;
+}
+
+
+/*
+ * ACTIVE: Create socket, connect, defer exchange QP information to CR
thread
+ * to avoid blocking. 
+ */
+DAT_RETURN 
+dapli_socket_connect(DAPL_EP		*ep_ptr,
+	   	     DAT_IA_ADDRESS_PTR	r_addr,
+		     DAT_CONN_QUAL	r_qual,
+		     DAT_COUNT		p_size,
+		     DAT_PVOID		p_data)
+{
+	dp_ib_cm_handle_t cm_ptr;
 	int		len, opt = 1;
 	struct iovec    iovec[2];
-	short		rtu_data = htons(0x0E0F);
-	
-	dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect: r_qual %d\n", r_qual);
+	DAPL_IA		*ia_ptr = ep_ptr->header.owner_ia;
+
+	dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect: r_qual %d
p_size=%d\n", 
+		     r_qual,p_size);
 			
-	/*
-	 *  Allocate CM and initialize
-	 */
-	if ((cm_ptr = dapl_os_alloc(sizeof(*cm_ptr))) == NULL ) {
+	cm_ptr = dapli_cm_create();
+	if (cm_ptr == NULL)
 		return DAT_INSUFFICIENT_RESOURCES;
-	}
-
-	(void) dapl_os_memzero( cm_ptr, sizeof( *cm_ptr ) );
-	cm_ptr->socket = -1;
 
 	/* create, connect, sockopt, and exchange QP information */
 	if ((cm_ptr->socket = socket(AF_INET,SOCK_STREAM,0)) < 0 ) {
@@ -136,204 +231,261 @@ dapli_socket_connect (	DAPL_EP
*ep_ptr,
 
 	((struct sockaddr_in*)r_addr)->sin_port = htons(r_qual);
 
-	if ( connect(cm_ptr->socket, r_addr, sizeof(*r_addr)) < 0 ) {
+	if (connect(cm_ptr->socket, r_addr, sizeof(*r_addr)) < 0) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
 			     " connect: %s on r_qual %d\n",
 			     strerror(errno), (unsigned int)r_qual);
-		dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
+		dapli_cm_destroy(cm_ptr);
 		return DAT_INVALID_ADDRESS;
 	}
 
setsockopt(cm_ptr->socket,IPPROTO_TCP,TCP_NODELAY,&opt,sizeof(opt));
-	
+
+	dapl_dbg_log(DAPL_DBG_TYPE_EP, " socket connected!\n");
+
+
 	/* Send QP info, IA address, and private data */
-	cm_ptr->dst.qpn = ep_ptr->qp_handle->qp_num;
-	cm_ptr->dst.port = ia_ptr->hca_ptr->port_num;
-	cm_ptr->dst.lid = dapli_get_lid(
ia_ptr->hca_ptr->ib_trans.ib_dev, 
-					 ia_ptr->hca_ptr->port_num );
+	cm_ptr->dst.qpn = htonl(ep_ptr->qp_handle->qp_num);
+	cm_ptr->dst.port = htons(ia_ptr->hca_ptr->port_num);
+	cm_ptr->dst.lid = 
+		htons(dapli_get_lid(ia_ptr->hca_ptr->ib_hca_handle, 
+
(uint8_t)ia_ptr->hca_ptr->port_num));
+	if (cm_ptr->dst.lid == 0xffff)
+		goto bail;
+
+        /* in network order */
+        if (ibv_query_gid(ia_ptr->hca_ptr->ib_hca_handle,
+				    (uint8_t)ia_ptr->hca_ptr->port_num,
+				    0,
+                                    &cm_ptr->dst.gid))
+		goto bail;
+
 	cm_ptr->dst.ia_address = ia_ptr->hca_ptr->hca_address;
-	cm_ptr->dst.p_size = p_size;
+	cm_ptr->dst.p_size = htonl(p_size);
 	iovec[0].iov_base = &cm_ptr->dst;
 	iovec[0].iov_len  = sizeof(ib_qp_cm_t);
-	if ( p_size ) {
+	if (p_size) {
 		iovec[1].iov_base = p_data;
 		iovec[1].iov_len  = p_size;
 	}
-	len = writev( cm_ptr->socket, iovec, (p_size ? 2:1) );
-    	if ( len != (p_size + sizeof(ib_qp_cm_t)) ) {
+
+	dapl_dbg_log(DAPL_DBG_TYPE_EP," socket connected, write QP and
private data\n"); 
+	len = writev(cm_ptr->socket, iovec, (p_size ? 2:1));
+    	if (len != (p_size + sizeof(ib_qp_cm_t))) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
 			     " connect write: ERR %s, wcnt=%d\n",
 			     strerror(errno), len); 
 		goto bail;
 	}
-	dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+	dapl_dbg_log(DAPL_DBG_TYPE_CM, 
 		     " connect: SRC port=0x%x lid=0x%x, qpn=0x%x,
psize=%d\n",
-		     cm_ptr->dst.port, cm_ptr->dst.lid, 
-		     cm_ptr->dst.qpn, cm_ptr->dst.p_size ); 
+		     ntohs(cm_ptr->dst.port), ntohs(cm_ptr->dst.lid), 
+		     ntohl(cm_ptr->dst.qpn), ntohl(cm_ptr->dst.p_size));

+        dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                     " connect SRC GID subnet %016llx id %016llx\n",
+                     (unsigned long long) 
+
cpu_to_be64(cm_ptr->dst.gid.global.subnet_prefix),
+                     (unsigned long long) 
+
cpu_to_be64(cm_ptr->dst.gid.global.interface_id));
+
+	/* queue up to work thread to avoid blocking consumer */
+	cm_ptr->state = SCM_CONN_PENDING;
+	cm_ptr->hca = ia_ptr->hca_ptr;
+	cm_ptr->ep = ep_ptr;
+	dapli_cm_queue(cm_ptr);
+	return DAT_SUCCESS;
+bail:
+	/* close socket, free cm structure */
+	dapli_cm_destroy(cm_ptr);
+	return DAT_INTERNAL_ERROR;
+}
+	
+
+/*
+ * ACTIVE: exchange QP information, called from CR thread
+ */
+void 
+dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
+{
+	DAPL_EP		*ep_ptr = cm_ptr->ep;
+	int		len;
+	struct iovec    iovec[2];
+	short		rtu_data = htons(0x0E0F);
+	ib_cm_events_t	event = IB_CME_DESTINATION_REJECT;
 
 	/* read DST information into cm_ptr, overwrite SRC info */
-	len = readv( cm_ptr->socket, iovec, 1 );
-	if ( len != sizeof(ib_qp_cm_t) ) {
+	dapl_dbg_log(DAPL_DBG_TYPE_EP," connect_rtu: recv peer QP
data\n"); 
+
+	iovec[0].iov_base = &cm_ptr->dst;
+	iovec[0].iov_len  = sizeof(ib_qp_cm_t);
+	len = readv(cm_ptr->socket, iovec, 1);
+	if (len != sizeof(ib_qp_cm_t) || ntohs(cm_ptr->dst.ver) !=
DSCM_VER) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
-			     " connect read: ERR %s, rcnt=%d\n",
-			     strerror(errno), len); 
+			     " connect_rtu read: ERR %s, rcnt=%d,
ver=%d\n",
+			     strerror(errno), len, cm_ptr->dst.ver); 
+		goto bail;
+	}
+	/* check for consumer reject */
+	if (cm_ptr->dst.rej) {
+		dapl_dbg_log(DAPL_DBG_TYPE_CM, 
+			     " connect_rtu read: PEER REJ
reason=0x%x\n",
+			     ntohs(cm_ptr->dst.rej)); 
+		event = IB_CME_DESTINATION_REJECT_PRIVATE_DATA;
 		goto bail;
 	}
+
+	/* convert peer response values to host order */
+	cm_ptr->dst.port = ntohs(cm_ptr->dst.port);
+	cm_ptr->dst.lid = ntohs(cm_ptr->dst.lid);
+	cm_ptr->dst.qpn = ntohl(cm_ptr->dst.qpn);
+	cm_ptr->dst.p_size = ntohl(cm_ptr->dst.p_size);
+
+	/* save remote address information */
+	dapl_os_memcpy( &ep_ptr->remote_ia_address, 
+			&cm_ptr->dst.ia_address, 
+			sizeof(ep_ptr->remote_ia_address));
+
 	dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-		     " connect: DST port=0x%x lid=0x%x, qpn=0x%x,
psize=%d\n",
+		     " connect_rtu: DST %s port=0x%x lid=0x%x, qpn=0x%x,
psize=%d\n",
+		     inet_ntoa(((struct sockaddr_in
*)&cm_ptr->dst.ia_address)->sin_addr),
 		     cm_ptr->dst.port, cm_ptr->dst.lid, 
-		     cm_ptr->dst.qpn, cm_ptr->dst.p_size ); 
+		     cm_ptr->dst.qpn, cm_ptr->dst.p_size); 
 
 	/* validate private data size before reading */
-	if ( cm_ptr->dst.p_size > IB_MAX_REP_PDATA_SIZE ) {
+	if (cm_ptr->dst.p_size > IB_MAX_REP_PDATA_SIZE) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
-			     " connect read: psize (%d) wrong\n",
+			     " connect_rtu read: psize (%d) wrong\n",
 			     cm_ptr->dst.p_size ); 
 		goto bail;
 	}
 
 	/* read private data into cm_handle if any present */
-	if ( cm_ptr->dst.p_size ) {
+	dapl_dbg_log(DAPL_DBG_TYPE_EP," socket connected, read private
data\n"); 
+	if (cm_ptr->dst.p_size) {
 		iovec[0].iov_base = cm_ptr->p_data;
 		iovec[0].iov_len  = cm_ptr->dst.p_size;
-		len = readv( cm_ptr->socket, iovec, 1 );
-		if ( len != cm_ptr->dst.p_size ) {
+		len = readv(cm_ptr->socket, iovec, 1);
+		if (len != cm_ptr->dst.p_size) {
 			dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
-				" connect read pdata: ERR %s,
rcnt=%d\n",
+				" connect_rtu read pdata: ERR %s,
rcnt=%d\n",
 				strerror(errno), len); 
 			goto bail;
 		}
 	}
 
 	/* modify QP to RTR and then to RTS with remote info */
-	if ( dapls_modify_qp_state( ep_ptr->qp_handle, 
-				    IBV_QPS_RTR, &cm_ptr->dst ) !=
DAT_SUCCESS )
+	if (dapls_modify_qp_state(ep_ptr->qp_handle, 
+				  IBV_QPS_RTR, &cm_ptr->dst) !=
DAT_SUCCESS)
 		goto bail;
 
-	if ( dapls_modify_qp_state( ep_ptr->qp_handle, 
-				    IBV_QPS_RTS, &cm_ptr->dst ) !=
DAT_SUCCESS )
+	if (dapls_modify_qp_state(ep_ptr->qp_handle, 
+				   IBV_QPS_RTS, &cm_ptr->dst) !=
DAT_SUCCESS)
 		goto bail;
 		 
 	ep_ptr->qp_state = IB_QP_STATE_RTS;
 
+	dapl_dbg_log(DAPL_DBG_TYPE_EP," connect_rtu: send RTU\n"); 
+
 	/* complete handshake after final QP state change */
-	write(cm_ptr->socket, &rtu_data, sizeof(rtu_data) );
+	write(cm_ptr->socket, &rtu_data, sizeof(rtu_data));
 
 	/* init cm_handle and post the event with private data */
 	ep_ptr->cm_handle = cm_ptr;
-	dapl_dbg_log( DAPL_DBG_TYPE_EP," ACTIVE: connected!\n" ); 
-	dapl_evd_connection_callback(   ep_ptr->cm_handle, 
-					IB_CME_CONNECTED, 
-					cm_ptr->p_data, 
-					ep_ptr );	
-	return DAT_SUCCESS;
-
+	cm_ptr->state = SCM_CONNECTED;
+	dapl_dbg_log(DAPL_DBG_TYPE_EP," ACTIVE: connected!\n"); 
+	dapl_evd_connection_callback(cm_ptr, 
+				     IB_CME_CONNECTED, 
+				     cm_ptr->p_data, 
+				     ep_ptr);	
+	return;
 bail:
 	/* close socket, free cm structure and post error event */
-	if ( cm_ptr->socket >= 0 ) 
-		close(cm_ptr->socket);
-	dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
-	dapls_ib_reinit_ep( ep_ptr ); /* reset QP state */
-
-	dapl_evd_connection_callback(	ep_ptr->cm_handle, 
-					IB_CME_LOCAL_FAILURE, 
-					NULL, 
-					ep_ptr );
-	return DAT_INTERNAL_ERROR;
+	dapli_cm_destroy(cm_ptr);
+	dapls_ib_reinit_ep(ep_ptr); /* reset QP state */
+	dapl_evd_connection_callback(NULL, event, NULL, ep_ptr);
 }
 
-
 /*
  * PASSIVE: Create socket, listen, accept, exchange QP information 
  */
-static DAT_RETURN 
-dapli_socket_listen (	DAPL_IA		*ia_ptr,
-			DAT_CONN_QUAL	serviceID,
-			DAPL_SP		*sp_ptr )
+DAT_RETURN 
+dapli_socket_listen(DAPL_IA		*ia_ptr,
+		    DAT_CONN_QUAL	serviceID,
+		    DAPL_SP		*sp_ptr )
 {
 	struct sockaddr_in	addr;
 	ib_cm_srvc_handle_t	cm_ptr = NULL;
 	int			opt = 1;
 	DAT_RETURN		dat_status = DAT_SUCCESS;
 
-	dapl_dbg_log (	DAPL_DBG_TYPE_EP,
-			" listen(ia_ptr %p ServiceID %d sp_ptr %p)\n",
-			ia_ptr, serviceID, sp_ptr);
+	dapl_dbg_log(DAPL_DBG_TYPE_EP,
+		     " listen(ia_ptr %p ServiceID %d sp_ptr %p)\n",
+		     ia_ptr, serviceID, sp_ptr);
 
-	/* Allocate CM and initialize */
-	if ((cm_ptr = dapl_os_alloc(sizeof(*cm_ptr))) == NULL) 
+	cm_ptr = dapli_cm_create();
+	if (cm_ptr == NULL)
 		return DAT_INSUFFICIENT_RESOURCES;
 
-	(void) dapl_os_memzero( cm_ptr, sizeof( *cm_ptr ) );
-	
-	cm_ptr->socket = cm_ptr->l_socket = -1;
 	cm_ptr->sp = sp_ptr;
-	cm_ptr->hca_ptr = ia_ptr->hca_ptr;
+	cm_ptr->hca = ia_ptr->hca_ptr;
 	
 	/* bind, listen, set sockopt, accept, exchange data */
-	if ((cm_ptr->l_socket = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+	if ((cm_ptr->socket = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
 		dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
 				"socket for listen returned %d\n",
errno);
 		dat_status = DAT_INSUFFICIENT_RESOURCES;
 		goto bail;
 	}
 
-
setsockopt(cm_ptr->l_socket,SOL_SOCKET,SO_REUSEADDR,&opt,sizeof(opt));
+
setsockopt(cm_ptr->socket,SOL_SOCKET,SO_REUSEADDR,&opt,sizeof(opt));
 	addr.sin_port        = htons(serviceID);
 	addr.sin_family      = AF_INET;
 	addr.sin_addr.s_addr = INADDR_ANY;
 
-	if (( bind( cm_ptr->l_socket,(struct sockaddr*)&addr,
sizeof(addr) ) < 0) ||
-		   (listen( cm_ptr->l_socket, 128 ) < 0) ) {
-	
+	if ((bind(cm_ptr->socket,(struct sockaddr*)&addr, sizeof(addr))
< 0) ||
+	    (listen(cm_ptr->socket, 128) < 0)) {
 		dapl_dbg_log( DAPL_DBG_TYPE_CM,
 				" listen: ERROR %s on conn_qual 0x%x\n",
 				strerror(errno),serviceID); 
-
-		if ( errno == EADDRINUSE )
+		if (errno == EADDRINUSE)
 			dat_status = DAT_CONN_QUAL_IN_USE;
 		else
 			dat_status = DAT_CONN_QUAL_UNAVAILABLE;
-
 		goto bail;
 	}
 	
 	/* set cm_handle for this service point, save listen socket */
 	sp_ptr->cm_srvc_handle = cm_ptr;
 
-	/* add to SP->CR thread list */
-	dapl_llist_init_entry((DAPL_LLIST_ENTRY*)&cm_ptr->entry);
-	dapl_os_lock( &cm_ptr->hca_ptr->ib_trans.lock );
-	dapl_llist_add_tail(&cm_ptr->hca_ptr->ib_trans.list, 
-			    (DAPL_LLIST_ENTRY*)&cm_ptr->entry, cm_ptr);
-	dapl_os_unlock(&cm_ptr->hca_ptr->ib_trans.lock);
+	/* queue up listen socket to process inbound CR's */
+	cm_ptr->state = SCM_LISTEN;
+	dapli_cm_queue(cm_ptr);
+
+	dapl_dbg_log(DAPL_DBG_TYPE_CM,
+		     " listen: qual 0x%x cr %p s_fd %d\n",
+		     ntohs(serviceID), cm_ptr, cm_ptr->socket ); 
 
-	dapl_dbg_log( DAPL_DBG_TYPE_CM,
-			" listen: qual 0x%x cr %p s_fd %d\n",
-			ntohs(serviceID), cm_ptr, cm_ptr->l_socket ); 
-	
 	return dat_status;
 bail:
 	dapl_dbg_log( DAPL_DBG_TYPE_CM,
 			" listen: ERROR on conn_qual 0x%x\n",serviceID);

-	if ( cm_ptr->l_socket >= 0 )
-		close( cm_ptr->l_socket );
-	dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
+	dapli_cm_destroy(cm_ptr);
 	return dat_status;
 }
 
 
 /*
- * PASSIVE: send local QP information, private data, and wait for 
- *	    active side to respond with QP RTS/RTR status 
+ * PASSIVE: accept socket, receive peer QP information, private data,
post cr_event 
  */
-static DAT_RETURN 
+DAT_RETURN 
 dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr)
 {
-	ib_cm_handle_t	acm_ptr;
+	dp_ib_cm_handle_t acm_ptr;
 	void		*p_data = NULL;
 	int		len;
 	DAT_RETURN	dat_status = DAT_SUCCESS;
 		
+	dapl_dbg_log(DAPL_DBG_TYPE_EP," socket_accept\n"); 
+
 	/* Allocate accept CM and initialize */
 	if ((acm_ptr = dapl_os_alloc(sizeof(*acm_ptr))) == NULL) 
 		return DAT_INSUFFICIENT_RESOURCES;
@@ -342,155 +494,221 @@ dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr)
 	
 	acm_ptr->socket = -1;
 	acm_ptr->sp = cm_ptr->sp;
-	acm_ptr->hca_ptr = cm_ptr->hca_ptr;
+	acm_ptr->hca = cm_ptr->hca;
 
 	len = sizeof(acm_ptr->dst.ia_address);
-	acm_ptr->socket = accept(cm_ptr->l_socket, 
+	acm_ptr->socket = accept(cm_ptr->socket, 
 				(struct
sockaddr*)&acm_ptr->dst.ia_address, 
-				(socklen_t*)&len );
+				(socklen_t*)&len);
 
-	if ( acm_ptr->socket < 0 ) {
+	if (acm_ptr->socket < 0) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
 			" accept: ERR %s on FD %d l_cr %p\n",
-			strerror(errno),cm_ptr->l_socket,cm_ptr); 
+			strerror(errno),cm_ptr->socket,cm_ptr); 
 		dat_status = DAT_INTERNAL_ERROR;
 		goto bail;
    	}
 
+	dapl_dbg_log(DAPL_DBG_TYPE_EP," socket accepted, read QP
data\n"); 
+
 	/* read in DST QP info, IA address. check for private data */
-	len = read( acm_ptr->socket, &acm_ptr->dst, sizeof(ib_qp_cm_t)
);
-	if ( len != sizeof(ib_qp_cm_t) ) {
+	len = read(acm_ptr->socket, &acm_ptr->dst, sizeof(ib_qp_cm_t));
+	if (len != sizeof(ib_qp_cm_t) || 
+	    ntohs(acm_ptr->dst.ver) != DSCM_VER) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
-			" accept read: ERR %s, rcnt=%d\n",
-			strerror(errno), len); 
+			     " accept read: ERR %s, rcnt=%d, ver=%d\n",
+			     strerror(errno), len, acm_ptr->dst.ver); 
 		dat_status = DAT_INTERNAL_ERROR;
 		goto bail;
-
 	}
+
+	/* convert accepted values to host order */
+	acm_ptr->dst.port = ntohs(acm_ptr->dst.port);
+	acm_ptr->dst.lid = ntohs(acm_ptr->dst.lid);
+	acm_ptr->dst.qpn = ntohl(acm_ptr->dst.qpn);
+	acm_ptr->dst.p_size = ntohl(acm_ptr->dst.p_size);
+
 	dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-		" accept: DST port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
-		acm_ptr->dst.port, acm_ptr->dst.lid, 
-		acm_ptr->dst.qpn, acm_ptr->dst.p_size ); 
+		     " accept: DST %s port=0x%x lid=0x%x, qpn=0x%x,
psize=%d\n",
+		     inet_ntoa(((struct sockaddr_in
*)&acm_ptr->dst.ia_address)->sin_addr),
+		     acm_ptr->dst.port, acm_ptr->dst.lid, 
+		     acm_ptr->dst.qpn, acm_ptr->dst.p_size); 
 
 	/* validate private data size before reading */
-	if ( acm_ptr->dst.p_size > IB_MAX_REQ_PDATA_SIZE ) {
+	if (acm_ptr->dst.p_size > IB_MAX_REQ_PDATA_SIZE) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
-			" accept read: psize (%d) wrong\n",
-			acm_ptr->dst.p_size ); 
+			     " accept read: psize (%d) wrong\n",
+			     acm_ptr->dst.p_size); 
 		dat_status = DAT_INTERNAL_ERROR;
 		goto bail;
 	}
 
+	dapl_dbg_log(DAPL_DBG_TYPE_EP," socket accepted, read private
data\n"); 
+
 	/* read private data into cm_handle if any present */
-	if ( acm_ptr->dst.p_size ) {
+	if (acm_ptr->dst.p_size) {
 		len = read( acm_ptr->socket, 
-			    acm_ptr->p_data, acm_ptr->dst.p_size );
-		if ( len != acm_ptr->dst.p_size ) {
+			    acm_ptr->p_data, acm_ptr->dst.p_size);
+		if (len != acm_ptr->dst.p_size) {
 			dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
-				" accept read pdata: ERR %s, rcnt=%d\n",
-				strerror(errno), len ); 
+				     " accept read pdata: ERR %s,
rcnt=%d\n",
+				     strerror(errno), len); 
 			dat_status = DAT_INTERNAL_ERROR;
 			goto bail;
 		}
-		dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-				" accept: psize=%d read\n",
-				acm_ptr->dst.p_size); 
+		dapl_dbg_log(DAPL_DBG_TYPE_EP," accept: psize=%d
read\n",len);
 		p_data = acm_ptr->p_data;
 	}
 	
-	/* trigger CR event and return SUCCESS */
-	dapls_cr_callback(  acm_ptr,
-			    IB_CME_CONNECTION_REQUEST_PENDING,
-		            p_data,
-			    acm_ptr->sp );
+	acm_ptr->state = SCM_ACCEPTING;
 
+	/* trigger CR event and return SUCCESS */
+	dapls_cr_callback(acm_ptr,
+			  IB_CME_CONNECTION_REQUEST_PENDING,
+		          p_data,
+			  acm_ptr->sp );
 	return DAT_SUCCESS;
-
 bail:
-	if ( acm_ptr->socket >=0 )
-		close( acm_ptr->socket );
-	dapl_os_free( acm_ptr, sizeof( *acm_ptr ) );
+	dapli_cm_destroy(acm_ptr);
 	return DAT_INTERNAL_ERROR;
 }
 
-
-static DAT_RETURN 
-dapli_socket_accept_final( DAPL_EP		*ep_ptr,
-			   DAPL_CR		*cr_ptr,
-			   DAT_COUNT		p_size,
-		           DAT_PVOID		p_data )
+/*
+ * PASSIVE: consumer accept, send local QP information, private data, 
+ * queue on work thread to receive RTU information to avoid blocking
+ * user thread. 
+ */
+DAT_RETURN 
+dapli_socket_accept_usr(DAPL_EP		*ep_ptr,
+			DAPL_CR		*cr_ptr,
+			DAT_COUNT	p_size,
+			DAT_PVOID	p_data)
 {
 	DAPL_IA		*ia_ptr = ep_ptr->header.owner_ia;
-	ib_cm_handle_t	cm_ptr = cr_ptr->ib_cm_handle;
-	ib_qp_cm_t	qp_cm;
+	dp_ib_cm_handle_t cm_ptr = cr_ptr->ib_cm_handle;
 	struct iovec    iovec[2];
 	int		len;
-	short		rtu_data = 0;
 
-	if (p_size >  IB_MAX_REP_PDATA_SIZE) 
+	if (p_size > IB_MAX_REP_PDATA_SIZE) 
 		return DAT_LENGTH_ERROR;
 
 	/* must have a accepted socket */
-	if ( cm_ptr->socket < 0 )
+	if (cm_ptr->socket < 0)
 		return DAT_INTERNAL_ERROR;
 	
+	dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+		     " accept_usr: remote port=0x%x lid=0x%x"
+		     " qpn=0x%x psize=%d\n",
+		     cm_ptr->dst.port, cm_ptr->dst.lid,
+		     cm_ptr->dst.qpn, cm_ptr->dst.p_size); 
+
 	/* modify QP to RTR and then to RTS with remote info already
read */
-	if ( dapls_modify_qp_state( ep_ptr->qp_handle, 
-				    IBV_QPS_RTR, &cm_ptr->dst ) !=
DAT_SUCCESS )
+	if (dapls_modify_qp_state(ep_ptr->qp_handle, 
+				  IBV_QPS_RTR, &cm_ptr->dst) !=
DAT_SUCCESS)
 		goto bail;
 
-	if ( dapls_modify_qp_state( ep_ptr->qp_handle, 
-				    IBV_QPS_RTS, &cm_ptr->dst ) !=
DAT_SUCCESS )
+	if (dapls_modify_qp_state(ep_ptr->qp_handle, 
+				  IBV_QPS_RTS, &cm_ptr->dst) !=
DAT_SUCCESS)
 		goto bail;
 
 	ep_ptr->qp_state = IB_QP_STATE_RTS;
 	
-	/* Send QP info, IA address, and private data */
-	qp_cm.qpn = ep_ptr->qp_handle->qp_num;
-	qp_cm.port = ia_ptr->hca_ptr->port_num;
-	qp_cm.lid = dapli_get_lid( ia_ptr->hca_ptr->ib_trans.ib_dev, 
-				   ia_ptr->hca_ptr->port_num );
-	qp_cm.ia_address = ia_ptr->hca_ptr->hca_address;
-	qp_cm.p_size = p_size;
-	iovec[0].iov_base = &qp_cm;
+	/* save remote address information */
+	dapl_os_memcpy( &ep_ptr->remote_ia_address, 
+			&cm_ptr->dst.ia_address, 
+			sizeof(ep_ptr->remote_ia_address));
+
+	/* send our QP info, IA address, and private data */
+	cm_ptr->dst.qpn = htonl(ep_ptr->qp_handle->qp_num);
+	cm_ptr->dst.port = htons(ia_ptr->hca_ptr->port_num);
+	cm_ptr->dst.lid =
htons(dapli_get_lid(ia_ptr->hca_ptr->ib_hca_handle, 
+
(uint8_t)ia_ptr->hca_ptr->port_num));
+	if (cm_ptr->dst.lid == 0xffff)
+		goto bail;
+
+        /* in network order */
+	if (ibv_query_gid(ia_ptr->hca_ptr->ib_hca_handle,
+			  (uint8_t)ia_ptr->hca_ptr->port_num,
+			  0,
+			  &cm_ptr->dst.gid))
+		goto bail;
+
+	cm_ptr->dst.ia_address = ia_ptr->hca_ptr->hca_address;
+	cm_ptr->dst.p_size = htonl(p_size);
+	iovec[0].iov_base = &cm_ptr->dst;
 	iovec[0].iov_len  = sizeof(ib_qp_cm_t);
 	if (p_size) {
 		iovec[1].iov_base = p_data;
 		iovec[1].iov_len  = p_size;
 	}
-	len = writev( cm_ptr->socket, iovec, (p_size ? 2:1) );
+	len = writev(cm_ptr->socket, iovec, (p_size ? 2:1));
     	if (len != (p_size + sizeof(ib_qp_cm_t))) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
-			     " accept_final: ERR %s, wcnt=%d\n",
+			     " accept_rtu: ERR %s, wcnt=%d\n",
 			     strerror(errno), len); 
 		goto bail;
 	}
-	dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-		     " accept_final: SRC port=0x%x lid=0x%x, qpn=0x%x,
psize=%d\n",
-		     qp_cm.port, qp_cm.lid, qp_cm.qpn, qp_cm.p_size ); 
-	
+	dapl_dbg_log(DAPL_DBG_TYPE_CM, 
+		     " accept_usr: local port=0x%x lid=0x%x"
+		     " qpn=0x%x psize=%d\n",
+		     ntohs(cm_ptr->dst.port), ntohs(cm_ptr->dst.lid), 
+		     ntohl(cm_ptr->dst.qpn), ntohl(cm_ptr->dst.p_size));

+        dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                     " accept_usr SRC GID subnet %016llx id %016llx\n",
+                     (unsigned long long) 
+
cpu_to_be64(cm_ptr->dst.gid.global.subnet_prefix),
+                     (unsigned long long) 
+
cpu_to_be64(cm_ptr->dst.gid.global.interface_id));
+
+	/* save state and reference to EP, queue for RTU data */
+	cm_ptr->ep = ep_ptr;
+	cm_ptr->hca = ia_ptr->hca_ptr;
+	cm_ptr->state = SCM_ACCEPTED;
+
+	/* restore remote address information for query */
+	dapl_os_memcpy( &cm_ptr->dst.ia_address, 
+			&ep_ptr->remote_ia_address,
+			sizeof(cm_ptr->dst.ia_address));
+
+	dapl_dbg_log( DAPL_DBG_TYPE_EP," PASSIVE: accepted!\n" ); 
+	dapli_cm_queue(cm_ptr);
+	return DAT_SUCCESS;
+bail:
+	dapl_dbg_log(DAPL_DBG_TYPE_ERR," accept_rtu: ERR !QP_RTR_RTS
\n"); 
+	dapli_cm_destroy(cm_ptr);
+	dapls_ib_reinit_ep(ep_ptr); /* reset QP state */
+	return DAT_INTERNAL_ERROR;
+}
+
+/*
+ * PASSIVE: read RTU from active peer, post CONN event
+ */
+void 
+dapli_socket_accept_rtu(dp_ib_cm_handle_t cm_ptr)
+{
+	int		len;
+	short		rtu_data = 0;
+
 	/* complete handshake after final QP state change */
-	len = read(cm_ptr->socket, &rtu_data, sizeof(rtu_data) );
-	if ( len != sizeof(rtu_data) || ntohs(rtu_data) != 0x0e0f ) {
+	len = read(cm_ptr->socket, &rtu_data, sizeof(rtu_data));
+	if (len != sizeof(rtu_data) || ntohs(rtu_data) != 0x0e0f) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
-			     " accept_final: ERR %s, rcnt=%d
rdata=%x\n",
-			     strerror(errno), len, ntohs(rtu_data) ); 
+			     " accept_rtu: ERR %s, rcnt=%d rdata=%x\n",
+			     strerror(errno), len, ntohs(rtu_data)); 
 		goto bail;
 	}
 
+	/* save state and reference to EP, queue for disc event */
+	cm_ptr->state = SCM_CONNECTED;
+
 	/* final data exchange if remote QP state is good to go */
 	dapl_dbg_log( DAPL_DBG_TYPE_EP," PASSIVE: connected!\n" ); 
-	dapls_cr_callback ( cm_ptr, IB_CME_CONNECTED, NULL, cm_ptr->sp
);
-	return DAT_SUCCESS;
-
+	dapls_cr_callback(cm_ptr, IB_CME_CONNECTED, NULL, cm_ptr->sp);
+	return;
 bail:
-	dapl_dbg_log( DAPL_DBG_TYPE_ERR," accept_final: ERR !QP_RTR_RTS
\n"); 
-	if ( cm_ptr >= 0 )
-		close( cm_ptr->socket );
-	dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
-	dapls_ib_reinit_ep( ep_ptr ); /* reset QP state */
-
-	return DAT_INTERNAL_ERROR;
+	dapls_ib_reinit_ep(cm_ptr->ep); /* reset QP state */
+	dapli_cm_destroy(cm_ptr);
+	dapls_cr_callback(cm_ptr, IB_CME_DESTINATION_REJECT, NULL,
cm_ptr->sp);
 }
 
 
@@ -528,18 +746,13 @@ dapls_ib_connect (
 	
 	dapl_dbg_log ( DAPL_DBG_TYPE_EP,
 			" connect(ep_handle %p ....)\n", ep_handle);
-	/*
-	 *  Sanity check
-	 */
-	if ( NULL == ep_handle ) 
-		return DAT_SUCCESS;
 
 	ep_ptr = (DAPL_EP*)ep_handle;
 	qp_ptr = ep_ptr->qp_handle;
 
-	return (dapli_socket_connect(	ep_ptr, remote_ia_address, 
-					remote_conn_qual,
-					private_data_size, private_data
));
+	return (dapli_socket_connect(ep_ptr, remote_ia_address, 
+				     remote_conn_qual,
+				     private_data_size, private_data));
 }
 
 /*
@@ -556,41 +769,25 @@ dapls_ib_connect (
  *
  * Returns:
  * 	DAT_SUCCESS
- *
  */
 DAT_RETURN
-dapls_ib_disconnect (
+dapls_ib_disconnect(
 	IN	DAPL_EP			*ep_ptr,
-	IN	DAT_CLOSE_FLAGS		close_flags )
+	IN	DAT_CLOSE_FLAGS		close_flags)
 {
-	ib_cm_handle_t	cm_ptr = ep_ptr->cm_handle;
+	dp_ib_cm_handle_t cm_ptr = ep_ptr->cm_handle;
 
 	dapl_dbg_log (DAPL_DBG_TYPE_EP,
 			"dapls_ib_disconnect(ep_handle %p ....)\n",
 			ep_ptr);
 
-	if ( cm_ptr->socket >= 0 ) {
-		close( cm_ptr->socket );
-		cm_ptr->socket = -1;
-	}
-	
 	/* reinit to modify QP state */
 	dapls_ib_reinit_ep(ep_ptr);
 
-	if ( ep_ptr->cr_ptr ) {
-		dapls_cr_callback ( ep_ptr->cm_handle,
-				    IB_CME_DISCONNECTED,
-				    NULL,
-				    ((DAPL_CR *)ep_ptr->cr_ptr)->sp_ptr
);
-	} else {
-		dapl_evd_connection_callback ( ep_ptr->cm_handle,
-						IB_CME_DISCONNECTED,
-						NULL,
-						ep_ptr );
-		ep_ptr->cm_handle = NULL;
-		dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
-	}	
-	return DAT_SUCCESS;
+	if (cm_ptr == NULL) 
+		return DAT_SUCCESS;
+	else
+		return(dapli_socket_disconnect(cm_ptr));
 }
 
 /*
@@ -679,13 +876,14 @@ dapls_ib_remove_conn_listener (
 			ia_ptr, sp_ptr, cm_ptr );
 
 	/* close accepted socket, free cm_srvc_handle and return */
-	if ( cm_ptr != NULL ) {
-		if ( cm_ptr->l_socket >= 0 ) {
-			close( cm_ptr->l_socket );
+	if (cm_ptr != NULL) {
+		if (cm_ptr->socket >= 0) {
+			close(cm_ptr->socket );
 			cm_ptr->socket = -1;
 		}
 	    	/* cr_thread will free */
 		sp_ptr->cm_srvc_handle = NULL;
+		write(g_scm_pipe[1], "w", sizeof "w");
 	}
 	return DAT_SUCCESS;
 }
@@ -720,23 +918,22 @@ dapls_ib_accept_connection (
 	DAPL_CR			*cr_ptr;
 	DAPL_EP			*ep_ptr;
 	
-	dapl_dbg_log (DAPL_DBG_TYPE_EP,
-		      "dapls_ib_accept_connection(cr %p ep %p prd
%p,%d)\n",
-		      cr_handle, ep_handle, p_data, p_size  );
+	dapl_dbg_log(DAPL_DBG_TYPE_EP,
+		     "dapls_ib_accept_connection(cr %p ep %p prd
%p,%d)\n",
+		     cr_handle, ep_handle, p_data, p_size  );
 
-	cr_ptr  = (DAPL_CR *) cr_handle;
-	ep_ptr  = (DAPL_EP *) ep_handle;
+	cr_ptr = (DAPL_CR *)cr_handle;
+	ep_ptr = (DAPL_EP *)ep_handle;
 	
 	/* allocate and attach a QP if necessary */
-	if ( ep_ptr->qp_state == DAPL_QP_STATE_UNATTACHED ) {
+	if (ep_ptr->qp_state == DAPL_QP_STATE_UNATTACHED) {
 		DAT_RETURN status;
-		status = dapls_ib_qp_alloc( ep_ptr->header.owner_ia, 
-					    ep_ptr, ep_ptr );
-		if ( status != DAT_SUCCESS )
+		status = dapls_ib_qp_alloc(ep_ptr->header.owner_ia, 
+					   ep_ptr, ep_ptr);
+		if (status != DAT_SUCCESS)
     			return status;
 	}
-    
-	return ( dapli_socket_accept_final(ep_ptr, cr_ptr, p_size,
p_data) );
+	return(dapli_socket_accept_usr(ep_ptr, cr_ptr, p_size, p_data));
 }
 
 
@@ -757,23 +954,32 @@ dapls_ib_accept_connection (
  *
  */
 DAT_RETURN
-dapls_ib_reject_connection (
-	IN  ib_cm_handle_t	ib_cm_handle,
-	IN  int			reject_reason,
-	IN  DAT_COUNT           private_data_size,
-	IN  const DAT_PVOID     private_data)
+dapls_ib_reject_connection(
+	IN dp_ib_cm_handle_t cm_ptr,
+	IN int reason,
+	IN DAT_COUNT psize,
+	IN const DAT_PVOID pdata)
 {
-    	ib_cm_srvc_handle_t	cm_ptr = ib_cm_handle;
+	struct iovec    	iovec;
 
 	dapl_dbg_log (DAPL_DBG_TYPE_EP,
-		      "dapls_ib_reject_connection(cm_handle %p reason
%x)\n",
-		      ib_cm_handle, reject_reason );
-
-	/* just close the socket and return */
-	if ( cm_ptr->socket > 0 ) {
-		close( cm_ptr->socket );
+		      " reject(cm %p reason %x, pdata %p, psize %d)\n",
+		      cm_ptr, reason, pdata, psize);
+
+	/* write reject data to indicate reject */
+	if (cm_ptr->socket >= 0) {
+		cm_ptr->dst.rej = (uint16_t)reason;
+		cm_ptr->dst.rej = htons(cm_ptr->dst.rej);
+		iovec.iov_base = &cm_ptr->dst;
+		iovec.iov_len  = sizeof(ib_qp_cm_t);
+		writev(cm_ptr->socket, &iovec, 1);
+		close(cm_ptr->socket);
 		cm_ptr->socket = -1;
 	}
+
+	/* cr_thread will destroy CR */
+	cm_ptr->state = SCM_REJECTED;
+        write(g_scm_pipe[1], "w", sizeof "w");
 	return DAT_SUCCESS;
 }
 
@@ -799,7 +1005,7 @@ dapls_ib_cm_remote_addr (
 	OUT	DAT_SOCK_ADDR6	*remote_ia_address )
 {
 	DAPL_HEADER	*header;
-	ib_cm_handle_t	ib_cm_handle;
+	dp_ib_cm_handle_t ib_cm_handle;
 
 	dapl_dbg_log (DAPL_DBG_TYPE_EP,
 		      "dapls_ib_cm_remote_addr(dat_handle %p, ....)\n",
@@ -829,7 +1035,6 @@ dapls_ib_cm_remote_addr (
  * Input:
  *	prd_ptr		private data pointer
  *	conn_op		connection operation type
- *      hca_ptr         hca pointer, needed for transport type
  *
  * If prd_ptr is NULL, this is a query for the max size supported by
  * the provider, otherwise it is the actual size of the private data
@@ -843,10 +1048,9 @@ dapls_ib_cm_remote_addr (
  * 	length of private data
  *
  */
-int dapls_ib_private_data_size (
-	IN      DAPL_PRIVATE	*prd_ptr,
-	IN	DAPL_PDATA_OP	conn_op,
-        IN	DAPL_HCA	*hca_ptr)
+int dapls_ib_private_data_size( IN DAPL_PRIVATE *prd_ptr,
+				IN DAPL_PDATA_OP conn_op,
+				IN DAPL_HCA     *hca_ptr)
 {
 	int  size;
 
@@ -995,24 +1199,25 @@ dapls_ib_get_cm_event (
     return ib_cm_event;
 }
 
-/* async CR processing thread to avoid blocking applications */
+/* outbound/inbound CR processing thread to avoid blocking applications
*/
+#define SCM_MAX_CONN 8192
 void cr_thread(void *arg) 
 {
     struct dapl_hca	*hca_ptr = arg;
-    ib_cm_srvc_handle_t	cr, next_cr;
-    int			max_fd;
-    fd_set		rfd,rfds;
-    struct timeval	to;
+    dp_ib_cm_handle_t	cr, next_cr;
+    int 		ret,idx;
+    char		rbuf[2];
+    struct pollfd	ufds[SCM_MAX_CONN];
      
     dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cr_thread: ENTER hca
%p\n",hca_ptr);
 
     dapl_os_lock( &hca_ptr->ib_trans.lock );
     hca_ptr->ib_trans.cr_state = IB_THREAD_RUN;
     while (hca_ptr->ib_trans.cr_state == IB_THREAD_RUN) {
-	
-	FD_ZERO( &rfds ); 
-	max_fd = -1;
-	
+	idx=0;
+	ufds[idx].fd = g_scm_pipe[0]; /* wakeup and process work */
+        ufds[idx].events = POLLIN;
+
 	if (!dapl_llist_is_empty(&hca_ptr->ib_trans.list))
             next_cr = dapl_llist_peek_head (&hca_ptr->ib_trans.list);
 	else
@@ -1020,51 +1225,70 @@ void cr_thread(void *arg)
 
 	while (next_cr) {
 	    cr = next_cr;
-	    dapl_dbg_log (DAPL_DBG_TYPE_CM," thread: cm_ptr %p\n", cr );
-	    if (cr->l_socket == -1 || 
+	    if ((cr->socket == -1) || 
 		hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
 
-		dapl_dbg_log(DAPL_DBG_TYPE_CM," thread: Freeing %p\n",
cr);
+		dapl_dbg_log(DAPL_DBG_TYPE_CM," cr_thread: Free %p\n",
cr);
 		next_cr = dapl_llist_next_entry(&hca_ptr->ib_trans.list,
 
(DAPL_LLIST_ENTRY*)&cr->entry );
 		dapl_llist_remove_entry(&hca_ptr->ib_trans.list, 
 					(DAPL_LLIST_ENTRY*)&cr->entry);
-		dapl_os_free( cr, sizeof(*cr) );
+		dapl_os_free(cr, sizeof(*cr));
 		continue;
 	    }
-	          
-	    FD_SET( cr->l_socket, &rfds ); /* add to select set */
-	    if ( cr->l_socket > max_fd )
-		max_fd = cr->l_socket;
-
-	    /* individual select poll to check for work */
-	    FD_ZERO(&rfd);
-	    FD_SET(cr->l_socket, &rfd);
-	    dapl_os_unlock(&hca_ptr->ib_trans.lock);	
-	    to.tv_sec  = 0;
-	    to.tv_usec = 0;
-	    if ( select(cr->l_socket + 1,&rfd, NULL, NULL, &to) < 0) {
-		dapl_dbg_log (DAPL_DBG_TYPE_CM,
-			  " thread: ERR %s on cr %p sk %d\n", 
-			  strerror(errno), cr, cr->l_socket);
-		close(cr->l_socket);
-		cr->l_socket = -1;
-	    } else if ( FD_ISSET(cr->l_socket, &rfd) && 
-			dapli_socket_accept(cr)) {
-		close(cr->l_socket);
-		cr->l_socket = -1;
+
+	    if (idx==SCM_MAX_CONN-1) {
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+			     "SCM ERR: cm_thread exceeded FD_SETSIZE
%d\n",idx+1);
+		continue;
 	    }
-	    dapl_os_lock( &hca_ptr->ib_trans.lock );
+		
+	    /* Add to ufds for poll, check for immediate work */
+	    ufds[++idx].fd = cr->socket; /* add listen or cr */
+	    ufds[idx].events = POLLIN;
+
+	    /* check socket for event, accept in or connect out */
+	    dapl_dbg_log(DAPL_DBG_TYPE_CM," poll cr=%p, fd=%d,%d\n", 
+				cr, cr->socket, ufds[idx].fd);
+	    dapl_os_unlock(&hca_ptr->ib_trans.lock);
+	    ret = poll(&ufds[idx],1,1);
+	    dapl_dbg_log(DAPL_DBG_TYPE_CM," poll wakeup ret=%d cr->st=%d
ev=%d fd=%d\n",
+			 ret,cr->state,ufds[idx].revents,ufds[idx].fd);
+
+	    /* data on listen, qp exchange, and on disconnect request */
+	    if ((ret == 1) && ufds[idx].revents == POLLIN) {
+		if (cr->socket > 0) {
+			if (cr->state == SCM_LISTEN)
+				dapli_socket_accept(cr);
+			else if (cr->state == SCM_ACCEPTED)
+				dapli_socket_accept_rtu(cr);
+			else if (cr->state == SCM_CONN_PENDING)
+				dapli_socket_connect_rtu(cr);
+			else if (cr->state == SCM_CONNECTED)
+				dapli_socket_disconnect(cr);
+		}
+	    } else if (ret != 0) {
+    		dapl_dbg_log(DAPL_DBG_TYPE_CM,
+			     " cr_thread(cr=%p) st=%d poll ERR= %s\n",
+			     cr,cr->state,strerror(errno));
+		/* POLLUP or poll error case, issue event if connected
*/
+		if (cr->state == SCM_CONNECTED)
+			dapli_socket_disconnect(cr);
+	    } 
+	    dapl_os_lock(&hca_ptr->ib_trans.lock);
 	    next_cr =  dapl_llist_next_entry(&hca_ptr->ib_trans.list,
-
(DAPL_LLIST_ENTRY*)&cr->entry );
+
(DAPL_LLIST_ENTRY*)&cr->entry);
 	} 
-	dapl_os_unlock( &hca_ptr->ib_trans.lock );
-	to.tv_sec  = 0;
-	to.tv_usec = 100000; /* wakeup and check destroy */
-	select(max_fd + 1, &rfds, NULL, NULL, &to);
-	dapl_os_lock( &hca_ptr->ib_trans.lock );
+	dapl_os_unlock(&hca_ptr->ib_trans.lock);
+	dapl_dbg_log(DAPL_DBG_TYPE_CM," cr_thread: sleep, %d\n", idx+1);
+	poll(ufds,idx+1,-1); /* infinite, all sockets and pipe */
+	/* if pipe used to wakeup, consume */
+	if (ufds[0].revents == POLLIN)
+		read(g_scm_pipe[0], rbuf, 2);
+	dapl_dbg_log(DAPL_DBG_TYPE_CM," cr_thread: wakeup\n");
+	dapl_os_lock(&hca_ptr->ib_trans.lock);
     } 
-    dapl_os_unlock( &hca_ptr->ib_trans.lock );	
+    dapl_os_unlock(&hca_ptr->ib_trans.lock);	
     hca_ptr->ib_trans.cr_state = IB_THREAD_EXIT;
     dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cr_thread(hca %p)
exit\n",hca_ptr);
 }
diff --git a/dapl/openib_scm/dapl_ib_cq.c b/dapl/openib_scm/dapl_ib_cq.c
index 7ac7037..56b729e 100644
--- a/dapl/openib_scm/dapl_ib_cq.c
+++ b/dapl/openib_scm/dapl_ib_cq.c
@@ -97,7 +97,7 @@ void dapli_cq_thread_destroy(struct dapl_hca *hca_ptr)
         while (hca_ptr->ib_trans.cq_state != IB_THREAD_EXIT) {
                 struct timespec sleep, remain;
                 sleep.tv_sec = 0;
-                sleep.tv_nsec = 200000000; /* 200 ms */
+                sleep.tv_nsec = 2000000; /* 2 ms */
                 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
                              " cq_thread_destroy: waiting for
cq_thread\n");
                 nanosleep (&sleep, &remain);
@@ -422,12 +422,21 @@ DAT_RETURN dapls_ib_cq_free (
 	IN  DAPL_IA		*ia_ptr,
 	IN  DAPL_EVD		*evd_ptr)
 {
-	if ( evd_ptr->ib_cq_handle != IB_INVALID_HANDLE ) {
-		/* copy all entries on CQ to EVD before destroying */	
-		dapls_evd_copy_cq(evd_ptr); 
+	DAT_EVENT event;
+
+	if (evd_ptr->ib_cq_handle != IB_INVALID_HANDLE) {
+		/* pull off CQ and EVD entries and toss */	
+		while (dapls_ib_completion_poll == DAT_SUCCESS);
+		while (dapl_evd_dequeue(evd_ptr,&event) !=
DAT_QUEUE_EMPTY);
+#if 1 
+		ibv_destroy_cq(evd_ptr->ib_cq_handle); 
+		evd_ptr->ib_cq_handle = IB_INVALID_HANDLE;
+		return DAT_SUCCESS;
+#else
 		if (ibv_destroy_cq(evd_ptr->ib_cq_handle)) 
 			return(dapl_convert_errno(errno,"destroy_cq"));
 		evd_ptr->ib_cq_handle = IB_INVALID_HANDLE;
+#endif
 	}
 	return DAT_SUCCESS;
 }
@@ -600,7 +609,7 @@ dapls_ib_wait_object_wait (
 		status = ETIMEDOUT;
 	
 	dapl_dbg_log (DAPL_DBG_TYPE_CM, 
-		      " cq_object_wait: RET evd %p ibv_cq %p ibv_ctx %p
%s\n",
+		      " cq_object_wait: RET evd %p ibv_cq %p %s\n",
 		      evd_ptr, ibv_cq,strerror(errno));
 	
 	return(dapl_convert_errno(status,"cq_wait_object_wait"));
diff --git a/dapl/openib_scm/dapl_ib_dto.h
b/dapl/openib_scm/dapl_ib_dto.h
index bea3e4d..6ba669f 100644
--- a/dapl/openib_scm/dapl_ib_dto.h
+++ b/dapl/openib_scm/dapl_ib_dto.h
@@ -35,7 +35,7 @@
  *
  *   Description: 
  *
- *   The uDAPL openib provider - DTO operations and CQE macros 
+ *   The OpenIB uCMA provider - DTO operations and CQE macros 
  *
 
************************************************************************
****
  *		   Source Control System Information
@@ -50,6 +50,10 @@
 
 #include "dapl_ib_util.h"
 
+#ifdef DAT_EXTENSIONS
+#include <dat2/dat_ib_extensions.h>
+#endif
+
 #define	DEFAULT_DS_ENTRIES	8
 
 STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p);
@@ -61,26 +65,26 @@ STATIC _INLINE_ int
dapls_cqe_opcode(ib_work_completion_t *cqe_p);
  */
 STATIC _INLINE_ DAT_RETURN 
 dapls_ib_post_recv (
-	IN  DAPL_EP	 	*ep_ptr,
+	IN  DAPL_EP		*ep_ptr,
 	IN  DAPL_COOKIE		*cookie,
-	IN  DAT_COUNT	   	segments,
+	IN  DAT_COUNT		segments,
 	IN  DAT_LMR_TRIPLET	*local_iov )
 {
-	ib_data_segment_t	ds_array[DEFAULT_DS_ENTRIES];
-	ib_data_segment_t	*ds_array_p, *ds_array_start_p = NULL;
-	struct ibv_recv_wr	wr;
-	struct ibv_recv_wr	*bad_wr;
-	DAT_COUNT		i, total_len;
-	int			ret;
+	ib_data_segment_t ds_array[DEFAULT_DS_ENTRIES];
+	ib_data_segment_t *ds_array_p, *ds_array_start_p = NULL;
+	struct ibv_recv_wr wr;
+	struct ibv_recv_wr *bad_wr;
+	DAT_COUNT i, total_len;
+	int ret;
 	
-	dapl_dbg_log (DAPL_DBG_TYPE_EP,
-		      " post_rcv: ep %p cookie %p segs %d l_iov %p\n",
-		      ep_ptr, cookie, segments, local_iov);
+	dapl_dbg_log(DAPL_DBG_TYPE_EP,
+		     " post_rcv: ep %p cookie %p segs %d l_iov %p\n",
+		     ep_ptr, cookie, segments, local_iov);
 
-	if ( segments <= DEFAULT_DS_ENTRIES ) 
+	if (segments <= DEFAULT_DS_ENTRIES) 
 		ds_array_p = ds_array;
 	else
-		ds_array_start_p = ds_array_p =
+		ds_array_start_p = ds_array_p = 
 			dapl_os_alloc(segments *
sizeof(ib_data_segment_t));
 
 	if (NULL == ds_array_p)
@@ -93,18 +97,18 @@ dapls_ib_post_recv (
 	wr.wr_id = (uint64_t)(uintptr_t)cookie;
 	wr.sg_list = ds_array_p;
 
-	for (i = 0; i < segments; i++ ) {
-		if ( !local_iov[i].segment_length )
+	for (i = 0; i < segments; i++) {
+		if (!local_iov[i].segment_length)
 			continue;
 
-		ds_array_p->addr  = (uint64_t)
local_iov[i].virtual_address;
+		ds_array_p->addr = (uint64_t)
local_iov[i].virtual_address;
 		ds_array_p->length = local_iov[i].segment_length;
-		ds_array_p->lkey  = local_iov[i].lmr_context;
+		ds_array_p->lkey = local_iov[i].lmr_context;
 		
-		dapl_dbg_log (	DAPL_DBG_TYPE_EP, 
-				" post_rcv: l_key 0x%x va %p len %d\n",
-				ds_array_p->lkey, ds_array_p->addr, 
-				ds_array_p->length );
+		dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+			     " post_rcv: l_key 0x%x va %p len %d\n",
+			     ds_array_p->lkey, ds_array_p->addr, 
+			     ds_array_p->length );
 
 		total_len += ds_array_p->length;
 		wr.num_sge++;
@@ -117,15 +121,14 @@ dapls_ib_post_recv (
 	ret = ibv_post_recv(ep_ptr->qp_handle, &wr, &bad_wr);
 	
 	if (ds_array_start_p != NULL)
-		dapl_os_free(ds_array_start_p, segments *
sizeof(ib_data_segment_t));
+	    dapl_os_free(ds_array_start_p, segments *
sizeof(ib_data_segment_t));
 
 	if (ret)
-		return( dapl_convert_errno(EFAULT,"ibv_recv") );
+		return( dapl_convert_errno(errno,"ibv_recv") );
 
 	return DAT_SUCCESS;
 }
 
-
 /*
  * dapls_ib_post_send
  *
@@ -133,35 +136,37 @@ dapls_ib_post_recv (
  */
 STATIC _INLINE_ DAT_RETURN 
 dapls_ib_post_send (
-    IN  DAPL_EP		   	*ep_ptr,
-    IN  ib_send_op_type_t       op_type,
-    IN  DAPL_COOKIE		*cookie,
-    IN  DAT_COUNT	   	segments,
-    IN  DAT_LMR_TRIPLET	   	*local_iov,
-    IN  const DAT_RMR_TRIPLET	*remote_iov,
-    IN  DAT_COMPLETION_FLAGS	completion_flags)
+	IN  DAPL_EP			*ep_ptr,
+	IN  ib_send_op_type_t		op_type,
+	IN  DAPL_COOKIE			*cookie,
+	IN  DAT_COUNT			segments,
+	IN  DAT_LMR_TRIPLET		*local_iov,
+	IN  const DAT_RMR_TRIPLET	*remote_iov,
+	IN  DAT_COMPLETION_FLAGS	completion_flags)
 {
-	dapl_dbg_log (DAPL_DBG_TYPE_EP,
-		      " post_snd: ep %p op %d ck %p sgs %d l_iov %p
r_iov %p f %d\n",
-		      ep_ptr, op_type, cookie, segments, local_iov, 
-		      remote_iov, completion_flags);
-
-	ib_data_segment_t	ds_array[DEFAULT_DS_ENTRIES];
-	ib_data_segment_t	*ds_array_p, *ds_array_start_p = NULL;
-	struct ibv_send_wr	wr;
-	struct ibv_send_wr	*bad_wr;
-	ib_hca_transport_t	*ibt_ptr =
&ep_ptr->header.owner_ia->hca_ptr->ib_trans;
-	DAT_COUNT		i, total_len;
-	int			ret;
+	dapl_dbg_log(DAPL_DBG_TYPE_EP,
+		     " post_snd: ep %p op %d ck %p sgs",
+		     "%d l_iov %p r_iov %p f %d\n",
+		     ep_ptr, op_type, cookie, segments, local_iov, 
+		     remote_iov, completion_flags);
+
+	ib_data_segment_t ds_array[DEFAULT_DS_ENTRIES];
+	ib_data_segment_t *ds_array_p, *ds_array_start_p = NULL;
+	struct ibv_send_wr wr;
+	struct ibv_send_wr *bad_wr;
+	ib_hca_transport_t *ibt_ptr = 
+		&ep_ptr->header.owner_ia->hca_ptr->ib_trans;
+	DAT_COUNT i, total_len;
+	int ret;
 	
-	dapl_dbg_log (DAPL_DBG_TYPE_EP,
-		      " post_snd: ep %p cookie %p segs %d l_iov %p\n",
-		      ep_ptr, cookie, segments, local_iov);
+	dapl_dbg_log(DAPL_DBG_TYPE_EP,
+		     " post_snd: ep %p cookie %p segs %d l_iov %p\n",
+		     ep_ptr, cookie, segments, local_iov);
 
-	if( segments <= DEFAULT_DS_ENTRIES ) 
+	if(segments <= DEFAULT_DS_ENTRIES) 
 		ds_array_p = ds_array;
 	else
-		ds_array_start_p = ds_array_p =
+		ds_array_start_p = ds_array_p = 
 			dapl_os_alloc(segments *
sizeof(ib_data_segment_t));
 
 	if (NULL == ds_array_p)
@@ -180,14 +185,14 @@ dapls_ib_post_send (
 		if ( !local_iov[i].segment_length )
 			continue;
 
-		ds_array_p->addr  = (uint64_t)
local_iov[i].virtual_address;
+		ds_array_p->addr = (uint64_t)
local_iov[i].virtual_address;
 		ds_array_p->length = local_iov[i].segment_length;
-		ds_array_p->lkey  = local_iov[i].lmr_context;
+		ds_array_p->lkey = local_iov[i].lmr_context;
 		
-		dapl_dbg_log (	DAPL_DBG_TYPE_EP, 
-				" post_snd: lkey 0x%x va %p len %d \n",
-				ds_array_p->lkey, ds_array_p->addr, 
-				ds_array_p->length );
+		dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+			     " post_snd: lkey 0x%x va %p len %d\n",
+			     ds_array_p->lkey, ds_array_p->addr, 
+			     ds_array_p->length );
 
 		total_len += ds_array_p->length;
 		wr.num_sge++;
@@ -196,20 +201,22 @@ dapls_ib_post_send (
 
 	if (cookie != NULL) 
 		cookie->val.dto.size = total_len;
-	
-	if ((op_type == OP_RDMA_WRITE) || (op_type == OP_RDMA_READ)) {
-		wr.wr.rdma.remote_addr = remote_iov->target_address;
+
+	if (wr.num_sge && 
+	    (op_type == OP_RDMA_WRITE || op_type == OP_RDMA_READ)) {
+		wr.wr.rdma.remote_addr = remote_iov->virtual_address;
 		wr.wr.rdma.rkey = remote_iov->rmr_context;
-		dapl_dbg_log (	DAPL_DBG_TYPE_EP, 
-				" post_snd_rdma: rkey 0x%x va
%#016Lx\n",
-				wr.wr.rdma.rkey, wr.wr.rdma.remote_addr
);
+		dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+			     " post_snd_rdma: rkey 0x%x va %#016Lx\n",
+			     wr.wr.rdma.rkey, wr.wr.rdma.remote_addr);
 	}
 
+
 	/* inline data for send or write ops */
-	if ((total_len <= ibt_ptr->max_inline_send ) && 
+	if ((total_len <= ibt_ptr->max_inline_send) && 
 	   ((op_type == OP_SEND) || (op_type == OP_RDMA_WRITE))) 
 		wr.send_flags |= IBV_SEND_INLINE;
-
+	
 	/* set completion flags in work request */
 	wr.send_flags |= (DAT_COMPLETION_SUPPRESS_FLAG & 
 				completion_flags) ? 0 :
IBV_SEND_SIGNALED;
@@ -218,24 +225,205 @@ dapls_ib_post_send (
 	wr.send_flags |= (DAT_COMPLETION_SOLICITED_WAIT_FLAG & 
 				completion_flags) ? IBV_SEND_SOLICITED :
0;
 
-	dapl_dbg_log (DAPL_DBG_TYPE_EP, 
-		      " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", 
-			wr.opcode, wr.send_flags, wr.sg_list,
wr.num_sge);
+	dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+		     " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", 
+		     wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
 
 	ret = ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr);
-	
+
 	if (ds_array_start_p != NULL)
-		dapl_os_free(ds_array_start_p, segments *
sizeof(ib_data_segment_t));
+	    dapl_os_free(ds_array_start_p, segments *
sizeof(ib_data_segment_t));
 
 	if (ret)
-		return( dapl_convert_errno(EFAULT,"ibv_send") );
+		return( dapl_convert_errno(errno,"ibv_send") );
+
+	dapl_dbg_log(DAPL_DBG_TYPE_EP," post_snd: returned\n");
+	return DAT_SUCCESS;
+}
+
+/* map Work Completions to DAPL WR operations */
+STATIC _INLINE_ DAT_DTOS dapls_cqe_dtos_opcode(ib_work_completion_t
*cqe_p)
+{
+	switch (cqe_p->opcode) {
+
+	case IBV_WC_SEND:
+		return (DAT_DTO_SEND);
+	case IBV_WC_RDMA_READ:
+		return (DAT_DTO_RDMA_READ);
+	case IBV_WC_BIND_MW:
+		return (DAT_DTO_BIND_MW);
+#ifdef DAT_EXTENSIONS
+	case IBV_WC_RDMA_WRITE:
+		if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
+			return (DAT_IB_DTO_RDMA_WRITE_IMMED);
+		else
+			return (DAT_DTO_RDMA_WRITE);
+	case IBV_WC_COMP_SWAP:
+		return (DAT_IB_DTO_CMP_SWAP);
+	case IBV_WC_FETCH_ADD:
+		return (DAT_IB_DTO_FETCH_ADD);
+	case IBV_WC_RECV_RDMA_WITH_IMM:
+		return (DAT_IB_DTO_RECV_IMMED);
+#else
+	case IBV_WC_RDMA_WRITE:
+		return (DAT_DTO_RDMA_WRITE);
+#endif
+	case IBV_WC_RECV:
+		return (DAT_DTO_RECEIVE);
+	default:
+		return (0xff);
+	}
+}
+#define DAPL_GET_CQE_DTOS_OPTYPE(cqe_p) dapls_cqe_dtos_opcode(cqe_p)
+
+
+#ifdef DAT_EXTENSIONS
+/*
+ * dapls_ib_post_ext_send
+ *
+ * Provider specific extended Post SEND function for atomics
+ *	OP_COMP_AND_SWAP and OP_FETCH_AND_ADD
+ */
+STATIC _INLINE_ DAT_RETURN 
+dapls_ib_post_ext_send (
+	IN  DAPL_EP			*ep_ptr,
+	IN  ib_send_op_type_t		op_type,
+	IN  DAPL_COOKIE			*cookie,
+	IN  DAT_COUNT			segments,
+	IN  DAT_LMR_TRIPLET		*local_iov,
+	IN  const DAT_RMR_TRIPLET	*remote_iov,
+	IN  DAT_UINT32			immed_data,
+	IN  DAT_UINT64			compare_add,
+	IN  DAT_UINT64			swap,
+	IN  DAT_COMPLETION_FLAGS	completion_flags)
+{
+	dapl_dbg_log(DAPL_DBG_TYPE_EP,
+		     " post_snd: ep %p op %d ck %p sgs",
+		     "%d l_iov %p r_iov %p f %d\n",
+		     ep_ptr, op_type, cookie, segments, local_iov, 
+		     remote_iov, completion_flags);
+
+	ib_data_segment_t ds_array[DEFAULT_DS_ENTRIES];
+	ib_data_segment_t *ds_array_p, *ds_array_start_p;
+	struct ibv_send_wr wr;
+	struct ibv_send_wr *bad_wr;
+	DAT_COUNT i, total_len;
+	int ret;
+	
+	dapl_dbg_log(DAPL_DBG_TYPE_EP,
+		     " post_snd: ep %p cookie %p segs %d l_iov %p\n",
+		     ep_ptr, cookie, segments, local_iov);
+
+	if(segments <= DEFAULT_DS_ENTRIES) 
+		ds_array_p = ds_array;
+	else
+		ds_array_start_p = ds_array_p = 
+			dapl_os_alloc(segments *
sizeof(ib_data_segment_t));
+
+	if (NULL == ds_array_p)
+		return (DAT_INSUFFICIENT_RESOURCES);
+	
+	/* setup the work request */
+	wr.next = 0;
+	wr.opcode = op_type;
+	wr.num_sge = 0;
+	wr.send_flags = 0;
+	wr.wr_id = (uint64_t)(uintptr_t)cookie;
+	wr.sg_list = ds_array_p;
+	total_len = 0;
+
+	for (i = 0; i < segments; i++ ) {
+		if ( !local_iov[i].segment_length )
+			continue;
+
+		ds_array_p->addr = (uint64_t)
local_iov[i].virtual_address;
+		ds_array_p->length = local_iov[i].segment_length;
+		ds_array_p->lkey = local_iov[i].lmr_context;
+		
+		dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+			     " post_snd: lkey 0x%x va %p len %d\n",
+			     ds_array_p->lkey, ds_array_p->addr, 
+			     ds_array_p->length );
+
+		total_len += ds_array_p->length;
+		wr.num_sge++;
+		ds_array_p++;
+	}
 
-	dapl_dbg_log (DAPL_DBG_TYPE_EP," post_snd: returned\n");
+	if (cookie != NULL) 
+		cookie->val.dto.size = total_len;
+
+	switch (op_type) {
+	case OP_RDMA_WRITE_IMM:
+		/* OP_RDMA_WRITE)IMMED has direct IB wr_type mapping */
+		dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+			     " post_ext: rkey 0x%x va %#016Lx
immed=0x%x\n",
+			     remote_iov?remote_iov->rmr_context:0, 
+			     remote_iov?remote_iov->virtual_address:0,
+			     immed_data);
+
+		wr.imm_data = immed_data;
+	        if (wr.num_sge) {
+			wr.wr.rdma.remote_addr =
remote_iov->virtual_address;
+			wr.wr.rdma.rkey = remote_iov->rmr_context;
+		}
+		break;
+	case OP_COMP_AND_SWAP:
+		/* OP_COMP_AND_SWAP has direct IB wr_type mapping */
+		dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+			     " post_ext: OP_COMP_AND_SWAP=%lx,"
+			     "%lx rkey 0x%x va %#016Lx\n",
+			     compare_add, swap, remote_iov->rmr_context,
+			     remote_iov->virtual_address);
+		
+		wr.wr.atomic.compare_add = compare_add;
+		wr.wr.atomic.swap = swap;
+		wr.wr.atomic.remote_addr = remote_iov->virtual_address;
+		wr.wr.atomic.rkey = remote_iov->rmr_context;
+		break;
+	case OP_FETCH_AND_ADD:
+		/* OP_FETCH_AND_ADD has direct IB wr_type mapping */
+		dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+			     " post_ext: OP_FETCH_AND_ADD=%lx,"
+			     "%lx rkey 0x%x va %#016Lx\n",
+			     compare_add, remote_iov->rmr_context,
+			     remote_iov->virtual_address);
+
+		wr.wr.atomic.compare_add = compare_add;
+		wr.wr.atomic.remote_addr = remote_iov->virtual_address;
+		wr.wr.atomic.rkey = remote_iov->rmr_context;
+		break;
+	default:
+		break;
+	}
+
+	/* set completion flags in work request */
+	wr.send_flags |= (DAT_COMPLETION_SUPPRESS_FLAG & 
+				completion_flags) ? 0 :
IBV_SEND_SIGNALED;
+	wr.send_flags |= (DAT_COMPLETION_BARRIER_FENCE_FLAG & 
+				completion_flags) ? IBV_SEND_FENCE : 0;
+	wr.send_flags |= (DAT_COMPLETION_SOLICITED_WAIT_FLAG & 
+				completion_flags) ? IBV_SEND_SOLICITED :
0;
+
+	dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+		     " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", 
+		     wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
+
+	ret = ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr);
+
+	if (segments > DEFAULT_DS_ENTRIES)
+	    dapl_os_free(ds_array_start_p, segments *
sizeof(ib_data_segment_t));
+
+	if (ret)
+		return( dapl_convert_errno(errno,"ibv_send") );
+	
+	dapl_dbg_log(DAPL_DBG_TYPE_EP," post_snd: returned\n");
 	return DAT_SUCCESS;
 }
+#endif
 
 STATIC _INLINE_ DAT_RETURN 
-dapls_ib_optional_prv_dat (
+dapls_ib_optional_prv_dat(
 	IN  DAPL_CR		*cr_ptr,
 	IN  const void		*event_data,
 	OUT   DAPL_CR		**cr_pp)
@@ -243,34 +431,69 @@ dapls_ib_optional_prv_dat (
     return DAT_SUCCESS;
 }
 
+/* map Work Completions to DAPL WR operations */
 STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p)
 {
-    switch (cqe_p->opcode) {
+	switch (cqe_p->opcode) {
 	case IBV_WC_SEND:
-	    return (OP_SEND);
+		return (OP_SEND);
 	case IBV_WC_RDMA_WRITE:
-	    return (OP_RDMA_WRITE);
+		if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
+			return (OP_RDMA_WRITE_IMM);
+		else
+			return (OP_RDMA_WRITE);
 	case IBV_WC_RDMA_READ:
-	    return (OP_RDMA_READ);
+		return (OP_RDMA_READ);
 	case IBV_WC_COMP_SWAP:
-	    return (OP_COMP_AND_SWAP);
+		return (OP_COMP_AND_SWAP);
 	case IBV_WC_FETCH_ADD:
-	    return (OP_FETCH_AND_ADD);
+		return (OP_FETCH_AND_ADD);
 	case IBV_WC_BIND_MW:
-	    return (OP_BIND_MW);
+		return (OP_BIND_MW);
 	case IBV_WC_RECV:
-	    return (OP_RECEIVE);
+		if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
+			return (OP_RECEIVE_IMM);
+		else
+			return (OP_RECEIVE);
 	case IBV_WC_RECV_RDMA_WITH_IMM:
-	    return (OP_RECEIVE_IMM);
+		return (OP_RECEIVE_IMM);
 	default:
-	    return (OP_INVALID);
-    }
+		return (OP_INVALID);
+	}
+}
+
+#define DAPL_GET_CQE_OPTYPE(cqe_p) dapls_cqe_opcode(cqe_p)
+#define DAPL_GET_CQE_WRID(cqe_p) ((ib_work_completion_t*)cqe_p)->wr_id
+#define DAPL_GET_CQE_STATUS(cqe_p)
((ib_work_completion_t*)cqe_p)->status
+#define DAPL_GET_CQE_VENDOR_ERR(cqe_p)
((ib_work_completion_t*)cqe_p)->vendor_err
+#define DAPL_GET_CQE_BYTESNUM(cqe_p)
((ib_work_completion_t*)cqe_p)->byte_len
+#define DAPL_GET_CQE_IMMED_DATA(cqe_p)
((ib_work_completion_t*)cqe_p)->imm_data
+
+STATIC _INLINE_ char * dapls_dto_op_str(int op)
+{
+    static char *optable[] =
+    {
+        "OP_RDMA_WRITE",
+        "OP_RDMA_WRITE_IMM",
+        "OP_SEND",
+        "OP_SEND_IMM",
+        "OP_RDMA_READ",
+        "OP_COMP_AND_SWAP",
+        "OP_FETCH_AND_ADD",
+        "OP_RECEIVE",
+        "OP_RECEIVE_IMM",
+        "OP_BIND_MW",
+        0
+    };
+    return ((op < 0 || op > 9) ? "Invalid CQE OP?" : optable[op]);
+}
+
+static _INLINE_ char *
+dapls_cqe_op_str(IN ib_work_completion_t *cqe_ptr)
+{
+    return dapls_dto_op_str(DAPL_GET_CQE_OPTYPE(cqe_ptr));
 }
 
-#define DAPL_GET_CQE_OPTYPE(cqe_p)	dapls_cqe_opcode(cqe_p)
-#define DAPL_GET_CQE_WRID(cqe_p)
((ib_work_completion_t*)cqe_p)->wr_id
-#define DAPL_GET_CQE_STATUS(cqe_p)
((ib_work_completion_t*)cqe_p)->status
-#define DAPL_GET_CQE_BYTESNUM(cqe_p)
((ib_work_completion_t*)cqe_p)->byte_len
-#define DAPL_GET_CQE_IMMED_DATA(cqe_p)
((ib_work_completion_t*)cqe_p)->imm_data
+#define DAPL_GET_CQE_OP_STR(cqe) dapls_cqe_op_str(cqe)
 
 #endif	/*  _DAPL_IB_DTO_H_ */
diff --git a/dapl/openib_scm/dapl_ib_extensions.c
b/dapl/openib_scm/dapl_ib_extensions.c
new file mode 100755
index 0000000..1402057
--- /dev/null
+++ b/dapl/openib_scm/dapl_ib_extensions.c
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 2007 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of
which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version
2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above
copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/**********************************************************************
+ * 
+ * MODULE: dapl_ib_extensions.c
+ *
+ * PURPOSE:  Extensions routines for OpenIB uCMA provider
+ *
+ * $Id: $
+ *
+
**********************************************************************/
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_evd_util.h"
+#include "dapl_ib_util.h"
+#include "dapl_ep_util.h"
+#include "dapl_cookie.h"
+#include <stdarg.h>
+
+
+DAT_RETURN
+dapli_post_ext( IN       DAT_EP_HANDLE		ep_handle,
+		IN       DAT_UINT64		cmp_add,	
+		IN       DAT_UINT64		swap,
+		IN	 DAT_UINT32		immed_data,
+		IN	 DAT_COUNT		segments,
+		IN       DAT_LMR_TRIPLET	*local_iov,
+		IN       DAT_DTO_COOKIE		user_cookie,
+		IN const DAT_RMR_TRIPLET	*remote_iov,
+		IN	 int			op_type, 
+		IN       DAT_COMPLETION_FLAGS	flags );
+
+
+/*
+ * dapl_extensions
+ *
+ * Process extension requests
+ *
+ * Input:
+ *	ext_type,
+ *	...
+ *
+ * Output:
+ * 	Depends....
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	DAT_NOT_IMPLEMENTED
+ *      .....
+ *
+ */
+DAT_RETURN
+dapl_extensions(IN DAT_HANDLE		dat_handle, 
+		IN DAT_EXTENDED_OP	ext_op, 
+		IN va_list		args)
+{
+	DAT_EP_HANDLE		ep;
+	DAT_LMR_TRIPLET		*lmr_p;
+	DAT_DTO_COOKIE		cookie;
+	const DAT_RMR_TRIPLET	*rmr_p;
+	DAT_UINT64		dat_uint64a, dat_uint64b;
+	DAT_UINT32		dat_uint32;
+	DAT_COUNT		segments = 1;
+	DAT_COMPLETION_FLAGS	comp_flags;
+	DAT_RETURN		status = DAT_NOT_IMPLEMENTED;
+
+	dapl_dbg_log(DAPL_DBG_TYPE_API,
+		     "dapl_extensions(hdl %p operation %d, ...)\n",
+		     dat_handle, ext_op);
+
+	DAPL_CNTR(DCNT_EXTENSION);
+
+	switch ((int)ext_op)
+	{
+	
+	case DAT_IB_RDMA_WRITE_IMMED_OP:
+    		dapl_dbg_log(DAPL_DBG_TYPE_RTN, 
+			     " WRITE_IMMED_DATA extension call\n");
+		
+		ep          = dat_handle;		 /* ep_handle */
+		segments    = va_arg( args, DAT_COUNT);  /* num segments
*/
+		lmr_p       = va_arg( args, DAT_LMR_TRIPLET*);  
+		cookie      = va_arg( args, DAT_DTO_COOKIE);
+		rmr_p       = va_arg( args, const DAT_RMR_TRIPLET*); 
+		dat_uint32  = va_arg( args, DAT_UINT32); /* immed data
*/
+		comp_flags  = va_arg( args, DAT_COMPLETION_FLAGS);   
+		
+		status = dapli_post_ext(ep, 0, 0, dat_uint32, segments,
lmr_p,
+    					cookie, rmr_p,
OP_RDMA_WRITE_IMM,
+    	    				comp_flags );
+    		break;
+
+	case DAT_IB_CMP_AND_SWAP_OP:
+    		dapl_dbg_log(DAPL_DBG_TYPE_RTN, 
+			     " CMP_AND_SWAP extension call\n");
+
+		ep          = dat_handle;		/* ep_handle */
+		dat_uint64a = va_arg( args, DAT_UINT64); /* cmp_value */
+		dat_uint64b = va_arg( args, DAT_UINT64); /* swap_value
*/
+		lmr_p       = va_arg( args, DAT_LMR_TRIPLET*); 
+		cookie      = va_arg( args, DAT_DTO_COOKIE);  
+		rmr_p       = va_arg( args, const DAT_RMR_TRIPLET*);
+		comp_flags  = va_arg( args, DAT_COMPLETION_FLAGS);
+
+		status = dapli_post_ext(ep, dat_uint64a, dat_uint64b,
+					0, segments, lmr_p, cookie,
rmr_p, 
+					OP_COMP_AND_SWAP, comp_flags );
+   		break;
+
+	case DAT_IB_FETCH_AND_ADD_OP:
+    		dapl_dbg_log(DAPL_DBG_TYPE_RTN, 
+			     " FETCH_AND_ADD extension call\n");
+		
+		ep          = dat_handle;		/* ep_handle */
+		dat_uint64a = va_arg( args, DAT_UINT64); /* add value */
+		lmr_p       = va_arg( args, DAT_LMR_TRIPLET*);  
+		cookie      = va_arg( args, DAT_DTO_COOKIE);
+		rmr_p       = va_arg( args, const DAT_RMR_TRIPLET*); 
+		comp_flags  = va_arg( args, DAT_COMPLETION_FLAGS);   
+		
+		status = dapli_post_ext(ep, dat_uint64a, 0, 0, segments,

+					lmr_p, cookie, rmr_p, 
+					OP_FETCH_AND_ADD, comp_flags );
+
+    		break;
+
+	default:
+    		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+			     "unsupported extension(%d)\n",
(int)ext_op);
+	}
+	
+	return(status);
+}
+
+
+DAT_RETURN
+dapli_post_ext( IN       DAT_EP_HANDLE		ep_handle,
+		IN       DAT_UINT64		cmp_add,	
+		IN       DAT_UINT64		swap,
+		IN	 DAT_UINT32		immed_data,
+		IN	 DAT_COUNT		segments,
+		IN       DAT_LMR_TRIPLET	*local_iov,
+		IN       DAT_DTO_COOKIE		user_cookie,
+		IN const DAT_RMR_TRIPLET	*remote_iov,
+		IN	 int			op_type, 
+		IN       DAT_COMPLETION_FLAGS	flags )
+{
+	DAPL_EP 	*ep_ptr;
+	ib_qp_handle_t	qp_ptr;
+	DAPL_COOKIE	*cookie = NULL;
+	DAT_RETURN	dat_status = DAT_SUCCESS;
+
+	dapl_dbg_log(DAPL_DBG_TYPE_API,
+		     " post_ext_op: ep %p cmp_val %d "
+		     "swap_val %d cookie 0x%x, r_iov %p, flags 0x%x\n",
+		     ep_handle, (unsigned)cmp_add, (unsigned)swap, 
+		     (unsigned)user_cookie.as_64, remote_iov, flags);
+
+	if (DAPL_BAD_HANDLE(ep_handle, DAPL_MAGIC_EP))
+		return(DAT_ERROR(DAT_INVALID_HANDLE,
DAT_INVALID_HANDLE_EP));
+
+	ep_ptr = (DAPL_EP *) ep_handle;
+	qp_ptr = ep_ptr->qp_handle;
+
+	/*
+	 * Synchronization ok since this buffer is only used for send
+	 * requests, which aren't allowed to race with each other.
+	 */
+	dat_status = dapls_dto_cookie_alloc(&ep_ptr->req_buffer,
+					    DAPL_DTO_TYPE_EXTENSION,
+					    user_cookie,
+					    &cookie);
+	if (dat_status != DAT_SUCCESS)
+		goto bail;
+		
+	/*
+	 * Take reference before posting to avoid race conditions with
+	 * completions
+	 */
+	dapl_os_atomic_inc(&ep_ptr->req_count);
+
+	/*
+	 * Invoke provider specific routine to post DTO
+	 */
+	dat_status = dapls_ib_post_ext_send(ep_ptr,	
+					    op_type,
+					    cookie,	
+					    segments,	/* data segments
*/
+					    local_iov, 
+					    remote_iov,
+					    immed_data,	/* immed data */
+					    cmp_add,	/* compare or
add */
+					    swap,	/* swap */
+					    flags);
+
+	if (dat_status != DAT_SUCCESS) {
+		dapl_os_atomic_dec(&ep_ptr->req_count);
+		dapls_cookie_dealloc(&ep_ptr->req_buffer, cookie);
+	}
+
+bail:
+	return dat_status;
+
+}
+
+
+/* 
+ * New provider routine to process extended DTO events 
+ */
+void
+dapls_cqe_to_event_extension(IN DAPL_EP			*ep_ptr,
+			     IN DAPL_COOKIE		*cookie,
+			     IN ib_work_completion_t	*cqe_ptr,
+			     IN DAT_EVENT		*event_ptr)
+{
+	uint32_t ibtype;
+	DAT_DTO_COMPLETION_EVENT_DATA *dto = 
+
&event_ptr->event_data.dto_completion_event_data;
+	DAT_IB_EXTENSION_EVENT_DATA *ext_data = 
+			(DAT_IB_EXTENSION_EVENT_DATA *)
+			&event_ptr->event_extension_data[0];
+	DAT_DTO_COMPLETION_STATUS dto_status;
+
+	/* Get status from cqe */
+        dto_status = dapls_ib_get_dto_status(cqe_ptr);
+	
+	dapl_dbg_log(DAPL_DBG_TYPE_EVD,
+		     " cqe_to_event_ext: dto_ptr %p ext_ptr %p status
%d\n",
+		     dto, ext_data, dto_status);
+
+	event_ptr->event_number = DAT_IB_DTO_EVENT;
+	dto->ep_handle = cookie->ep;
+	dto->user_cookie = cookie->val.dto.cookie;
+	dto->operation = DAPL_GET_CQE_DTOS_OPTYPE(cqe_ptr); /* new for
2.0 */
+	dto->status = ext_data->status = dto_status;
+
+	if (dto_status != DAT_DTO_SUCCESS)
+		return;
+	
+	/* 
+	 * Get operation type from CQ work completion entry and
+	 * if extented operation then set extended event data
+         */
+	ibtype = DAPL_GET_CQE_OPTYPE(cqe_ptr);
+        
+	switch (ibtype)	{
+
+	case OP_RDMA_WRITE_IMM:
+		dapl_dbg_log (DAPL_DBG_TYPE_EVD,
+                                " cqe_to_event_ext:
OP_RDMA_WRITE_IMMED\n");
+		
+		/* type and outbound rdma write transfer size */
+		dto->transfered_length = cookie->val.dto.size;
+		ext_data->type = DAT_IB_RDMA_WRITE_IMMED;
+		break;
+	case OP_RECEIVE_IMM:
+		dapl_dbg_log (DAPL_DBG_TYPE_EVD,
+                                " cqe_to_event_ext:
OP_RECEIVE_RDMA_IMMED\n");
+		
+		/* immed recvd, type and inbound rdma write transfer
size */
+		dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
+		ext_data->type = DAT_IB_RDMA_WRITE_IMMED_DATA;
+		ext_data->val.immed.data =
DAPL_GET_CQE_IMMED_DATA(cqe_ptr);
+		break;
+	case OP_COMP_AND_SWAP:
+                dapl_dbg_log (DAPL_DBG_TYPE_EVD,
+                                " cqe_to_event_ext:
COMP_AND_SWAP_RESP\n");
+
+                /* original data is returned in LMR provided with post
*/
+		ext_data->type = DAT_IB_CMP_AND_SWAP;
+		dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
+		break;
+	case OP_FETCH_AND_ADD:
+                dapl_dbg_log (DAPL_DBG_TYPE_EVD,
+                                " cqe_to_event_ext:
FETCH_AND_ADD_RESP\n");
+
+		/* original data is returned in LMR provided with post
*/
+		ext_data->type = DAT_IB_FETCH_AND_ADD;
+		dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
+		break;
+	default:
+		/* not extended operation */
+		ext_data->status = DAT_IB_OP_ERR;
+		dto->status = DAT_DTO_ERR_TRANSPORT;
+		break;
+	}
+}
diff --git a/dapl/openib_scm/dapl_ib_mem.c
b/dapl/openib_scm/dapl_ib_mem.c
index de36c0f..5bd5342 100644
--- a/dapl/openib_scm/dapl_ib_mem.c
+++ b/dapl/openib_scm/dapl_ib_mem.c
@@ -1,4 +1,6 @@
 /*
+ * Copyright (c) 2005-2007 Intel Corporation.  All rights reserved.
+ *
  * This Software is licensed under one of the following licenses:
  *
  * 1) under the terms of the "Common Public License 1.0" a copy of
which is
@@ -25,12 +27,11 @@
 
 /**********************************************************************
  * 
- * MODULE: dapl_det_mem.c
+ * MODULE: dapl_ib_mem.c
  *
- * PURPOSE: Intel DET APIs: Memory windows, registration,
- *           and protection domain 
+ * PURPOSE: Memory windows, registration, and protection domain 
  *
- * $Id: $
+ * $Id:$
  *
 
**********************************************************************/
 
@@ -61,8 +62,7 @@
  *
  */
 STATIC _INLINE_ int
-dapls_convert_privileges (
-    IN DAT_MEM_PRIV_FLAGS	privileges)
+dapls_convert_privileges(IN DAT_MEM_PRIV_FLAGS privileges)
 {
 	int	access = 0;
 
@@ -79,6 +79,11 @@ dapls_convert_privileges (
 		access |= IBV_ACCESS_REMOTE_READ;
 	if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
 		access |= IBV_ACCESS_REMOTE_READ;
+#ifdef DAT_EXTENSIONS
+        if (DAT_IB_MEM_PRIV_REMOTE_ATOMIC & privileges)
+                access |= IBV_ACCESS_REMOTE_ATOMIC;
+#endif
+
 
 	return access;
 }
@@ -101,16 +106,15 @@ dapls_convert_privileges (
  *
  */
 DAT_RETURN
-dapls_ib_pd_alloc (
-	IN  DAPL_IA 	*ia_ptr,
-	IN  DAPL_PZ 	*pz )
+dapls_ib_pd_alloc(IN DAPL_IA *ia_ptr, IN DAPL_PZ *pz)
 {
 	/* get a protection domain */
 	pz->pd_handle = ibv_alloc_pd(ia_ptr->hca_ptr->ib_hca_handle);
 	if (!pz->pd_handle) 
 		return(dapl_convert_errno(ENOMEM,"alloc_pd"));
 
-	dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " pd_alloc: pd_handle=%p\n", 
+	dapl_dbg_log(DAPL_DBG_TYPE_UTIL, 
+		     " pd_alloc: pd_handle=%p\n", 
 		     pz->pd_handle );
 
 	return DAT_SUCCESS;
@@ -134,13 +138,18 @@ dapls_ib_pd_alloc (
  *
  */
 DAT_RETURN
-dapls_ib_pd_free (
-	IN  DAPL_PZ 	*pz )
+dapls_ib_pd_free(IN DAPL_PZ *pz )
 {
 	if (pz->pd_handle != IB_INVALID_HANDLE) {
+#if 1
+		ibv_dealloc_pd(pz->pd_handle);
+		pz->pd_handle = IB_INVALID_HANDLE;	
+		return DAT_SUCCESS;
+#else
 		if (ibv_dealloc_pd(pz->pd_handle))
 			return(dapl_convert_errno(errno,"dealloc_pd"));
 		pz->pd_handle = IB_INVALID_HANDLE;	
+#endif
 	}
 	return DAT_SUCCESS;
 }
@@ -165,28 +174,37 @@ dapls_ib_pd_free (
  *
  */
 DAT_RETURN
-dapls_ib_mr_register (
-        IN  DAPL_IA                 *ia_ptr,
-        IN  DAPL_LMR                *lmr,
-        IN  DAT_PVOID                virt_addr,
-        IN  DAT_VLEN                length,
-        IN  DAT_MEM_PRIV_FLAGS      privileges)
+dapls_ib_mr_register(IN  DAPL_IA *ia_ptr,
+		     IN  DAPL_LMR *lmr,
+		     IN  DAT_PVOID virt_addr,
+		     IN  DAT_VLEN length,
+		     IN  DAT_MEM_PRIV_FLAGS privileges,
+		     IN  DAT_VA_TYPE va_type)
 {
 	ib_pd_handle_t	ib_pd_handle;
+	struct ibv_device *ibv_dev =
ia_ptr->hca_ptr->ib_hca_handle->device;
 
 	ib_pd_handle = ((DAPL_PZ *)lmr->param.pz_handle)->pd_handle;
 	
-	dapl_dbg_log (  DAPL_DBG_TYPE_UTIL, 
-			" mr_register: ia=%p, lmr=%p va=%p ln=%d
pv=0x%x\n", 
-			ia_ptr, lmr, virt_addr, length, privileges );
+	dapl_dbg_log(DAPL_DBG_TYPE_UTIL, 
+		     " mr_register: ia=%p, lmr=%p va=%p ln=%d
pv=0x%x\n", 
+		     ia_ptr, lmr, virt_addr, length, privileges );
 
 	/* TODO: shared memory */
 	if (lmr->param.mem_type == DAT_MEM_TYPE_SHARED_VIRTUAL) {
-		dapl_dbg_log( DAPL_DBG_TYPE_ERR,
-		     " mr_register_shared: NOT IMPLEMENTED\n");    
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+			     " mr_register_shared: NOT IMPLEMENTED\n");

 		return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);

 	}
 
+        /* iWARP only support */
+	if ((va_type == DAT_VA_TYPE_ZB) &&
+	    (ibv_dev->transport_type != IBV_TRANSPORT_IWARP)) {
+                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+                        " va_type == DAT_VA_TYPE_ZB: NOT SUPPORTED\n");
+                return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+        }
+
 	/* local read is default on IB */ 
 	lmr->mr_handle = 
 		ibv_reg_mr(((DAPL_PZ *)lmr->param.pz_handle)->pd_handle,

@@ -200,16 +218,16 @@ dapls_ib_mr_register (
 	lmr->param.lmr_context = lmr->mr_handle->lkey; 
 	lmr->param.rmr_context = lmr->mr_handle->rkey;
 	lmr->param.registered_size = length;
-	lmr->param.registered_address = (DAT_VADDR)(uintptr_t)
virt_addr;
+	lmr->param.registered_address = (DAT_VADDR)(uintptr_t)virt_addr;
 
-	dapl_dbg_log (  DAPL_DBG_TYPE_UTIL, 
-			" mr_register: mr=%p h %x pd %p ctx %p
,lkey=0x%x, rkey=0x%x priv=%x\n", 
-			lmr->mr_handle,	lmr->mr_handle->handle,	
-			lmr->mr_handle->pd,
-			lmr->mr_handle->context,
-			lmr->mr_handle->lkey, 
-			lmr->mr_handle->rkey, 
-			length, dapls_convert_privileges(privileges) );
+	dapl_dbg_log(DAPL_DBG_TYPE_UTIL, 
+		     " mr_register: mr=%p addr=%p h %x pd %p ctx %p "
+		     "lkey=0x%x rkey=0x%x priv=%x\n", 
+		     lmr->mr_handle, lmr->mr_handle->addr, 
+		     lmr->mr_handle->handle,	
+		     lmr->mr_handle->pd, lmr->mr_handle->context,
+		     lmr->mr_handle->lkey, lmr->mr_handle->rkey, 
+		     length, dapls_convert_privileges(privileges));
 
 	return DAT_SUCCESS;
 }
@@ -231,8 +249,7 @@ dapls_ib_mr_register (
  *
  */
 DAT_RETURN
-dapls_ib_mr_deregister (
-	IN  DAPL_LMR	*lmr )
+dapls_ib_mr_deregister(IN DAPL_LMR *lmr)
 {
 	if (lmr->mr_handle != IB_INVALID_HANDLE) {
 		if (ibv_dereg_mr(lmr->mr_handle))
@@ -251,8 +268,8 @@ dapls_ib_mr_deregister (
  * Input:
  *	ia_ptr		IA handle
  *	lmr		pointer to dapl_lmr struct
- *	virt_addr	virtual address of beginning of mem region
- *	length		length of memory region
+ *	privileges	
+ *	va_type		
  *
  * Output:
  * 	none
@@ -263,13 +280,15 @@ dapls_ib_mr_deregister (
  *
  */
 DAT_RETURN
-dapls_ib_mr_register_shared (
-	IN  DAPL_IA 		    *ia_ptr,
-	IN  DAPL_LMR		    *lmr,
-	IN  DAT_MEM_PRIV_FLAGS	privileges )
+dapls_ib_mr_register_shared(IN DAPL_IA *ia_ptr,
+			    IN DAPL_LMR *lmr,
+			    IN DAT_MEM_PRIV_FLAGS privileges,
+			    IN DAT_VA_TYPE va_type)
 {
-    dapl_dbg_log(DAPL_DBG_TYPE_ERR," mr_register_shared: NOT
IMPLEMENTED\n");
-    return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
+    dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+	         " mr_register_shared: NOT IMPLEMENTED\n");
+
+    return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
 }
 
 /*
@@ -289,12 +308,13 @@ dapls_ib_mr_register_shared (
  *
  */
 DAT_RETURN
-dapls_ib_mw_alloc (
-	IN  DAPL_RMR	*rmr )
+dapls_ib_mw_alloc (IN DAPL_RMR *rmr)
 {
 
-	dapl_dbg_log(DAPL_DBG_TYPE_ERR," mw_alloc: NOT IMPLEMENTED\n");
-   	return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
+	dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+		     " mw_alloc: NOT IMPLEMENTED\n");
+   	
+	return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
 }
 
 /*
@@ -314,11 +334,12 @@ dapls_ib_mw_alloc (
  *
  */
 DAT_RETURN
-dapls_ib_mw_free (
-	IN  DAPL_RMR 	*rmr )
+dapls_ib_mw_free(IN DAPL_RMR *rmr)
 {	
-	dapl_dbg_log(DAPL_DBG_TYPE_ERR," mw_free: NOT IMPLEMENTED\n");
-	return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
+	dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+		     " mw_free: NOT IMPLEMENTED\n");
+
+	return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
 }
 
 /*
@@ -339,17 +360,18 @@ dapls_ib_mw_free (
  *
  */
 DAT_RETURN
-dapls_ib_mw_bind (
-	IN  DAPL_RMR			*rmr,
-	IN  DAPL_LMR			*lmr,
-	IN  DAPL_EP			*ep,
-	IN  DAPL_COOKIE			*cookie,
-	IN  DAT_VADDR			virtual_address,
-	IN  DAT_VLEN			length,
-	IN  DAT_MEM_PRIV_FLAGS		mem_priv,
-	IN  DAT_BOOLEAN			is_signaled)
+dapls_ib_mw_bind(IN DAPL_RMR *rmr,
+		 IN DAPL_LMR *lmr,
+		 IN DAPL_EP  *ep,
+		 IN DAPL_COOKIE *cookie,
+		 IN DAT_VADDR virtual_address,
+		 IN DAT_VLEN length,
+		 IN DAT_MEM_PRIV_FLAGS mem_priv,
+		 IN DAT_BOOLEAN is_signaled)
 {
-	dapl_dbg_log(DAPL_DBG_TYPE_ERR," mw_bind: NOT IMPLEMENTED\n");
+	dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+		     " mw_bind: NOT IMPLEMENTED\n");
+
 	return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
 }
 
@@ -372,14 +394,15 @@ dapls_ib_mw_bind (
  *
  */
 DAT_RETURN
-dapls_ib_mw_unbind (
-	IN  DAPL_RMR	*rmr,
-	IN  DAPL_EP	*ep,
-	IN  DAPL_COOKIE	*cookie,
-	IN  DAT_BOOLEAN	is_signaled )
+dapls_ib_mw_unbind(IN DAPL_RMR *rmr,
+		   IN DAPL_EP  *ep,
+		   IN DAPL_COOKIE *cookie,
+		   IN DAT_BOOLEAN is_signaled )
 {
-	dapl_dbg_log(DAPL_DBG_TYPE_ERR," mw_unbind: NOT IMPLEMENTED\n");
-	return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
+	dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+		     " mw_unbind: NOT IMPLEMENTED\n");
+	
+	return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
 }
 
 /*
diff --git a/dapl/openib_scm/dapl_ib_qp.c b/dapl/openib_scm/dapl_ib_qp.c
index 3a1e3c8..1eba2bd 100644
--- a/dapl/openib_scm/dapl_ib_qp.c
+++ b/dapl/openib_scm/dapl_ib_qp.c
@@ -110,15 +110,23 @@ dapls_ib_qp_alloc (
 	/* Setup attributes and create qp */
 	dapl_os_memzero((void*)&qp_create, sizeof(qp_create));
 	qp_create.send_cq = req_cq;
-	qp_create.recv_cq = rcv_cq;
 	qp_create.cap.max_send_wr = attr->max_request_dtos;
-	qp_create.cap.max_recv_wr = attr->max_recv_dtos;
 	qp_create.cap.max_send_sge = attr->max_request_iov;
-	qp_create.cap.max_recv_sge = attr->max_recv_iov;
 	qp_create.cap.max_inline_data =
ia_ptr->hca_ptr->ib_trans.max_inline_send; 
 	qp_create.qp_type = IBV_QPT_RC;
 	qp_create.qp_context = (void*)ep_ptr;
 
+	/* ibv assumes rcv_cq is never NULL, set to req_cq */
+	if (rcv_cq == NULL) {
+		qp_create.recv_cq = req_cq;
+		qp_create.cap.max_recv_wr = 0;
+		qp_create.cap.max_recv_sge = 0;
+	} else {
+		qp_create.recv_cq = rcv_cq;
+		qp_create.cap.max_recv_wr = attr->max_recv_dtos;
+		qp_create.cap.max_recv_sge = attr->max_recv_iov;
+	}
+
 	ep_ptr->qp_handle = ibv_create_qp( ib_pd_handle, &qp_create);
 	if (!ep_ptr->qp_handle) 
 		return(dapl_convert_errno(ENOMEM, "create_qp"));
@@ -298,9 +306,10 @@ dapls_modify_qp_state ( IN ib_qp_handle_t
qp_handle,
 			IN ib_qp_state_t	qp_state,
 			IN ib_qp_cm_t		*qp_cm )
 {
-	struct ibv_qp_attr	qp_attr;
+	struct ibv_qp_attr 	qp_attr;
 	enum ibv_qp_attr_mask	mask = IBV_QP_STATE;
-	DAPL_EP	*ep_ptr = (DAPL_EP*)qp_handle->qp_context;
+	DAPL_EP			*ep_ptr =
(DAPL_EP*)qp_handle->qp_context;
+	DAPL_IA			*ia_ptr = ep_ptr->header.owner_ia;
 			
 	dapl_os_memzero((void*)&qp_attr, sizeof(qp_attr));
 	qp_attr.qp_state = qp_state;
@@ -315,14 +324,16 @@ dapls_modify_qp_state ( IN ib_qp_handle_t
qp_handle,
 				IBV_QP_RQ_PSN             |
 				IBV_QP_MAX_DEST_RD_ATOMIC |
 				IBV_QP_MIN_RNR_TIMER;
+
 			qp_attr.qp_state 		= IBV_QPS_RTR;
-			qp_attr.path_mtu 		= IBV_MTU_1024;
+			qp_attr.path_mtu 		= IBV_MTU_2048;
 			qp_attr.dest_qp_num 		= qp_cm->qpn;
 			qp_attr.rq_psn 			= 1;
 			qp_attr.max_dest_rd_atomic	= 
 				ep_ptr->param.ep_attr.max_rdma_read_out;
-			qp_attr.min_rnr_timer		= 12;
+			qp_attr.min_rnr_timer		=
ia_ptr->hca_ptr->ib_trans.rnr_timer;
 			qp_attr.ah_attr.is_global	= 0;
+			qp_attr.ah_attr.grh.dgid        = qp_cm->gid;
 			qp_attr.ah_attr.dlid		= qp_cm->lid;
 			qp_attr.ah_attr.sl		= 0;
 			qp_attr.ah_attr.src_path_bits	= 0;
@@ -343,30 +354,25 @@ dapls_modify_qp_state ( IN ib_qp_handle_t
qp_handle,
 				IBV_QP_RNR_RETRY          |
 				IBV_QP_SQ_PSN             |
 				IBV_QP_MAX_QP_RD_ATOMIC;
+
 			qp_attr.qp_state	= IBV_QPS_RTS;
-			qp_attr.timeout		= 14;
-			qp_attr.retry_cnt	= 7;
-			qp_attr.rnr_retry	= 7;
+			qp_attr.timeout		=
ia_ptr->hca_ptr->ib_trans.ack_timer;
+			qp_attr.retry_cnt	=
ia_ptr->hca_ptr->ib_trans.ack_retry;
+			qp_attr.rnr_retry	=
ia_ptr->hca_ptr->ib_trans.rnr_retry;
 			qp_attr.sq_psn		= 1;
 			qp_attr.max_rd_atomic	= 
 				ep_ptr->param.ep_attr.max_rdma_read_out;
 
-			dapl_dbg_log (DAPL_DBG_TYPE_EP,
-			      " modify_qp_rts: psn %x rd_atomic %d\n",
-			      qp_attr.sq_psn, qp_attr.max_rd_atomic );
+			dapl_dbg_log(DAPL_DBG_TYPE_EP,
+				" modify_qp_rts: psn %x rd_atomic %d ack
%d "
+				" retry %d rnr_retry %d\n",
+				qp_attr.sq_psn, qp_attr.max_rd_atomic, 
+				qp_attr.timeout, qp_attr.retry_cnt, 
+				qp_attr.rnr_retry );
 			break;
 		}
 		case IBV_QPS_INIT: 
 		{
-			DAPL_IA	*ia_ptr;
-			DAPL_EP	*ep_ptr; 
-			/* need to find way back to port num */
-			ep_ptr = (DAPL_EP*)qp_handle->qp_context;
-			if (ep_ptr)
-				ia_ptr = ep_ptr->header.owner_ia;
-			else
-				break;
-
 			mask |= IBV_QP_PKEY_INDEX	|
 				IBV_QP_PORT		|
 				IBV_QP_ACCESS_FLAGS;
@@ -377,7 +383,8 @@ dapls_modify_qp_state ( IN ib_qp_handle_t
qp_handle,
 					IBV_ACCESS_LOCAL_WRITE |
 					IBV_ACCESS_REMOTE_WRITE |
 					IBV_ACCESS_REMOTE_READ |
-					IBV_ACCESS_REMOTE_ATOMIC;
+					IBV_ACCESS_REMOTE_ATOMIC |
+					IBV_ACCESS_MW_BIND;
 			
 			dapl_dbg_log (DAPL_DBG_TYPE_EP,
 				" modify_qp_init: pi %x port %x acc
%x\n",
diff --git a/dapl/openib_scm/dapl_ib_util.c
b/dapl/openib_scm/dapl_ib_util.c
index 5e34b47..9bf0e2b 100644
--- a/dapl/openib_scm/dapl_ib_util.c
+++ b/dapl/openib_scm/dapl_ib_util.c
@@ -62,6 +62,7 @@ static const char rcsid[] = "$Id:  $";
 #include <fcntl.h>
 
 int g_dapl_loopback_connection = 0;
+int g_scm_pipe[2];
 
 /* just get IP address for hostname */
 DAT_RETURN getipaddr( char *addr, int addr_len)
@@ -70,14 +71,14 @@ DAT_RETURN getipaddr( char *addr, int addr_len)
 	struct hostent		*h_ptr;
 	struct utsname		ourname;
 
-	if ( uname( &ourname ) < 0 ) 
+	if (uname( &ourname ) < 0) 
 		return DAT_INTERNAL_ERROR;
 
-	h_ptr = gethostbyname( ourname.nodename );
-	if ( h_ptr == NULL ) 
+	h_ptr = gethostbyname(ourname.nodename);
+	if (h_ptr == NULL) 
 		return DAT_INTERNAL_ERROR;
 
-	if ( h_ptr->h_addrtype == AF_INET ) {
+	if (h_ptr->h_addrtype == AF_INET) {
 		int i;
 		struct in_addr  **alist =
 			(struct in_addr **)h_ptr->h_addr_list;
@@ -87,18 +88,17 @@ DAT_RETURN getipaddr( char *addr, int addr_len)
 		
 		/* Walk the list of addresses for host */
 		for (i=0; alist[i] != NULL; i++) {
-		       
-                       /* first non-loopback address */

-		       if ( *(uint32_t*)alist[i] != htonl(0x7f000001) )
{
-                               dapl_os_memcpy( &ipv4_addr->sin_addr,
-                                               h_ptr->h_addr_list[i],
-                                               4 );
+		       /* first non-loopback address */			
+		       if (*(uint32_t*)alist[i] != htonl(0x7f000001)) {
+                               dapl_os_memcpy(&ipv4_addr->sin_addr,
+                                              h_ptr->h_addr_list[i],
+                                              4);
                                break;
                        }
                }
                /* if no acceptable address found */
                if (*(uint32_t*)&ipv4_addr->sin_addr == 0)
-                       return DAT_INVALID_ADDRESS;
+			return DAT_INVALID_ADDRESS;
 	} else 
 		return DAT_INVALID_ADDRESS;
 
@@ -122,6 +122,10 @@ DAT_RETURN getipaddr( char *addr, int addr_len)
  */
 int32_t dapls_ib_init (void)
 {	
+	/* create pipe for waking up thread */
+	if (pipe(g_scm_pipe))
+		return 1;
+
 	return 0;
 }
 
@@ -156,7 +160,7 @@ DAT_RETURN dapls_ib_open_hca (
 	int		i;
 	DAT_RETURN	dat_status = DAT_SUCCESS;
 
-	dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
+	dapl_dbg_log(DAPL_DBG_TYPE_UTIL, 
 		      " open_hca: %s - %p\n", hca_name, hca_ptr );
 
 	/* Get list of all IB devices, find match, open */
@@ -170,65 +174,83 @@ DAT_RETURN dapls_ib_open_hca (
 
 	for (i = 0; dev_list[i]; ++i) {
 		hca_ptr->ib_trans.ib_dev = dev_list[i];
-		if
(!strcmp(ibv_get_device_name(hca_ptr->ib_trans.ib_dev),hca_name))
+		if
(!strcmp(ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+			    hca_name))
 			goto found;
 	}
 
-	dapl_dbg_log (DAPL_DBG_TYPE_ERR,
-		      " open_hca: IB device %s not found\n",
-		      hca_name);
+	dapl_log(DAPL_DBG_TYPE_ERR,
+		 " open_hca: device %s not found\n",
+		 hca_name);
 	goto err;
 
 found:
-	dapl_dbg_log (DAPL_DBG_TYPE_UTIL," open_hca: Found dev %s
%016llx\n", 
-			ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
-			(unsigned long
long)bswap_64(ibv_get_device_guid(hca_ptr->ib_trans.ib_dev)));
+	dapl_dbg_log(DAPL_DBG_TYPE_UTIL," open_hca: Found dev %s
%016llx\n", 
+		     ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+		     (unsigned long long)
+
bswap_64(ibv_get_device_guid(hca_ptr->ib_trans.ib_dev)));
 
 	hca_ptr->ib_hca_handle =
ibv_open_device(hca_ptr->ib_trans.ib_dev);
 	if (!hca_ptr->ib_hca_handle) {
-		 dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
-				" open_hca: IB dev open failed for
%s\n", 
-
ibv_get_device_name(hca_ptr->ib_trans.ib_dev) );
+		 dapl_log(DAPL_DBG_TYPE_ERR, 
+			  " open_hca: dev open failed for %s, err=%s\n",

+			  ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+			  strerror(errno));
 		 goto err;
 	}
 
-	/* set inline max with enviroment or default */
+	/* set RC tunables via enviroment or default */
 	hca_ptr->ib_trans.max_inline_send = 
-		dapl_os_get_env_val ( "DAPL_MAX_INLINE",
INLINE_SEND_DEFAULT );
+		dapl_os_get_env_val("DAPL_MAX_INLINE",
INLINE_SEND_DEFAULT);
+	hca_ptr->ib_trans.ack_retry = 
+		dapl_os_get_env_val("DAPL_ACK_RETRY", SCM_ACK_RETRY);
+	hca_ptr->ib_trans.ack_timer =
+		dapl_os_get_env_val("DAPL_ACK_TIMER", SCM_ACK_TIMER);
+	hca_ptr->ib_trans.rnr_retry = 
+		dapl_os_get_env_val("DAPL_RNR_RETRY", SCM_RNR_RETRY);
+	hca_ptr->ib_trans.rnr_timer = 
+		dapl_os_get_env_val("DAPL_RNR_TIMER", SCM_RNR_TIMER);
 
 	/* initialize cq_lock */
 	dat_status = dapl_os_lock_init(&hca_ptr->ib_trans.cq_lock);
-	if (dat_status != DAT_SUCCESS)
-	{
-		dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
-			" open_hca: failed to init cq_lock\n");
+	if (dat_status != DAT_SUCCESS) {
+		dapl_log(DAPL_DBG_TYPE_ERR, 
+			 " open_hca: failed to init cq_lock\n");
 		goto bail;
 	}
 
 	/* EVD events without direct CQ channels, non-blocking */
 	hca_ptr->ib_trans.ib_cq = 
 		ibv_create_comp_channel(hca_ptr->ib_hca_handle);
+	if (hca_ptr->ib_trans.ib_cq == NULL) {
+		dapl_log(DAPL_DBG_TYPE_ERR, 
+			 " open_hca: ibv_create_comp_channel ERR %s\n",
+			 strerror(errno));
+		goto bail;
+	}
+
 	opts = fcntl(hca_ptr->ib_trans.ib_cq->fd, F_GETFL); /* uCQ */
 	if (opts < 0 || fcntl(hca_ptr->ib_trans.ib_cq->fd, 
 			      F_SETFL, opts | O_NONBLOCK) < 0) {
-		dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
-			      " open_hca: ERR with CQ FD\n" );
+		dapl_log(DAPL_DBG_TYPE_ERR, 
+			 " open_hca: fcntl on ib_cq->fd %d ERR %d %s\n",

+			 hca_ptr->ib_trans.ib_cq->fd, opts,
+			 strerror(errno));
 		goto bail;
 	}
 
 	if (dapli_cq_thread_init(hca_ptr)) {
-                dapl_dbg_log (DAPL_DBG_TYPE_ERR,
-                              " open_hca: cq_thread_init failed for
%s\n",
-
ibv_get_device_name(hca_ptr->ib_trans.ib_dev) );
+                dapl_log(DAPL_DBG_TYPE_ERR,
+                         " open_hca: cq_thread_init failed for %s\n",
+
ibv_get_device_name(hca_ptr->ib_trans.ib_dev));
                 goto bail;
         }
 
 	/* initialize cr_list lock */
 	dat_status = dapl_os_lock_init(&hca_ptr->ib_trans.lock);
-	if (dat_status != DAT_SUCCESS)
-	{
-		dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
-				" open_hca: failed to init lock\n");
+	if (dat_status != DAT_SUCCESS) {
+		dapl_log(DAPL_DBG_TYPE_ERR, 
+			 " open_hca: failed to init cr_list lock\n");
 		goto bail;
 	}
 
@@ -240,10 +262,9 @@ found:
 	dat_status = dapl_os_thread_create(cr_thread, 
 					   (void*)hca_ptr, 
 					   &hca_ptr->ib_trans.thread );
-	if (dat_status != DAT_SUCCESS)
-	{
-		dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
-				" open_hca: failed to create thread\n");
+	if (dat_status != DAT_SUCCESS) {
+		dapl_log(DAPL_DBG_TYPE_ERR, 
+			 " open_hca: failed to create thread\n");
 		goto bail;
 	}
 	
@@ -251,7 +272,7 @@ found:
 	while (hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
 		struct timespec	sleep, remain;
 		sleep.tv_sec = 0;
-		sleep.tv_nsec = 20000000; /* 20 ms */
+		sleep.tv_nsec = 2000000; /* 2 ms */
 		dapl_dbg_log(DAPL_DBG_TYPE_UTIL, 
 			     " open_hca: waiting for cr_thread\n");
 		nanosleep (&sleep, &remain);
@@ -259,16 +280,15 @@ found:
 
 	/* get the IP address of the device */
 	dat_status = getipaddr((char*)&hca_ptr->hca_address, 
-				sizeof(DAT_SOCK_ADDR6) );
-	dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
-		" open_hca: %s, port %d, %s  %d.%d.%d.%d\n", 
-		ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
hca_ptr->port_num,
-		((struct sockaddr_in
*)&hca_ptr->hca_address)->sin_family == AF_INET ?  "AF_INET":"AF_INET6",
-		((struct sockaddr_in
*)&hca_ptr->hca_address)->sin_addr.s_addr >> 0 & 0xff,
-		((struct sockaddr_in
*)&hca_ptr->hca_address)->sin_addr.s_addr >> 8 & 0xff,
-		((struct sockaddr_in
*)&hca_ptr->hca_address)->sin_addr.s_addr >> 16 & 0xff,
-		((struct sockaddr_in
*)&hca_ptr->hca_address)->sin_addr.s_addr >> 24 & 0xff );
-
+				sizeof(DAT_SOCK_ADDR6));
+	
+	dapl_dbg_log(DAPL_DBG_TYPE_UTIL, 
+		     " open_hca: devname %s, port %d, hostname_IP %s\n",

+		     ibv_get_device_name(hca_ptr->ib_trans.ib_dev), 
+		     hca_ptr->port_num,
+		     inet_ntoa(((struct sockaddr_in *)
+				&hca_ptr->hca_address)->sin_addr));
+		
 	ibv_free_device_list(dev_list);
 	return dat_status;
 
@@ -308,15 +328,15 @@ DAT_RETURN dapls_ib_close_hca (	IN   DAPL_HCA
*hca_ptr )
 
return(dapl_convert_errno(errno,"ib_close_device"));
 		hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
 	}
-
 	dapl_os_lock_destroy(&hca_ptr->ib_trans.cq_lock);
 
 	/* destroy cr_thread and lock */
 	hca_ptr->ib_trans.cr_state = IB_THREAD_CANCEL;
+	write(g_scm_pipe[1], "w", sizeof "w");
 	while (hca_ptr->ib_trans.cr_state != IB_THREAD_EXIT) {
 		struct timespec	sleep, remain;
 		sleep.tv_sec = 0;
-		sleep.tv_nsec = 20000000; /* 20 ms */
+		sleep.tv_nsec = 2000000; /* 2 ms */
 		dapl_dbg_log(DAPL_DBG_TYPE_UTIL, 
 			     " close_hca: waiting for cr_thread\n");
 		nanosleep (&sleep, &remain);
@@ -378,14 +398,11 @@ DAT_RETURN dapls_ib_query_hca (
 		ia_attr->vendor_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
 		ia_attr->ia_address_ptr =
(DAT_IA_ADDRESS_PTR)&hca_ptr->hca_address;
 
-		dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
-			" query_hca: %s %s  %d.%d.%d.%d\n", 
-			ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
-			((struct sockaddr_in
*)ia_attr->ia_address_ptr)->sin_family == AF_INET ?
"AF_INET":"AF_INET6",
-			((struct sockaddr_in
*)ia_attr->ia_address_ptr)->sin_addr.s_addr >> 0 & 0xff,
-			((struct sockaddr_in
*)ia_attr->ia_address_ptr)->sin_addr.s_addr >> 8 & 0xff,
-			((struct sockaddr_in
*)ia_attr->ia_address_ptr)->sin_addr.s_addr >> 16 & 0xff,
-			((struct sockaddr_in
*)ia_attr->ia_address_ptr)->sin_addr.s_addr >> 24 & 0xff );
+		dapl_dbg_log(DAPL_DBG_TYPE_UTIL, 
+			     " query_hca: %s %s \n", 
+
ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+			     inet_ntoa(((struct sockaddr_in *)
+
&hca_ptr->hca_address)->sin_addr));
 		
 		ia_attr->hardware_version_major   = dev_attr.hw_ver;
 		/* ia_attr->hardware_version_minor   = dev_attr.fw_ver;
*/
@@ -408,10 +425,21 @@ DAT_RETURN dapls_ib_query_hca (
 		ia_attr->max_pzs                  = dev_attr.max_pd;
 		ia_attr->max_mtu_size             =
port_attr.max_msg_sz;
 		ia_attr->max_rdma_size            =
port_attr.max_msg_sz;
+		ia_attr->max_iov_segments_per_rdma_read =
dev_attr.max_sge;
+		ia_attr->max_iov_segments_per_rdma_write =
dev_attr.max_sge;
 		ia_attr->num_transport_attr       = 0;
 		ia_attr->transport_attr           = NULL;
 		ia_attr->num_vendor_attr          = 0;
 		ia_attr->vendor_attr              = NULL;
+                ia_attr->max_iov_segments_per_rdma_read =
dev_attr.max_sge;
+                ia_attr->max_iov_segments_per_rdma_write =
dev_attr.max_sge;
+#ifdef DAT_EXTENSIONS
+                ia_attr->extension_supported = DAT_EXTENSION_IB;
+                ia_attr->extension_version = DAT_IB_EXTENSION_VERSION;
+#endif
+		hca_ptr->ib_trans.ack_timer = 
+			DAPL_MAX(dev_attr.local_ca_ack_delay,
+				 hca_ptr->ib_trans.ack_timer);
 
 		dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
 			" query_hca: (%x.%x) ep %d ep_q %d evd %d evd_q
%d\n", 
@@ -420,11 +448,10 @@ DAT_RETURN dapls_ib_query_hca (
 			ia_attr->max_eps, ia_attr->max_dto_per_ep,
 			ia_attr->max_evds, ia_attr->max_evd_qlen );
 		dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
-			" query_hca: msg %llu rdma %llu iov %d lmr %d
rmr %d\n", 
+			" query_hca: msg %llu rdma %llu iov %d lmr %d
rmr %d ack_time %d\n", 
 			ia_attr->max_mtu_size, ia_attr->max_rdma_size,
 			ia_attr->max_iov_segments_per_dto,
ia_attr->max_lmrs, 
-			ia_attr->max_rmrs );
-
+			ia_attr->max_rmrs,hca_ptr->ib_trans.ack_timer );
 	}
 	
 	if (ep_attr != NULL) {
@@ -443,7 +470,6 @@ DAT_RETURN dapls_ib_query_hca (
 			ep_attr->max_recv_dtos, ep_attr->max_recv_iov,
 			ep_attr->max_rdma_read_in,
ep_attr->max_rdma_read_out);
 	}
-
 	return DAT_SUCCESS;
 }
 
@@ -508,3 +534,41 @@ DAT_RETURN dapls_ib_setup_async_callback (
     return DAT_SUCCESS;
 }
 
+/*
+ * dapls_set_provider_specific_attr
+ *
+ * Input:
+ *      attr_ptr        Pointer provider specific attributes
+ *
+ * Output:
+ *      none
+ *
+ * Returns:
+ *      void
+ */
+DAT_NAMED_ATTR  ib_attrs[] = {
+#ifdef DAT_EXTENSIONS
+    {
+        "DAT_EXTENSION_INTERFACE", "TRUE"
+    },
+    {
+        DAT_IB_ATTR_FETCH_AND_ADD, "TRUE"
+    },
+    {
+        DAT_IB_ATTR_CMP_AND_SWAP, "TRUE"
+    },
+    {
+        DAT_IB_ATTR_IMMED_DATA, "TRUE"
+    },
+#endif
+};
+
+#define SPEC_ATTR_SIZE( x )     (sizeof( x ) / sizeof( DAT_NAMED_ATTR))
+
+void dapls_query_provider_specific_attr(
+        IN DAT_PROVIDER_ATTR    *attr_ptr )
+{
+    attr_ptr->num_provider_specific_attr = SPEC_ATTR_SIZE(ib_attrs);
+    attr_ptr->provider_specific_attr     = ib_attrs;
+}
+
diff --git a/dapl/openib_scm/dapl_ib_util.h
b/dapl/openib_scm/dapl_ib_util.h
index 0d928df..fd9cd2f 100644
--- a/dapl/openib_scm/dapl_ib_util.h
+++ b/dapl/openib_scm/dapl_ib_util.h
@@ -71,15 +71,18 @@ typedef ib_hca_handle_t		dapl_ibal_ca_t;
 
 /* CM mappings, user CM not complete use SOCKETS */
 
-/* destination info to exchange until real IB CM shows up */
+/* destination info to exchange, define wire protocol version */
+#define DSCM_VER 2
 typedef struct _ib_qp_cm
 { 
-	uint32_t		qpn;
+	uint16_t		ver;
+	uint16_t		rej;
 	uint16_t		lid;
 	uint16_t		port;
-	int			p_size;
+	uint32_t		qpn;
+	uint32_t		p_size;
 	DAT_SOCK_ADDR6		ia_address;
-
+        union ibv_gid		gid;
 } ib_qp_cm_t;
 
 /* 
@@ -94,20 +97,34 @@ struct ib_llist_entry
     struct dapl_llist_entry	*list_head;
 };
 
+typedef enum scm_state 
+{
+	SCM_INIT,
+	SCM_LISTEN,
+	SCM_CONN_PENDING,
+	SCM_ACCEPTING,
+	SCM_ACCEPTED,
+	SCM_REJECTED,
+	SCM_CONNECTED,
+	SCM_DISCONNECTED,
+	SCM_DESTROY
+} SCM_STATE;
+
 struct ib_cm_handle
 { 
 	struct ib_llist_entry	entry;
+	DAPL_OS_LOCK		lock;
+	SCM_STATE		state;
 	int			socket;
-	int			l_socket; 
-	struct dapl_hca		*hca_ptr;
-	DAT_HANDLE		cr;
+	struct dapl_hca		*hca;
 	DAT_HANDLE		sp;	
+	struct dapl_ep 		*ep;	
 	ib_qp_cm_t		dst;
 	unsigned char		p_data[256];
 };
 
-typedef struct ib_cm_handle	*ib_cm_handle_t;
-typedef ib_cm_handle_t		ib_cm_srvc_handle_t;
+typedef struct ib_cm_handle	*dp_ib_cm_handle_t;
+typedef dp_ib_cm_handle_t	ib_cm_srvc_handle_t;
 
 DAT_RETURN getipaddr(char *addr, int addr_len);
 
@@ -163,6 +180,12 @@ typedef struct ibv_comp_channel
*ib_wait_obj_handle_t;
 /* inline send rdma threshold */
 #define	INLINE_SEND_DEFAULT	128
 
+/* RC timer - retry count defaults */
+#define SCM_ACK_TIMER 15	/* 5 bits, 4.096us*2^ack_timer. 15 ==
134ms */
+#define SCM_ACK_RETRY 7		/* 3 bits, 7 * 134ms = 940ms */
+#define SCM_RNR_TIMER 28	/* 5 bits, 28 == 163ms, 31 == 491ms */
+#define SCM_RNR_RETRY 7		/* 3 bits, 7 == infinite */
+
 /* CM private data areas */
 #define	IB_MAX_REQ_PDATA_SIZE	92
 #define	IB_MAX_REP_PDATA_SIZE	196
@@ -268,7 +291,10 @@ typedef struct _ib_hca_transport
 	ib_async_cq_handler_t	async_cq_error;
 	ib_async_dto_handler_t	async_cq;
 	ib_async_qp_handler_t	async_qp_error;
-
+	uint8_t			ack_timer;
+	uint8_t			ack_retry;
+	uint8_t			rnr_timer;
+	uint8_t			rnr_retry;
 } ib_hca_transport_t;
 
 /* provider specfic fields for shared memory support */
diff --git a/dapl/udapl/libdaploscm.map b/dapl/udapl/libdaploscm.map
new file mode 100755
index 0000000..45edb4d
--- /dev/null
+++ b/dapl/udapl/libdaploscm.map
@@ -0,0 +1,7 @@
+DAPL_SCM_2.0 {
+        global:
+		dat_provider_fini;
+		dat_provider_init;
+		dapl_extensions;
+	local: *;
+};
-- 
1.5.2.5




More information about the general mailing list