[openib-general] [PATCH #2] new uDAPL openIB provider using socket CM, corrected license headers

Arlin Davis arlin.r.davis at intel.com
Wed Oct 26 12:46:25 PDT 2005


James,

This version includes updated license headers per your request.

-arlin

Signed-off by: Arlin Davis <ardavis at ichips.intel.com>

Index: dapl/udapl/Makefile
===================================================================
--- dapl/udapl/Makefile	(revision 3848)
+++ dapl/udapl/Makefile	(working copy)
@@ -139,6 +139,16 @@ CFLAGS   += -I/usr/local/include/infinib
 endif
 
 #
+# OpenIB provider with Socket CM
+#
+ifeq ($(VERBS),openib_scm)
+PROVIDER = $(TOPDIR)/../openib_scm
+CFLAGS   += -DOPENIB
+CFLAGS   += -DCQ_WAIT_OBJECT
+CFLAGS   += -I/usr/local/include/infiniband
+endif
+
+#
 # If an implementation supports CM and DTO completions on the same EVD
 # then DAPL_MERGE_CM_DTO should be set
 # CFLAGS	+= -DDAPL_MERGE_CM_DTO=1
@@ -251,6 +261,13 @@ PROVIDER_SRCS  = dapl_ib_util.c dapl_ib_
 PROVIDER_SRCS += dapl_ib_cm.c dapl_ib_mem.c
 endif
 
+ifeq ($(VERBS),openib_scm)
+LDFLAGS += -libverbs
+LDFLAGS += -rpath /usr/local/lib -L /usr/local/lib
+PROVIDER_SRCS  = dapl_ib_util.c dapl_ib_cq.c dapl_ib_qp.c \
+                 dapl_ib_cm.c dapl_ib_mem.c
+endif
+
 UDAPL_SRCS = 	dapl_init.c		\
 	dapl_evd_create.c		\
 	dapl_evd_query.c		\
Index: dapl/openib_scm/dapl_ib_dto.h
===================================================================
--- dapl/openib_scm/dapl_ib_dto.h	(revision 0)
+++ dapl/openib_scm/dapl_ib_dto.h	(revision 0)
@@ -0,0 +1,262 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/***************************************************************************
+ *
+ *   Module:		 uDAPL
+ *
+ *   Filename:		 dapl_ib_dto.h
+ *
+ *   Author:		 Arlin Davis
+ *
+ *   Created:		 3/10/2005
+ *
+ *   Description: 
+ *
+ *   The uDAPL openib provider - DTO operations and CQE macros 
+ *
+ ****************************************************************************
+ *		   Source Control System Information
+ *
+ *    $Id: $
+ *
+ *	Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ **************************************************************************/
+#ifndef _DAPL_IB_DTO_H_
+#define _DAPL_IB_DTO_H_
+
+#include "dapl_ib_util.h"
+
+#define	DEFAULT_DS_ENTRIES	8
+
+STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p);
+
+/*
+ * dapls_ib_post_recv
+ *
+ * Provider specific Post RECV function
+ */
+STATIC _INLINE_ DAT_RETURN 
+dapls_ib_post_recv (
+	IN  DAPL_EP	 	*ep_ptr,
+	IN  DAPL_COOKIE		*cookie,
+	IN  DAT_COUNT	   	segments,
+	IN  DAT_LMR_TRIPLET	*local_iov )
+{
+	ib_data_segment_t	ds_array[DEFAULT_DS_ENTRIES];
+	ib_data_segment_t	*ds_array_p;
+	struct ibv_recv_wr	wr;
+	struct ibv_recv_wr	*bad_wr;
+	DAT_COUNT		i, total_len;
+	
+	dapl_dbg_log (DAPL_DBG_TYPE_EP,
+		      " post_rcv: ep %p cookie %p segs %d l_iov %p\n",
+		      ep_ptr, cookie, segments, local_iov);
+
+	if ( segments <= DEFAULT_DS_ENTRIES ) 
+		ds_array_p = ds_array;
+	else
+		ds_array_p = dapl_os_alloc(segments * sizeof(ib_data_segment_t));
+
+	if (NULL == ds_array_p)
+		return (DAT_INSUFFICIENT_RESOURCES);
+	
+	/* setup work request */
+	total_len = 0;
+	wr.next = 0;
+	wr.num_sge = 0;
+	wr.wr_id = (uint64_t)(uintptr_t)cookie;
+	wr.sg_list = ds_array_p;
+
+	for (i = 0; i < segments; i++ ) {
+		if ( !local_iov[i].segment_length )
+			continue;
+
+		ds_array_p->addr  = (uint64_t) local_iov[i].virtual_address;
+		ds_array_p->length = local_iov[i].segment_length;
+		ds_array_p->lkey  = local_iov[i].lmr_context;
+		
+		dapl_dbg_log (	DAPL_DBG_TYPE_EP, 
+				" post_rcv: l_key 0x%x va %p len %d\n",
+				ds_array_p->lkey, ds_array_p->addr, 
+				ds_array_p->length );
+
+		total_len += ds_array_p->length;
+		wr.num_sge++;
+		ds_array_p++;
+	}
+
+	if (cookie != NULL) 
+		cookie->val.dto.size = total_len;
+
+	if (ibv_post_recv(ep_ptr->qp_handle, &wr, &bad_wr))
+		return( dapl_convert_errno(EFAULT,"ibv_recv") );
+	
+	return DAT_SUCCESS;
+}
+
+
+/*
+ * dapls_ib_post_send
+ *
+ * Provider specific Post SEND function
+ */
+STATIC _INLINE_ DAT_RETURN 
+dapls_ib_post_send (
+    IN  DAPL_EP		   	*ep_ptr,
+    IN  ib_send_op_type_t       op_type,
+    IN  DAPL_COOKIE		*cookie,
+    IN  DAT_COUNT	   	segments,
+    IN  DAT_LMR_TRIPLET	   	*local_iov,
+    IN  const DAT_RMR_TRIPLET	*remote_iov,
+    IN  DAT_COMPLETION_FLAGS	completion_flags)
+{
+	dapl_dbg_log (DAPL_DBG_TYPE_EP,
+		      " post_snd: ep %p op %d ck %p sgs %d l_iov %p r_iov %p f %d\n",
+		      ep_ptr, op_type, cookie, segments, local_iov, 
+		      remote_iov, completion_flags);
+
+	ib_data_segment_t	ds_array[DEFAULT_DS_ENTRIES];
+	ib_data_segment_t	*ds_array_p;
+	struct ibv_send_wr	wr;
+	struct ibv_send_wr	*bad_wr;
+	ib_hca_transport_t	*ibt_ptr = &ep_ptr->header.owner_ia->hca_ptr->ib_trans;
+	DAT_COUNT		i, total_len;
+	
+	dapl_dbg_log (DAPL_DBG_TYPE_EP,
+		      " post_snd: ep %p cookie %p segs %d l_iov %p\n",
+		      ep_ptr, cookie, segments, local_iov);
+
+	if( segments <= DEFAULT_DS_ENTRIES ) 
+		ds_array_p = ds_array;
+	else
+		ds_array_p = dapl_os_alloc(segments * sizeof(ib_data_segment_t));
+
+	if (NULL == ds_array_p)
+		return (DAT_INSUFFICIENT_RESOURCES);
+	
+	/* setup the work request */
+	wr.next = 0;
+	wr.opcode = op_type;
+	wr.num_sge = 0;
+	wr.send_flags = 0;
+	wr.wr_id = (uint64_t)(uintptr_t)cookie;
+	wr.sg_list = ds_array_p;
+	total_len = 0;
+
+	for (i = 0; i < segments; i++ ) {
+		if ( !local_iov[i].segment_length )
+			continue;
+
+		ds_array_p->addr  = (uint64_t) local_iov[i].virtual_address;
+		ds_array_p->length = local_iov[i].segment_length;
+		ds_array_p->lkey  = local_iov[i].lmr_context;
+		
+		dapl_dbg_log (	DAPL_DBG_TYPE_EP, 
+				" post_snd: lkey 0x%x va %p len %d \n",
+				ds_array_p->lkey, ds_array_p->addr, 
+				ds_array_p->length );
+
+		total_len += ds_array_p->length;
+		wr.num_sge++;
+		ds_array_p++;
+	}
+
+	if (cookie != NULL) 
+		cookie->val.dto.size = total_len;
+	
+	if ((op_type == OP_RDMA_WRITE) || (op_type == OP_RDMA_READ)) {
+		wr.wr.rdma.remote_addr = remote_iov->target_address;
+		wr.wr.rdma.rkey = remote_iov->rmr_context;
+		dapl_dbg_log (	DAPL_DBG_TYPE_EP, 
+				" post_snd_rdma: rkey 0x%x va %#016Lx\n",
+				wr.wr.rdma.rkey, wr.wr.rdma.remote_addr );
+	}
+
+	/* inline data for send or write ops */
+	if ((total_len <= ibt_ptr->max_inline_send ) && 
+	   ((op_type == OP_SEND) || (op_type == OP_RDMA_WRITE))) 
+		wr.send_flags |= IBV_SEND_INLINE;
+
+	/* set completion flags in work request */
+	wr.send_flags |= (DAT_COMPLETION_SUPPRESS_FLAG & 
+				completion_flags) ? 0 : IBV_SEND_SIGNALED;
+	wr.send_flags |= (DAT_COMPLETION_BARRIER_FENCE_FLAG & 
+				completion_flags) ? IBV_SEND_FENCE : 0;
+	wr.send_flags |= (DAT_COMPLETION_SOLICITED_WAIT_FLAG & 
+				completion_flags) ? IBV_SEND_SOLICITED : 0;
+
+	dapl_dbg_log (DAPL_DBG_TYPE_EP, 
+		      " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", 
+			wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
+
+	if (ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr))
+		return( dapl_convert_errno(EFAULT,"ibv_recv") );
+	
+	dapl_dbg_log (DAPL_DBG_TYPE_EP," post_snd: returned\n");
+	return DAT_SUCCESS;
+}
+
+STATIC _INLINE_ DAT_RETURN 
+dapls_ib_optional_prv_dat (
+	IN  DAPL_CR		*cr_ptr,
+	IN  const void		*event_data,
+	OUT   DAPL_CR		**cr_pp)
+{
+    return DAT_SUCCESS;
+}
+
+STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p)
+{
+    switch (cqe_p->opcode) {
+	case IBV_WC_SEND:
+	    return (OP_SEND);
+	case IBV_WC_RDMA_WRITE:
+	    return (OP_RDMA_WRITE);
+	case IBV_WC_RDMA_READ:
+	    return (OP_RDMA_READ);
+	case IBV_WC_COMP_SWAP:
+	    return (OP_COMP_AND_SWAP);
+	case IBV_WC_FETCH_ADD:
+	    return (OP_FETCH_AND_ADD);
+	case IBV_WC_BIND_MW:
+	    return (OP_BIND_MW);
+	case IBV_WC_RECV:
+	    return (OP_RECEIVE);
+	case IBV_WC_RECV_RDMA_WITH_IMM:
+	    return (OP_RECEIVE_IMM);
+	default:
+	    return (OP_INVALID);
+    }
+}
+
+#define DAPL_GET_CQE_OPTYPE(cqe_p)	dapls_cqe_opcode(cqe_p)
+#define DAPL_GET_CQE_WRID(cqe_p)	((ib_work_completion_t*)cqe_p)->wr_id
+#define DAPL_GET_CQE_STATUS(cqe_p)	((ib_work_completion_t*)cqe_p)->status
+#define DAPL_GET_CQE_BYTESNUM(cqe_p)	((ib_work_completion_t*)cqe_p)->byte_len
+#define DAPL_GET_CQE_IMMED_DATA(cqe_p)	((ib_work_completion_t*)cqe_p)->imm_data
+
+#endif	/*  _DAPL_IB_DTO_H_ */
Index: dapl/openib_scm/dapl_ib_util.c
===================================================================
--- dapl/openib_scm/dapl_ib_util.c	(revision 0)
+++ dapl/openib_scm/dapl_ib_util.c	(revision 0)
@@ -0,0 +1,472 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/***************************************************************************
+ *
+ *   Module:		 uDAPL
+ *
+ *   Filename:		 dapl_ib_util.c
+ *
+ *   Author:		 Arlin Davis
+ *
+ *   Created:		 3/10/2005
+ *
+ *   Description: 
+ *
+ *   The uDAPL openib provider - init, open, close, utilities
+ *
+ ****************************************************************************
+ *		   Source Control System Information
+ *
+ *    $Id: $
+ *
+ *	Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ **************************************************************************/
+#ifdef RCSID
+static const char rcsid[] = "$Id:  $";
+#endif
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_ib_util.h"
+
+#include <stdlib.h>
+#include <netinet/tcp.h>
+#include <sys/utsname.h>
+#include <unistd.h>	
+#include <fcntl.h>
+
+int g_dapl_loopback_connection = 0;
+
+/* just get IP address for hostname */
+DAT_RETURN getipaddr( char *addr, int addr_len)
+{
+	struct sockaddr_in	*ipv4_addr = (struct sockaddr_in*)addr;
+	struct hostent		*h_ptr;
+	struct utsname		ourname;
+
+	if ( uname( &ourname ) < 0 ) 
+		return DAT_INTERNAL_ERROR;
+
+	h_ptr = gethostbyname( ourname.nodename );
+	if ( h_ptr == NULL ) 
+		return DAT_INTERNAL_ERROR;
+
+	if ( h_ptr->h_addrtype == AF_INET ) {
+		ipv4_addr = (struct sockaddr_in*) addr;
+		ipv4_addr->sin_family = AF_INET;
+		dapl_os_memcpy( &ipv4_addr->sin_addr, h_ptr->h_addr_list[0], 4 );
+	} else 
+		return DAT_INVALID_ADDRESS;
+
+	return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_init, dapls_ib_release
+ *
+ * Initialize Verb related items for device open
+ *
+ * Input:
+ * 	none
+ *
+ * Output:
+ *	none
+ *
+ * Returns:
+ * 	0 success, -1 error
+ *
+ */
+int32_t dapls_ib_init (void)
+{	
+	return 0;
+}
+
+int32_t dapls_ib_release (void)
+{
+	return 0;
+}
+
+/*
+ * dapls_ib_open_hca
+ *
+ * Open HCA
+ *
+ * Input:
+ *      *hca_name         pointer to provider device name
+ *      *ib_hca_handle_p  pointer to provide HCA handle
+ *
+ * Output:
+ *      none
+ *
+ * Return:
+ *      DAT_SUCCESS
+ *      dapl_convert_errno
+ *
+ */
+DAT_RETURN dapls_ib_open_hca (
+        IN   IB_HCA_NAME	hca_name,
+        IN   DAPL_HCA		*hca_ptr)
+{
+	struct dlist	*dev_list;
+	int		opts;
+	DAT_RETURN	dat_status = DAT_SUCCESS;
+
+	dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
+		      " open_hca: %s - %p\n", hca_name, hca_ptr );
+
+	/* Get list of all IB devices, find match, open */
+	dev_list = ibv_get_devices();
+	dlist_start(dev_list);
+	dlist_for_each_data(dev_list,hca_ptr->ib_trans.ib_dev,struct ibv_device) {
+		if (!strcmp(ibv_get_device_name(hca_ptr->ib_trans.ib_dev),hca_name))
+			break;
+	}
+
+	if (!hca_ptr->ib_trans.ib_dev) {
+		dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+			      " open_hca: IB device %s not found\n",
+			      hca_name);
+		return DAT_INTERNAL_ERROR;
+	}
+	
+	dapl_dbg_log (DAPL_DBG_TYPE_UTIL," open_hca: Found dev %s %016llx\n", 
+			ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+			(unsigned long
long)bswap_64(ibv_get_device_guid(hca_ptr->ib_trans.ib_dev)));
+
+	hca_ptr->ib_hca_handle = ibv_open_device(hca_ptr->ib_trans.ib_dev);
+	if (!hca_ptr->ib_hca_handle) {
+		 dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
+				" open_hca: IB dev open failed for %s\n", 
+				ibv_get_device_name(hca_ptr->ib_trans.ib_dev) );
+		return DAT_INTERNAL_ERROR;
+	}
+
+	/* set inline max with enviroment or default */
+	hca_ptr->ib_trans.max_inline_send = 
+		dapl_os_get_env_val ( "DAPL_MAX_INLINE", INLINE_SEND_DEFAULT );
+
+	/* initialize cq_lock */
+	dat_status = dapl_os_lock_init(&hca_ptr->ib_trans.cq_lock);
+	if (dat_status != DAT_SUCCESS)
+	{
+		dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
+			" open_hca: failed to init cq_lock\n");
+		goto bail;
+	}
+
+	/* EVD events without direct CQ channels, non-blocking */
+	hca_ptr->ib_trans.ib_cq = 
+		ibv_create_comp_channel(hca_ptr->ib_hca_handle);
+	opts = fcntl(hca_ptr->ib_trans.ib_cq->fd, F_GETFL); /* uCQ */
+	if (opts < 0 || fcntl(hca_ptr->ib_trans.ib_cq->fd, 
+			      F_SETFL, opts | O_NONBLOCK) < 0) {
+		dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
+			      " open_hca: ERR with CQ FD\n" );
+		goto bail;
+	}
+
+	if (dapli_cq_thread_init(hca_ptr)) {
+                dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+                              " open_hca: cq_thread_init failed for %s\n",
+                              ibv_get_device_name(hca_ptr->ib_trans.ib_dev) );
+                goto bail;
+        }
+
+	/* initialize cr_list lock */
+	dat_status = dapl_os_lock_init(&hca_ptr->ib_trans.lock);
+	if (dat_status != DAT_SUCCESS)
+	{
+		dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
+				" open_hca: failed to init lock\n");
+		goto bail;
+	}
+
+	/* initialize CM list for listens on this HCA */
+	dapl_llist_init_head(&hca_ptr->ib_trans.list);
+
+	/* create thread to process inbound connect request */
+	hca_ptr->ib_trans.cr_state = IB_THREAD_INIT;
+	dat_status = dapl_os_thread_create(cr_thread, 
+					   (void*)hca_ptr, 
+					   &hca_ptr->ib_trans.thread );
+	if (dat_status != DAT_SUCCESS)
+	{
+		dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
+				" open_hca: failed to create thread\n");
+		goto bail;
+	}
+	
+	/* wait for thread */
+	while (hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
+		struct timespec	sleep, remain;
+		sleep.tv_sec = 0;
+		sleep.tv_nsec = 20000000; /* 20 ms */
+		dapl_dbg_log(DAPL_DBG_TYPE_UTIL, 
+			     " open_hca: waiting for cr_thread\n");
+		nanosleep (&sleep, &remain);
+	}
+
+	/* get the IP address of the device */
+	dat_status = getipaddr((char*)&hca_ptr->hca_address, 
+				sizeof(DAT_SOCK_ADDR6) );
+	dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
+		" open_hca: %s, port %d, %s  %d.%d.%d.%d\n", 
+		ibv_get_device_name(hca_ptr->ib_trans.ib_dev), hca_ptr->port_num,
+		((struct sockaddr_in *)&hca_ptr->hca_address)->sin_family == AF_INET ?
"AF_INET":"AF_INET6",
+		((struct sockaddr_in *)&hca_ptr->hca_address)->sin_addr.s_addr >> 0 & 0xff,
+		((struct sockaddr_in *)&hca_ptr->hca_address)->sin_addr.s_addr >> 8 & 0xff,
+		((struct sockaddr_in *)&hca_ptr->hca_address)->sin_addr.s_addr >> 16 & 0xff,
+		((struct sockaddr_in *)&hca_ptr->hca_address)->sin_addr.s_addr >> 24 & 0xff );
+
+	return dat_status;
+bail:
+	ibv_close_device(hca_ptr->ib_hca_handle); 
+	hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
+	return DAT_INTERNAL_ERROR;
+}
+
+
+/*
+ * dapls_ib_close_hca
+ *
+ * Open HCA
+ *
+ * Input:
+ *      DAPL_HCA   provide CA handle
+ *
+ * Output:
+ *      none
+ *
+ * Return:
+ *      DAT_SUCCESS
+ *	dapl_convert_errno 
+ *
+ */
+DAT_RETURN dapls_ib_close_hca (	IN   DAPL_HCA	*hca_ptr )
+{
+	dapl_dbg_log (DAPL_DBG_TYPE_UTIL," close_hca: %p\n",hca_ptr);
+
+	dapli_cq_thread_destroy(hca_ptr);
+
+	if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
+		if (ibv_close_device(hca_ptr->ib_hca_handle)) 
+			return(dapl_convert_errno(errno,"ib_close_device"));
+		hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
+	}
+
+	dapl_os_lock_destroy(&hca_ptr->ib_trans.cq_lock);
+
+	/* destroy cr_thread and lock */
+	hca_ptr->ib_trans.cr_state = IB_THREAD_CANCEL;
+	while (hca_ptr->ib_trans.cr_state != IB_THREAD_EXIT) {
+		struct timespec	sleep, remain;
+		sleep.tv_sec = 0;
+		sleep.tv_nsec = 20000000; /* 20 ms */
+		dapl_dbg_log(DAPL_DBG_TYPE_UTIL, 
+			     " close_hca: waiting for cr_thread\n");
+		nanosleep (&sleep, &remain);
+	}
+	dapl_os_lock_destroy(&hca_ptr->ib_trans.lock);
+
+	return (DAT_SUCCESS);
+}
+  
+/*
+ * dapls_ib_query_hca
+ *
+ * Query the hca attribute
+ *
+ * Input:
+ *	hca_handl		hca handle	
+ *	ia_attr			attribute of the ia
+ *	ep_attr			attribute of the ep
+ *	ip_addr			ip address of DET NIC
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	DAT_INVALID_HANDLE
+ */
+
+DAT_RETURN dapls_ib_query_hca (
+	IN  DAPL_HCA                       *hca_ptr,
+	OUT DAT_IA_ATTR                    *ia_attr,
+	OUT DAT_EP_ATTR                    *ep_attr,
+	OUT DAT_SOCK_ADDR6                 *ip_addr)
+{
+	struct ibv_device_attr	dev_attr;
+	struct ibv_port_attr	port_attr;
+
+	if (hca_ptr->ib_hca_handle == NULL) {
+		dapl_dbg_log (DAPL_DBG_TYPE_ERR," query_hca: BAD handle\n");
+		return (DAT_INVALID_HANDLE);
+	}
+
+	/* local IP address of device, set during ia_open */
+	if (ip_addr != NULL)
+		memcpy(ip_addr, &hca_ptr->hca_address, sizeof(DAT_SOCK_ADDR6));
+	
+	if (ia_attr == NULL && ep_attr == NULL) 
+		return DAT_SUCCESS;
+
+	/* query verbs for this device and port attributes */	
+	if (ibv_query_device(hca_ptr->ib_hca_handle, &dev_attr) ||
+			     ibv_query_port(hca_ptr->ib_hca_handle, 
+					    hca_ptr->port_num, &port_attr))
+		return(dapl_convert_errno(errno,"ib_query_hca"));
+
+	if (ia_attr != NULL) {
+		ia_attr->adapter_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
+		ia_attr->vendor_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
+		ia_attr->ia_address_ptr = (DAT_IA_ADDRESS_PTR)&hca_ptr->hca_address;
+
+		dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
+			" query_hca: %s %s  %d.%d.%d.%d\n", 
+			ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+			((struct sockaddr_in *)ia_attr->ia_address_ptr)->sin_family == AF_INET ?
"AF_INET":"AF_INET6",
+			((struct sockaddr_in *)ia_attr->ia_address_ptr)->sin_addr.s_addr >> 0 &
0xff,
+			((struct sockaddr_in *)ia_attr->ia_address_ptr)->sin_addr.s_addr >> 8 &
0xff,
+			((struct sockaddr_in *)ia_attr->ia_address_ptr)->sin_addr.s_addr >> 16 &
0xff,
+			((struct sockaddr_in *)ia_attr->ia_address_ptr)->sin_addr.s_addr >> 24 &
0xff );
+		
+		ia_attr->hardware_version_major   = dev_attr.hw_ver;
+		/* ia_attr->hardware_version_minor   = dev_attr.fw_ver; */
+		ia_attr->max_eps                  = dev_attr.max_qp;
+		ia_attr->max_dto_per_ep           = dev_attr.max_qp_wr;
+		ia_attr->max_rdma_read_per_ep     = dev_attr.max_qp_rd_atom;
+		ia_attr->max_evds                 = dev_attr.max_cq;
+		ia_attr->max_evd_qlen             = dev_attr.max_cqe;
+		ia_attr->max_iov_segments_per_dto = dev_attr.max_sge;
+		ia_attr->max_lmrs                 = dev_attr.max_mr;
+		ia_attr->max_lmr_block_size       = dev_attr.max_mr_size;
+		ia_attr->max_rmrs                 = dev_attr.max_mw;
+		ia_attr->max_lmr_virtual_address  = dev_attr.max_mr_size;
+		ia_attr->max_rmr_target_address   = dev_attr.max_mr_size;
+		ia_attr->max_pzs                  = dev_attr.max_pd;
+		ia_attr->max_mtu_size             = port_attr.max_msg_sz;
+		ia_attr->max_rdma_size            = port_attr.max_msg_sz;
+		ia_attr->num_transport_attr       = 0;
+		ia_attr->transport_attr           = NULL;
+		ia_attr->num_vendor_attr          = 0;
+		ia_attr->vendor_attr              = NULL;
+
+		dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
+			" query_hca: (%x.%x) ep %d ep_q %d evd %d evd_q %d\n", 
+			ia_attr->hardware_version_major,
+			ia_attr->hardware_version_minor,
+			ia_attr->max_eps, ia_attr->max_dto_per_ep,
+			ia_attr->max_evds, ia_attr->max_evd_qlen );
+		dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
+			" query_hca: msg %llu rdma %llu iov %d lmr %d rmr %d\n", 
+			ia_attr->max_mtu_size, ia_attr->max_rdma_size,
+			ia_attr->max_iov_segments_per_dto, ia_attr->max_lmrs, 
+			ia_attr->max_rmrs );
+
+	}
+	
+	if (ep_attr != NULL) {
+		ep_attr->max_mtu_size     = port_attr.max_msg_sz;
+		ep_attr->max_rdma_size    = port_attr.max_msg_sz;
+		ep_attr->max_recv_dtos    = dev_attr.max_qp_wr;
+		ep_attr->max_request_dtos = dev_attr.max_qp_wr;
+		ep_attr->max_recv_iov     = dev_attr.max_sge;
+		ep_attr->max_request_iov  = dev_attr.max_sge;
+		ep_attr->max_rdma_read_in = dev_attr.max_qp_rd_atom;
+		ep_attr->max_rdma_read_out= dev_attr.max_qp_rd_atom;
+		dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
+			" query_hca: MAX msg %llu dto %d iov %d rdma i%d,o%d\n", 
+			ep_attr->max_mtu_size,
+			ep_attr->max_recv_dtos, ep_attr->max_recv_iov,
+			ep_attr->max_rdma_read_in, ep_attr->max_rdma_read_out);
+	}
+
+	return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_setup_async_callback
+ *
+ * Set up an asynchronous callbacks of various kinds
+ *
+ * Input:
+ *	ia_handle		IA handle
+ *	handler_type		type of handler to set up
+ *	callback_handle 	handle param for completion callbacks
+ *	callback		callback routine pointer
+ *	context 		argument for callback routine
+ *
+ * Output:
+ *	none
+ *
+ * Returns:
+ *	DAT_SUCCESS
+ *	DAT_INSUFFICIENT_RESOURCES
+ *	DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN dapls_ib_setup_async_callback (
+	IN  DAPL_IA			*ia_ptr,
+	IN  DAPL_ASYNC_HANDLER_TYPE	handler_type,
+	IN  DAPL_EVD			*evd_ptr,
+	IN  ib_async_handler_t		callback,
+	IN  void			*context )
+
+{
+    ib_hca_transport_t	*hca_ptr;
+
+    dapl_dbg_log (DAPL_DBG_TYPE_UTIL,
+		  " setup_async_cb: ia %p type %d handle %p cb %p ctx %p\n",
+		  ia_ptr, handler_type, evd_ptr, callback, context);
+
+    hca_ptr = &ia_ptr->hca_ptr->ib_trans;
+    switch(handler_type)
+    {
+	case DAPL_ASYNC_UNAFILIATED:
+		hca_ptr->async_unafiliated = 
+			(ib_async_handler_t)callback;
+		hca_ptr->async_un_ctx = context;
+		break;
+	case DAPL_ASYNC_CQ_ERROR:
+		hca_ptr->async_cq_error = 
+			(ib_async_cq_handler_t)callback;
+		break;
+	case DAPL_ASYNC_CQ_COMPLETION:
+		hca_ptr->async_cq = 
+			(ib_async_dto_handler_t)callback;
+		break;
+	case DAPL_ASYNC_QP_ERROR:
+		hca_ptr->async_qp_error = 
+			(ib_async_qp_handler_t)callback;
+		break;
+	default:
+		break;
+    }
+    return DAT_SUCCESS;
+}
+
Index: dapl/openib_scm/dapl_ib_mem.c
===================================================================
--- dapl/openib_scm/dapl_ib_mem.c	(revision 0)
+++ dapl/openib_scm/dapl_ib_mem.c	(revision 0)
@@ -0,0 +1,392 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/**********************************************************************
+ * 
+ * MODULE: dapl_det_mem.c
+ *
+ * PURPOSE: Intel DET APIs: Memory windows, registration,
+ *           and protection domain 
+ *
+ * $Id: $
+ *
+ **********************************************************************/
+
+#include <sys/ioctl.h>  /* for IOCTL's */
+#include <sys/types.h>  /* for socket(2) and related bits and pieces */
+#include <sys/socket.h> /* for socket(2) */
+#include <net/if.h>     /* for struct ifreq */
+#include <net/if_arp.h> /* for ARPHRD_ETHER */
+#include <unistd.h>		/* for _SC_CLK_TCK */
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_lmr_util.h"
+
+/*
+ * dapls_convert_privileges
+ *
+ * Convert LMR privileges to provider  
+ *
+ * Input:
+ *	DAT_MEM_PRIV_FLAGS
+ *
+ * Output:
+ *	none
+ *
+ * Returns:
+ *	ibv_access_flags
+ *
+ */
+STATIC _INLINE_ int
+dapls_convert_privileges (
+    IN DAT_MEM_PRIV_FLAGS	privileges)
+{
+	int	access = 0;
+
+	/*
+	 * if (DAT_MEM_PRIV_LOCAL_READ_FLAG & privileges) do nothing
+	 */
+	if (DAT_MEM_PRIV_LOCAL_WRITE_FLAG & privileges)
+		access |= IBV_ACCESS_LOCAL_WRITE;
+	if (DAT_MEM_PRIV_REMOTE_WRITE_FLAG & privileges)
+		access |= IBV_ACCESS_REMOTE_WRITE;
+	if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
+		access |= IBV_ACCESS_REMOTE_READ;
+	if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
+		access |= IBV_ACCESS_REMOTE_READ;
+	if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
+		access |= IBV_ACCESS_REMOTE_READ;
+
+	return access;
+}
+
+/*
+ * dapl_ib_pd_alloc
+ *
+ * Alloc a PD
+ *
+ * Input:
+ *	ia_handle	IA handle
+ *	pz		pointer to PZ struct
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_pd_alloc (
+	IN  DAPL_IA 	*ia_ptr,
+	IN  DAPL_PZ 	*pz )
+{
+	/* get a protection domain */
+	pz->pd_handle = ibv_alloc_pd(ia_ptr->hca_ptr->ib_hca_handle);
+	if (!pz->pd_handle) 
+		return(dapl_convert_errno(ENOMEM,"alloc_pd"));
+
+	dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " pd_alloc: pd_handle=%p\n", 
+		     pz->pd_handle );
+
+	return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_pd_free
+ *
+ * Free a PD
+ *
+ * Input:
+ *	ia_handle	IA handle
+ *	PZ_ptr		pointer to PZ struct
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *      DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN
+dapls_ib_pd_free (
+	IN  DAPL_PZ 	*pz )
+{
+	if (pz->pd_handle != IB_INVALID_HANDLE) {
+		if (ibv_dealloc_pd(pz->pd_handle))
+			return(dapl_convert_errno(errno,"dealloc_pd"));
+		pz->pd_handle = IB_INVALID_HANDLE;	
+	}
+	return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_mr_register
+ *
+ * Register a virtual memory region
+ *
+ * Input:
+ *	ia_handle	IA handle
+ *	lmr		pointer to dapl_lmr struct
+ *	virt_addr	virtual address of beginning of mem region
+ *	length		length of memory region
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mr_register (
+        IN  DAPL_IA                 *ia_ptr,
+        IN  DAPL_LMR                *lmr,
+        IN  DAT_PVOID                virt_addr,
+        IN  DAT_VLEN                length,
+        IN  DAT_MEM_PRIV_FLAGS      privileges)
+{
+	ib_pd_handle_t	ib_pd_handle;
+
+	ib_pd_handle = ((DAPL_PZ *)lmr->param.pz_handle)->pd_handle;
+	
+	dapl_dbg_log (  DAPL_DBG_TYPE_UTIL, 
+			" mr_register: ia=%p, lmr=%p va=%p ln=%d pv=0x%x\n", 
+			ia_ptr, lmr, virt_addr, length, privileges );
+
+	/* TODO: shared memory */
+	if (lmr->param.mem_type == DAT_MEM_TYPE_SHARED_VIRTUAL) {
+		dapl_dbg_log( DAPL_DBG_TYPE_ERR,
+		     " mr_register_shared: NOT IMPLEMENTED\n");    
+		return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
+	}
+
+	/* local read is default on IB */ 
+	lmr->mr_handle = 
+		ibv_reg_mr(((DAPL_PZ *)lmr->param.pz_handle)->pd_handle, 
+			    virt_addr, 
+			    length, 
+			    dapls_convert_privileges(privileges));
+
+	if (!lmr->mr_handle) 
+		return(dapl_convert_errno(ENOMEM,"reg_mr"));
+	
+	lmr->param.lmr_context = lmr->mr_handle->lkey; 
+	lmr->param.rmr_context = lmr->mr_handle->rkey;
+	lmr->param.registered_size = length;
+	lmr->param.registered_address = (DAT_VADDR)(uintptr_t) virt_addr;
+
+	dapl_dbg_log (  DAPL_DBG_TYPE_UTIL, 
+			" mr_register: mr=%p h %x pd %p ctx %p ,lkey=0x%x, rkey=0x%x priv=%x\n", 
+			lmr->mr_handle,	lmr->mr_handle->handle,	
+			lmr->mr_handle->pd,
+			lmr->mr_handle->context,
+			lmr->mr_handle->lkey, 
+			lmr->mr_handle->rkey, 
+			length, dapls_convert_privileges(privileges) );
+
+	return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_mr_deregister
+ *
+ * Free a memory region
+ *
+ * Input:
+ *	lmr			pointer to dapl_lmr struct
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN
+dapls_ib_mr_deregister (
+	IN  DAPL_LMR	*lmr )
+{
+	if (lmr->mr_handle != IB_INVALID_HANDLE) {
+		if (ibv_dereg_mr(lmr->mr_handle))
+			return(dapl_convert_errno(errno,"dereg_pd"));
+		lmr->mr_handle = IB_INVALID_HANDLE;
+	}
+	return DAT_SUCCESS;
+}
+
+
+/*
+ * dapl_ib_mr_register_shared
+ *
+ * Register a virtual memory region
+ *
+ * Input:
+ *	ia_ptr		IA handle
+ *	lmr		pointer to dapl_lmr struct
+ *	virt_addr	virtual address of beginning of mem region
+ *	length		length of memory region
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mr_register_shared (
+	IN  DAPL_IA 		    *ia_ptr,
+	IN  DAPL_LMR		    *lmr,
+	IN  DAT_MEM_PRIV_FLAGS	privileges )
+{
+    dapl_dbg_log(DAPL_DBG_TYPE_ERR," mr_register_shared: NOT IMPLEMENTED\n");
+    return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
+}
+
+/*
+ * dapls_ib_mw_alloc
+ *
+ * Bind a protection domain to a memory window
+ *
+ * Input:
+ *	rmr	Initialized rmr to hold binding handles
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mw_alloc (
+	IN  DAPL_RMR	*rmr )
+{
+
+	dapl_dbg_log(DAPL_DBG_TYPE_ERR," mw_alloc: NOT IMPLEMENTED\n");
+   	return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
+}
+
+/*
+ * dapls_ib_mw_free
+ *
+ * Release bindings of a protection domain to a memory window
+ *
+ * Input:
+ *	rmr	Initialized rmr to hold binding handles
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN
+dapls_ib_mw_free (
+	IN  DAPL_RMR 	*rmr )
+{	
+	dapl_dbg_log(DAPL_DBG_TYPE_ERR," mw_free: NOT IMPLEMENTED\n");
+	return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
+}
+
+/*
+ * dapls_ib_mw_bind
+ *
+ * Bind a protection domain to a memory window
+ *
+ * Input:
+ *	rmr	Initialized rmr to hold binding handles
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	DAT_INVALID_PARAMETER;
+ *	DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mw_bind (
+	IN  DAPL_RMR			*rmr,
+	IN  DAPL_LMR			*lmr,
+	IN  DAPL_EP			*ep,
+	IN  DAPL_COOKIE			*cookie,
+	IN  DAT_VADDR			virtual_address,
+	IN  DAT_VLEN			length,
+	IN  DAT_MEM_PRIV_FLAGS		mem_priv,
+	IN  DAT_BOOLEAN			is_signaled)
+{
+	dapl_dbg_log(DAPL_DBG_TYPE_ERR," mw_bind: NOT IMPLEMENTED\n");
+	return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
+}
+
+/*
+ * dapls_ib_mw_unbind
+ *
+ * Unbind a protection domain from a memory window
+ *
+ * Input:
+ *	rmr	Initialized rmr to hold binding handles
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *    	DAT_INVALID_PARAMETER;
+ *   	DAT_INVALID_STATE;
+ *	DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mw_unbind (
+	IN  DAPL_RMR	*rmr,
+	IN  DAPL_EP	*ep,
+	IN  DAPL_COOKIE	*cookie,
+	IN  DAT_BOOLEAN	is_signaled )
+{
+	dapl_dbg_log(DAPL_DBG_TYPE_ERR," mw_unbind: NOT IMPLEMENTED\n");
+	return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
+}
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ *  tab-width: 8
+ * End:
+ */
+
Index: dapl/openib_scm/dapl_ib_cm.c
===================================================================
--- dapl/openib_scm/dapl_ib_cm.c	(revision 0)
+++ dapl/openib_scm/dapl_ib_cm.c	(revision 0)
@@ -0,0 +1,1074 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/***************************************************************************
+ *
+ *   Module:		 uDAPL
+ *
+ *   Filename:		 dapl_ib_cm.c
+ *
+ *   Author:		 Arlin Davis
+ *
+ *   Created:		 3/10/2005
+ *
+ *   Description: 
+ *
+ *   The uDAPL openib provider - connection management
+ *
+ ****************************************************************************
+ *		   Source Control System Information
+ *
+ *    $Id: $
+ *
+ *	Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ **************************************************************************/
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_evd_util.h"
+#include "dapl_cr_util.h"
+#include "dapl_name_service.h"
+#include "dapl_ib_util.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <netinet/tcp.h>
+#include <sysfs/libsysfs.h>
+
+/* prototypes */
+static uint16_t dapli_get_lid( struct ibv_device *dev, int port );
+
+static DAT_RETURN dapli_socket_connect ( DAPL_EP		*ep_ptr,
+					 DAT_IA_ADDRESS_PTR	r_addr,
+					 DAT_CONN_QUAL		r_qual,
+					 DAT_COUNT		p_size,
+					 DAT_PVOID		p_data );
+
+static DAT_RETURN dapli_socket_listen ( DAPL_IA			*ia_ptr,
+					DAT_CONN_QUAL		serviceID,
+					DAPL_SP			*sp_ptr );
+
+static DAT_RETURN dapli_socket_accept(	ib_cm_srvc_handle_t cm_ptr );
+
+static DAT_RETURN dapli_socket_accept_final(	DAPL_EP		*ep_ptr,
+						DAPL_CR		*cr_ptr,
+						DAT_COUNT	p_size,
+						DAT_PVOID	p_data );
+
+/* XXX temporary hack to get lid */
+static uint16_t dapli_get_lid(IN struct ibv_device *dev, IN int port)
+{
+	char path[128];
+	char val[16];
+	char name[256];
+
+	if (sysfs_get_mnt_path(path, sizeof path)) {
+		fprintf(stderr, "Couldn't find sysfs mount.\n");
+		return 0;
+	}
+	sprintf(name, "%s/class/infiniband/%s/ports/%d/lid", path,
+		 ibv_get_device_name(dev), port);
+
+	if (sysfs_read_attribute_value(name, val, sizeof val)) {
+		fprintf(stderr, "Couldn't read LID at %s\n", name);
+		return 0;
+	}
+	return strtol(val, NULL, 0);
+}
+
+/*
+ * ACTIVE: Create socket, connect, and exchange QP information 
+ */
+static DAT_RETURN 
+dapli_socket_connect (	DAPL_EP			*ep_ptr,
+			DAT_IA_ADDRESS_PTR	r_addr,
+			DAT_CONN_QUAL		r_qual,
+			DAT_COUNT		p_size,
+			DAT_PVOID		p_data )
+{
+	ib_cm_handle_t	cm_ptr;
+	DAPL_IA		*ia_ptr = ep_ptr->header.owner_ia;
+	int		len, opt = 1;
+	struct iovec    iovec[2];
+	short		rtu_data = htons(0x0E0F);
+	
+	dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect: r_qual %d\n", r_qual);
+			
+	/*
+	 *  Allocate CM and initialize
+	 */
+	if ((cm_ptr = dapl_os_alloc(sizeof(*cm_ptr))) == NULL ) {
+		return DAT_INSUFFICIENT_RESOURCES;
+	}
+
+	(void) dapl_os_memzero( cm_ptr, sizeof( *cm_ptr ) );
+	cm_ptr->socket = -1;
+
+	/* create, connect, sockopt, and exchange QP information */
+	if ((cm_ptr->socket = socket(AF_INET,SOCK_STREAM,0)) < 0 ) {
+		dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
+		return DAT_INSUFFICIENT_RESOURCES;
+	}
+
+	((struct sockaddr_in*)r_addr)->sin_port = htons(r_qual);
+
+	if ( connect(cm_ptr->socket, r_addr, sizeof(*r_addr)) < 0 ) {
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+			     " connect: %s on r_qual %d\n",
+			     strerror(errno), (unsigned int)r_qual);
+		dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
+		return DAT_INVALID_ADDRESS;
+	}
+	setsockopt(cm_ptr->socket,IPPROTO_TCP,TCP_NODELAY,&opt,sizeof(opt));
+	
+	/* Send QP info, IA address, and private data */
+	cm_ptr->dst.qpn = ep_ptr->qp_handle->qp_num;
+	cm_ptr->dst.port = ia_ptr->hca_ptr->port_num;
+	cm_ptr->dst.lid = dapli_get_lid( ia_ptr->hca_ptr->ib_trans.ib_dev, 
+					 ia_ptr->hca_ptr->port_num );
+	cm_ptr->dst.ia_address = ia_ptr->hca_ptr->hca_address;
+	cm_ptr->dst.p_size = p_size;
+	iovec[0].iov_base = &cm_ptr->dst;
+	iovec[0].iov_len  = sizeof(ib_qp_cm_t);
+	if ( p_size ) {
+		iovec[1].iov_base = p_data;
+		iovec[1].iov_len  = p_size;
+	}
+	len = writev( cm_ptr->socket, iovec, (p_size ? 2:1) );
+    	if ( len != (p_size + sizeof(ib_qp_cm_t)) ) {
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+			     " connect write: ERR %s, wcnt=%d\n",
+			     strerror(errno), len); 
+		goto bail;
+	}
+	dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+		     " connect: SRC port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
+		     cm_ptr->dst.port, cm_ptr->dst.lid, 
+		     cm_ptr->dst.qpn, cm_ptr->dst.p_size ); 
+
+	/* read DST information into cm_ptr, overwrite SRC info */
+	len = readv( cm_ptr->socket, iovec, 1 );
+	if ( len != sizeof(ib_qp_cm_t) ) {
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+			     " connect read: ERR %s, rcnt=%d\n",
+			     strerror(errno), len); 
+		goto bail;
+	}
+	dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+		     " connect: DST port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
+		     cm_ptr->dst.port, cm_ptr->dst.lid, 
+		     cm_ptr->dst.qpn, cm_ptr->dst.p_size ); 
+
+	/* validate private data size before reading */
+	if ( cm_ptr->dst.p_size > IB_MAX_REP_PDATA_SIZE ) {
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+			     " connect read: psize (%d) wrong\n",
+			     cm_ptr->dst.p_size ); 
+		goto bail;
+	}
+
+	/* read private data into cm_handle if any present */
+	if ( cm_ptr->dst.p_size ) {
+		iovec[0].iov_base = cm_ptr->p_data;
+		iovec[0].iov_len  = cm_ptr->dst.p_size;
+		len = readv( cm_ptr->socket, iovec, 1 );
+		if ( len != cm_ptr->dst.p_size ) {
+			dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+				" connect read pdata: ERR %s, rcnt=%d\n",
+				strerror(errno), len); 
+			goto bail;
+		}
+	}
+
+	/* modify QP to RTR and then to RTS with remote info */
+	if ( dapls_modify_qp_state( ep_ptr->qp_handle, 
+				    IBV_QPS_RTR, &cm_ptr->dst ) != DAT_SUCCESS )
+		goto bail;
+
+	if ( dapls_modify_qp_state( ep_ptr->qp_handle, 
+				    IBV_QPS_RTS, &cm_ptr->dst ) != DAT_SUCCESS )
+		goto bail;
+		 
+	ep_ptr->qp_state = IB_QP_STATE_RTS;
+
+	/* complete handshake after final QP state change */
+	write(cm_ptr->socket, &rtu_data, sizeof(rtu_data) );
+
+	/* init cm_handle and post the event with private data */
+	ep_ptr->cm_handle = cm_ptr;
+	dapl_dbg_log( DAPL_DBG_TYPE_EP," ACTIVE: connected!\n" ); 
+	dapl_evd_connection_callback(   ep_ptr->cm_handle, 
+					IB_CME_CONNECTED, 
+					cm_ptr->p_data, 
+					ep_ptr );	
+	return DAT_SUCCESS;
+
+bail:
+	/* close socket, free cm structure and post error event */
+	if ( cm_ptr->socket >= 0 ) 
+		close(cm_ptr->socket);
+	dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
+	dapls_ib_reinit_ep( ep_ptr ); /* reset QP state */
+
+	dapl_evd_connection_callback(	ep_ptr->cm_handle, 
+					IB_CME_LOCAL_FAILURE, 
+					NULL, 
+					ep_ptr );
+	return DAT_INTERNAL_ERROR;
+}
+
+
+/*
+ * PASSIVE: Create socket, listen, accept, exchange QP information 
+ */
+static DAT_RETURN 
+dapli_socket_listen (	DAPL_IA		*ia_ptr,
+			DAT_CONN_QUAL	serviceID,
+			DAPL_SP		*sp_ptr )
+{
+	struct sockaddr_in	addr;
+	ib_cm_srvc_handle_t	cm_ptr = NULL;
+	int			opt = 1;
+	DAT_RETURN		dat_status = DAT_SUCCESS;
+
+	dapl_dbg_log (	DAPL_DBG_TYPE_EP,
+			" listen(ia_ptr %p ServiceID %d sp_ptr %p)\n",
+			ia_ptr, serviceID, sp_ptr);
+
+	/* Allocate CM and initialize */
+	if ((cm_ptr = dapl_os_alloc(sizeof(*cm_ptr))) == NULL) 
+		return DAT_INSUFFICIENT_RESOURCES;
+
+	(void) dapl_os_memzero( cm_ptr, sizeof( *cm_ptr ) );
+	
+	cm_ptr->socket = cm_ptr->l_socket = -1;
+	cm_ptr->sp = sp_ptr;
+	cm_ptr->hca_ptr = ia_ptr->hca_ptr;
+	
+	/* bind, listen, set sockopt, accept, exchange data */
+	if ((cm_ptr->l_socket = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+		dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
+				"socket for listen returned %d\n", errno);
+		dat_status = DAT_INSUFFICIENT_RESOURCES;
+		goto bail;
+	}
+
+	setsockopt(cm_ptr->l_socket,SOL_SOCKET,SO_REUSEADDR,&opt,sizeof(opt));
+	addr.sin_port        = htons(serviceID);
+	addr.sin_family      = AF_INET;
+	addr.sin_addr.s_addr = INADDR_ANY;
+
+	if (( bind( cm_ptr->l_socket,(struct sockaddr*)&addr, sizeof(addr) ) < 0) ||
+		   (listen( cm_ptr->l_socket, 128 ) < 0) ) {
+	
+		dapl_dbg_log( DAPL_DBG_TYPE_ERR,
+				" listen: ERROR %s on conn_qual 0x%x\n",
+				strerror(errno),serviceID); 
+
+		if ( errno == EADDRINUSE )
+			dat_status = DAT_CONN_QUAL_IN_USE;
+		else
+			dat_status = DAT_CONN_QUAL_UNAVAILABLE;
+
+		goto bail;
+	}
+	
+	/* set cm_handle for this service point, save listen socket */
+	sp_ptr->cm_srvc_handle = cm_ptr;
+
+	/* add to SP->CR thread list */
+	dapl_llist_init_entry((DAPL_LLIST_ENTRY*)&cm_ptr->entry);
+	dapl_os_lock( &cm_ptr->hca_ptr->ib_trans.lock );
+	dapl_llist_add_tail(&cm_ptr->hca_ptr->ib_trans.list, 
+			    (DAPL_LLIST_ENTRY*)&cm_ptr->entry, cm_ptr);
+	dapl_os_unlock(&cm_ptr->hca_ptr->ib_trans.lock);
+
+	dapl_dbg_log( DAPL_DBG_TYPE_CM,
+			" listen: qual 0x%x cr %p s_fd %d\n",
+			ntohs(serviceID), cm_ptr, cm_ptr->l_socket ); 
+	
+	return dat_status;
+bail:
+	dapl_dbg_log( DAPL_DBG_TYPE_ERR,
+			" listen: ERROR on conn_qual 0x%x\n",serviceID); 
+	if ( cm_ptr->l_socket >= 0 )
+		close( cm_ptr->l_socket );
+	dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
+	return dat_status;
+}
+
+
+/*
+ * PASSIVE: send local QP information, private data, and wait for 
+ *	    active side to respond with QP RTS/RTR status 
+ */
+static DAT_RETURN 
+dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr)
+{
+	ib_cm_handle_t	acm_ptr;
+	void		*p_data = NULL;
+	int		len;
+	DAT_RETURN	dat_status = DAT_SUCCESS;
+		
+	/* Allocate accept CM and initialize */
+	if ((acm_ptr = dapl_os_alloc(sizeof(*acm_ptr))) == NULL) 
+		return DAT_INSUFFICIENT_RESOURCES;
+
+	(void) dapl_os_memzero( acm_ptr, sizeof( *acm_ptr ) );
+	
+	acm_ptr->socket = -1;
+	acm_ptr->sp = cm_ptr->sp;
+	acm_ptr->hca_ptr = cm_ptr->hca_ptr;
+
+	len = sizeof(acm_ptr->dst.ia_address);
+	acm_ptr->socket = accept(cm_ptr->l_socket, 
+				(struct sockaddr*)&acm_ptr->dst.ia_address, 
+				&len );
+
+	if ( acm_ptr->socket < 0 ) {
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+			" accept: ERR %s on FD %d l_cr %p\n",
+			strerror(errno),cm_ptr->l_socket,cm_ptr); 
+		dat_status = DAT_INTERNAL_ERROR;
+		goto bail;
+   	}
+
+	/* read in DST QP info, IA address. check for private data */
+	len = read( acm_ptr->socket, &acm_ptr->dst, sizeof(ib_qp_cm_t) );
+	if ( len != sizeof(ib_qp_cm_t) ) {
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+			" accept read: ERR %s, rcnt=%d\n",
+			strerror(errno), len); 
+		dat_status = DAT_INTERNAL_ERROR;
+		goto bail;
+
+	}
+	dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+		" accept: DST port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
+		acm_ptr->dst.port, acm_ptr->dst.lid, 
+		acm_ptr->dst.qpn, acm_ptr->dst.p_size ); 
+
+	/* validate private data size before reading */
+	if ( acm_ptr->dst.p_size > IB_MAX_REQ_PDATA_SIZE ) {
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+			" accept read: psize (%d) wrong\n",
+			acm_ptr->dst.p_size ); 
+		dat_status = DAT_INTERNAL_ERROR;
+		goto bail;
+	}
+
+	/* read private data into cm_handle if any present */
+	if ( acm_ptr->dst.p_size ) {
+		len = read( acm_ptr->socket, 
+			    acm_ptr->p_data, acm_ptr->dst.p_size );
+		if ( len != acm_ptr->dst.p_size ) {
+			dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+				" accept read pdata: ERR %s, rcnt=%d\n",
+				strerror(errno), len ); 
+			dat_status = DAT_INTERNAL_ERROR;
+			goto bail;
+		}
+		dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+				" accept: psize=%d read\n",
+				acm_ptr->dst.p_size); 
+		p_data = acm_ptr->p_data;
+	}
+	
+	/* trigger CR event and return SUCCESS */
+	dapls_cr_callback(  acm_ptr,
+			    IB_CME_CONNECTION_REQUEST_PENDING,
+		            p_data,
+			    acm_ptr->sp );
+
+	return DAT_SUCCESS;
+
+bail:
+	if ( acm_ptr->socket >=0 )
+		close( acm_ptr->socket );
+	dapl_os_free( acm_ptr, sizeof( *acm_ptr ) );
+	return DAT_INTERNAL_ERROR;
+}
+
+
+static DAT_RETURN 
+dapli_socket_accept_final( DAPL_EP		*ep_ptr,
+			   DAPL_CR		*cr_ptr,
+			   DAT_COUNT		p_size,
+		           DAT_PVOID		p_data )
+{
+	DAPL_IA		*ia_ptr = ep_ptr->header.owner_ia;
+	ib_cm_handle_t	cm_ptr = cr_ptr->ib_cm_handle;
+	ib_qp_cm_t	qp_cm;
+	struct iovec    iovec[2];
+	int		len;
+	short		rtu_data = 0;
+
+	if (p_size >  IB_MAX_REP_PDATA_SIZE) 
+		return DAT_LENGTH_ERROR;
+
+	/* must have a accepted socket */
+	if ( cm_ptr->socket < 0 )
+		return DAT_INTERNAL_ERROR;
+	
+	/* modify QP to RTR and then to RTS with remote info already read */
+	if ( dapls_modify_qp_state( ep_ptr->qp_handle, 
+				    IBV_QPS_RTR, &cm_ptr->dst ) != DAT_SUCCESS )
+		goto bail;
+
+	if ( dapls_modify_qp_state( ep_ptr->qp_handle, 
+				    IBV_QPS_RTS, &cm_ptr->dst ) != DAT_SUCCESS )
+		goto bail;
+
+	ep_ptr->qp_state = IB_QP_STATE_RTS;
+	
+	/* Send QP info, IA address, and private data */
+	qp_cm.qpn = ep_ptr->qp_handle->qp_num;
+	qp_cm.port = ia_ptr->hca_ptr->port_num;
+	qp_cm.lid = dapli_get_lid( ia_ptr->hca_ptr->ib_trans.ib_dev, 
+				   ia_ptr->hca_ptr->port_num );
+	qp_cm.ia_address = ia_ptr->hca_ptr->hca_address;
+	qp_cm.p_size = p_size;
+	iovec[0].iov_base = &qp_cm;
+	iovec[0].iov_len  = sizeof(ib_qp_cm_t);
+	if (p_size) {
+		iovec[1].iov_base = p_data;
+		iovec[1].iov_len  = p_size;
+	}
+	len = writev( cm_ptr->socket, iovec, (p_size ? 2:1) );
+    	if (len != (p_size + sizeof(ib_qp_cm_t))) {
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+			     " accept_final: ERR %s, wcnt=%d\n",
+			     strerror(errno), len); 
+		goto bail;
+	}
+	dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+		     " accept_final: SRC port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
+		     qp_cm.port, qp_cm.lid, qp_cm.qpn, qp_cm.p_size ); 
+	
+	/* complete handshake after final QP state change */
+	len = read(cm_ptr->socket, &rtu_data, sizeof(rtu_data) );
+	if ( len != sizeof(rtu_data) || ntohs(rtu_data) != 0x0e0f ) {
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+			     " accept_final: ERR %s, rcnt=%d rdata=%x\n",
+			     strerror(errno), len, ntohs(rtu_data) ); 
+		goto bail;
+	}
+
+	/* final data exchange if remote QP state is good to go */
+	dapl_dbg_log( DAPL_DBG_TYPE_EP," PASSIVE: connected!\n" ); 
+	dapls_cr_callback ( cm_ptr, IB_CME_CONNECTED, NULL, cm_ptr->sp );
+	return DAT_SUCCESS;
+
+bail:
+	dapl_dbg_log( DAPL_DBG_TYPE_ERR," accept_final: ERR !QP_RTR_RTS \n"); 
+	if ( cm_ptr >= 0 )
+		close( cm_ptr->socket );
+	dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
+	dapls_ib_reinit_ep( ep_ptr ); /* reset QP state */
+
+	return DAT_INTERNAL_ERROR;
+}
+
+
+/*
+ * dapls_ib_connect
+ *
+ * Initiate a connection with the passive listener on another node
+ *
+ * Input:
+ *	ep_handle,
+ *	remote_ia_address,
+ *	remote_conn_qual,
+ *	prd_size		size of private data and structure
+ *	prd_prt			pointer to private data structure
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	DAT_INSUFFICIENT_RESOURCES
+ *	DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN
+dapls_ib_connect (
+	IN  DAT_EP_HANDLE		ep_handle,
+	IN  DAT_IA_ADDRESS_PTR		remote_ia_address,
+	IN  DAT_CONN_QUAL		remote_conn_qual,
+	IN  DAT_COUNT			private_data_size,
+	IN  void			*private_data )
+{
+	DAPL_EP		*ep_ptr;
+	ib_qp_handle_t	qp_ptr;
+	
+	dapl_dbg_log ( DAPL_DBG_TYPE_EP,
+			" connect(ep_handle %p ....)\n", ep_handle);
+	/*
+	 *  Sanity check
+	 */
+	if ( NULL == ep_handle ) 
+		return DAT_SUCCESS;
+
+	ep_ptr = (DAPL_EP*)ep_handle;
+	qp_ptr = ep_ptr->qp_handle;
+
+	return (dapli_socket_connect(	ep_ptr, remote_ia_address, 
+					remote_conn_qual,
+					private_data_size, private_data ));
+}
+
+/*
+ * dapls_ib_disconnect
+ *
+ * Disconnect an EP
+ *
+ * Input:
+ *	ep_handle,
+ *	disconnect_flags
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *
+ */
+DAT_RETURN
+dapls_ib_disconnect (
+	IN	DAPL_EP			*ep_ptr,
+	IN	DAT_CLOSE_FLAGS		close_flags )
+{
+	ib_cm_handle_t	cm_ptr = ep_ptr->cm_handle;
+
+	dapl_dbg_log (DAPL_DBG_TYPE_EP,
+			"dapls_ib_disconnect(ep_handle %p ....)\n",
+			ep_ptr);
+
+	if ( cm_ptr->socket >= 0 ) {
+		close( cm_ptr->socket );
+		cm_ptr->socket = -1;
+	}
+	
+	/* reinit to modify QP state */
+	dapls_ib_reinit_ep(ep_ptr);
+
+	if ( ep_ptr->cr_ptr ) {
+		dapls_cr_callback ( ep_ptr->cm_handle,
+				    IB_CME_DISCONNECTED,
+				    NULL,
+				    ((DAPL_CR *)ep_ptr->cr_ptr)->sp_ptr );
+	} else {
+		dapl_evd_connection_callback ( ep_ptr->cm_handle,
+						IB_CME_DISCONNECTED,
+						NULL,
+						ep_ptr );
+		ep_ptr->cm_handle = NULL;
+		dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
+	}	
+	return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_disconnect_clean
+ *
+ * Clean up outstanding connection data. This routine is invoked
+ * after the final disconnect callback has occurred. Only on the
+ * ACTIVE side of a connection.
+ *
+ * Input:
+ *	ep_ptr		DAPL_EP
+ *	active		Indicates active side of connection
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	void
+ *
+ */
+void
+dapls_ib_disconnect_clean (
+	IN  DAPL_EP			*ep_ptr,
+	IN  DAT_BOOLEAN			active,
+	IN  const ib_cm_events_t	ib_cm_event )
+{
+    return;
+}
+
+/*
+ * dapl_ib_setup_conn_listener
+ *
+ * Have the CM set up a connection listener.
+ *
+ * Input:
+ *	ibm_hca_handle		HCA handle
+ *	qp_handle			QP handle
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	DAT_INSUFFICIENT_RESOURCES
+ *	DAT_INTERNAL_ERROR
+ *	DAT_CONN_QUAL_UNAVAILBLE
+ *	DAT_CONN_QUAL_IN_USE
+ *
+ */
+DAT_RETURN
+dapls_ib_setup_conn_listener (
+	IN  DAPL_IA		*ia_ptr,
+	IN  DAT_UINT64		ServiceID,
+	IN  DAPL_SP		*sp_ptr )
+{
+	return (dapli_socket_listen( ia_ptr, ServiceID, sp_ptr ));
+}
+
+
+/*
+ * dapl_ib_remove_conn_listener
+ *
+ * Have the CM remove a connection listener.
+ *
+ * Input:
+ *	ia_handle		IA handle
+ *	ServiceID		IB Channel Service ID
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN
+dapls_ib_remove_conn_listener (
+	IN  DAPL_IA		*ia_ptr,
+	IN  DAPL_SP		*sp_ptr )
+{
+	ib_cm_srvc_handle_t	cm_ptr = sp_ptr->cm_srvc_handle;
+
+	dapl_dbg_log (DAPL_DBG_TYPE_EP,
+			"dapls_ib_remove_conn_listener(ia_ptr %p sp_ptr %p cm_ptr %p)\n",
+			ia_ptr, sp_ptr, cm_ptr );
+
+	/* close accepted socket, free cm_srvc_handle and return */
+	if ( cm_ptr != NULL ) {
+		if ( cm_ptr->l_socket >= 0 ) {
+			close( cm_ptr->l_socket );
+			cm_ptr->socket = -1;
+		}
+	    	/* cr_thread will free */
+		sp_ptr->cm_srvc_handle = NULL;
+	}
+	return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_accept_connection
+ *
+ * Perform necessary steps to accept a connection
+ *
+ * Input:
+ *	cr_handle
+ *	ep_handle
+ *	private_data_size
+ *	private_data
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	DAT_INSUFFICIENT_RESOURCES
+ *	DAT_INTERNAL_ERROR
+ *
+ */
+DAT_RETURN
+dapls_ib_accept_connection (
+	IN  DAT_CR_HANDLE	cr_handle,
+	IN  DAT_EP_HANDLE	ep_handle,
+	IN  DAT_COUNT		p_size,
+	IN  const DAT_PVOID	p_data )
+{
+	DAPL_CR			*cr_ptr;
+	DAPL_EP			*ep_ptr;
+	
+	dapl_dbg_log (DAPL_DBG_TYPE_EP,
+		      "dapls_ib_accept_connection(cr %p ep %p prd %p,%d)\n",
+		      cr_handle, ep_handle, p_data, p_size  );
+
+	cr_ptr  = (DAPL_CR *) cr_handle;
+	ep_ptr  = (DAPL_EP *) ep_handle;
+	
+	/* allocate and attach a QP if necessary */
+	if ( ep_ptr->qp_state == DAPL_QP_STATE_UNATTACHED ) {
+		DAT_RETURN status;
+		status = dapls_ib_qp_alloc( ep_ptr->header.owner_ia, 
+					    ep_ptr, ep_ptr );
+		if ( status != DAT_SUCCESS )
+    			return status;
+	}
+    
+	return ( dapli_socket_accept_final(ep_ptr, cr_ptr, p_size, p_data) );
+}
+
+
+/*
+ * dapls_ib_reject_connection
+ *
+ * Reject a connection
+ *
+ * Input:
+ *	cr_handle
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	DAT_INTERNAL_ERROR
+ *
+ */
+DAT_RETURN
+dapls_ib_reject_connection (
+	IN  ib_cm_handle_t	ib_cm_handle,
+	IN  int			reject_reason )
+{
+    	ib_cm_srvc_handle_t	cm_ptr = ib_cm_handle;
+
+	dapl_dbg_log (DAPL_DBG_TYPE_EP,
+		      "dapls_ib_reject_connection(cm_handle %p reason %x)\n",
+		      ib_cm_handle, reject_reason );
+
+	/* just close the socket and return */
+	if ( cm_ptr->socket > 0 ) {
+		close( cm_ptr->socket );
+		cm_ptr->socket = -1;
+	}
+	return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_cm_remote_addr
+ *
+ * Obtain the remote IP address given a connection
+ *
+ * Input:
+ *	cr_handle
+ *
+ * Output:
+ *	remote_ia_address: where to place the remote address
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	DAT_INVALID_HANDLE
+ *
+ */
+DAT_RETURN
+dapls_ib_cm_remote_addr (
+	IN      DAT_HANDLE	dat_handle,
+	OUT	DAT_SOCK_ADDR6	*remote_ia_address )
+{
+	DAPL_HEADER	*header;
+	ib_cm_handle_t	ib_cm_handle;
+
+	dapl_dbg_log (DAPL_DBG_TYPE_EP,
+		      "dapls_ib_cm_remote_addr(dat_handle %p, ....)\n",
+		      dat_handle );
+
+	header = (DAPL_HEADER *)dat_handle;
+
+	if (header->magic == DAPL_MAGIC_EP) 
+		ib_cm_handle = ((DAPL_EP *) dat_handle)->cm_handle;
+	else if (header->magic == DAPL_MAGIC_CR) 
+		ib_cm_handle = ((DAPL_CR *) dat_handle)->ib_cm_handle;
+	else 
+		return DAT_INVALID_HANDLE;
+
+	dapl_os_memcpy(	remote_ia_address, 
+			&ib_cm_handle->dst.ia_address, 
+			sizeof(DAT_SOCK_ADDR6) );
+
+	return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_private_data_size
+ *
+ * Return the size of private data given a connection op type
+ *
+ * Input:
+ *	prd_ptr		private data pointer
+ *	conn_op		connection operation type
+ *
+ * If prd_ptr is NULL, this is a query for the max size supported by
+ * the provider, otherwise it is the actual size of the private data
+ * contained in prd_ptr.
+ *
+ *
+ * Output:
+ *	None
+ *
+ * Returns:
+ * 	length of private data
+ *
+ */
+int dapls_ib_private_data_size (
+	IN      DAPL_PRIVATE	*prd_ptr,
+	IN	DAPL_PDATA_OP	conn_op)
+{
+	int  size;
+
+	switch (conn_op)
+	{
+		case DAPL_PDATA_CONN_REQ:
+		{
+			size = IB_MAX_REQ_PDATA_SIZE;
+			break;
+		}
+		case DAPL_PDATA_CONN_REP:
+		{
+			size = IB_MAX_REP_PDATA_SIZE;
+			break;
+		}
+		case DAPL_PDATA_CONN_REJ:
+		{
+			size = IB_MAX_REJ_PDATA_SIZE;
+			break;
+		}
+		case DAPL_PDATA_CONN_DREQ:
+		{
+			size = IB_MAX_DREQ_PDATA_SIZE;
+			break;
+		}
+		case DAPL_PDATA_CONN_DREP:
+		{
+			size = IB_MAX_DREP_PDATA_SIZE;
+			break;
+		}
+		default:
+		{
+			size = 0;
+		}
+
+	} /* end case */
+
+	return size;
+}
+
+/*
+ * Map all socket CM event codes to the DAT equivelent.
+ */
+#define DAPL_IB_EVENT_CNT	11
+
+static struct ib_cm_event_map
+{
+	const ib_cm_events_t	ib_cm_event;
+	DAT_EVENT_NUMBER	dat_event_num;
+	} ib_cm_event_map[DAPL_IB_EVENT_CNT] = {
+	/* 00 */  { IB_CME_CONNECTED,	
+					DAT_CONNECTION_EVENT_ESTABLISHED}, 
+	/* 01 */  { IB_CME_DISCONNECTED,	
+					DAT_CONNECTION_EVENT_DISCONNECTED},
+	/* 02 */  { IB_CME_DISCONNECTED_ON_LINK_DOWN, 
+					DAT_CONNECTION_EVENT_DISCONNECTED},
+	/* 03 */  { IB_CME_CONNECTION_REQUEST_PENDING,	
+					DAT_CONNECTION_REQUEST_EVENT},
+	/* 04 */  { IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
+					DAT_CONNECTION_REQUEST_EVENT},
+	/* 05 */  { IB_CME_DESTINATION_REJECT,
+					DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
+	/* 06 */  { IB_CME_DESTINATION_REJECT_PRIVATE_DATA,		
+					DAT_CONNECTION_EVENT_PEER_REJECTED},
+	/* 07 */  { IB_CME_DESTINATION_UNREACHABLE,	
+					DAT_CONNECTION_EVENT_UNREACHABLE},
+	/* 08 */  { IB_CME_TOO_MANY_CONNECTION_REQUESTS,
+					DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
+	/* 09 */  { IB_CME_LOCAL_FAILURE,
+					DAT_CONNECTION_EVENT_BROKEN},
+	/* 10 */  { IB_CM_LOCAL_FAILURE,
+					DAT_CONNECTION_EVENT_BROKEN}
+};
+ 
+/*
+ * dapls_ib_get_cm_event
+ *
+ * Return a DAT connection event given a provider CM event.
+ *
+ * Input:
+ *	dat_event_num	DAT event we need an equivelent CM event for
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	ib_cm_event of translated DAPL value
+ */
+DAT_EVENT_NUMBER
+dapls_ib_get_dat_event (
+	IN    const ib_cm_events_t	ib_cm_event,
+	IN    DAT_BOOLEAN		active)
+{
+	DAT_EVENT_NUMBER	dat_event_num;
+	int			i;
+	
+	active = active;
+
+	if (ib_cm_event > IB_CM_LOCAL_FAILURE)
+		return (DAT_EVENT_NUMBER) 0;
+
+	dat_event_num = 0;
+	for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
+		if (ib_cm_event == ib_cm_event_map[i].ib_cm_event) {
+			dat_event_num = ib_cm_event_map[i].dat_event_num;
+			break;
+		}
+	}
+	dapl_dbg_log (DAPL_DBG_TYPE_CALLBACK,
+		"dapls_ib_get_dat_event: event translate(%s) ib=0x%x dat=0x%x\n",
+		active ? "active" : "passive",  ib_cm_event, dat_event_num);
+
+	return dat_event_num;
+}
+
+
+/*
+ * dapls_ib_get_dat_event
+ *
+ * Return a DAT connection event given a provider CM event.
+ * 
+ * Input:
+ *	ib_cm_event	event provided to the dapl callback routine
+ *	active		switch indicating active or passive connection
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_EVENT_NUMBER of translated provider value
+ */
+ib_cm_events_t
+dapls_ib_get_cm_event (
+	IN    DAT_EVENT_NUMBER		dat_event_num)
+{
+    ib_cm_events_t	ib_cm_event;
+    int			i;
+
+    ib_cm_event = 0;
+    for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
+	if ( dat_event_num == ib_cm_event_map[i].dat_event_num ) {
+		ib_cm_event = ib_cm_event_map[i].ib_cm_event;
+		break;
+	}
+    }
+    return ib_cm_event;
+}
+
+/* async CR processing thread to avoid blocking applications */
+void cr_thread(void *arg) 
+{
+    struct dapl_hca	*hca_ptr = arg;
+    ib_cm_srvc_handle_t	cr, next_cr;
+    int			max_fd;
+    fd_set		rfd,rfds;
+    struct timeval	to;
+     
+    dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cr_thread: ENTER hca %p\n",hca_ptr);
+
+    dapl_os_lock( &hca_ptr->ib_trans.lock );
+    hca_ptr->ib_trans.cr_state = IB_THREAD_RUN;
+    while (hca_ptr->ib_trans.cr_state == IB_THREAD_RUN) {
+	
+	FD_ZERO( &rfds ); 
+	max_fd = -1;
+	
+	if (!dapl_llist_is_empty(&hca_ptr->ib_trans.list))
+            next_cr = dapl_llist_peek_head (&hca_ptr->ib_trans.list);
+	else
+	    next_cr = NULL;
+
+	while (next_cr) {
+	    cr = next_cr;
+	    dapl_dbg_log (DAPL_DBG_TYPE_CM," thread: cm_ptr %p\n", cr );
+	    if (cr->l_socket == -1 || 
+		hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
+
+		dapl_dbg_log(DAPL_DBG_TYPE_CM," thread: Freeing %p\n", cr);
+		next_cr = dapl_llist_next_entry(&hca_ptr->ib_trans.list,
+						(DAPL_LLIST_ENTRY*)&cr->entry );
+		dapl_llist_remove_entry(&hca_ptr->ib_trans.list, 
+					(DAPL_LLIST_ENTRY*)&cr->entry);
+		dapl_os_free( cr, sizeof(*cr) );
+		continue;
+	    }
+	          
+	    FD_SET( cr->l_socket, &rfds ); /* add to select set */
+	    if ( cr->l_socket > max_fd )
+		max_fd = cr->l_socket;
+
+	    /* individual select poll to check for work */
+	    FD_ZERO(&rfd);
+	    FD_SET(cr->l_socket, &rfd);
+	    dapl_os_unlock(&hca_ptr->ib_trans.lock);	
+	    to.tv_sec  = 0;
+	    to.tv_usec = 0;
+	    if ( select(cr->l_socket + 1,&rfd, NULL, NULL, &to) < 0) {
+		dapl_dbg_log (DAPL_DBG_TYPE_CM,
+			  " thread: ERR %s on cr %p sk %d\n", 
+			  strerror(errno), cr, cr->l_socket);
+		close(cr->l_socket);
+		cr->l_socket = -1;
+	    } else if ( FD_ISSET(cr->l_socket, &rfd) && 
+			dapli_socket_accept(cr)) {
+		close(cr->l_socket);
+		cr->l_socket = -1;
+	    }
+	    dapl_os_lock( &hca_ptr->ib_trans.lock );
+	    next_cr =  dapl_llist_next_entry(&hca_ptr->ib_trans.list,
+					     (DAPL_LLIST_ENTRY*)&cr->entry );
+	} 
+	dapl_os_unlock( &hca_ptr->ib_trans.lock );
+	to.tv_sec  = 0;
+	to.tv_usec = 100000; /* wakeup and check destroy */
+	select(max_fd + 1, &rfds, NULL, NULL, &to);
+	dapl_os_lock( &hca_ptr->ib_trans.lock );
+    } 
+    dapl_os_unlock( &hca_ptr->ib_trans.lock );	
+    hca_ptr->ib_trans.cr_state = IB_THREAD_EXIT;
+    dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cr_thread(hca %p) exit\n",hca_ptr);
+}
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ *  tab-width: 8
+ * End:
+ */
Index: dapl/openib_scm/dapl_ib_qp.c
===================================================================
--- dapl/openib_scm/dapl_ib_qp.c	(revision 0)
+++ dapl/openib_scm/dapl_ib_qp.c	(revision 0)
@@ -0,0 +1,398 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/**********************************************************************
+ *
+ * MODULE: dapl_det_qp.c
+ *
+ * PURPOSE: QP routines for access to DET Verbs
+ *
+ * $Id: $
+ **********************************************************************/
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+
+/*
+ * dapl_ib_qp_alloc
+ *
+ * Alloc a QP
+ *
+ * Input:
+ *	*ep_ptr		pointer to EP INFO
+ *	ib_hca_handle	provider HCA handle
+ *	ib_pd_handle	provider protection domain handle
+ *	cq_recv		provider recv CQ handle
+ *	cq_send		provider send CQ handle
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	DAT_INSUFFICIENT_RESOURCES
+ *	DAT_INTERNAL_ERROR
+ *
+ */
+DAT_RETURN
+dapls_ib_qp_alloc (
+    IN  DAPL_IA		*ia_ptr,
+    IN  DAPL_EP		*ep_ptr,
+    IN  DAPL_EP		*ep_ctx_ptr )
+{
+   	DAT_EP_ATTR		*attr;
+	DAPL_EVD		*rcv_evd, *req_evd;
+	ib_cq_handle_t		rcv_cq, req_cq;
+	ib_pd_handle_t		ib_pd_handle;
+	struct ibv_qp_init_attr qp_create;
+			
+	dapl_dbg_log (DAPL_DBG_TYPE_EP,
+		      " qp_alloc: ia_ptr %p ep_ptr %p ep_ctx_ptr %p\n",
+		      ia_ptr, ep_ptr, ep_ctx_ptr);
+
+	attr = &ep_ptr->param.ep_attr;
+	ib_pd_handle = ((DAPL_PZ *)ep_ptr->param.pz_handle)->pd_handle;
+	rcv_evd	= (DAPL_EVD *) ep_ptr->param.recv_evd_handle;
+	req_evd	= (DAPL_EVD *) ep_ptr->param.request_evd_handle;
+
+	/* 
+	 * DAT allows usage model of EP's with no EVD's but IB does not. 
+	 * Create a CQ with zero entries under the covers to support and 
+	 * catch any invalid posting. 
+	 */
+	if ( rcv_evd != DAT_HANDLE_NULL ) 
+		rcv_cq = rcv_evd->ib_cq_handle;
+	else if (!ia_ptr->hca_ptr->ib_trans.ib_cq_empty) 
+		rcv_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
+	else {
+		struct ibv_comp_channel *channel = 
+					ia_ptr->hca_ptr->ib_trans.ib_cq;
+#ifdef CQ_WAIT_OBJECT
+		if (rcv_evd->cq_wait_obj_handle)
+			channel = rcv_evd->cq_wait_obj_handle;
+#endif
+		/* Call IB verbs to create CQ */
+		rcv_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
+				       0, NULL, channel, 0);
+
+		if (rcv_cq == IB_INVALID_HANDLE) 
+			return(dapl_convert_errno(ENOMEM, "create_cq"));
+
+		ia_ptr->hca_ptr->ib_trans.ib_cq_empty = rcv_cq;
+	}
+	if (req_evd != DAT_HANDLE_NULL) 
+		req_cq = req_evd->ib_cq_handle;
+	else 
+		req_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
+
+	/* Setup attributes and create qp */
+	dapl_os_memzero((void*)&qp_create, sizeof(qp_create));
+	qp_create.send_cq = req_cq;
+	qp_create.recv_cq = rcv_cq;
+	qp_create.cap.max_send_wr = attr->max_request_dtos;
+	qp_create.cap.max_recv_wr = attr->max_recv_dtos;
+	qp_create.cap.max_send_sge = attr->max_request_iov;
+	qp_create.cap.max_recv_sge = attr->max_recv_iov;
+	qp_create.cap.max_inline_data = ia_ptr->hca_ptr->ib_trans.max_inline_send; 
+	qp_create.qp_type = IBV_QPT_RC;
+	qp_create.qp_context = (void*)ep_ptr;
+
+	ep_ptr->qp_handle = ibv_create_qp( ib_pd_handle, &qp_create);
+	if (!ep_ptr->qp_handle) 
+		return(dapl_convert_errno(ENOMEM, "create_qp"));
+	
+	dapl_dbg_log (	DAPL_DBG_TYPE_EP,
+			" qp_alloc: qpn %p sq %d,%d rq %d,%d\n", 
+			ep_ptr->qp_handle->qp_num,
+			qp_create.cap.max_send_wr,qp_create.cap.max_send_sge,
+			qp_create.cap.max_recv_wr,qp_create.cap.max_recv_sge );
+
+	/* Setup QP attributes for INIT state on the way out */ 
+	if (dapls_modify_qp_state(ep_ptr->qp_handle,
+				  IBV_QPS_INIT,
+				  NULL )  != DAT_SUCCESS ) {
+		ibv_destroy_qp(ep_ptr->qp_handle);		
+		ep_ptr->qp_handle = IB_INVALID_HANDLE;
+		return DAT_INTERNAL_ERROR;
+	}
+
+	ep_ptr->qp_state = IB_QP_STATE_INIT;
+	return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_qp_free
+ *
+ * Free a QP
+ *
+ * Input:
+ *	ia_handle	IA handle
+ *	*ep_ptr		pointer to EP INFO
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *  dapl_convert_errno
+ *
+ */
+DAT_RETURN
+dapls_ib_qp_free (
+    IN  DAPL_IA		*ia_ptr,
+    IN  DAPL_EP		*ep_ptr )
+{
+	dapl_dbg_log (DAPL_DBG_TYPE_EP, " qp_free:  ep_ptr %p qp %p\n",	
+		      ep_ptr, ep_ptr->qp_handle);
+
+	if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {
+		/* force error state to flush queue, then destroy */
+		dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, NULL);
+		
+		if (ibv_destroy_qp(ep_ptr->qp_handle)) 
+			return(dapl_convert_errno(errno,"destroy_qp"));
+
+		ep_ptr->qp_handle = IB_INVALID_HANDLE;
+		ep_ptr->qp_state = IB_QP_STATE_ERROR;
+	}
+
+	return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_qp_modify
+ *
+ * Set the QP to the parameters specified in an EP_PARAM
+ *
+ * The EP_PARAM structure that is provided has been
+ * sanitized such that only non-zero values are valid.
+ *
+ * Input:
+ *	ib_hca_handle		HCA handle
+ *	qp_handle		QP handle
+ *	ep_attr		        Sanitized EP Params
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	DAT_INSUFFICIENT_RESOURCES
+ *	DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN
+dapls_ib_qp_modify (
+    IN  DAPL_IA		*ia_ptr,
+    IN  DAPL_EP		*ep_ptr,
+    IN  DAT_EP_ATTR	*attr )
+{
+	struct ibv_qp_attr	qp_attr;
+	
+	if (ep_ptr->qp_handle == IB_INVALID_HANDLE)
+		return DAT_INVALID_PARAMETER;
+
+	/* 
+	 * EP state, qp_handle state should be an indication
+	 * of current state but the only way to be sure is with
+	 * a user mode ibv_query_qp call which is NOT available 
+	 */
+	
+	/* move to error state if necessary */
+	if ((ep_ptr->qp_state == IB_QP_STATE_ERROR) &&
+	    (ep_ptr->qp_handle->state != IBV_QPS_ERR)) {
+		ep_ptr->qp_state = IB_QP_STATE_ERROR;
+		return (dapls_modify_qp_state(ep_ptr->qp_handle, 
+					      IBV_QPS_ERR, NULL));
+	}
+
+	/*
+	 * Check if we have the right qp_state to modify attributes
+	 */
+	if ((ep_ptr->qp_handle->state  != IBV_QPS_RTR ) && 
+	    (ep_ptr->qp_handle->state  != IBV_QPS_RTS )) 
+		return DAT_INVALID_STATE;
+
+	/* Adjust to current EP attributes */
+	dapl_os_memzero((void*)&qp_attr, sizeof(qp_attr));
+	qp_attr.cap.max_send_wr = attr->max_request_dtos;
+	qp_attr.cap.max_recv_wr = attr->max_recv_dtos;
+	qp_attr.cap.max_send_sge = attr->max_request_iov;
+	qp_attr.cap.max_recv_sge = attr->max_recv_iov;
+
+	dapl_dbg_log (DAPL_DBG_TYPE_EP,
+		      "modify_qp: qp %p sq %d,%d, rq %d,%d\n", 
+		      ep_ptr->qp_handle, 
+		      qp_attr.cap.max_send_wr, qp_attr.cap.max_send_sge, 
+		      qp_attr.cap.max_recv_wr, qp_attr.cap.max_recv_sge );
+
+	if (ibv_modify_qp(ep_ptr->qp_handle, &qp_attr, IBV_QP_CAP)) {
+		dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+			      "modify_qp: modify ep %p qp %p failed\n",
+			      ep_ptr, ep_ptr->qp_handle);
+		return(dapl_convert_errno(errno,"modify_qp_state"));
+	}
+
+	return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_reinit_ep
+ *
+ * Move the QP to INIT state again.
+ *
+ * Input:
+ *	ep_ptr		DAPL_EP
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	void
+ *
+ */
+void
+dapls_ib_reinit_ep (
+	IN  DAPL_EP	*ep_ptr)
+{
+	
+	if ( ep_ptr->qp_handle != IB_INVALID_HANDLE ) {
+		/* move to RESET state and then to INIT */
+		dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET, 0);
+		dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT, 0);
+		ep_ptr->qp_state = IB_QP_STATE_INIT;
+	}
+
+	/* TODO: When IB-CM is implement then handle timewait before 
+	 * allowing re-use of this QP
+	 */
+}
+
+/* 
+ * Generic QP modify for init, reset, error, RTS, RTR
+ */
+DAT_RETURN
+dapls_modify_qp_state ( IN ib_qp_handle_t	qp_handle,
+			IN ib_qp_state_t	qp_state,
+			IN ib_qp_cm_t		*qp_cm )
+{
+	struct ibv_qp_attr	qp_attr;
+	enum ibv_qp_attr_mask	mask = IBV_QP_STATE;
+		
+	dapl_os_memzero((void*)&qp_attr, sizeof(qp_attr));
+	qp_attr.qp_state = qp_state;
+	
+	switch (qp_state) {
+		/* additional attributes with RTR and RTS */
+		case IBV_QPS_RTR:
+		{
+			mask |= IBV_QP_AV                 |
+				IBV_QP_PATH_MTU           |
+				IBV_QP_DEST_QPN           |
+				IBV_QP_RQ_PSN             |
+				IBV_QP_MAX_DEST_RD_ATOMIC |
+				IBV_QP_MIN_RNR_TIMER;
+			qp_attr.qp_state 		= IBV_QPS_RTR;
+			qp_attr.path_mtu 		= IBV_MTU_1024;
+			qp_attr.dest_qp_num 		= qp_cm->qpn;
+			qp_attr.rq_psn 			= 1;
+			qp_attr.max_dest_rd_atomic	= 8;
+			qp_attr.min_rnr_timer		= 12;
+			qp_attr.ah_attr.is_global	= 0;
+			qp_attr.ah_attr.dlid		= qp_cm->lid;
+			qp_attr.ah_attr.sl		= 0;
+			qp_attr.ah_attr.src_path_bits	= 0;
+			qp_attr.ah_attr.port_num	= qp_cm->port;
+
+			dapl_dbg_log (DAPL_DBG_TYPE_EP,
+			      " modify_qp_rtr: qpn %x lid %x port %x\n",
+			      qp_cm->qpn,qp_cm->lid,qp_cm->port );
+			break;
+		}		
+		case IBV_QPS_RTS: 
+		{
+			mask |= IBV_QP_TIMEOUT            |
+				IBV_QP_RETRY_CNT          |
+				IBV_QP_RNR_RETRY          |
+				IBV_QP_SQ_PSN             |
+				IBV_QP_MAX_QP_RD_ATOMIC;
+			qp_attr.qp_state	= IBV_QPS_RTS;
+			qp_attr.timeout		= 14;
+			qp_attr.retry_cnt	= 7;
+			qp_attr.rnr_retry	= 7;
+			qp_attr.sq_psn		= 1;
+			qp_attr.max_rd_atomic	= 8;
+			dapl_dbg_log (DAPL_DBG_TYPE_EP,
+			      " modify_qp_rts: psn %x or %x\n",
+			      qp_attr.sq_psn, qp_attr.max_rd_atomic );
+			break;
+		}
+		case IBV_QPS_INIT: 
+		{
+			DAPL_IA	*ia_ptr;
+			DAPL_EP	*ep_ptr; 
+			/* need to find way back to port num */
+			ep_ptr = (DAPL_EP*)qp_handle->qp_context;
+			if (ep_ptr)
+				ia_ptr = ep_ptr->header.owner_ia;
+			else
+				break;
+
+			mask |= IBV_QP_PKEY_INDEX	|
+				IBV_QP_PORT		|
+				IBV_QP_ACCESS_FLAGS;
+
+			qp_attr.pkey_index  = 0;
+			qp_attr.port_num = ia_ptr->hca_ptr->port_num;
+			qp_attr.qp_access_flags = 
+					IBV_ACCESS_LOCAL_WRITE |
+					IBV_ACCESS_REMOTE_WRITE |
+					IBV_ACCESS_REMOTE_READ |
+					IBV_ACCESS_REMOTE_ATOMIC;
+			
+			dapl_dbg_log (DAPL_DBG_TYPE_EP,
+				" modify_qp_init: pi %x port %x acc %x\n",
+				qp_attr.pkey_index, qp_attr.port_num,
+				qp_attr.qp_access_flags );
+			break;
+		}
+		default:
+			break;
+		
+	}
+
+	if (ibv_modify_qp(qp_handle, &qp_attr, mask))
+		return(dapl_convert_errno(errno,"modify_qp_state"));
+	
+	return DAT_SUCCESS;
+}
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ *  tab-width: 8
+ * End:
+ */
Index: dapl/openib_scm/README
===================================================================
--- dapl/openib_scm/README	(revision 0)
+++ dapl/openib_scm/README	(revision 0)
@@ -0,0 +1,40 @@
+
+OpenIB uDAPL provider using socket-based CM, in leiu of uCM/uAT, to setup QP/channels.
+
+to build:
+
+cd dapl/udapl
+make VERBS=openib_scm clean
+make VERBS=openib_scm
+
+
+Modifications to common code:
+
+- added dapl/openib_scm directory 
+
+	dapl/udapl/Makefile
+
+New files for openib_scm provider
+
+	dapl/openib/dapl_ib_cq.c
+	dapl/openib/dapl_ib_dto.h
+	dapl/openib/dapl_ib_mem.c
+	dapl/openib/dapl_ib_qp.c
+	dapl/openib/dapl_ib_util.c
+	dapl/openib/dapl_ib_util.h
+	dapl/openib/dapl_ib_cm.c
+
+A simple dapl test just for openib_scm testing...
+
+	test/dtest/dtest.c
+	test/dtest/makefile
+
+	server:	dtest -s 
+	client:	dtest -h hostname
+
+known issues:
+
+	no memory windows support in ibverbs, dat_create_rmr fails.
+	
+
+
Index: dapl/openib_scm/dapl_ib_util.h
===================================================================
--- dapl/openib_scm/dapl_ib_util.h	(revision 0)
+++ dapl/openib_scm/dapl_ib_util.h	(revision 0)
@@ -0,0 +1,356 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/***************************************************************************
+ *
+ *   Module:		 uDAPL
+ *
+ *   Filename:		 dapl_ib_util.h
+ *
+ *   Author:		 Arlin Davis
+ *
+ *   Created:		 3/10/2005
+ *
+ *   Description: 
+ *
+ *   The uDAPL openib provider - definitions, prototypes,
+ *
+ ****************************************************************************
+ *		   Source Control System Information
+ *
+ *    $Id: $
+ *
+ *	Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ **************************************************************************/
+
+#ifndef _DAPL_IB_UTIL_H_
+#define _DAPL_IB_UTIL_H_
+
+#include "verbs.h"
+#include <byteswap.h>
+
+#ifndef __cplusplus
+#define false 0
+#define true  1
+#endif /*__cplusplus */
+
+/* Typedefs to map common DAPL provider types to IB verbs */
+typedef	struct ibv_qp		*ib_qp_handle_t;
+typedef	struct ibv_cq		*ib_cq_handle_t;
+typedef	struct ibv_pd		*ib_pd_handle_t;
+typedef	struct ibv_mr		*ib_mr_handle_t;
+typedef	struct ibv_mw		*ib_mw_handle_t;
+typedef	struct ibv_wc		ib_work_completion_t;
+
+/* HCA context type maps to IB verbs  */
+typedef	struct ibv_context	*ib_hca_handle_t;
+typedef ib_hca_handle_t		dapl_ibal_ca_t;
+
+/* CM mappings, user CM not complete use SOCKETS */
+
+/* destination info to exchange until real IB CM shows up */
+typedef struct _ib_qp_cm
+{ 
+	uint32_t		qpn;
+	uint16_t		lid;
+	uint16_t		port;
+	int			p_size;
+	DAT_SOCK_ADDR6		ia_address;
+
+} ib_qp_cm_t;
+
+/* 
+ * dapl_llist_entry in dapl.h but dapl.h depends on provider 
+ * typedef's in this file first. move dapl_llist_entry out of dapl.h
+ */
+struct ib_llist_entry
+{
+    struct dapl_llist_entry	*flink;
+    struct dapl_llist_entry	*blink;
+    void			*data;
+    struct dapl_llist_entry	*list_head;
+};
+
+struct ib_cm_handle
+{ 
+	struct ib_llist_entry	entry;
+	int			socket;
+	int			l_socket; 
+	struct dapl_hca		*hca_ptr;
+	DAT_HANDLE		cr;
+	DAT_HANDLE		sp;	
+	ib_qp_cm_t		dst;
+	unsigned char		p_data[256];
+};
+
+typedef struct ib_cm_handle	*ib_cm_handle_t;
+typedef ib_cm_handle_t		ib_cm_srvc_handle_t;
+
+DAT_RETURN getipaddr(char *addr, int addr_len);
+
+/* CM events */
+typedef enum 
+{
+    IB_CME_CONNECTED,
+    IB_CME_DISCONNECTED,
+    IB_CME_DISCONNECTED_ON_LINK_DOWN,
+    IB_CME_CONNECTION_REQUEST_PENDING,
+    IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
+    IB_CME_DESTINATION_REJECT,
+    IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
+    IB_CME_DESTINATION_UNREACHABLE,
+    IB_CME_TOO_MANY_CONNECTION_REQUESTS,
+    IB_CME_LOCAL_FAILURE,
+    IB_CM_LOCAL_FAILURE
+
+} ib_cm_events_t;
+
+/* prototype for cm thread */
+void cr_thread (void *arg);
+
+/* Operation and state mappings */
+typedef enum	ibv_send_flags	ib_send_op_type_t;
+typedef	struct	ibv_sge		ib_data_segment_t;
+typedef enum	ibv_qp_state	ib_qp_state_t;
+typedef	enum	ibv_event_type	ib_async_event_type;
+typedef struct	ibv_async_event	ib_error_record_t;	
+
+/* CQ notifications */
+typedef enum
+{
+	IB_NOTIFY_ON_NEXT_COMP,
+	IB_NOTIFY_ON_SOLIC_COMP
+
+} ib_notification_type_t;
+
+/* other mappings */
+typedef int			ib_bool_t;
+typedef union ibv_gid		GID;
+typedef char			*IB_HCA_NAME;
+typedef uint16_t		ib_hca_port_t;
+typedef uint32_t		ib_comp_handle_t;
+
+#ifdef CQ_WAIT_OBJECT
+typedef struct ibv_comp_channel *ib_wait_obj_handle_t;
+#endif
+
+/* Definitions */
+#define IB_INVALID_HANDLE	NULL
+
+/* inline send rdma threshold */
+#define	INLINE_SEND_DEFAULT	128
+
+/* CM private data areas */
+#define	IB_MAX_REQ_PDATA_SIZE	92
+#define	IB_MAX_REP_PDATA_SIZE	196
+#define	IB_MAX_REJ_PDATA_SIZE	148
+#define	IB_MAX_DREQ_PDATA_SIZE	220
+#define	IB_MAX_DREP_PDATA_SIZE	224
+
+/* DTO OPs, ordered for DAPL ENUM definitions ???*/
+#define OP_RDMA_WRITE           IBV_WR_RDMA_WRITE
+#define OP_RDMA_WRITE_IMM       IBV_WR_RDMA_WRITE_WITH_IMM
+#define OP_SEND                 IBV_WR_SEND
+#define OP_SEND_IMM             IBV_WR_SEND_WITH_IMM
+#define OP_RDMA_READ            IBV_WR_RDMA_READ
+#define OP_COMP_AND_SWAP        IBV_WR_ATOMIC_CMP_AND_SWP
+#define OP_FETCH_AND_ADD        IBV_WR_ATOMIC_FETCH_AND_ADD
+#define OP_RECEIVE              7   /* internal op */
+#define OP_RECEIVE_IMM		8   /* internel op */
+#define OP_BIND_MW              9   /* internal op */
+#define OP_INVALID		0xff
+
+/* Definitions to map QP state */
+#define IB_QP_STATE_RESET	IBV_QPS_RESET
+#define IB_QP_STATE_INIT	IBV_QPS_INIT
+#define IB_QP_STATE_RTR		IBV_QPS_RTR
+#define IB_QP_STATE_RTS		IBV_QPS_RTS
+#define IB_QP_STATE_SQD		IBV_QPS_SQD
+#define IB_QP_STATE_SQE		IBV_QPS_SQE
+#define IB_QP_STATE_ERROR	IBV_QPS_ERR
+
+/* Definitions for ibverbs/mthca return codes, should be defined in verbs.h */
+/* some are errno and some are -n values */
+
+/**
+ * ibv_get_device_name - Return kernel device name
+ * ibv_get_device_guid - Return device's node GUID
+ * ibv_open_device - Return ibv_context or NULL
+ * ibv_close_device - Return 0, (errno?)
+ * ibv_get_async_event - Return 0, -1 
+ * ibv_alloc_pd - Return ibv_pd, NULL
+ * ibv_dealloc_pd - Return 0, errno 
+ * ibv_reg_mr - Return ibv_mr, NULL
+ * ibv_dereg_mr - Return 0, errno
+ * ibv_create_cq - Return ibv_cq, NULL
+ * ibv_destroy_cq - Return 0, errno
+ * ibv_get_cq_event - Return 0 & ibv_cq/context, int
+ * ibv_poll_cq - Return n & ibv_wc, 0 ok, -1 empty, -2 error 
+ * ibv_req_notify_cq - Return 0 (void?)
+ * ibv_create_qp - Return ibv_qp, NULL
+ * ibv_modify_qp - Return 0, errno
+ * ibv_destroy_qp - Return 0, errno
+ * ibv_post_send - Return 0, -1 & bad_wr
+ * ibv_post_recv - Return 0, -1 & bad_wr 
+ */
+
+/* async handler for DTO, CQ, QP, and unafiliated */
+typedef void (*ib_async_dto_handler_t)(
+    IN    ib_hca_handle_t    ib_hca_handle,
+    IN    ib_error_record_t  *err_code,
+    IN    void               *context);
+
+typedef void (*ib_async_cq_handler_t)(
+    IN    ib_hca_handle_t    ib_hca_handle,
+    IN    ib_cq_handle_t     ib_cq_handle,
+    IN    ib_error_record_t  *err_code,
+    IN    void               *context);
+
+typedef void (*ib_async_qp_handler_t)(
+    IN    ib_hca_handle_t    ib_hca_handle,
+    IN    ib_qp_handle_t     ib_qp_handle,
+    IN    ib_error_record_t  *err_code,
+    IN    void               *context);
+
+typedef void (*ib_async_handler_t)(
+    IN    ib_hca_handle_t    ib_hca_handle,
+    IN    ib_error_record_t  *err_code,
+    IN    void               *context);
+
+typedef enum
+{
+	IB_THREAD_INIT,
+	IB_THREAD_RUN,
+	IB_THREAD_CANCEL,
+	IB_THREAD_EXIT
+
+} ib_thread_state_t;
+
+/* ib_hca_transport_t, specific to this implementation */
+typedef struct _ib_hca_transport
+{ 
+	struct	ibv_device	*ib_dev;
+	ib_cq_handle_t		ib_cq_empty;
+	DAPL_OS_LOCK		cq_lock;	
+	int			max_inline_send;
+	ib_thread_state_t       cq_state;
+	DAPL_OS_THREAD          cq_thread;
+	struct ibv_comp_channel *ib_cq;
+	int			cr_state;
+	DAPL_OS_THREAD		thread;
+	DAPL_OS_LOCK		lock;	
+	struct dapl_llist_entry	*list;	
+	ib_async_handler_t	async_unafiliated;
+	void			*async_un_ctx;
+	ib_async_cq_handler_t	async_cq_error;
+	ib_async_dto_handler_t	async_cq;
+	ib_async_qp_handler_t	async_qp_error;
+
+} ib_hca_transport_t;
+
+/* provider specfic fields for shared memory support */
+typedef uint32_t ib_shm_transport_t;
+
+/* prototypes */
+int32_t	dapls_ib_init (void);
+int32_t	dapls_ib_release (void);
+void cq_thread (void *arg);
+void cr_thread(void *arg);
+int dapli_cq_thread_init(struct dapl_hca *hca_ptr);
+void dapli_cq_thread_destroy(struct dapl_hca *hca_ptr);
+
+
+DAT_RETURN
+dapls_modify_qp_state ( IN ib_qp_handle_t	qp_handle,
+			IN ib_qp_state_t	qp_state,
+			IN ib_qp_cm_t		*qp_cm );
+
+/* inline functions */
+STATIC _INLINE_ IB_HCA_NAME dapl_ib_convert_name (IN char *name)
+{
+	/* use ascii; name of local device */
+	return dapl_os_strdup(name);
+}
+
+STATIC _INLINE_ void dapl_ib_release_name (IN IB_HCA_NAME name)
+{
+	return;
+}
+
+/*
+ *  Convert errno to DAT_RETURN values
+ */
+STATIC _INLINE_ DAT_RETURN 
+dapl_convert_errno( IN int err, IN const char *str )
+{
+    if (!err)	return DAT_SUCCESS;
+    	
+#if DAPL_DBG
+    if ((err != EAGAIN) && (err != ETIME) && (err != ETIMEDOUT))
+	dapl_dbg_log (DAPL_DBG_TYPE_ERR," %s %s\n", str, strerror(err));
+#endif 
+
+    switch( err )
+    {
+	case EOVERFLOW	: return DAT_LENGTH_ERROR;
+	case EACCES	: return DAT_PRIVILEGES_VIOLATION;
+	case ENXIO	: 
+	case ERANGE	: 
+	case EPERM	: return DAT_PROTECTION_VIOLATION;		  
+	case EINVAL	:
+        case EBADF	: 
+	case ENOENT	:
+	case ENOTSOCK	: return DAT_INVALID_HANDLE;
+    	case EISCONN	: return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_CONNECTED;
+    	case ECONNREFUSED : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_NOTREADY;
+	case ETIME	:	    
+	case ETIMEDOUT	: return DAT_TIMEOUT_EXPIRED;
+    	case ENETUNREACH: return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_UNREACHABLE;
+    	case EADDRINUSE	: return DAT_CONN_QUAL_IN_USE;
+    	case EALREADY	: return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_ACTCONNPENDING;
+        case ENOSPC	: 
+	case ENOMEM	:
+        case E2BIG	:
+        case EDQUOT	: return DAT_INSUFFICIENT_RESOURCES;
+        case EAGAIN	: return DAT_QUEUE_EMPTY;
+	case EINTR	: return DAT_INTERRUPTED_CALL;
+    	case EAFNOSUPPORT : return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_MALFORMED;
+    	case EFAULT	: 
+	default		: return DAT_INTERNAL_ERROR;
+    }
+ }
+
+/*
+ * Definitions required only for DAT 1.1 builds
+ */
+#define IB_ACCESS_LOCAL_READ    IBV_ACCESS_LOCAL_WRITE
+#define IB_ACCESS_LOCAL_WRITE   IBV_ACCESS_LOCAL_WRITE
+#define IB_ACCESS_REMOTE_READ   IBV_ACCESS_REMOTE_READ
+#define IB_ACCESS_REMOTE_WRITE  IBV_ACCESS_REMOTE_WRITE
+#define IB_ACCESS_MW_BIND       IBV_ACCESS_LOCAL_WRITE
+#define IB_ACCESS_ATOMIC       
+
+#endif /*  _DAPL_IB_UTIL_H_ */
Index: dapl/openib_scm/dapl_ib_cq.c
===================================================================
--- dapl/openib_scm/dapl_ib_cq.c	(revision 0)
+++ dapl/openib_scm/dapl_ib_cq.c	(revision 0)
@@ -0,0 +1,619 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/***************************************************************************
+ *
+ *   Module:		 uDAPL
+ *
+ *   Filename:		 dapl_ib_cq.c
+ *
+ *   Author:		 Arlin Davis
+ *
+ *   Created:		 3/10/2005
+ *
+ *   Description: 
+ *
+ *   The uDAPL openib provider - completion queue
+ *
+ ****************************************************************************
+ *		   Source Control System Information
+ *
+ *    $Id: $
+ *
+ *	Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ **************************************************************************/
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_lmr_util.h"
+#include "dapl_evd_util.h"
+#include "dapl_ring_buffer_util.h"
+#include <sys/poll.h>
+#include <signal.h>
+
+int dapli_cq_thread_init(struct dapl_hca *hca_ptr)
+{
+        DAT_RETURN dat_status;
+
+        dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cq_thread_init(%p)\n", hca_ptr);
+
+        /* create thread to process inbound connect request */
+	hca_ptr->ib_trans.cq_state = IB_THREAD_INIT;
+        dat_status = dapl_os_thread_create(cq_thread, (void*)hca_ptr,
&hca_ptr->ib_trans.cq_thread);
+        if (dat_status != DAT_SUCCESS)
+        {
+                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+                             " cq_thread_init: failed to create thread\n");
+                return 1;
+        }
+	
+	/* wait for thread to start */
+	while (hca_ptr->ib_trans.cq_state != IB_THREAD_RUN) {
+                struct timespec sleep, remain;
+                sleep.tv_sec = 0;
+                sleep.tv_nsec = 20000000; /* 20 ms */
+                dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                             " cq_thread_init: waiting for cq_thread\n");
+                nanosleep (&sleep, &remain);
+        }
+	dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cq_thread_init(%d) exit\n",getpid());
+        return 0;
+}
+
+void dapli_cq_thread_destroy(struct dapl_hca *hca_ptr)
+{
+        dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cq_thread_destroy(%p)\n", hca_ptr);
+
+	if (hca_ptr->ib_trans.cq_state != IB_THREAD_RUN)
+		return;
+
+        /* destroy cr_thread and lock */
+        hca_ptr->ib_trans.cq_state = IB_THREAD_CANCEL;
+        pthread_kill(hca_ptr->ib_trans.cq_thread, SIGUSR1);
+        dapl_dbg_log(DAPL_DBG_TYPE_CM," cq_thread_destroy(%p) cancel\n",hca_ptr);
+        while (hca_ptr->ib_trans.cq_state != IB_THREAD_EXIT) {
+                struct timespec sleep, remain;
+                sleep.tv_sec = 0;
+                sleep.tv_nsec = 200000000; /* 200 ms */
+                dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                             " cq_thread_destroy: waiting for cq_thread\n");
+                nanosleep (&sleep, &remain);
+        }
+        dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cq_thread_destroy(%d) exit\n",getpid());
+}
+
+/* catch the signal */
+static void ib_cq_handler(int signum)
+{
+        return;
+}
+
+void cq_thread( void *arg )
+{
+        struct dapl_hca *hca_ptr = arg;
+        struct dapl_evd *evd_ptr;
+        struct ibv_cq   *ibv_cq = NULL;
+	sigset_t	sigset;
+
+	sigemptyset(&sigset);
+        sigaddset(&sigset,SIGUSR1);
+        pthread_sigmask(SIG_UNBLOCK, &sigset, NULL);
+        signal(SIGUSR1, ib_cq_handler);
+
+	hca_ptr->ib_trans.cq_state = IB_THREAD_RUN;
+	
+	dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cq_thread: ENTER hca %p\n",hca_ptr);
+	
+        /* wait on DTO event, or signal to abort */
+        while (hca_ptr->ib_trans.cq_state == IB_THREAD_RUN) {
+                struct pollfd cq_fd = {
+                        .fd      = hca_ptr->ib_trans.ib_cq->fd,
+                        .events  = POLLIN,
+                        .revents = 0
+                };
+		if ((poll(&cq_fd, 1, -1) == 1) &&
+			(!ibv_get_cq_event(hca_ptr->ib_trans.ib_cq,  
+				   &ibv_cq, (void*)&evd_ptr))) {
+
+			if (DAPL_BAD_HANDLE(evd_ptr, DAPL_MAGIC_EVD)) {
+				ibv_ack_cq_events(ibv_cq, 1);
+				return;
+			}
+
+			/* process DTO event via callback */
+			dapl_evd_dto_callback ( hca_ptr->ib_hca_handle,
+						evd_ptr->ib_cq_handle,
+						(void*)evd_ptr );
+
+			ibv_ack_cq_events(ibv_cq, 1);
+		} 
+        }
+        hca_ptr->ib_trans.cq_state = IB_THREAD_EXIT;
+        dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cq_thread: EXIT: hca %p \n", hca_ptr);
+}
+
+
+/*
+ * Map all verbs DTO completion codes to the DAT equivelent.
+ *
+ * Not returned by verbs:     DAT_DTO_ERR_PARTIAL_PACKET
+ */
+static struct ib_status_map
+{
+    int				ib_status;
+    DAT_DTO_COMPLETION_STATUS	dat_status;
+} ib_status_map[] = {
+	/* 00 */  { IBV_WC_SUCCESS,		DAT_DTO_SUCCESS},
+	/* 01 */  { IBV_WC_LOC_LEN_ERR,		DAT_DTO_ERR_LOCAL_LENGTH},
+	/* 02 */  { IBV_WC_LOC_QP_OP_ERR,	DAT_DTO_ERR_LOCAL_EP},
+	/* 03 */  { IBV_WC_LOC_EEC_OP_ERR,	DAT_DTO_ERR_TRANSPORT},
+	/* 04 */  { IBV_WC_LOC_PROT_ERR,	DAT_DTO_ERR_LOCAL_PROTECTION},
+	/* 05 */  { IBV_WC_WR_FLUSH_ERR,	DAT_DTO_ERR_FLUSHED},
+	/* 06 */  { IBV_WC_MW_BIND_ERR,		DAT_RMR_OPERATION_FAILED},
+	/* 07 */  { IBV_WC_BAD_RESP_ERR,	DAT_DTO_ERR_BAD_RESPONSE},
+	/* 08 */  { IBV_WC_LOC_ACCESS_ERR,	DAT_DTO_ERR_LOCAL_PROTECTION},
+	/* 09 */  { IBV_WC_REM_INV_REQ_ERR,	DAT_DTO_ERR_REMOTE_RESPONDER},
+	/* 10 */  { IBV_WC_REM_ACCESS_ERR,	DAT_DTO_ERR_REMOTE_ACCESS},
+	/* 11 */  { IBV_WC_REM_OP_ERR,		DAT_DTO_ERR_REMOTE_RESPONDER},
+	/* 12 */  { IBV_WC_RETRY_EXC_ERR,	DAT_DTO_ERR_TRANSPORT},
+	/* 13 */  { IBV_WC_RNR_RETRY_EXC_ERR,	DAT_DTO_ERR_RECEIVER_NOT_READY},
+	/* 14 */  { IBV_WC_LOC_RDD_VIOL_ERR,	DAT_DTO_ERR_LOCAL_PROTECTION},
+	/* 15 */  { IBV_WC_REM_INV_RD_REQ_ERR,	DAT_DTO_ERR_REMOTE_RESPONDER},
+	/* 16 */  { IBV_WC_REM_ABORT_ERR,	DAT_DTO_ERR_REMOTE_RESPONDER},
+	/* 17 */  { IBV_WC_INV_EECN_ERR,	DAT_DTO_ERR_TRANSPORT},
+	/* 18 */  { IBV_WC_INV_EEC_STATE_ERR,	DAT_DTO_ERR_TRANSPORT},
+	/* 19 */  { IBV_WC_FATAL_ERR,		DAT_DTO_ERR_TRANSPORT},
+	/* 20 */  { IBV_WC_RESP_TIMEOUT_ERR,	DAT_DTO_ERR_RECEIVER_NOT_READY},
+	/* 21 */  { IBV_WC_GENERAL_ERR,		DAT_DTO_ERR_TRANSPORT},
+};
+
+/*
+ * dapls_ib_get_dto_status
+ *
+ * Return the DAT status of a DTO operation
+ *
+ * Input:
+ *	cqe_ptr		pointer to completion queue entry
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	Value from ib_status_map table above
+ */
+
+DAT_DTO_COMPLETION_STATUS
+dapls_ib_get_dto_status (
+	IN ib_work_completion_t		*cqe_ptr)
+{
+	uint32_t	ib_status;
+	int		i;
+
+	ib_status = DAPL_GET_CQE_STATUS (cqe_ptr);
+
+	/*
+	* Due to the implementation of verbs completion code, we need to
+	* search the table for the correct value rather than assuming
+	* linear distribution.
+	*/
+	for (i = 0; i <= IBV_WC_GENERAL_ERR; i++) {
+		if (ib_status == ib_status_map[i].ib_status) {
+			if ( ib_status != IBV_WC_SUCCESS ) {
+				dapl_dbg_log (DAPL_DBG_TYPE_DTO_COMP_ERR,
+	    			" DTO completion ERROR: %d: op %#x\n", 
+				ib_status, DAPL_GET_CQE_OPTYPE (cqe_ptr));
+			}
+			return ib_status_map[i].dat_status;
+		}
+	}
+
+	dapl_dbg_log (DAPL_DBG_TYPE_DTO_COMP_ERR,
+	    		" DTO completion ERROR: %d: op %#x\n", 
+			ib_status,
+			DAPL_GET_CQE_OPTYPE (cqe_ptr));
+
+	return DAT_DTO_FAILURE;
+}
+    
+DAT_RETURN dapls_ib_get_async_event (
+	IN  ib_error_record_t		*err_record,
+	OUT DAT_EVENT_NUMBER		*async_event)
+{
+    DAT_RETURN	dat_status = DAT_SUCCESS;
+    int	err_code = err_record->event_type;
+    
+    switch (err_code) {
+	/* OVERFLOW error */
+	case IBV_EVENT_CQ_ERR:
+	    *async_event = DAT_ASYNC_ERROR_EVD_OVERFLOW;
+	    break;
+	/* INTERNAL errors */
+	case IBV_EVENT_DEVICE_FATAL:
+	    *async_event = DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR;
+	    break;
+	/* CATASTROPHIC errors */
+	case IBV_EVENT_PORT_ERR:
+	    *async_event = DAT_ASYNC_ERROR_IA_CATASTROPHIC;
+	    break;
+	/* BROKEN QP error */
+	case IBV_EVENT_SQ_DRAINED:
+	case IBV_EVENT_QP_FATAL:
+	case IBV_EVENT_QP_REQ_ERR:
+	case IBV_EVENT_QP_ACCESS_ERR:
+	    *async_event = DAT_ASYNC_ERROR_EP_BROKEN;
+	    break;
+
+	/* connection completion */
+	case IBV_EVENT_COMM_EST:
+	    *async_event = DAT_CONNECTION_EVENT_ESTABLISHED;
+	    break;
+
+	/* TODO: process HW state changes */
+	case IBV_EVENT_PATH_MIG:
+	case IBV_EVENT_PATH_MIG_ERR:
+	case IBV_EVENT_PORT_ACTIVE:
+	case IBV_EVENT_LID_CHANGE:
+	case IBV_EVENT_PKEY_CHANGE:
+	case IBV_EVENT_SM_CHANGE:
+	default:
+	    dat_status = DAT_ERROR (DAT_NOT_IMPLEMENTED, 0);
+    }
+    return dat_status;
+}
+
+/*
+ * dapl_ib_cq_alloc
+ *
+ * Alloc a CQ
+ *
+ * Input:
+ *	ia_handle		IA handle
+ *	evd_ptr			pointer to EVD struct
+ *	cqlen			minimum QLen
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_cq_alloc (
+	IN  DAPL_IA		*ia_ptr,
+	IN  DAPL_EVD		*evd_ptr,
+	IN  DAT_COUNT		*cqlen )
+{
+	dapl_dbg_log ( DAPL_DBG_TYPE_UTIL, 
+		"dapls_ib_cq_alloc: evd %p cqlen=%d \n", evd_ptr, *cqlen );
+
+	struct ibv_comp_channel *channel = ia_ptr->hca_ptr->ib_trans.ib_cq;
+
+#ifdef CQ_WAIT_OBJECT
+	if (evd_ptr->cq_wait_obj_handle)
+		channel = evd_ptr->cq_wait_obj_handle;
+#endif
+
+	/* Call IB verbs to create CQ */
+	evd_ptr->ib_cq_handle = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
+					      *cqlen,
+					      evd_ptr,
+					      channel, 0);
+	
+	if (evd_ptr->ib_cq_handle == IB_INVALID_HANDLE) 
+		return	DAT_INSUFFICIENT_RESOURCES;
+
+	/* arm cq for events */
+	dapls_set_cq_notify(ia_ptr, evd_ptr);
+	
+        /* update with returned cq entry size */
+	*cqlen = evd_ptr->ib_cq_handle->cqe;
+
+	dapl_dbg_log ( DAPL_DBG_TYPE_UTIL, 
+		"dapls_ib_cq_alloc: new_cq %p cqlen=%d \n", 
+		evd_ptr->ib_cq_handle, *cqlen );
+
+	return DAT_SUCCESS;
+}
+
+
+/*
+ * dapl_ib_cq_resize
+ *
+ * Alloc a CQ
+ *
+ * Input:
+ *	ia_handle		IA handle
+ *	evd_ptr			pointer to EVD struct
+ *	cqlen			minimum QLen
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN
+dapls_ib_cq_resize (
+	IN  DAPL_IA	*ia_ptr,
+	IN  DAPL_EVD	*evd_ptr,
+	IN  DAT_COUNT	*cqlen )
+{
+	ib_cq_handle_t	new_cq;
+	struct ibv_comp_channel *channel = ia_ptr->hca_ptr->ib_trans.ib_cq;
+
+	/* IB verbs doe not support resize. Try to re-create CQ
+	 * with new size. Can only be done if QP is not attached. 
+	 * destroy EBUSY == QP still attached.
+	 */
+
+#ifdef CQ_WAIT_OBJECT
+	if (evd_ptr->cq_wait_obj_handle)
+		channel = evd_ptr->cq_wait_obj_handle;
+#endif
+
+	/* Call IB verbs to create CQ */
+	new_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle, *cqlen,
+			       evd_ptr, channel, 0);
+
+	if (new_cq == IB_INVALID_HANDLE) 
+		return	DAT_INSUFFICIENT_RESOURCES;
+	
+	/* destroy the original and replace if successful */
+	if (ibv_destroy_cq(evd_ptr->ib_cq_handle)) {
+		ibv_destroy_cq(new_cq);
+		return(dapl_convert_errno(errno,"resize_cq"));
+	}
+		
+	/* update EVD with new cq handle and size */
+	evd_ptr->ib_cq_handle = new_cq;
+	*cqlen = new_cq->cqe;
+
+	/* arm cq for events */
+	dapls_set_cq_notify (ia_ptr, evd_ptr);
+
+	return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_cq_free
+ *
+ * destroy a CQ
+ *
+ * Input:
+ *	ia_handle		IA handle
+ *	evd_ptr			pointer to EVD struct
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN dapls_ib_cq_free (
+	IN  DAPL_IA		*ia_ptr,
+	IN  DAPL_EVD		*evd_ptr)
+{
+	if ( evd_ptr->ib_cq_handle != IB_INVALID_HANDLE ) {
+		/* copy all entries on CQ to EVD before destroying */	
+		dapls_evd_copy_cq(evd_ptr); 
+		if (ibv_destroy_cq(evd_ptr->ib_cq_handle)) 
+			return(dapl_convert_errno(errno,"destroy_cq"));
+		evd_ptr->ib_cq_handle = IB_INVALID_HANDLE;
+	}
+	return DAT_SUCCESS;
+}
+
+/*
+ * dapls_set_cq_notify
+ *
+ * Set the CQ notification for next
+ *
+ * Input:
+ *	hca_handl		hca handle
+ *	DAPL_EVD		evd handle
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	dapl_convert_errno 
+ */
+DAT_RETURN dapls_set_cq_notify (
+	IN  DAPL_IA	    *ia_ptr,
+	IN  DAPL_EVD	    *evd_ptr)
+{
+	if (ibv_req_notify_cq( evd_ptr->ib_cq_handle, 0 ))
+		return(dapl_convert_errno(errno,"notify_cq"));
+	else
+		return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_completion_notify
+ *
+ * Set the CQ notification type
+ *
+ * Input:
+ *	hca_handl		hca handle
+ *	evd_ptr			evd handle
+ *	type			notification type
+ *
+ * Output:
+ * 	none
+ *
+ * Returns:
+ * 	DAT_SUCCESS
+ *	dapl_convert_errno
+ */
+DAT_RETURN dapls_ib_completion_notify (
+	IN  ib_hca_handle_t		hca_handle,
+	IN  DAPL_EVD			*evd_ptr,
+	IN  ib_notification_type_t	type)
+{
+	if (ibv_req_notify_cq( evd_ptr->ib_cq_handle, type ))
+		return(dapl_convert_errno(errno,"notify_cq_type"));
+	else
+		return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_completion_poll
+ *
+ * CQ poll for completions
+ *
+ * Input:
+ *	hca_handl		hca handle
+ *	evd_ptr			evd handle
+ *	wc_ptr			work completion
+ *
+ * Output:
+ * 	none
+ *
+ * Returns: 
+ * 	DAT_SUCCESS
+ *	DAT_QUEUE_EMPTY
+ *	
+ */
+DAT_RETURN dapls_ib_completion_poll (
+	IN  DAPL_HCA			*hca_ptr,
+	IN  DAPL_EVD			*evd_ptr,
+	IN  ib_work_completion_t	*wc_ptr)
+{
+	int	ret;
+
+    	ret = ibv_poll_cq(evd_ptr->ib_cq_handle, 1, wc_ptr);
+	if (ret == 1) 
+		return	DAT_SUCCESS;
+	
+	return	DAT_QUEUE_EMPTY;
+}
+
+#ifdef CQ_WAIT_OBJECT
+
+/* NEW common wait objects for providers with direct CQ wait objects */
+DAT_RETURN
+dapls_ib_wait_object_create ( 
+		IN DAPL_EVD		*evd_ptr,
+		IN ib_wait_obj_handle_t	*p_cq_wait_obj_handle )
+{
+	dapl_dbg_log (	DAPL_DBG_TYPE_CM, 
+			" cq_object_create: (%p,%p)\n", 
+			evd_ptr, p_cq_wait_obj_handle );
+
+	/* set cq_wait object to evd_ptr */
+	*p_cq_wait_obj_handle = 
+		ibv_create_comp_channel(evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle);	
+		
+	return DAT_SUCCESS;
+}
+
+DAT_RETURN
+dapls_ib_wait_object_destroy (
+	IN ib_wait_obj_handle_t	    p_cq_wait_obj_handle)
+{
+	dapl_dbg_log (  DAPL_DBG_TYPE_UTIL, 
+			" cq_object_destroy: wait_obj=%p\n", 
+			p_cq_wait_obj_handle );
+	
+	ibv_destroy_comp_channel(p_cq_wait_obj_handle);
+	
+	return DAT_SUCCESS;
+}
+
+DAT_RETURN
+dapls_ib_wait_object_wakeup (
+	IN ib_wait_obj_handle_t		p_cq_wait_obj_handle)
+{
+	dapl_dbg_log (  DAPL_DBG_TYPE_UTIL, 
+			" cq_object_wakeup: wait_obj=%p\n", 
+			p_cq_wait_obj_handle );
+
+        /* no wake up mechanism */
+	return DAT_SUCCESS;
+}
+
+DAT_RETURN
+dapls_ib_wait_object_wait (
+	IN ib_wait_obj_handle_t	    p_cq_wait_obj_handle,
+	IN u_int32_t 		    timeout)
+{
+	struct dapl_evd	*evd_ptr;
+	struct ibv_cq	*ibv_cq = NULL;
+	void		*ibv_ctx = NULL;
+	int		status = 0; 
+	int		timeout_ms = -1;
+	struct pollfd cq_fd = {
+			.fd      = p_cq_wait_obj_handle->fd,
+			.events  = POLLIN,
+			.revents = 0
+		};
+
+	dapl_dbg_log ( DAPL_DBG_TYPE_CM, 
+			" cq_object_wait: CQ channel %p time %d\n", 
+			p_cq_wait_obj_handle, timeout );
+	
+	/* uDAPL timeout values in usecs */
+	if (timeout != DAT_TIMEOUT_INFINITE)
+		timeout_ms = timeout/1000;
+
+	status = poll(&cq_fd, 1, timeout_ms);
+
+	/* returned event */
+	if (status > 0) {
+		if (!ibv_get_cq_event(p_cq_wait_obj_handle, 
+				      &ibv_cq, (void*)&evd_ptr)) {
+			ibv_ack_cq_events(ibv_cq, 1);
+		}
+		status = 0;
+
+	/* timeout */
+	} else if (status == 0) 
+		status = ETIMEDOUT;
+	
+	dapl_dbg_log (DAPL_DBG_TYPE_CM, 
+		      " cq_object_wait: RET evd %p ibv_cq %p ibv_ctx %p %s\n",
+		      evd_ptr, ibv_cq,ibv_ctx,strerror(errno));
+	
+	return(dapl_convert_errno(status,"cq_wait_object_wait"));
+	
+}
+#endif
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ *  tab-width: 8
+ * End:
+ */
+







More information about the general mailing list