[ofa-general] [PATCH 01/11] uDAPL openib_common: reorganize provider code base to share common mem, cq, qp, dto functions
Arlin Davis
arlin.r.davis at intel.com
Mon Jul 6 12:51:50 PDT 2009
Patch set to build framework for new uCM provider. Reorganize code base
to make better use of common code for the ofa providers.
add new openib_common directory with cq, qp, util, dto, mem function calls
and definitions. This basically leaves the unique CM and Device definitions
and functions to the individual providers directory of openib_scm and openib_cma.
modifications to dapl_cr_accept required. ep->cm_handle is allocated
and managed entirely in provider so dapl common code should not update
ep_handle->cm_handle from the cr->cm_handle automatically. The provider
should determine which cm_handle is required for the accept.
Most changes are simply code relocation.
openib_cma defines _OPENIB_CMA_ and openib_scm defines _OPENIB_SCM_ for provider
specific build needs in common code.
---
Makefile.am | 32 +-
dapl/common/dapl_cr_accept.c | 1 -
dapl/dirs | 2 +-
dapl/include/dapl_debug.h | 3 +-
dapl/openib_cma/SOURCES | 21 +-
dapl/openib_cma/cm.c | 1338 +++++++++++++++++++++++++
dapl/openib_cma/dapl_ib_cm.c | 1297 ------------------------
dapl/openib_cma/dapl_ib_cq.c | 559 -----------
dapl/openib_cma/dapl_ib_dto.h | 472 ---------
dapl/openib_cma/dapl_ib_extensions.c | 329 ------
dapl/openib_cma/dapl_ib_mem.c | 380 -------
dapl/openib_cma/dapl_ib_qp.c | 331 ------
dapl/openib_cma/dapl_ib_util.c | 1134 ---------------------
dapl/openib_cma/dapl_ib_util.h | 252 +----
dapl/openib_cma/device.c | 847 ++++++++++++++++
dapl/openib_common/cq.c | 491 +++++++++
dapl/openib_common/dapl_ib_common.h | 299 ++++++
dapl/openib_common/dapl_ib_dto.h | 504 ++++++++++
dapl/openib_common/ib_extensions.c | 360 +++++++
dapl/openib_common/mem.c | 370 +++++++
dapl/openib_common/qp.c | 515 ++++++++++
dapl/openib_common/util.c | 375 +++++++
dapl/openib_scm/SOURCES | 21 +-
dapl/openib_scm/cm.c | 1839 ++++++++++++++++++++++++++++++++++
dapl/openib_scm/dapl_ib_cm.c | 1786 ---------------------------------
dapl/openib_scm/dapl_ib_cq.c | 705 -------------
dapl/openib_scm/dapl_ib_dto.h | 527 ----------
dapl/openib_scm/dapl_ib_extensions.c | 371 -------
dapl/openib_scm/dapl_ib_mem.c | 382 -------
dapl/openib_scm/dapl_ib_qp.c | 513 ----------
dapl/openib_scm/dapl_ib_util.c | 743 --------------
dapl/openib_scm/dapl_ib_util.h | 300 +------
dapl/openib_scm/device.c | 412 ++++++++
33 files changed, 7433 insertions(+), 10078 deletions(-)
create mode 100644 dapl/openib_cma/cm.c
delete mode 100755 dapl/openib_cma/dapl_ib_cm.c
delete mode 100755 dapl/openib_cma/dapl_ib_cq.c
delete mode 100644 dapl/openib_cma/dapl_ib_dto.h
delete mode 100755 dapl/openib_cma/dapl_ib_extensions.c
delete mode 100755 dapl/openib_cma/dapl_ib_mem.c
delete mode 100755 dapl/openib_cma/dapl_ib_qp.c
delete mode 100755 dapl/openib_cma/dapl_ib_util.c
create mode 100644 dapl/openib_cma/device.c
create mode 100644 dapl/openib_common/cq.c
create mode 100644 dapl/openib_common/dapl_ib_common.h
create mode 100644 dapl/openib_common/dapl_ib_dto.h
create mode 100644 dapl/openib_common/ib_extensions.c
create mode 100644 dapl/openib_common/mem.c
create mode 100644 dapl/openib_common/qp.c
create mode 100644 dapl/openib_common/util.c
create mode 100644 dapl/openib_scm/cm.c
delete mode 100644 dapl/openib_scm/dapl_ib_cm.c
delete mode 100644 dapl/openib_scm/dapl_ib_cq.c
delete mode 100644 dapl/openib_scm/dapl_ib_dto.h
delete mode 100755 dapl/openib_scm/dapl_ib_extensions.c
delete mode 100644 dapl/openib_scm/dapl_ib_mem.c
delete mode 100644 dapl/openib_scm/dapl_ib_qp.c
delete mode 100644 dapl/openib_scm/dapl_ib_util.c
create mode 100644 dapl/openib_scm/device.c
diff --git a/Makefile.am b/Makefile.am
index fa47165..cf75a88 100755
--- a/Makefile.am
+++ b/Makefile.am
@@ -17,8 +17,8 @@ endif
if EXT_TYPE_IB
XFLAGS = -DDAT_EXTENSIONS
-XPROGRAMS_CMA = dapl/openib_cma/dapl_ib_extensions.c
-XPROGRAMS_SCM = dapl/openib_scm/dapl_ib_extensions.c
+XPROGRAMS_CMA = dapl/openib_common/ib_extensions.c
+XPROGRAMS_SCM = dapl/openib_common/ib_extensions.c
else
XFLAGS =
XPROGRAMS_CMA =
@@ -47,6 +47,7 @@ dapl_udapl_libdaplofa_la_CFLAGS = $(AM_CFLAGS) -D_GNU_SOURCE $(OSFLAGS) $(XFLAGS
-DOPENIB -DCQ_WAIT_OBJECT \
-I$(srcdir)/dat/include/ -I$(srcdir)/dapl/include/ \
-I$(srcdir)/dapl/common -I$(srcdir)/dapl/udapl/linux \
+ -I$(srcdir)/dapl/openib_common \
-I$(srcdir)/dapl/openib_cma \
-I$(srcdir)/dapl/openib_cma/linux
@@ -54,6 +55,7 @@ dapl_udapl_libdaploscm_la_CFLAGS = $(AM_CFLAGS) -D_GNU_SOURCE $(OSFLAGS) $(XFLAG
-DOPENIB -DCQ_WAIT_OBJECT \
-I$(srcdir)/dat/include/ -I$(srcdir)/dapl/include/ \
-I$(srcdir)/dapl/common -I$(srcdir)/dapl/udapl/linux \
+ -I$(srcdir)/dapl/openib_common \
-I$(srcdir)/dapl/openib_scm \
-I$(srcdir)/dapl/openib_scm/linux
@@ -185,11 +187,12 @@ dapl_udapl_libdaplofa_la_SOURCES = dapl/udapl/dapl_init.c \
dapl/common/dapl_csp.c \
dapl/common/dapl_ep_post_send_invalidate.c \
dapl/common/dapl_ep_post_rdma_read_to_rmr.c \
- dapl/openib_cma/dapl_ib_util.c \
- dapl/openib_cma/dapl_ib_cq.c \
- dapl/openib_cma/dapl_ib_qp.c \
- dapl/openib_cma/dapl_ib_cm.c \
- dapl/openib_cma/dapl_ib_mem.c $(XPROGRAMS_CMA)
+ dapl/openib_common/mem.c \
+ dapl/openib_common/cq.c \
+ dapl/openib_common/qp.c \
+ dapl/openib_common/util.c \
+ dapl/openib_cma/cm.c \
+ dapl/openib_cma/device.c $(XPROGRAMS_CMA)
dapl_udapl_libdaplofa_la_LDFLAGS = -version-info 2:0:0 $(daplofa_version_script) \
-Wl,-init,dapl_init -Wl,-fini,dapl_fini \
@@ -298,11 +301,12 @@ dapl_udapl_libdaploscm_la_SOURCES = dapl/udapl/dapl_init.c \
dapl/common/dapl_csp.c \
dapl/common/dapl_ep_post_send_invalidate.c \
dapl/common/dapl_ep_post_rdma_read_to_rmr.c \
- dapl/openib_scm/dapl_ib_util.c \
- dapl/openib_scm/dapl_ib_cq.c \
- dapl/openib_scm/dapl_ib_qp.c \
- dapl/openib_scm/dapl_ib_cm.c \
- dapl/openib_scm/dapl_ib_mem.c $(XPROGRAMS_SCM)
+ dapl/openib_common/mem.c \
+ dapl/openib_common/cq.c \
+ dapl/openib_common/qp.c \
+ dapl/openib_common/util.c \
+ dapl/openib_scm/cm.c \
+ dapl/openib_scm/device.c $(XPROGRAMS_SCM)
dapl_udapl_libdaploscm_la_LDFLAGS = -version-info 2:0:0 $(daploscm_version_script) \
-Wl,-init,dapl_init -Wl,-fini,dapl_fini \
@@ -365,10 +369,10 @@ EXTRA_DIST = dat/common/dat_dictionary.h \
dapl/include/dapl_debug.h \
dapl/include/dapl_ipoib_names.h \
dapl/include/dapl_vendor.h \
- dapl/openib_cma/dapl_ib_dto.h \
+ dapl/openib_common/dapl_ib_dto.h \
+ dapl/openib_common/dapl_ib_common.h \
dapl/openib_cma/dapl_ib_util.h \
dapl/openib_cma/linux/openib_osd.h \
- dapl/openib_scm/dapl_ib_dto.h \
dapl/openib_scm/dapl_ib_util.h \
dapl/openib_scm/linux/openib_osd.h \
dat/udat/libdat2.map \
diff --git a/dapl/common/dapl_cr_accept.c b/dapl/common/dapl_cr_accept.c
index 76a841e..5df9458 100644
--- a/dapl/common/dapl_cr_accept.c
+++ b/dapl/common/dapl_cr_accept.c
@@ -180,7 +180,6 @@ dapl_cr_accept(IN DAT_CR_HANDLE cr_handle,
entry_ep_state = ep_ptr->param.ep_state;
entry_ep_handle = cr_ptr->param.local_ep_handle;
ep_ptr->param.ep_state = DAT_EP_STATE_COMPLETION_PENDING;
- ep_ptr->cm_handle = cr_ptr->ib_cm_handle;
ep_ptr->cr_ptr = cr_ptr;
ep_ptr->param.remote_ia_address_ptr =
cr_ptr->param.remote_ia_address_ptr;
diff --git a/dapl/dirs b/dapl/dirs
index e865dfb..e721ef5 100644
--- a/dapl/dirs
+++ b/dapl/dirs
@@ -1 +1 @@
-DIRS = ibal openib_scm openib_cma
+DIRS = ibal openib_common openib_scm openib_cma
diff --git a/dapl/include/dapl_debug.h b/dapl/include/dapl_debug.h
index 92e3d3b..37edf90 100644
--- a/dapl/include/dapl_debug.h
+++ b/dapl/include/dapl_debug.h
@@ -66,7 +66,8 @@ typedef enum
DAPL_DBG_TYPE_EXCEPTION = 0x0400,
DAPL_DBG_TYPE_SRQ = 0x0800,
DAPL_DBG_TYPE_CNTR = 0x1000,
- DAPL_DBG_TYPE_CM_LIST = 0x2000
+ DAPL_DBG_TYPE_CM_LIST = 0x2000,
+ DAPL_DBG_TYPE_THREAD = 0x4000
} DAPL_DBG_TYPE;
diff --git a/dapl/openib_cma/SOURCES b/dapl/openib_cma/SOURCES
index fd67d07..f1c5002 100644
--- a/dapl/openib_cma/SOURCES
+++ b/dapl/openib_cma/SOURCES
@@ -18,16 +18,17 @@ USE_MSVCRT = 1
SOURCES = \
udapl.rc \
- ..\dapl_common_src.c \
- ..\dapl_udapl_src.c \
- dapl_ib_cq.c \
- dapl_ib_extensions.c \
- dapl_ib_mem.c \
- dapl_ib_qp.c \
- dapl_ib_util.c \
- dapl_ib_cm.c
-
-INCLUDES = ..\include;..\common;windows;..\..\dat\include;\
+ ..\dapl_common_src.c \
+ ..\dapl_udapl_src.c \
+ ..\openib_common\mem.c \
+ ..\openib_common\util.c \
+ ..\openib_common\cq.c \
+ ..\openib_common\qp.c \
+ ..\openib_common\ib_extensions.c \
+ device.c \
+ cm.c
+
+INCLUDES = ..\include;..\openib_common;..\common;windows;..\..\dat\include;\
..\..\dat\udat\windows;..\udapl\windows;\
..\..\..\..\inc;..\..\..\..\inc\user;..\..\..\libibverbs\include;\
..\..\..\librdmacm\include
diff --git a/dapl/openib_cma/cm.c b/dapl/openib_cma/cm.c
new file mode 100644
index 0000000..497f78a
--- /dev/null
+++ b/dapl/openib_cma/cm.c
@@ -0,0 +1,1338 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc. All rights reserved.
+ * Copyright (c) 2005-2007 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 2003 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ * copy of which is available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/**********************************************************************
+ *
+ * MODULE: dapl_ib_cm.c
+ *
+ * PURPOSE: The OFED provider - uCMA, name and route resolution
+ *
+ * $Id: $
+ *
+ **********************************************************************/
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_evd_util.h"
+#include "dapl_cr_util.h"
+#include "dapl_name_service.h"
+#include "dapl_ib_util.h"
+#include "dapl_vendor.h"
+#include "dapl_osd.h"
+
+extern struct rdma_event_channel *g_cm_events;
+
+/* local prototypes */
+static struct dapl_cm_id *dapli_req_recv(struct dapl_cm_id *conn,
+ struct rdma_cm_event *event);
+static void dapli_cm_active_cb(struct dapl_cm_id *conn,
+ struct rdma_cm_event *event);
+static void dapli_cm_passive_cb(struct dapl_cm_id *conn,
+ struct rdma_cm_event *event);
+static void dapli_addr_resolve(struct dapl_cm_id *conn);
+static void dapli_route_resolve(struct dapl_cm_id *conn);
+
+/* cma requires 16 bit SID, in network order */
+#define IB_PORT_MOD 32001
+#define IB_PORT_BASE (65535 - IB_PORT_MOD)
+#define SID_TO_PORT(SID) \
+ (SID > 0xffff ? \
+ htons((unsigned short)((SID % IB_PORT_MOD) + IB_PORT_BASE)) :\
+ htons((unsigned short)SID))
+
+#define PORT_TO_SID(p) ntohs(p)
+
+/* private data header to validate consumer rejects versus abnormal events */
+struct dapl_pdata_hdr {
+ DAT_UINT32 version;
+};
+
+static void dapli_addr_resolve(struct dapl_cm_id *conn)
+{
+ int ret;
+#ifdef DAPL_DBG
+ struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
+#endif
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " addr_resolve: cm_id %p SRC %x DST %x\n",
+ conn->cm_id, ntohl(((struct sockaddr_in *)
+ &ipaddr->src_addr)->sin_addr.s_addr),
+ ntohl(((struct sockaddr_in *)
+ &ipaddr->dst_addr)->sin_addr.s_addr));
+
+ ret = rdma_resolve_route(conn->cm_id, conn->route_timeout);
+ if (ret) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " dapl_cma_connect: rdma_resolve_route ERR 0x%x %s\n",
+ ret, strerror(errno));
+ dapl_evd_connection_callback(conn,
+ IB_CME_LOCAL_FAILURE,
+ NULL, conn->ep);
+ }
+}
+
+static void dapli_route_resolve(struct dapl_cm_id *conn)
+{
+ int ret;
+#ifdef DAPL_DBG
+ struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
+ struct ib_addr *ibaddr = &conn->cm_id->route.addr.addr.ibaddr;
+#endif
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " route_resolve: cm_id %p SRC %x DST %x PORT %d\n",
+ conn->cm_id, ntohl(((struct sockaddr_in *)
+ &ipaddr->src_addr)->sin_addr.s_addr),
+ ntohl(((struct sockaddr_in *)
+ &ipaddr->dst_addr)->sin_addr.s_addr),
+ ntohs(((struct sockaddr_in *)
+ &ipaddr->dst_addr)->sin_port));
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " route_resolve: SRC GID subnet %016llx id %016llx\n",
+ (unsigned long long)
+ ntohll(ibaddr->sgid.global.subnet_prefix),
+ (unsigned long long)
+ ntohll(ibaddr->sgid.global.interface_id));
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " route_resolve: DST GID subnet %016llx id %016llx\n",
+ (unsigned long long)
+ ntohll(ibaddr->dgid.global.subnet_prefix),
+ (unsigned long long)
+ ntohll(ibaddr->dgid.global.interface_id));
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " route_resolve: cm_id %p pdata %p plen %d rr %d ind %d\n",
+ conn->cm_id,
+ conn->params.private_data,
+ conn->params.private_data_len,
+ conn->params.responder_resources,
+ conn->params.initiator_depth);
+
+ ret = rdma_connect(conn->cm_id, &conn->params);
+ if (ret) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " dapl_cma_connect: rdma_connect ERR %d %s\n",
+ ret, strerror(errno));
+ goto bail;
+ }
+ return;
+
+ bail:
+ dapl_evd_connection_callback(conn,
+ IB_CME_LOCAL_FAILURE, NULL, conn->ep);
+}
+
+dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
+{
+ dp_ib_cm_handle_t conn;
+ struct rdma_cm_id *cm_id;
+
+ /* Allocate CM and initialize lock */
+ if ((conn = dapl_os_alloc(sizeof(*conn))) == NULL)
+ return NULL;
+
+ dapl_os_memzero(conn, sizeof(*conn));
+ dapl_os_lock_init(&conn->lock);
+
+ /* create CM_ID, bind to local device, create QP */
+ if (rdma_create_id(g_cm_events, &cm_id, (void *)conn, RDMA_PS_TCP)) {
+ dapl_os_free(conn, sizeof(*conn));
+ return NULL;
+ }
+ conn->cm_id = cm_id;
+
+ /* setup timers for address and route resolution */
+ conn->arp_timeout = dapl_os_get_env_val("DAPL_CM_ARP_TIMEOUT_MS",
+ IB_ARP_TIMEOUT);
+ conn->arp_retries = dapl_os_get_env_val("DAPL_CM_ARP_RETRY_COUNT",
+ IB_ARP_RETRY_COUNT);
+ conn->route_timeout = dapl_os_get_env_val("DAPL_CM_ROUTE_TIMEOUT_MS",
+ IB_ROUTE_TIMEOUT);
+ conn->route_retries = dapl_os_get_env_val("DAPL_CM_ROUTE_RETRY_COUNT",
+ IB_ROUTE_RETRY_COUNT);
+ if (ep != NULL) {
+ conn->ep = ep;
+ conn->hca = ((DAPL_IA *)ep->param.ia_handle)->hca_ptr;
+ }
+
+ return conn;
+}
+
+/*
+ * Called from consumer thread via dat_ep_free().
+ * CANNOT be called from the async event processing thread
+ * dapli_cma_event_cb() since a cm_id reference is held and
+ * a deadlock will occur.
+ */
+
+void dapls_ib_cm_free(dp_ib_cm_handle_t conn, DAPL_EP *ep)
+{
+ struct rdma_cm_id *cm_id;
+
+ if (conn == NULL)
+ return;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " destroy_conn: conn %p id %d\n", conn, conn->cm_id);
+
+ dapl_os_lock(&conn->lock);
+ conn->destroy = 1;
+
+ if (ep != NULL) {
+ ep->cm_handle = NULL;
+ ep->qp_handle = NULL;
+ ep->qp_state = IB_QP_STATE_ERROR;
+ }
+
+ cm_id = conn->cm_id;
+ conn->cm_id = NULL;
+ dapl_os_unlock(&conn->lock);
+
+ /*
+ * rdma_destroy_id will force synchronization with async CM event
+ * thread since it blocks until the in-process event reference
+ * is cleared during our event processing call exit.
+ */
+ if (cm_id) {
+ if (cm_id->qp)
+ rdma_destroy_qp(cm_id);
+
+ rdma_destroy_id(cm_id);
+ }
+ dapl_os_free(conn, sizeof(*conn));
+}
+
+static struct dapl_cm_id *dapli_req_recv(struct dapl_cm_id *conn,
+ struct rdma_cm_event *event)
+{
+ struct dapl_cm_id *new_conn;
+#ifdef DAPL_DBG
+ struct rdma_addr *ipaddr = &event->id->route.addr;
+#endif
+
+ if (conn->sp == NULL) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " dapli_rep_recv: on invalid listen " "handle\n");
+ return NULL;
+ }
+
+ /* allocate new cm_id and merge listen parameters */
+ new_conn = dapl_os_alloc(sizeof(*new_conn));
+ if (new_conn) {
+ (void)dapl_os_memzero(new_conn, sizeof(*new_conn));
+ dapl_os_lock_init(&new_conn->lock);
+ new_conn->cm_id = event->id; /* provided by uCMA */
+ event->id->context = new_conn; /* update CM_ID context */
+ new_conn->sp = conn->sp;
+ new_conn->hca = conn->hca;
+
+ /* Get requesters connect data, setup for accept */
+ new_conn->params.responder_resources =
+ DAPL_MIN(event->param.conn.responder_resources,
+ conn->hca->ib_trans.rd_atom_in);
+ new_conn->params.initiator_depth =
+ DAPL_MIN(event->param.conn.initiator_depth,
+ conn->hca->ib_trans.rd_atom_out);
+
+ new_conn->params.flow_control = event->param.conn.flow_control;
+ new_conn->params.rnr_retry_count =
+ event->param.conn.rnr_retry_count;
+ new_conn->params.retry_count = event->param.conn.retry_count;
+
+ /* save private data */
+ if (event->param.conn.private_data_len) {
+ dapl_os_memcpy(new_conn->p_data,
+ event->param.conn.private_data,
+ event->param.conn.private_data_len);
+ new_conn->params.private_data = new_conn->p_data;
+ new_conn->params.private_data_len =
+ event->param.conn.private_data_len;
+ }
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM, " passive_cb: "
+ "REQ: SP %p PORT %d LID %d "
+ "NEW CONN %p ID %p pdata %p,%d\n",
+ new_conn->sp, ntohs(((struct sockaddr_in *)
+ &ipaddr->src_addr)->sin_port),
+ event->listen_id, new_conn, event->id,
+ event->param.conn.private_data,
+ event->param.conn.private_data_len);
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM, " passive_cb: "
+ "REQ: IP SRC %x PORT %d DST %x PORT %d "
+ "rr %d init %d\n", ntohl(((struct sockaddr_in *)
+ &ipaddr->src_addr)->
+ sin_addr.s_addr),
+ ntohs(((struct sockaddr_in *)
+ &ipaddr->src_addr)->sin_port),
+ ntohl(((struct sockaddr_in *)
+ &ipaddr->dst_addr)->sin_addr.s_addr),
+ ntohs(((struct sockaddr_in *)
+ &ipaddr->dst_addr)->sin_port),
+ new_conn->params.responder_resources,
+ new_conn->params.initiator_depth);
+ }
+ return new_conn;
+}
+
+static void dapli_cm_active_cb(struct dapl_cm_id *conn,
+ struct rdma_cm_event *event)
+{
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " active_cb: conn %p id %d event %d\n",
+ conn, conn->cm_id, event->event);
+
+ dapl_os_lock(&conn->lock);
+ if (conn->destroy) {
+ dapl_os_unlock(&conn->lock);
+ return;
+ }
+ dapl_os_unlock(&conn->lock);
+
+ /* There is a chance that we can get events after
+ * the consumer calls disconnect in a pending state
+ * since the IB CM and uDAPL states are not shared.
+ * In some cases, IB CM could generate either a DCONN
+ * or CONN_ERR after the consumer returned from
+ * dapl_ep_disconnect with a DISCONNECTED event
+ * already queued. Check state here and bail to
+ * avoid any events after a disconnect.
+ */
+ if (DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP))
+ return;
+
+ dapl_os_lock(&conn->ep->header.lock);
+ if (conn->ep->param.ep_state == DAT_EP_STATE_DISCONNECTED) {
+ dapl_os_unlock(&conn->ep->header.lock);
+ return;
+ }
+ if (event->event == RDMA_CM_EVENT_DISCONNECTED)
+ conn->ep->param.ep_state = DAT_EP_STATE_DISCONNECTED;
+
+ dapl_os_unlock(&conn->ep->header.lock);
+
+ switch (event->event) {
+ case RDMA_CM_EVENT_UNREACHABLE:
+ case RDMA_CM_EVENT_CONNECT_ERROR:
+ {
+ dapl_log(DAPL_DBG_TYPE_WARN,
+ "dapl_cma_active: CONN_ERR event=0x%x"
+ " status=%d %s DST %s, %d\n",
+ event->event, event->status,
+ (event->status == -ETIMEDOUT) ? "TIMEOUT" : "",
+ inet_ntoa(((struct sockaddr_in *)
+ &conn->cm_id->route.addr.dst_addr)->
+ sin_addr),
+ ntohs(((struct sockaddr_in *)
+ &conn->cm_id->route.addr.dst_addr)->
+ sin_port));
+
+ /* per DAT SPEC provider always returns UNREACHABLE */
+ dapl_evd_connection_callback(conn,
+ IB_CME_DESTINATION_UNREACHABLE,
+ NULL, conn->ep);
+ break;
+ }
+ case RDMA_CM_EVENT_REJECTED:
+ {
+ ib_cm_events_t cm_event;
+ unsigned char *pdata = NULL;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " dapli_cm_active_handler: REJECTED reason=%d\n",
+ event->status);
+
+ /* valid REJ from consumer will always contain private data */
+ if (event->status == 28 &&
+ event->param.conn.private_data_len) {
+ cm_event =
+ IB_CME_DESTINATION_REJECT_PRIVATE_DATA;
+ pdata =
+ (unsigned char *)event->param.conn.
+ private_data +
+ sizeof(struct dapl_pdata_hdr);
+ } else {
+ cm_event = IB_CME_DESTINATION_REJECT;
+ dapl_log(DAPL_DBG_TYPE_WARN,
+ "dapl_cma_active: non-consumer REJ,"
+ " reason=%d, DST %s, %d\n",
+ event->status,
+ inet_ntoa(((struct sockaddr_in *)
+ &conn->cm_id->route.addr.
+ dst_addr)->sin_addr),
+ ntohs(((struct sockaddr_in *)
+ &conn->cm_id->route.addr.
+ dst_addr)->sin_port));
+ }
+ dapl_evd_connection_callback(conn, cm_event, pdata,
+ conn->ep);
+ break;
+ }
+ case RDMA_CM_EVENT_ESTABLISHED:
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " active_cb: cm_id %d PORT %d CONNECTED to %s!\n",
+ conn->cm_id, ntohs(((struct sockaddr_in *)
+ &conn->cm_id->route.addr.
+ dst_addr)->sin_port),
+ inet_ntoa(((struct sockaddr_in *)
+ &conn->cm_id->route.addr.dst_addr)->
+ sin_addr));
+
+ /* setup local and remote ports for ep query */
+ conn->ep->param.remote_port_qual =
+ PORT_TO_SID(rdma_get_dst_port(conn->cm_id));
+ conn->ep->param.local_port_qual =
+ PORT_TO_SID(rdma_get_src_port(conn->cm_id));
+
+ dapl_evd_connection_callback(conn, IB_CME_CONNECTED,
+ event->param.conn.private_data,
+ conn->ep);
+ break;
+
+ case RDMA_CM_EVENT_DISCONNECTED:
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " active_cb: DISC EVENT - EP %p\n",conn->ep);
+ rdma_disconnect(conn->cm_id); /* required for DREP */
+ /* validate EP handle */
+ if (!DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP))
+ dapl_evd_connection_callback(conn,
+ IB_CME_DISCONNECTED,
+ NULL, conn->ep);
+ break;
+ default:
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " dapli_cm_active_cb_handler: Unexpected CM "
+ "event %d on ID 0x%p\n", event->event,
+ conn->cm_id);
+ break;
+ }
+
+ return;
+}
+
+static void dapli_cm_passive_cb(struct dapl_cm_id *conn,
+ struct rdma_cm_event *event)
+{
+ struct dapl_cm_id *new_conn;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " passive_cb: conn %p id %d event %d\n",
+ conn, event->id, event->event);
+
+ dapl_os_lock(&conn->lock);
+ if (conn->destroy) {
+ dapl_os_unlock(&conn->lock);
+ return;
+ }
+ dapl_os_unlock(&conn->lock);
+
+ switch (event->event) {
+ case RDMA_CM_EVENT_CONNECT_REQUEST:
+ /* create new conn object with new conn_id from event */
+ new_conn = dapli_req_recv(conn, event);
+
+ if (new_conn)
+ dapls_cr_callback(new_conn,
+ IB_CME_CONNECTION_REQUEST_PENDING,
+ event->param.conn.private_data,
+ new_conn->sp);
+ break;
+ case RDMA_CM_EVENT_UNREACHABLE:
+ case RDMA_CM_EVENT_CONNECT_ERROR:
+ dapl_log(DAPL_DBG_TYPE_WARN,
+ "dapl_cm_passive: CONN_ERR event=0x%x status=%d %s,"
+ " DST %s,%d\n",
+ event->event, event->status,
+ (event->status == -ETIMEDOUT) ? "TIMEOUT" : "",
+ inet_ntoa(((struct sockaddr_in *)
+ &conn->cm_id->route.addr.dst_addr)->
+ sin_addr), ntohs(((struct sockaddr_in *)
+ &conn->cm_id->route.addr.
+ dst_addr)->sin_port));
+
+ dapls_cr_callback(conn, IB_CME_DESTINATION_UNREACHABLE,
+ NULL, conn->sp);
+ break;
+
+ case RDMA_CM_EVENT_REJECTED:
+ {
+ /* will alwasys be abnormal NON-consumer from active side */
+ dapl_log(DAPL_DBG_TYPE_WARN,
+ "dapl_cm_passive: non-consumer REJ, reason=%d,"
+ " DST %s, %d\n",
+ event->status,
+ inet_ntoa(((struct sockaddr_in *)
+ &conn->cm_id->route.addr.dst_addr)->
+ sin_addr),
+ ntohs(((struct sockaddr_in *)
+ &conn->cm_id->route.addr.dst_addr)->
+ sin_port));
+
+ dapls_cr_callback(conn, IB_CME_DESTINATION_REJECT,
+ NULL, conn->sp);
+ break;
+ }
+ case RDMA_CM_EVENT_ESTABLISHED:
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " passive_cb: cm_id %p PORT %d CONNECTED from 0x%x!\n",
+ conn->cm_id, ntohs(((struct sockaddr_in *)
+ &conn->cm_id->route.addr.
+ src_addr)->sin_port),
+ ntohl(((struct sockaddr_in *)
+ &conn->cm_id->route.addr.dst_addr)->
+ sin_addr.s_addr));
+
+ dapls_cr_callback(conn, IB_CME_CONNECTED, NULL, conn->sp);
+
+ break;
+ case RDMA_CM_EVENT_DISCONNECTED:
+ rdma_disconnect(conn->cm_id); /* required for DREP */
+ /* validate SP handle context */
+ if (!DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_PSP) ||
+ !DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_RSP))
+ dapls_cr_callback(conn,
+ IB_CME_DISCONNECTED, NULL, conn->sp);
+ break;
+ default:
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR, " passive_cb: "
+ "Unexpected CM event %d on ID 0x%p\n",
+ event->event, conn->cm_id);
+ break;
+ }
+
+ return;
+}
+
+/************************ DAPL provider entry points **********************/
+
+/*
+ * dapls_ib_connect
+ *
+ * Initiate a connection with the passive listener on another node
+ *
+ * Input:
+ * ep_handle,
+ * remote_ia_address,
+ * remote_conn_qual,
+ * prd_size size of private data and structure
+ * prd_prt pointer to private data structure
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ * DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
+ IN DAT_IA_ADDRESS_PTR r_addr,
+ IN DAT_CONN_QUAL r_qual,
+ IN DAT_COUNT p_size, IN void *p_data)
+{
+ struct dapl_ep *ep_ptr = ep_handle;
+ struct dapl_cm_id *conn = ep_ptr->cm_handle;
+ int ret;
+
+ /* Sanity check */
+ if (NULL == ep_ptr)
+ return DAT_SUCCESS;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " connect: rSID 0x%llx rPort %d, pdata %p, ln %d\n",
+ r_qual, ntohs(SID_TO_PORT(r_qual)), p_data, p_size);
+
+ /* rdma conn and cm_id pre-bound; reference via ep_ptr->cm_handle */
+
+ /* Setup QP/CM parameters and private data in cm_id */
+ (void)dapl_os_memzero(&conn->params, sizeof(conn->params));
+ conn->params.responder_resources =
+ ep_ptr->param.ep_attr.max_rdma_read_in;
+ conn->params.initiator_depth = ep_ptr->param.ep_attr.max_rdma_read_out;
+ conn->params.flow_control = 1;
+ conn->params.rnr_retry_count = IB_RNR_RETRY_COUNT;
+ conn->params.retry_count = IB_RC_RETRY_COUNT;
+ if (p_size) {
+ dapl_os_memcpy(conn->p_data, p_data, p_size);
+ conn->params.private_data = conn->p_data;
+ conn->params.private_data_len = p_size;
+ }
+
+ /* copy in remote address, need a copy for retry attempts */
+ dapl_os_memcpy(&conn->r_addr, r_addr, sizeof(*r_addr));
+
+ /* Resolve remote address, src already bound during QP create */
+ ((struct sockaddr_in *)&conn->r_addr)->sin_port = SID_TO_PORT(r_qual);
+ ((struct sockaddr_in *)&conn->r_addr)->sin_family = AF_INET;
+
+ ret = rdma_resolve_addr(conn->cm_id, NULL,
+ (struct sockaddr *)&conn->r_addr,
+ conn->arp_timeout);
+ if (ret) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " dapl_cma_connect: rdma_resolve_addr ERR 0x%x %s\n",
+ ret, strerror(errno));
+ return dapl_convert_errno(errno, "ib_connect");
+ }
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " connect: resolve_addr: cm_id %p -> %s port %d\n",
+ conn->cm_id,
+ inet_ntoa(((struct sockaddr_in *)&conn->r_addr)->sin_addr),
+ ((struct sockaddr_in *)&conn->r_addr)->sin_port);
+
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_disconnect
+ *
+ * Disconnect an EP
+ *
+ * Input:
+ * ep_handle,
+ * disconnect_flags
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ *
+ */
+DAT_RETURN
+dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags)
+{
+ dp_ib_cm_handle_t conn = ep_ptr->cm_handle;
+ int ret;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " disconnect(ep %p, conn %p, id %d flags %x)\n",
+ ep_ptr, conn, (conn ? conn->cm_id : 0), close_flags);
+
+ if ((conn == IB_INVALID_HANDLE) || (conn->cm_id == NULL))
+ return DAT_SUCCESS;
+
+ /* no graceful half-pipe disconnect option */
+ ret = rdma_disconnect(conn->cm_id);
+ if (ret)
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " disconnect: ID %p ret 0x%x\n",
+ ep_ptr->cm_handle, ret);
+
+ /*
+ * DAT event notification occurs from the callback
+ * Note: will fire even if DREQ goes unanswered on timeout
+ */
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_disconnect_clean
+ *
+ * Clean up outstanding connection data. This routine is invoked
+ * after the final disconnect callback has occurred. Only on the
+ * ACTIVE side of a connection.
+ *
+ * Input:
+ * ep_ptr DAPL_EP
+ * active Indicates active side of connection
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * void
+ *
+ */
+void
+dapls_ib_disconnect_clean(IN DAPL_EP * ep_ptr,
+ IN DAT_BOOLEAN active,
+ IN const ib_cm_events_t ib_cm_event)
+{
+ /* nothing to do */
+ return;
+}
+
+/*
+ * dapl_ib_setup_conn_listener
+ *
+ * Have the CM set up a connection listener.
+ *
+ * Input:
+ * ibm_hca_handle HCA handle
+ * qp_handle QP handle
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ * DAT_INTERNAL_ERROR
+ * DAT_CONN_QUAL_UNAVAILBLE
+ * DAT_CONN_QUAL_IN_USE
+ *
+ */
+DAT_RETURN
+dapls_ib_setup_conn_listener(IN DAPL_IA * ia_ptr,
+ IN DAT_UINT64 ServiceID, IN DAPL_SP * sp_ptr)
+{
+ DAT_RETURN dat_status = DAT_SUCCESS;
+ ib_cm_srvc_handle_t conn;
+ DAT_SOCK_ADDR6 addr; /* local binding address */
+
+ /* Allocate CM and initialize lock */
+ if ((conn = dapl_os_alloc(sizeof(*conn))) == NULL)
+ return DAT_INSUFFICIENT_RESOURCES;
+
+ dapl_os_memzero(conn, sizeof(*conn));
+ dapl_os_lock_init(&conn->lock);
+
+ /* create CM_ID, bind to local device, create QP */
+ if (rdma_create_id
+ (g_cm_events, &conn->cm_id, (void *)conn, RDMA_PS_TCP)) {
+ dapl_os_free(conn, sizeof(*conn));
+ return (dapl_convert_errno(errno, "setup_listener"));
+ }
+
+ /* open identifies the local device; per DAT specification */
+ /* Get family and address then set port to consumer's ServiceID */
+ dapl_os_memcpy(&addr, &ia_ptr->hca_ptr->hca_address, sizeof(addr));
+ ((struct sockaddr_in *)&addr)->sin_port = SID_TO_PORT(ServiceID);
+
+ if (rdma_bind_addr(conn->cm_id, (struct sockaddr *)&addr)) {
+ if ((errno == EBUSY) || (errno == EADDRINUSE))
+ dat_status = DAT_CONN_QUAL_IN_USE;
+ else
+ dat_status =
+ dapl_convert_errno(errno, "setup_listener");
+ goto bail;
+ }
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " listen(ia_ptr %p SID 0x%llx Port %d sp %p conn %p id %d)\n",
+ ia_ptr, ServiceID, ntohs(SID_TO_PORT(ServiceID)),
+ sp_ptr, conn, conn->cm_id);
+
+ sp_ptr->cm_srvc_handle = conn;
+ conn->sp = sp_ptr;
+ conn->hca = ia_ptr->hca_ptr;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " listen(conn=%p cm_id=%d)\n",
+ sp_ptr->cm_srvc_handle, conn->cm_id);
+
+ if (rdma_listen(conn->cm_id, 0)) { /* max cma backlog */
+
+ if ((errno == EBUSY) || (errno == EADDRINUSE))
+ dat_status = DAT_CONN_QUAL_IN_USE;
+ else
+ dat_status =
+ dapl_convert_errno(errno, "setup_listener");
+ goto bail;
+ }
+
+ /* success */
+ return DAT_SUCCESS;
+
+ bail:
+ rdma_destroy_id(conn->cm_id);
+ dapl_os_free(conn, sizeof(*conn));
+ return dat_status;
+}
+
+/*
+ * dapl_ib_remove_conn_listener
+ *
+ * Have the CM remove a connection listener.
+ *
+ * Input:
+ * ia_handle IA handle
+ * ServiceID IB Channel Service ID
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN
+dapls_ib_remove_conn_listener(IN DAPL_IA * ia_ptr, IN DAPL_SP * sp_ptr)
+{
+ ib_cm_srvc_handle_t conn = sp_ptr->cm_srvc_handle;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " remove_listen(ia_ptr %p sp_ptr %p cm_ptr %p)\n",
+ ia_ptr, sp_ptr, conn);
+
+ if (conn != IB_INVALID_HANDLE) {
+ sp_ptr->cm_srvc_handle = NULL;
+ dapls_ib_cm_free(conn, NULL);
+ }
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_accept_connection
+ *
+ * Perform necessary steps to accept a connection
+ *
+ * Input:
+ * cr_handle
+ * ep_handle
+ * private_data_size
+ * private_data
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ * DAT_INTERNAL_ERROR
+ *
+ */
+DAT_RETURN
+dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle,
+ IN DAT_EP_HANDLE ep_handle,
+ IN DAT_COUNT p_size, IN const DAT_PVOID p_data)
+{
+ DAPL_CR *cr_ptr = (DAPL_CR *) cr_handle;
+ DAPL_EP *ep_ptr = (DAPL_EP *) ep_handle;
+ DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
+ struct dapl_cm_id *cr_conn = cr_ptr->ib_cm_handle;
+ int ret;
+ DAT_RETURN dat_status;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " accept(cr %p conn %p, id %p, p_data %p, p_sz=%d)\n",
+ cr_ptr, cr_conn, cr_conn->cm_id, p_data, p_size);
+
+ /* Obtain size of private data structure & contents */
+ if (p_size > IB_MAX_REP_PDATA_SIZE) {
+ dat_status = DAT_ERROR(DAT_LENGTH_ERROR, DAT_NO_SUBTYPE);
+ goto bail;
+ }
+
+ if (ep_ptr->qp_state == DAPL_QP_STATE_UNATTACHED) {
+ /*
+ * If we are lazy attaching the QP then we may need to
+ * hook it up here. Typically, we run this code only for
+ * DAT_PSP_PROVIDER_FLAG
+ */
+ dat_status = dapls_ib_qp_alloc(ia_ptr, ep_ptr, NULL);
+ if (dat_status != DAT_SUCCESS) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " dapl_cma_accept: qp_alloc ERR %d\n",
+ dat_status);
+ goto bail;
+ }
+ }
+
+ /*
+ * Validate device and port in EP cm_id against inbound
+ * CR cm_id. The pre-allocated EP cm_id is already bound to
+ * a local device (cm_id and QP) when created. Move the QP
+ * to the new cm_id only if device and port numbers match.
+ */
+ if (ep_ptr->cm_handle->cm_id->verbs == cr_conn->cm_id->verbs &&
+ ep_ptr->cm_handle->cm_id->port_num == cr_conn->cm_id->port_num) {
+ /* move QP to new cr_conn, remove QP ref in EP cm_id */
+ cr_conn->cm_id->qp = ep_ptr->cm_handle->cm_id->qp;
+ ep_ptr->cm_handle->cm_id->qp = NULL;
+ dapls_ib_cm_free(ep_ptr->cm_handle, NULL);
+ } else {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " dapl_cma_accept: ERR dev(%p!=%p) or"
+ " port mismatch(%d!=%d)\n",
+ ep_ptr->cm_handle->cm_id->verbs, cr_conn->cm_id->verbs,
+ ntohs(ep_ptr->cm_handle->cm_id->port_num),
+ ntohs(cr_conn->cm_id->port_num));
+ dat_status = DAT_INTERNAL_ERROR;
+ goto bail;
+ }
+
+ cr_ptr->param.local_ep_handle = ep_handle;
+ cr_conn->params.private_data = p_data;
+ cr_conn->params.private_data_len = p_size;
+
+ ret = rdma_accept(cr_conn->cm_id, &cr_conn->params);
+ if (ret) {
+ dapl_log(DAPL_DBG_TYPE_ERR, " dapl_cma_accept: ERR %d %s\n",
+ ret, strerror(errno));
+ dat_status = dapl_convert_errno(ret, "accept");
+ goto bail;
+ }
+
+ /* save accepted conn and EP reference, qp_handle unchanged */
+ ep_ptr->cm_handle = cr_conn;
+ cr_conn->ep = ep_ptr;
+
+ /* setup local and remote ports for ep query */
+ /* Note: port qual in network order */
+ ep_ptr->param.remote_port_qual =
+ PORT_TO_SID(rdma_get_dst_port(cr_conn->cm_id));
+ ep_ptr->param.local_port_qual =
+ PORT_TO_SID(rdma_get_src_port(cr_conn->cm_id));
+
+ return DAT_SUCCESS;
+ bail:
+ rdma_reject(cr_conn->cm_id, NULL, 0);
+ dapls_ib_cm_free(cr_conn, NULL);
+ return dat_status;
+}
+
+/*
+ * dapls_ib_reject_connection
+ *
+ * Reject a connection
+ *
+ * Input:
+ * cr_handle
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INTERNAL_ERROR
+ *
+ */
+DAT_RETURN
+dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm_handle,
+ IN int reason,
+ IN DAT_COUNT private_data_size,
+ IN const DAT_PVOID private_data)
+{
+ int ret;
+ int offset = sizeof(struct dapl_pdata_hdr);
+ struct dapl_pdata_hdr pdata_hdr;
+
+ memset(&pdata_hdr, 0, sizeof pdata_hdr);
+ pdata_hdr.version = htonl((DAT_VERSION_MAJOR << 24) |
+ (DAT_VERSION_MINOR << 16) |
+ (VN_PROVIDER_MAJOR << 8) |
+ (VN_PROVIDER_MINOR));
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " reject: handle %p reason %x, ver=%x, data %p, sz=%d\n",
+ cm_handle, reason, ntohl(pdata_hdr.version),
+ private_data, private_data_size);
+
+ if (cm_handle == IB_INVALID_HANDLE) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " reject: invalid handle: reason %d\n", reason);
+ return DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_CR);
+ }
+
+ if (private_data_size >
+ dapls_ib_private_data_size(NULL, DAPL_PDATA_CONN_REJ,
+ cm_handle->hca))
+ return DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG3);
+
+ /* setup pdata_hdr and users data, in CR pdata buffer */
+ dapl_os_memcpy(cm_handle->p_data, &pdata_hdr, offset);
+ if (private_data_size)
+ dapl_os_memcpy(cm_handle->p_data + offset,
+ private_data, private_data_size);
+
+ /*
+ * Always some private data with reject so active peer can
+ * determine real application reject from an abnormal
+ * application termination
+ */
+ ret = rdma_reject(cm_handle->cm_id,
+ cm_handle->p_data, offset + private_data_size);
+
+ dapls_ib_cm_free(cm_handle, NULL);
+ return dapl_convert_errno(ret, "reject");
+}
+
+/*
+ * dapls_ib_cm_remote_addr
+ *
+ * Obtain the remote IP address given a connection
+ *
+ * Input:
+ * cr_handle
+ *
+ * Output:
+ * remote_ia_address: where to place the remote address
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_HANDLE
+ *
+ */
+DAT_RETURN
+dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle, OUT DAT_SOCK_ADDR6 * raddr)
+{
+ DAPL_HEADER *header;
+ dp_ib_cm_handle_t ib_cm_handle;
+ struct rdma_addr *ipaddr;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " remote_addr(cm_handle=%p, r_addr=%p)\n",
+ dat_handle, raddr);
+
+ header = (DAPL_HEADER *) dat_handle;
+
+ if (header->magic == DAPL_MAGIC_EP)
+ ib_cm_handle = ((DAPL_EP *) dat_handle)->cm_handle;
+ else if (header->magic == DAPL_MAGIC_CR)
+ ib_cm_handle = ((DAPL_CR *) dat_handle)->ib_cm_handle;
+ else
+ return DAT_INVALID_HANDLE;
+
+ /* get remote IP address from cm_id route */
+ ipaddr = &ib_cm_handle->cm_id->route.addr;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " remote_addr: conn %p id %p SRC %x DST %x PORT %d\n",
+ ib_cm_handle, ib_cm_handle->cm_id,
+ ntohl(((struct sockaddr_in *)
+ &ipaddr->src_addr)->sin_addr.s_addr),
+ ntohl(((struct sockaddr_in *)
+ &ipaddr->dst_addr)->sin_addr.s_addr),
+ ntohs(((struct sockaddr_in *)
+ &ipaddr->dst_addr)->sin_port));
+
+ dapl_os_memcpy(raddr, &ipaddr->dst_addr, sizeof(DAT_SOCK_ADDR));
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_private_data_size
+ *
+ * Return the size of private data given a connection op type
+ *
+ * Input:
+ * prd_ptr private data pointer
+ * conn_op connection operation type
+ * hca_ptr hca pointer, needed for transport type
+ *
+ * If prd_ptr is NULL, this is a query for the max size supported by
+ * the provider, otherwise it is the actual size of the private data
+ * contained in prd_ptr.
+ *
+ *
+ * Output:
+ * None
+ *
+ * Returns:
+ * length of private data
+ *
+ */
+int dapls_ib_private_data_size(IN DAPL_PRIVATE * prd_ptr,
+ IN DAPL_PDATA_OP conn_op, IN DAPL_HCA * hca_ptr)
+{
+ int size;
+
+ if (hca_ptr->ib_hca_handle->device->transport_type
+ == IBV_TRANSPORT_IWARP)
+ return (IWARP_MAX_PDATA_SIZE - sizeof(struct dapl_pdata_hdr));
+
+ switch (conn_op) {
+
+ case DAPL_PDATA_CONN_REQ:
+ size = IB_MAX_REQ_PDATA_SIZE;
+ break;
+ case DAPL_PDATA_CONN_REP:
+ size = IB_MAX_REP_PDATA_SIZE;
+ break;
+ case DAPL_PDATA_CONN_REJ:
+ size = IB_MAX_REJ_PDATA_SIZE - sizeof(struct dapl_pdata_hdr);
+ break;
+ case DAPL_PDATA_CONN_DREQ:
+ size = IB_MAX_DREQ_PDATA_SIZE;
+ break;
+ case DAPL_PDATA_CONN_DREP:
+ size = IB_MAX_DREP_PDATA_SIZE;
+ break;
+ default:
+ size = 0;
+
+ } /* end case */
+
+ return size;
+}
+
+/*
+ * Map all CMA event codes to the DAT equivelent.
+ */
+#define DAPL_IB_EVENT_CNT 13
+
+static struct ib_cm_event_map {
+ const ib_cm_events_t ib_cm_event;
+ DAT_EVENT_NUMBER dat_event_num;
+} ib_cm_event_map[DAPL_IB_EVENT_CNT] = {
+ /* 00 */ {
+ IB_CME_CONNECTED, DAT_CONNECTION_EVENT_ESTABLISHED},
+ /* 01 */ {
+ IB_CME_DISCONNECTED, DAT_CONNECTION_EVENT_DISCONNECTED},
+ /* 02 */ {
+ IB_CME_DISCONNECTED_ON_LINK_DOWN,
+ DAT_CONNECTION_EVENT_DISCONNECTED},
+ /* 03 */ {
+ IB_CME_CONNECTION_REQUEST_PENDING, DAT_CONNECTION_REQUEST_EVENT},
+ /* 04 */ {
+ IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
+ DAT_CONNECTION_REQUEST_EVENT},
+ /* 05 */ {
+ IB_CME_CONNECTION_REQUEST_ACKED, DAT_CONNECTION_REQUEST_EVENT},
+ /* 06 */ {
+ IB_CME_DESTINATION_REJECT,
+ DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
+ /* 07 */ {
+ IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
+ DAT_CONNECTION_EVENT_PEER_REJECTED},
+ /* 08 */ {
+ IB_CME_DESTINATION_UNREACHABLE, DAT_CONNECTION_EVENT_UNREACHABLE},
+ /* 09 */ {
+ IB_CME_TOO_MANY_CONNECTION_REQUESTS,
+ DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
+ /* 10 */ {
+ IB_CME_LOCAL_FAILURE, DAT_CONNECTION_EVENT_BROKEN},
+ /* 11 */ {
+ IB_CME_BROKEN, DAT_CONNECTION_EVENT_BROKEN},
+ /* 12 */ {
+IB_CME_TIMEOUT, DAT_CONNECTION_EVENT_TIMED_OUT},};
+
+/*
+ * dapls_ib_get_cm_event
+ *
+ * Return a DAT connection event given a provider CM event.
+ *
+ * Input:
+ * dat_event_num DAT event we need an equivelent CM event for
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * ib_cm_event of translated DAPL value
+ */
+DAT_EVENT_NUMBER
+dapls_ib_get_dat_event(IN const ib_cm_events_t ib_cm_event,
+ IN DAT_BOOLEAN active)
+{
+ DAT_EVENT_NUMBER dat_event_num;
+ int i;
+
+ active = active;
+
+ dat_event_num = 0;
+ for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
+ if (ib_cm_event == ib_cm_event_map[i].ib_cm_event) {
+ dat_event_num = ib_cm_event_map[i].dat_event_num;
+ break;
+ }
+ }
+ dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
+ "dapls_ib_get_dat_event: event(%s) ib=0x%x dat=0x%x\n",
+ active ? "active" : "passive", ib_cm_event, dat_event_num);
+
+ return dat_event_num;
+}
+
+/*
+ * dapls_ib_get_dat_event
+ *
+ * Return a DAT connection event given a provider CM event.
+ *
+ * Input:
+ * ib_cm_event event provided to the dapl callback routine
+ * active switch indicating active or passive connection
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_EVENT_NUMBER of translated provider value
+ */
+ib_cm_events_t dapls_ib_get_cm_event(IN DAT_EVENT_NUMBER dat_event_num)
+{
+ ib_cm_events_t ib_cm_event;
+ int i;
+
+ ib_cm_event = 0;
+ for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
+ if (dat_event_num == ib_cm_event_map[i].dat_event_num) {
+ ib_cm_event = ib_cm_event_map[i].ib_cm_event;
+ break;
+ }
+ }
+ return ib_cm_event;
+}
+
+void dapli_cma_event_cb(void)
+{
+ struct rdma_cm_event *event;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_event()\n");
+
+ /* process one CM event, fairness */
+ if (!rdma_get_cm_event(g_cm_events, &event)) {
+ struct dapl_cm_id *conn;
+
+ /* set proper conn from cm_id context */
+ if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
+ conn = (struct dapl_cm_id *)event->listen_id->context;
+ else
+ conn = (struct dapl_cm_id *)event->id->context;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " cm_event: EVENT=%d ID=%p LID=%p CTX=%p\n",
+ event->event, event->id, event->listen_id, conn);
+
+ switch (event->event) {
+ case RDMA_CM_EVENT_ADDR_RESOLVED:
+ dapli_addr_resolve(conn);
+ break;
+
+ case RDMA_CM_EVENT_ROUTE_RESOLVED:
+ dapli_route_resolve(conn);
+ break;
+
+ case RDMA_CM_EVENT_ADDR_ERROR:
+ dapl_log(DAPL_DBG_TYPE_WARN,
+ "dapl_cma_active: CM ADDR ERROR: ->"
+ " DST %s retry (%d)..\n",
+ inet_ntoa(((struct sockaddr_in *)
+ &conn->r_addr)->sin_addr),
+ conn->arp_retries);
+
+ /* retry address resolution */
+ if ((--conn->arp_retries) &&
+ (event->status == -ETIMEDOUT)) {
+ int ret;
+ ret = rdma_resolve_addr(conn->cm_id, NULL,
+ (struct sockaddr *)
+ &conn->r_addr,
+ conn->arp_timeout);
+ if (!ret)
+ break;
+ else {
+ dapl_dbg_log(DAPL_DBG_TYPE_WARN,
+ " ERROR: rdma_resolve_addr = "
+ "%d %s\n",
+ ret, strerror(errno));
+ }
+ }
+ /* retries exhausted or resolve_addr failed */
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ "dapl_cma_active: ARP_ERR, retries(%d)"
+ " exhausted -> DST %s,%d\n",
+ IB_ARP_RETRY_COUNT,
+ inet_ntoa(((struct sockaddr_in *)
+ &conn->cm_id->route.addr.dst_addr)->
+ sin_addr),
+ ntohs(((struct sockaddr_in *)
+ &conn->cm_id->route.addr.dst_addr)->
+ sin_port));
+
+ dapl_evd_connection_callback(conn,
+ IB_CME_DESTINATION_UNREACHABLE,
+ NULL, conn->ep);
+ break;
+
+ case RDMA_CM_EVENT_ROUTE_ERROR:
+ dapl_log(DAPL_DBG_TYPE_WARN,
+ "dapl_cma_active: CM ROUTE ERROR: ->"
+ " DST %s retry (%d)..\n",
+ inet_ntoa(((struct sockaddr_in *)
+ &conn->r_addr)->sin_addr),
+ conn->route_retries);
+
+ /* retry route resolution */
+ if ((--conn->route_retries) &&
+ (event->status == -ETIMEDOUT))
+ dapli_addr_resolve(conn);
+ else {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ "dapl_cma_active: PATH_RECORD_ERR,"
+ " retries(%d) exhausted, DST %s,%d\n",
+ IB_ROUTE_RETRY_COUNT,
+ inet_ntoa(((struct sockaddr_in *)
+ &conn->cm_id->route.addr.
+ dst_addr)->sin_addr),
+ ntohs(((struct sockaddr_in *)
+ &conn->cm_id->route.addr.
+ dst_addr)->sin_port));
+
+ dapl_evd_connection_callback(conn,
+ IB_CME_DESTINATION_UNREACHABLE,
+ NULL, conn->ep);
+ }
+ break;
+
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ dapl_evd_connection_callback(conn,
+ IB_CME_LOCAL_FAILURE,
+ NULL, conn->ep);
+ break;
+ case RDMA_CM_EVENT_CONNECT_REQUEST:
+ case RDMA_CM_EVENT_CONNECT_ERROR:
+ case RDMA_CM_EVENT_UNREACHABLE:
+ case RDMA_CM_EVENT_REJECTED:
+ case RDMA_CM_EVENT_ESTABLISHED:
+ case RDMA_CM_EVENT_DISCONNECTED:
+ /* passive or active */
+ if (conn->sp)
+ dapli_cm_passive_cb(conn, event);
+ else
+ dapli_cm_active_cb(conn, event);
+ break;
+ case RDMA_CM_EVENT_CONNECT_RESPONSE:
+ default:
+ dapl_dbg_log(DAPL_DBG_TYPE_WARN,
+ " cm_event: UNEXPECTED EVENT=%p ID=%p CTX=%p\n",
+ event->event, event->id,
+ event->id->context);
+ break;
+ }
+ /* ack event, unblocks destroy_cm_id in consumer threads */
+ rdma_ack_cm_event(event);
+ }
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 4
+ * c-basic-offset: 4
+ * tab-width: 8
+ * End:
+ */
diff --git a/dapl/openib_cma/dapl_ib_cm.c b/dapl/openib_cma/dapl_ib_cm.c
deleted file mode 100755
index 946cfbd..0000000
--- a/dapl/openib_cma/dapl_ib_cm.c
+++ /dev/null
@@ -1,1297 +0,0 @@
-/*
- * Copyright (c) 2005 Voltaire Inc. All rights reserved.
- * Copyright (c) 2005-2007 Intel Corporation. All rights reserved.
- * Copyright (c) 2004-2005, Mellanox Technologies, Inc. All rights reserved.
- * Copyright (c) 2003 Topspin Corporation. All rights reserved.
- * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- * copy of which is available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- *
- * MODULE: dapl_ib_cm.c
- *
- * PURPOSE: The OFED provider - uCMA, name and route resolution
- *
- * $Id: $
- *
- **********************************************************************/
-
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_evd_util.h"
-#include "dapl_cr_util.h"
-#include "dapl_name_service.h"
-#include "dapl_ib_util.h"
-#include "dapl_vendor.h"
-#include "dapl_osd.h"
-
-extern struct rdma_event_channel *g_cm_events;
-
-/* local prototypes */
-static struct dapl_cm_id *dapli_req_recv(struct dapl_cm_id *conn,
- struct rdma_cm_event *event);
-static void dapli_cm_active_cb(struct dapl_cm_id *conn,
- struct rdma_cm_event *event);
-static void dapli_cm_passive_cb(struct dapl_cm_id *conn,
- struct rdma_cm_event *event);
-static void dapli_addr_resolve(struct dapl_cm_id *conn);
-static void dapli_route_resolve(struct dapl_cm_id *conn);
-
-/* cma requires 16 bit SID, in network order */
-#define IB_PORT_MOD 32001
-#define IB_PORT_BASE (65535 - IB_PORT_MOD)
-#define SID_TO_PORT(SID) \
- (SID > 0xffff ? \
- htons((unsigned short)((SID % IB_PORT_MOD) + IB_PORT_BASE)) :\
- htons((unsigned short)SID))
-
-#define PORT_TO_SID(p) ntohs(p)
-
-/* private data header to validate consumer rejects versus abnormal events */
-struct dapl_pdata_hdr {
- DAT_UINT32 version;
-};
-
-static void dapli_addr_resolve(struct dapl_cm_id *conn)
-{
- int ret;
-#ifdef DAPL_DBG
- struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
-#endif
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " addr_resolve: cm_id %p SRC %x DST %x\n",
- conn->cm_id, ntohl(((struct sockaddr_in *)
- &ipaddr->src_addr)->sin_addr.s_addr),
- ntohl(((struct sockaddr_in *)
- &ipaddr->dst_addr)->sin_addr.s_addr));
-
- ret = rdma_resolve_route(conn->cm_id, conn->route_timeout);
- if (ret) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " dapl_cma_connect: rdma_resolve_route ERR 0x%x %s\n",
- ret, strerror(errno));
- dapl_evd_connection_callback(conn,
- IB_CME_LOCAL_FAILURE,
- NULL, conn->ep);
- }
-}
-
-static void dapli_route_resolve(struct dapl_cm_id *conn)
-{
- int ret;
-#ifdef DAPL_DBG
- struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
- struct ib_addr *ibaddr = &conn->cm_id->route.addr.addr.ibaddr;
-#endif
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " route_resolve: cm_id %p SRC %x DST %x PORT %d\n",
- conn->cm_id, ntohl(((struct sockaddr_in *)
- &ipaddr->src_addr)->sin_addr.s_addr),
- ntohl(((struct sockaddr_in *)
- &ipaddr->dst_addr)->sin_addr.s_addr),
- ntohs(((struct sockaddr_in *)
- &ipaddr->dst_addr)->sin_port));
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " route_resolve: SRC GID subnet %016llx id %016llx\n",
- (unsigned long long)
- ntohll(ibaddr->sgid.global.subnet_prefix),
- (unsigned long long)
- ntohll(ibaddr->sgid.global.interface_id));
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " route_resolve: DST GID subnet %016llx id %016llx\n",
- (unsigned long long)
- ntohll(ibaddr->dgid.global.subnet_prefix),
- (unsigned long long)
- ntohll(ibaddr->dgid.global.interface_id));
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " route_resolve: cm_id %p pdata %p plen %d rr %d ind %d\n",
- conn->cm_id,
- conn->params.private_data,
- conn->params.private_data_len,
- conn->params.responder_resources,
- conn->params.initiator_depth);
-
- ret = rdma_connect(conn->cm_id, &conn->params);
- if (ret) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " dapl_cma_connect: rdma_connect ERR %d %s\n",
- ret, strerror(errno));
- goto bail;
- }
- return;
-
- bail:
- dapl_evd_connection_callback(conn,
- IB_CME_LOCAL_FAILURE, NULL, conn->ep);
-}
-
-/*
- * Called from consumer thread via dat_ep_free().
- * CANNOT be called from the async event processing thread
- * dapli_cma_event_cb() since a cm_id reference is held and
- * a deadlock will occur.
- */
-void dapli_destroy_conn(struct dapl_cm_id *conn)
-{
- struct rdma_cm_id *cm_id;
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " destroy_conn: conn %p id %d\n", conn, conn->cm_id);
-
- dapl_os_lock(&conn->lock);
- conn->destroy = 1;
-
- if (conn->ep) {
- conn->ep->cm_handle = IB_INVALID_HANDLE;
- conn->ep->qp_handle = IB_INVALID_HANDLE;
- }
-
- cm_id = conn->cm_id;
- conn->cm_id = NULL;
- dapl_os_unlock(&conn->lock);
-
- /*
- * rdma_destroy_id will force synchronization with async CM event
- * thread since it blocks until the in-process event reference
- * is cleared during our event processing call exit.
- */
- if (cm_id) {
- if (cm_id->qp)
- rdma_destroy_qp(cm_id);
-
- rdma_destroy_id(cm_id);
- }
- dapl_os_free(conn, sizeof(*conn));
-}
-
-static struct dapl_cm_id *dapli_req_recv(struct dapl_cm_id *conn,
- struct rdma_cm_event *event)
-{
- struct dapl_cm_id *new_conn;
-#ifdef DAPL_DBG
- struct rdma_addr *ipaddr = &event->id->route.addr;
-#endif
-
- if (conn->sp == NULL) {
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- " dapli_rep_recv: on invalid listen " "handle\n");
- return NULL;
- }
-
- /* allocate new cm_id and merge listen parameters */
- new_conn = dapl_os_alloc(sizeof(*new_conn));
- if (new_conn) {
- (void)dapl_os_memzero(new_conn, sizeof(*new_conn));
- dapl_os_lock_init(&new_conn->lock);
- new_conn->cm_id = event->id; /* provided by uCMA */
- event->id->context = new_conn; /* update CM_ID context */
- new_conn->sp = conn->sp;
- new_conn->hca = conn->hca;
-
- /* Get requesters connect data, setup for accept */
- new_conn->params.responder_resources =
- DAPL_MIN(event->param.conn.responder_resources,
- conn->hca->ib_trans.max_rdma_rd_in);
- new_conn->params.initiator_depth =
- DAPL_MIN(event->param.conn.initiator_depth,
- conn->hca->ib_trans.max_rdma_rd_out);
-
- new_conn->params.flow_control = event->param.conn.flow_control;
- new_conn->params.rnr_retry_count =
- event->param.conn.rnr_retry_count;
- new_conn->params.retry_count = event->param.conn.retry_count;
-
- /* save private data */
- if (event->param.conn.private_data_len) {
- dapl_os_memcpy(new_conn->p_data,
- event->param.conn.private_data,
- event->param.conn.private_data_len);
- new_conn->params.private_data = new_conn->p_data;
- new_conn->params.private_data_len =
- event->param.conn.private_data_len;
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM, " passive_cb: "
- "REQ: SP %p PORT %d LID %d "
- "NEW CONN %p ID %p pD %p,%d\n",
- new_conn->sp, ntohs(((struct sockaddr_in *)
- &ipaddr->src_addr)->sin_port),
- event->listen_id, new_conn, event->id,
- event->param.conn.private_data,
- event->param.conn.private_data_len);
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM, " passive_cb: "
- "REQ: IP SRC %x PORT %d DST %x PORT %d "
- "rr %d init %d\n", ntohl(((struct sockaddr_in *)
- &ipaddr->src_addr)->
- sin_addr.s_addr),
- ntohs(((struct sockaddr_in *)
- &ipaddr->src_addr)->sin_port),
- ntohl(((struct sockaddr_in *)
- &ipaddr->dst_addr)->sin_addr.s_addr),
- ntohs(((struct sockaddr_in *)
- &ipaddr->dst_addr)->sin_port),
- new_conn->params.responder_resources,
- new_conn->params.initiator_depth);
- }
- return new_conn;
-}
-
-static void dapli_cm_active_cb(struct dapl_cm_id *conn,
- struct rdma_cm_event *event)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " active_cb: conn %p id %d event %d\n",
- conn, conn->cm_id, event->event);
-
- dapl_os_lock(&conn->lock);
- if (conn->destroy) {
- dapl_os_unlock(&conn->lock);
- return;
- }
- dapl_os_unlock(&conn->lock);
-
- /* There is a chance that we can get events after
- * the consumer calls disconnect in a pending state
- * since the IB CM and uDAPL states are not shared.
- * In some cases, IB CM could generate either a DCONN
- * or CONN_ERR after the consumer returned from
- * dapl_ep_disconnect with a DISCONNECTED event
- * already queued. Check state here and bail to
- * avoid any events after a disconnect.
- */
- if (DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP))
- return;
-
- dapl_os_lock(&conn->ep->header.lock);
- if (conn->ep->param.ep_state == DAT_EP_STATE_DISCONNECTED) {
- dapl_os_unlock(&conn->ep->header.lock);
- return;
- }
- if (event->event == RDMA_CM_EVENT_DISCONNECTED)
- conn->ep->param.ep_state = DAT_EP_STATE_DISCONNECTED;
-
- dapl_os_unlock(&conn->ep->header.lock);
-
- switch (event->event) {
- case RDMA_CM_EVENT_UNREACHABLE:
- case RDMA_CM_EVENT_CONNECT_ERROR:
- {
- dapl_log(DAPL_DBG_TYPE_WARN,
- "dapl_cma_active: CONN_ERR event=0x%x"
- " status=%d %s DST %s, %d\n",
- event->event, event->status,
- (event->status == -ETIMEDOUT) ? "TIMEOUT" : "",
- inet_ntoa(((struct sockaddr_in *)
- &conn->cm_id->route.addr.dst_addr)->
- sin_addr),
- ntohs(((struct sockaddr_in *)
- &conn->cm_id->route.addr.dst_addr)->
- sin_port));
-
- /* per DAT SPEC provider always returns UNREACHABLE */
- dapl_evd_connection_callback(conn,
- IB_CME_DESTINATION_UNREACHABLE,
- NULL, conn->ep);
- break;
- }
- case RDMA_CM_EVENT_REJECTED:
- {
- ib_cm_events_t cm_event;
- unsigned char *pdata = NULL;
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " dapli_cm_active_handler: REJECTED reason=%d\n",
- event->status);
-
- /* valid REJ from consumer will always contain private data */
- if (event->status == 28 &&
- event->param.conn.private_data_len) {
- cm_event =
- IB_CME_DESTINATION_REJECT_PRIVATE_DATA;
- pdata =
- (unsigned char *)event->param.conn.
- private_data +
- sizeof(struct dapl_pdata_hdr);
- } else {
- cm_event = IB_CME_DESTINATION_REJECT;
- dapl_log(DAPL_DBG_TYPE_WARN,
- "dapl_cma_active: non-consumer REJ,"
- " reason=%d, DST %s, %d\n",
- event->status,
- inet_ntoa(((struct sockaddr_in *)
- &conn->cm_id->route.addr.
- dst_addr)->sin_addr),
- ntohs(((struct sockaddr_in *)
- &conn->cm_id->route.addr.
- dst_addr)->sin_port));
- }
- dapl_evd_connection_callback(conn, cm_event, pdata,
- conn->ep);
- break;
- }
- case RDMA_CM_EVENT_ESTABLISHED:
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " active_cb: cm_id %d PORT %d CONNECTED to %s!\n",
- conn->cm_id, ntohs(((struct sockaddr_in *)
- &conn->cm_id->route.addr.
- dst_addr)->sin_port),
- inet_ntoa(((struct sockaddr_in *)
- &conn->cm_id->route.addr.dst_addr)->
- sin_addr));
-
- /* setup local and remote ports for ep query */
- conn->ep->param.remote_port_qual =
- PORT_TO_SID(rdma_get_dst_port(conn->cm_id));
- conn->ep->param.local_port_qual =
- PORT_TO_SID(rdma_get_src_port(conn->cm_id));
-
- dapl_evd_connection_callback(conn, IB_CME_CONNECTED,
- event->param.conn.private_data,
- conn->ep);
- break;
-
- case RDMA_CM_EVENT_DISCONNECTED:
- rdma_disconnect(conn->cm_id); /* required for DREP */
- /* validate EP handle */
- if (!DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP))
- dapl_evd_connection_callback(conn,
- IB_CME_DISCONNECTED,
- NULL, conn->ep);
- break;
- default:
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- " dapli_cm_active_cb_handler: Unexpected CM "
- "event %d on ID 0x%p\n", event->event,
- conn->cm_id);
- break;
- }
-
- return;
-}
-
-static void dapli_cm_passive_cb(struct dapl_cm_id *conn,
- struct rdma_cm_event *event)
-{
- struct dapl_cm_id *new_conn;
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " passive_cb: conn %p id %d event %d\n",
- conn, event->id, event->event);
-
- dapl_os_lock(&conn->lock);
- if (conn->destroy) {
- dapl_os_unlock(&conn->lock);
- return;
- }
- dapl_os_unlock(&conn->lock);
-
- switch (event->event) {
- case RDMA_CM_EVENT_CONNECT_REQUEST:
- /* create new conn object with new conn_id from event */
- new_conn = dapli_req_recv(conn, event);
-
- if (new_conn)
- dapls_cr_callback(new_conn,
- IB_CME_CONNECTION_REQUEST_PENDING,
- event->param.conn.private_data,
- new_conn->sp);
- break;
- case RDMA_CM_EVENT_UNREACHABLE:
- case RDMA_CM_EVENT_CONNECT_ERROR:
- dapl_log(DAPL_DBG_TYPE_WARN,
- "dapl_cm_passive: CONN_ERR event=0x%x status=%d %s,"
- " DST %s,%d\n",
- event->event, event->status,
- (event->status == -ETIMEDOUT) ? "TIMEOUT" : "",
- inet_ntoa(((struct sockaddr_in *)
- &conn->cm_id->route.addr.dst_addr)->
- sin_addr), ntohs(((struct sockaddr_in *)
- &conn->cm_id->route.addr.
- dst_addr)->sin_port));
-
- dapls_cr_callback(conn, IB_CME_DESTINATION_UNREACHABLE,
- NULL, conn->sp);
- break;
-
- case RDMA_CM_EVENT_REJECTED:
- {
- /* will alwasys be abnormal NON-consumer from active side */
- dapl_log(DAPL_DBG_TYPE_WARN,
- "dapl_cm_passive: non-consumer REJ, reason=%d,"
- " DST %s, %d\n",
- event->status,
- inet_ntoa(((struct sockaddr_in *)
- &conn->cm_id->route.addr.dst_addr)->
- sin_addr),
- ntohs(((struct sockaddr_in *)
- &conn->cm_id->route.addr.dst_addr)->
- sin_port));
-
- dapls_cr_callback(conn, IB_CME_DESTINATION_REJECT,
- NULL, conn->sp);
- break;
- }
- case RDMA_CM_EVENT_ESTABLISHED:
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " passive_cb: cm_id %p PORT %d CONNECTED from 0x%x!\n",
- conn->cm_id, ntohs(((struct sockaddr_in *)
- &conn->cm_id->route.addr.
- src_addr)->sin_port),
- ntohl(((struct sockaddr_in *)
- &conn->cm_id->route.addr.dst_addr)->
- sin_addr.s_addr));
-
- dapls_cr_callback(conn, IB_CME_CONNECTED, NULL, conn->sp);
-
- break;
- case RDMA_CM_EVENT_DISCONNECTED:
- rdma_disconnect(conn->cm_id); /* required for DREP */
- /* validate SP handle context */
- if (!DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_PSP) ||
- !DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_RSP))
- dapls_cr_callback(conn,
- IB_CME_DISCONNECTED, NULL, conn->sp);
- break;
- default:
- dapl_dbg_log(DAPL_DBG_TYPE_ERR, " passive_cb: "
- "Unexpected CM event %d on ID 0x%p\n",
- event->event, conn->cm_id);
- break;
- }
-
- return;
-}
-
-/************************ DAPL provider entry points **********************/
-
-/*
- * dapls_ib_connect
- *
- * Initiate a connection with the passive listener on another node
- *
- * Input:
- * ep_handle,
- * remote_ia_address,
- * remote_conn_qual,
- * prd_size size of private data and structure
- * prd_prt pointer to private data structure
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- * DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
- IN DAT_IA_ADDRESS_PTR r_addr,
- IN DAT_CONN_QUAL r_qual,
- IN DAT_COUNT p_size, IN void *p_data)
-{
- struct dapl_ep *ep_ptr = ep_handle;
- struct dapl_cm_id *conn;
- int ret;
-
- /* Sanity check */
- if (NULL == ep_ptr)
- return DAT_SUCCESS;
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " connect: rSID 0x%llx rPort %d, pdata %p, ln %d\n",
- r_qual, ntohs(SID_TO_PORT(r_qual)), p_data, p_size);
-
- /* rdma conn and cm_id pre-bound; reference via qp_handle */
- conn = ep_ptr->cm_handle = ep_ptr->qp_handle;
-
- /* Setup QP/CM parameters and private data in cm_id */
- (void)dapl_os_memzero(&conn->params, sizeof(conn->params));
- conn->params.responder_resources =
- ep_ptr->param.ep_attr.max_rdma_read_in;
- conn->params.initiator_depth = ep_ptr->param.ep_attr.max_rdma_read_out;
- conn->params.flow_control = 1;
- conn->params.rnr_retry_count = IB_RNR_RETRY_COUNT;
- conn->params.retry_count = IB_RC_RETRY_COUNT;
- if (p_size) {
- dapl_os_memcpy(conn->p_data, p_data, p_size);
- conn->params.private_data = conn->p_data;
- conn->params.private_data_len = p_size;
- }
-
- /* copy in remote address, need a copy for retry attempts */
- dapl_os_memcpy(&conn->r_addr, r_addr, sizeof(*r_addr));
-
- /* Resolve remote address, src already bound during QP create */
- ((struct sockaddr_in *)&conn->r_addr)->sin_port = SID_TO_PORT(r_qual);
- ((struct sockaddr_in *)&conn->r_addr)->sin_family = AF_INET;
-
- ret = rdma_resolve_addr(conn->cm_id, NULL,
- (struct sockaddr *)&conn->r_addr,
- conn->arp_timeout);
- if (ret) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " dapl_cma_connect: rdma_resolve_addr ERR 0x%x %s\n",
- ret, strerror(errno));
- return dapl_convert_errno(errno, "ib_connect");
- }
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " connect: resolve_addr: cm_id %p -> %s port %d\n",
- conn->cm_id,
- inet_ntoa(((struct sockaddr_in *)&conn->r_addr)->sin_addr),
- ((struct sockaddr_in *)&conn->r_addr)->sin_port);
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_disconnect
- *
- * Disconnect an EP
- *
- * Input:
- * ep_handle,
- * disconnect_flags
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- *
- */
-DAT_RETURN
-dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags)
-{
- dp_ib_cm_handle_t conn = ep_ptr->cm_handle;
- int ret;
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " disconnect(ep %p, conn %p, id %d flags %x)\n",
- ep_ptr, conn, (conn ? conn->cm_id : 0), close_flags);
-
- if ((conn == IB_INVALID_HANDLE) || (conn->cm_id == NULL))
- return DAT_SUCCESS;
-
- /* no graceful half-pipe disconnect option */
- ret = rdma_disconnect(conn->cm_id);
- if (ret)
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- " disconnect: ID %p ret 0x%x\n",
- ep_ptr->cm_handle, ret);
-
- /*
- * DAT event notification occurs from the callback
- * Note: will fire even if DREQ goes unanswered on timeout
- */
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_disconnect_clean
- *
- * Clean up outstanding connection data. This routine is invoked
- * after the final disconnect callback has occurred. Only on the
- * ACTIVE side of a connection.
- *
- * Input:
- * ep_ptr DAPL_EP
- * active Indicates active side of connection
- *
- * Output:
- * none
- *
- * Returns:
- * void
- *
- */
-void
-dapls_ib_disconnect_clean(IN DAPL_EP * ep_ptr,
- IN DAT_BOOLEAN active,
- IN const ib_cm_events_t ib_cm_event)
-{
- /* nothing to do */
- return;
-}
-
-/*
- * dapl_ib_setup_conn_listener
- *
- * Have the CM set up a connection listener.
- *
- * Input:
- * ibm_hca_handle HCA handle
- * qp_handle QP handle
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- * DAT_INTERNAL_ERROR
- * DAT_CONN_QUAL_UNAVAILBLE
- * DAT_CONN_QUAL_IN_USE
- *
- */
-DAT_RETURN
-dapls_ib_setup_conn_listener(IN DAPL_IA * ia_ptr,
- IN DAT_UINT64 ServiceID, IN DAPL_SP * sp_ptr)
-{
- DAT_RETURN dat_status = DAT_SUCCESS;
- ib_cm_srvc_handle_t conn;
- DAT_SOCK_ADDR6 addr; /* local binding address */
-
- /* Allocate CM and initialize lock */
- if ((conn = dapl_os_alloc(sizeof(*conn))) == NULL)
- return DAT_INSUFFICIENT_RESOURCES;
-
- dapl_os_memzero(conn, sizeof(*conn));
- dapl_os_lock_init(&conn->lock);
-
- /* create CM_ID, bind to local device, create QP */
- if (rdma_create_id
- (g_cm_events, &conn->cm_id, (void *)conn, RDMA_PS_TCP)) {
- dapl_os_free(conn, sizeof(*conn));
- return (dapl_convert_errno(errno, "setup_listener"));
- }
-
- /* open identifies the local device; per DAT specification */
- /* Get family and address then set port to consumer's ServiceID */
- dapl_os_memcpy(&addr, &ia_ptr->hca_ptr->hca_address, sizeof(addr));
- ((struct sockaddr_in *)&addr)->sin_port = SID_TO_PORT(ServiceID);
-
- if (rdma_bind_addr(conn->cm_id, (struct sockaddr *)&addr)) {
- if ((errno == EBUSY) || (errno == EADDRINUSE))
- dat_status = DAT_CONN_QUAL_IN_USE;
- else
- dat_status =
- dapl_convert_errno(errno, "setup_listener");
- goto bail;
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " listen(ia_ptr %p SID 0x%llx Port %d sp %p conn %p id %d)\n",
- ia_ptr, ServiceID, ntohs(SID_TO_PORT(ServiceID)),
- sp_ptr, conn, conn->cm_id);
-
- sp_ptr->cm_srvc_handle = conn;
- conn->sp = sp_ptr;
- conn->hca = ia_ptr->hca_ptr;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " listen(conn=%p cm_id=%d)\n",
- sp_ptr->cm_srvc_handle, conn->cm_id);
-
- if (rdma_listen(conn->cm_id, 0)) { /* max cma backlog */
-
- if ((errno == EBUSY) || (errno == EADDRINUSE))
- dat_status = DAT_CONN_QUAL_IN_USE;
- else
- dat_status =
- dapl_convert_errno(errno, "setup_listener");
- goto bail;
- }
-
- /* success */
- return DAT_SUCCESS;
-
- bail:
- rdma_destroy_id(conn->cm_id);
- dapl_os_free(conn, sizeof(*conn));
- return dat_status;
-}
-
-/*
- * dapl_ib_remove_conn_listener
- *
- * Have the CM remove a connection listener.
- *
- * Input:
- * ia_handle IA handle
- * ServiceID IB Channel Service ID
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_STATE
- *
- */
-DAT_RETURN
-dapls_ib_remove_conn_listener(IN DAPL_IA * ia_ptr, IN DAPL_SP * sp_ptr)
-{
- ib_cm_srvc_handle_t conn = sp_ptr->cm_srvc_handle;
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " remove_listen(ia_ptr %p sp_ptr %p cm_ptr %p)\n",
- ia_ptr, sp_ptr, conn);
-
- if (conn != IB_INVALID_HANDLE) {
- sp_ptr->cm_srvc_handle = NULL;
- dapli_destroy_conn(conn);
- }
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_accept_connection
- *
- * Perform necessary steps to accept a connection
- *
- * Input:
- * cr_handle
- * ep_handle
- * private_data_size
- * private_data
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- * DAT_INTERNAL_ERROR
- *
- */
-DAT_RETURN
-dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle,
- IN DAT_EP_HANDLE ep_handle,
- IN DAT_COUNT p_size, IN const DAT_PVOID p_data)
-{
- DAPL_CR *cr_ptr = (DAPL_CR *) cr_handle;
- DAPL_EP *ep_ptr = (DAPL_EP *) ep_handle;
- DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
- struct dapl_cm_id *cr_conn = cr_ptr->ib_cm_handle;
- int ret;
- DAT_RETURN dat_status;
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " accept(cr %p conn %p, id %p, p_data %p, p_sz=%d)\n",
- cr_ptr, cr_conn, cr_conn->cm_id, p_data, p_size);
-
- /* Obtain size of private data structure & contents */
- if (p_size > IB_MAX_REP_PDATA_SIZE) {
- dat_status = DAT_ERROR(DAT_LENGTH_ERROR, DAT_NO_SUBTYPE);
- goto bail;
- }
-
- if (ep_ptr->qp_state == DAPL_QP_STATE_UNATTACHED) {
- /*
- * If we are lazy attaching the QP then we may need to
- * hook it up here. Typically, we run this code only for
- * DAT_PSP_PROVIDER_FLAG
- */
- dat_status = dapls_ib_qp_alloc(ia_ptr, ep_ptr, NULL);
- if (dat_status != DAT_SUCCESS) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " dapl_cma_accept: qp_alloc ERR %d\n",
- dat_status);
- goto bail;
- }
- }
-
- /*
- * Validate device and port in EP cm_id against inbound
- * CR cm_id. The pre-allocated EP cm_id is already bound to
- * a local device (cm_id and QP) when created. Move the QP
- * to the new cm_id only if device and port numbers match.
- */
- if (ep_ptr->qp_handle->cm_id->verbs == cr_conn->cm_id->verbs &&
- ep_ptr->qp_handle->cm_id->port_num == cr_conn->cm_id->port_num) {
- /* move QP to new cr_conn, remove QP ref in EP cm_id */
- cr_conn->cm_id->qp = ep_ptr->qp_handle->cm_id->qp;
- ep_ptr->qp_handle->cm_id->qp = NULL;
- dapli_destroy_conn(ep_ptr->qp_handle);
- } else {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " dapl_cma_accept: ERR dev(%p!=%p) or"
- " port mismatch(%d!=%d)\n",
- ep_ptr->qp_handle->cm_id->verbs, cr_conn->cm_id->verbs,
- ntohs(ep_ptr->qp_handle->cm_id->port_num),
- ntohs(cr_conn->cm_id->port_num));
- dat_status = DAT_INTERNAL_ERROR;
- goto bail;
- }
-
- cr_ptr->param.local_ep_handle = ep_handle;
- cr_conn->params.private_data = p_data;
- cr_conn->params.private_data_len = p_size;
-
- ret = rdma_accept(cr_conn->cm_id, &cr_conn->params);
- if (ret) {
- dapl_log(DAPL_DBG_TYPE_ERR, " dapl_cma_accept: ERR %d %s\n",
- ret, strerror(errno));
- dat_status = dapl_convert_errno(ret, "accept");
- goto bail;
- }
-
- /* save accepted conn and EP reference */
- ep_ptr->qp_handle = cr_conn;
- ep_ptr->cm_handle = cr_conn;
- cr_conn->ep = ep_ptr;
-
- /* setup local and remote ports for ep query */
- /* Note: port qual in network order */
- ep_ptr->param.remote_port_qual =
- PORT_TO_SID(rdma_get_dst_port(cr_conn->cm_id));
- ep_ptr->param.local_port_qual =
- PORT_TO_SID(rdma_get_src_port(cr_conn->cm_id));
-
- return DAT_SUCCESS;
- bail:
- rdma_reject(cr_conn->cm_id, NULL, 0);
- dapli_destroy_conn(cr_conn);
- return dat_status;
-}
-
-/*
- * dapls_ib_reject_connection
- *
- * Reject a connection
- *
- * Input:
- * cr_handle
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INTERNAL_ERROR
- *
- */
-DAT_RETURN
-dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm_handle,
- IN int reason,
- IN DAT_COUNT private_data_size,
- IN const DAT_PVOID private_data)
-{
- int ret;
- int offset = sizeof(struct dapl_pdata_hdr);
- struct dapl_pdata_hdr pdata_hdr;
-
- memset(&pdata_hdr, 0, sizeof pdata_hdr);
- pdata_hdr.version = htonl((DAT_VERSION_MAJOR << 24) |
- (DAT_VERSION_MINOR << 16) |
- (VN_PROVIDER_MAJOR << 8) |
- (VN_PROVIDER_MINOR));
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " reject: handle %p reason %x, ver=%x, data %p, sz=%d\n",
- cm_handle, reason, ntohl(pdata_hdr.version),
- private_data, private_data_size);
-
- if (cm_handle == IB_INVALID_HANDLE) {
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- " reject: invalid handle: reason %d\n", reason);
- return DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_CR);
- }
-
- if (private_data_size >
- dapls_ib_private_data_size(NULL, DAPL_PDATA_CONN_REJ,
- cm_handle->hca))
- return DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG3);
-
- /* setup pdata_hdr and users data, in CR pdata buffer */
- dapl_os_memcpy(cm_handle->p_data, &pdata_hdr, offset);
- if (private_data_size)
- dapl_os_memcpy(cm_handle->p_data + offset,
- private_data, private_data_size);
-
- /*
- * Always some private data with reject so active peer can
- * determine real application reject from an abnormal
- * application termination
- */
- ret = rdma_reject(cm_handle->cm_id,
- cm_handle->p_data, offset + private_data_size);
-
- dapli_destroy_conn(cm_handle);
- return dapl_convert_errno(ret, "reject");
-}
-
-/*
- * dapls_ib_cm_remote_addr
- *
- * Obtain the remote IP address given a connection
- *
- * Input:
- * cr_handle
- *
- * Output:
- * remote_ia_address: where to place the remote address
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_HANDLE
- *
- */
-DAT_RETURN
-dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle, OUT DAT_SOCK_ADDR6 * raddr)
-{
- DAPL_HEADER *header;
- dp_ib_cm_handle_t ib_cm_handle;
- struct rdma_addr *ipaddr;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " remote_addr(cm_handle=%p, r_addr=%p)\n",
- dat_handle, raddr);
-
- header = (DAPL_HEADER *) dat_handle;
-
- if (header->magic == DAPL_MAGIC_EP)
- ib_cm_handle = ((DAPL_EP *) dat_handle)->cm_handle;
- else if (header->magic == DAPL_MAGIC_CR)
- ib_cm_handle = ((DAPL_CR *) dat_handle)->ib_cm_handle;
- else
- return DAT_INVALID_HANDLE;
-
- /* get remote IP address from cm_id route */
- ipaddr = &ib_cm_handle->cm_id->route.addr;
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " remote_addr: conn %p id %p SRC %x DST %x PORT %d\n",
- ib_cm_handle, ib_cm_handle->cm_id,
- ntohl(((struct sockaddr_in *)
- &ipaddr->src_addr)->sin_addr.s_addr),
- ntohl(((struct sockaddr_in *)
- &ipaddr->dst_addr)->sin_addr.s_addr),
- ntohs(((struct sockaddr_in *)
- &ipaddr->dst_addr)->sin_port));
-
- dapl_os_memcpy(raddr, &ipaddr->dst_addr, sizeof(DAT_SOCK_ADDR));
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_private_data_size
- *
- * Return the size of private data given a connection op type
- *
- * Input:
- * prd_ptr private data pointer
- * conn_op connection operation type
- * hca_ptr hca pointer, needed for transport type
- *
- * If prd_ptr is NULL, this is a query for the max size supported by
- * the provider, otherwise it is the actual size of the private data
- * contained in prd_ptr.
- *
- *
- * Output:
- * None
- *
- * Returns:
- * length of private data
- *
- */
-int dapls_ib_private_data_size(IN DAPL_PRIVATE * prd_ptr,
- IN DAPL_PDATA_OP conn_op, IN DAPL_HCA * hca_ptr)
-{
- int size;
-
- if (hca_ptr->ib_hca_handle->device->transport_type
- == IBV_TRANSPORT_IWARP)
- return (IWARP_MAX_PDATA_SIZE - sizeof(struct dapl_pdata_hdr));
-
- switch (conn_op) {
-
- case DAPL_PDATA_CONN_REQ:
- size = IB_MAX_REQ_PDATA_SIZE;
- break;
- case DAPL_PDATA_CONN_REP:
- size = IB_MAX_REP_PDATA_SIZE;
- break;
- case DAPL_PDATA_CONN_REJ:
- size = IB_MAX_REJ_PDATA_SIZE - sizeof(struct dapl_pdata_hdr);
- break;
- case DAPL_PDATA_CONN_DREQ:
- size = IB_MAX_DREQ_PDATA_SIZE;
- break;
- case DAPL_PDATA_CONN_DREP:
- size = IB_MAX_DREP_PDATA_SIZE;
- break;
- default:
- size = 0;
-
- } /* end case */
-
- return size;
-}
-
-/*
- * Map all CMA event codes to the DAT equivelent.
- */
-#define DAPL_IB_EVENT_CNT 13
-
-static struct ib_cm_event_map {
- const ib_cm_events_t ib_cm_event;
- DAT_EVENT_NUMBER dat_event_num;
-} ib_cm_event_map[DAPL_IB_EVENT_CNT] = {
- /* 00 */ {
- IB_CME_CONNECTED, DAT_CONNECTION_EVENT_ESTABLISHED},
- /* 01 */ {
- IB_CME_DISCONNECTED, DAT_CONNECTION_EVENT_DISCONNECTED},
- /* 02 */ {
- IB_CME_DISCONNECTED_ON_LINK_DOWN,
- DAT_CONNECTION_EVENT_DISCONNECTED},
- /* 03 */ {
- IB_CME_CONNECTION_REQUEST_PENDING, DAT_CONNECTION_REQUEST_EVENT},
- /* 04 */ {
- IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
- DAT_CONNECTION_REQUEST_EVENT},
- /* 05 */ {
- IB_CME_CONNECTION_REQUEST_ACKED, DAT_CONNECTION_REQUEST_EVENT},
- /* 06 */ {
- IB_CME_DESTINATION_REJECT,
- DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
- /* 07 */ {
- IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
- DAT_CONNECTION_EVENT_PEER_REJECTED},
- /* 08 */ {
- IB_CME_DESTINATION_UNREACHABLE, DAT_CONNECTION_EVENT_UNREACHABLE},
- /* 09 */ {
- IB_CME_TOO_MANY_CONNECTION_REQUESTS,
- DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
- /* 10 */ {
- IB_CME_LOCAL_FAILURE, DAT_CONNECTION_EVENT_BROKEN},
- /* 11 */ {
- IB_CME_BROKEN, DAT_CONNECTION_EVENT_BROKEN},
- /* 12 */ {
-IB_CME_TIMEOUT, DAT_CONNECTION_EVENT_TIMED_OUT},};
-
-/*
- * dapls_ib_get_cm_event
- *
- * Return a DAT connection event given a provider CM event.
- *
- * Input:
- * dat_event_num DAT event we need an equivelent CM event for
- *
- * Output:
- * none
- *
- * Returns:
- * ib_cm_event of translated DAPL value
- */
-DAT_EVENT_NUMBER
-dapls_ib_get_dat_event(IN const ib_cm_events_t ib_cm_event,
- IN DAT_BOOLEAN active)
-{
- DAT_EVENT_NUMBER dat_event_num;
- int i;
-
- active = active;
-
- dat_event_num = 0;
- for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
- if (ib_cm_event == ib_cm_event_map[i].ib_cm_event) {
- dat_event_num = ib_cm_event_map[i].dat_event_num;
- break;
- }
- }
- dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
- "dapls_ib_get_dat_event: event(%s) ib=0x%x dat=0x%x\n",
- active ? "active" : "passive", ib_cm_event, dat_event_num);
-
- return dat_event_num;
-}
-
-/*
- * dapls_ib_get_dat_event
- *
- * Return a DAT connection event given a provider CM event.
- *
- * Input:
- * ib_cm_event event provided to the dapl callback routine
- * active switch indicating active or passive connection
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_EVENT_NUMBER of translated provider value
- */
-ib_cm_events_t dapls_ib_get_cm_event(IN DAT_EVENT_NUMBER dat_event_num)
-{
- ib_cm_events_t ib_cm_event;
- int i;
-
- ib_cm_event = 0;
- for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
- if (dat_event_num == ib_cm_event_map[i].dat_event_num) {
- ib_cm_event = ib_cm_event_map[i].ib_cm_event;
- break;
- }
- }
- return ib_cm_event;
-}
-
-void dapli_cma_event_cb(void)
-{
- struct rdma_cm_event *event;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_event()\n");
-
- /* process one CM event, fairness */
- if (!rdma_get_cm_event(g_cm_events, &event)) {
- struct dapl_cm_id *conn;
-
- /* set proper conn from cm_id context */
- if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
- conn = (struct dapl_cm_id *)event->listen_id->context;
- else
- conn = (struct dapl_cm_id *)event->id->context;
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " cm_event: EVENT=%d ID=%p LID=%p CTX=%p\n",
- event->event, event->id, event->listen_id, conn);
-
- switch (event->event) {
- case RDMA_CM_EVENT_ADDR_RESOLVED:
- dapli_addr_resolve(conn);
- break;
-
- case RDMA_CM_EVENT_ROUTE_RESOLVED:
- dapli_route_resolve(conn);
- break;
-
- case RDMA_CM_EVENT_ADDR_ERROR:
- dapl_log(DAPL_DBG_TYPE_WARN,
- "dapl_cma_active: CM ADDR ERROR: ->"
- " DST %s retry (%d)..\n",
- inet_ntoa(((struct sockaddr_in *)
- &conn->r_addr)->sin_addr),
- conn->arp_retries);
-
- /* retry address resolution */
- if ((--conn->arp_retries) &&
- (event->status == -ETIMEDOUT)) {
- int ret;
- ret = rdma_resolve_addr(conn->cm_id, NULL,
- (struct sockaddr *)
- &conn->r_addr,
- conn->arp_timeout);
- if (!ret)
- break;
- else {
- dapl_dbg_log(DAPL_DBG_TYPE_WARN,
- " ERROR: rdma_resolve_addr = "
- "%d %s\n",
- ret, strerror(errno));
- }
- }
- /* retries exhausted or resolve_addr failed */
- dapl_log(DAPL_DBG_TYPE_ERR,
- "dapl_cma_active: ARP_ERR, retries(%d)"
- " exhausted -> DST %s,%d\n",
- IB_ARP_RETRY_COUNT,
- inet_ntoa(((struct sockaddr_in *)
- &conn->cm_id->route.addr.dst_addr)->
- sin_addr),
- ntohs(((struct sockaddr_in *)
- &conn->cm_id->route.addr.dst_addr)->
- sin_port));
-
- dapl_evd_connection_callback(conn,
- IB_CME_DESTINATION_UNREACHABLE,
- NULL, conn->ep);
- break;
-
- case RDMA_CM_EVENT_ROUTE_ERROR:
- dapl_log(DAPL_DBG_TYPE_WARN,
- "dapl_cma_active: CM ROUTE ERROR: ->"
- " DST %s retry (%d)..\n",
- inet_ntoa(((struct sockaddr_in *)
- &conn->r_addr)->sin_addr),
- conn->route_retries);
-
- /* retry route resolution */
- if ((--conn->route_retries) &&
- (event->status == -ETIMEDOUT))
- dapli_addr_resolve(conn);
- else {
- dapl_log(DAPL_DBG_TYPE_ERR,
- "dapl_cma_active: PATH_RECORD_ERR,"
- " retries(%d) exhausted, DST %s,%d\n",
- IB_ROUTE_RETRY_COUNT,
- inet_ntoa(((struct sockaddr_in *)
- &conn->cm_id->route.addr.
- dst_addr)->sin_addr),
- ntohs(((struct sockaddr_in *)
- &conn->cm_id->route.addr.
- dst_addr)->sin_port));
-
- dapl_evd_connection_callback(conn,
- IB_CME_DESTINATION_UNREACHABLE,
- NULL, conn->ep);
- }
- break;
-
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
- dapl_evd_connection_callback(conn,
- IB_CME_LOCAL_FAILURE,
- NULL, conn->ep);
- break;
- case RDMA_CM_EVENT_CONNECT_REQUEST:
- case RDMA_CM_EVENT_CONNECT_ERROR:
- case RDMA_CM_EVENT_UNREACHABLE:
- case RDMA_CM_EVENT_REJECTED:
- case RDMA_CM_EVENT_ESTABLISHED:
- case RDMA_CM_EVENT_DISCONNECTED:
- /* passive or active */
- if (conn->sp)
- dapli_cm_passive_cb(conn, event);
- else
- dapli_cm_active_cb(conn, event);
- break;
- case RDMA_CM_EVENT_CONNECT_RESPONSE:
- default:
- dapl_dbg_log(DAPL_DBG_TYPE_WARN,
- " cm_event: UNEXPECTED EVENT=%p ID=%p CTX=%p\n",
- event->event, event->id,
- event->id->context);
- break;
- }
- /* ack event, unblocks destroy_cm_id in consumer threads */
- rdma_ack_cm_event(event);
- }
-}
-
-/*
- * Local variables:
- * c-indent-level: 4
- * c-basic-offset: 4
- * tab-width: 8
- * End:
- */
diff --git a/dapl/openib_cma/dapl_ib_cq.c b/dapl/openib_cma/dapl_ib_cq.c
deleted file mode 100755
index 7f67982..0000000
--- a/dapl/openib_cma/dapl_ib_cq.c
+++ /dev/null
@@ -1,559 +0,0 @@
-/*
- * Copyright (c) 2005-2007 Intel Corporation. All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- * copy of which is available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- *
- * MODULE: dapl_ib_cq.c
- *
- * PURPOSE: completion queues for OFED IB Verbs
- *
- * $Id: $
- *
- **********************************************************************/
-
-#include "openib_osd.h"
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_lmr_util.h"
-#include "dapl_evd_util.h"
-#include "dapl_ring_buffer_util.h"
-
-/* One CQ event channel per HCA */
-void dapli_cq_event_cb(struct _ib_hca_transport *hca)
-{
- /* check all comp events on this device */
- struct dapl_evd *evd_ptr = NULL;
- struct ibv_cq *ibv_cq = NULL;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapli_cq_event_cb(%p)\n", hca);
-
- if (!ibv_get_cq_event(hca->ib_cq, &ibv_cq, (void *)&evd_ptr)) {
-
- if (DAPL_BAD_HANDLE(evd_ptr, DAPL_MAGIC_EVD)) {
- ibv_ack_cq_events(ibv_cq, 1);
- return;
- }
-
- /* process DTO event via callback */
- dapl_evd_dto_callback(hca->cm_id->verbs,
- evd_ptr->ib_cq_handle, (void *)evd_ptr);
-
- ibv_ack_cq_events(ibv_cq, 1);
- }
-}
-
-/*
- * Map all verbs DTO completion codes to the DAT equivelent.
- *
- * Not returned by verbs: DAT_DTO_ERR_PARTIAL_PACKET
- */
-static struct ib_status_map {
- int ib_status;
- DAT_DTO_COMPLETION_STATUS dat_status;
-} ib_status_map[] = {
-/* 00 */ {
- IBV_WC_SUCCESS, DAT_DTO_SUCCESS},
-/* 01 */ {
- IBV_WC_LOC_LEN_ERR, DAT_DTO_ERR_LOCAL_LENGTH},
-/* 02 */ {
- IBV_WC_LOC_QP_OP_ERR, DAT_DTO_ERR_LOCAL_EP},
-/* 03 */ {
- IBV_WC_LOC_EEC_OP_ERR, DAT_DTO_ERR_TRANSPORT},
-/* 04 */ {
- IBV_WC_LOC_PROT_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
-/* 05 */ {
- IBV_WC_WR_FLUSH_ERR, DAT_DTO_ERR_FLUSHED},
-/* 06 */ {
- IBV_WC_MW_BIND_ERR, DAT_RMR_OPERATION_FAILED},
-/* 07 */ {
- IBV_WC_BAD_RESP_ERR, DAT_DTO_ERR_BAD_RESPONSE},
-/* 08 */ {
- IBV_WC_LOC_ACCESS_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
-/* 09 */ {
- IBV_WC_REM_INV_REQ_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
-/* 10 */ {
- IBV_WC_REM_ACCESS_ERR, DAT_DTO_ERR_REMOTE_ACCESS},
-/* 11 */ {
- IBV_WC_REM_OP_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
-/* 12 */ {
- IBV_WC_RETRY_EXC_ERR, DAT_DTO_ERR_TRANSPORT},
-/* 13 */ {
- IBV_WC_RNR_RETRY_EXC_ERR, DAT_DTO_ERR_RECEIVER_NOT_READY},
-/* 14 */ {
- IBV_WC_LOC_RDD_VIOL_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
-/* 15 */ {
- IBV_WC_REM_INV_RD_REQ_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
-/* 16 */ {
- IBV_WC_REM_ABORT_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
-/* 17 */ {
- IBV_WC_INV_EECN_ERR, DAT_DTO_ERR_TRANSPORT},
-/* 18 */ {
- IBV_WC_INV_EEC_STATE_ERR, DAT_DTO_ERR_TRANSPORT},
-/* 19 */ {
- IBV_WC_FATAL_ERR, DAT_DTO_ERR_TRANSPORT},
-/* 20 */ {
- IBV_WC_RESP_TIMEOUT_ERR, DAT_DTO_ERR_RECEIVER_NOT_READY},
-/* 21 */ {
-IBV_WC_GENERAL_ERR, DAT_DTO_ERR_TRANSPORT},};
-
-/*
- * dapls_ib_get_dto_status
- *
- * Return the DAT status of a DTO operation
- *
- * Input:
- * cqe_ptr pointer to completion queue entry
- *
- * Output:
- * none
- *
- * Returns:
- * Value from ib_status_map table above
- */
-
-DAT_DTO_COMPLETION_STATUS
-dapls_ib_get_dto_status(IN ib_work_completion_t * cqe_ptr)
-{
- uint32_t ib_status;
- int i;
-
- ib_status = DAPL_GET_CQE_STATUS(cqe_ptr);
-
- /*
- * Due to the implementation of verbs completion code, we need to
- * search the table for the correct value rather than assuming
- * linear distribution.
- */
- for (i = 0; i <= IBV_WC_GENERAL_ERR; i++) {
- if (ib_status == ib_status_map[i].ib_status) {
- if (ib_status != IBV_WC_SUCCESS) {
- dapl_dbg_log(DAPL_DBG_TYPE_DTO_COMP_ERR,
- " DTO completion ERROR: %d: op %#x\n",
- ib_status,
- DAPL_GET_CQE_OPTYPE(cqe_ptr));
- }
- return ib_status_map[i].dat_status;
- }
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_DTO_COMP_ERR,
- " DTO completion ERROR: %d: op %#x\n",
- ib_status, DAPL_GET_CQE_OPTYPE(cqe_ptr));
-
- return DAT_DTO_FAILURE;
-}
-
-DAT_RETURN dapls_ib_get_async_event(IN ib_error_record_t * err_record,
- OUT DAT_EVENT_NUMBER * async_event)
-{
- DAT_RETURN dat_status = DAT_SUCCESS;
- int err_code = err_record->event_type;
-
- switch (err_code) {
- /* OVERFLOW error */
- case IBV_EVENT_CQ_ERR:
- *async_event = DAT_ASYNC_ERROR_EVD_OVERFLOW;
- break;
- /* INTERNAL errors */
- case IBV_EVENT_DEVICE_FATAL:
- *async_event = DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR;
- break;
- /* CATASTROPHIC errors */
- case IBV_EVENT_PORT_ERR:
- *async_event = DAT_ASYNC_ERROR_IA_CATASTROPHIC;
- break;
- /* BROKEN QP error */
- case IBV_EVENT_SQ_DRAINED:
- case IBV_EVENT_QP_FATAL:
- case IBV_EVENT_QP_REQ_ERR:
- case IBV_EVENT_QP_ACCESS_ERR:
- *async_event = DAT_ASYNC_ERROR_EP_BROKEN;
- break;
- /* connection completion */
- case IBV_EVENT_COMM_EST:
- *async_event = DAT_CONNECTION_EVENT_ESTABLISHED;
- break;
- /* TODO: process HW state changes */
- case IBV_EVENT_PATH_MIG:
- case IBV_EVENT_PATH_MIG_ERR:
- case IBV_EVENT_PORT_ACTIVE:
- case IBV_EVENT_LID_CHANGE:
- case IBV_EVENT_PKEY_CHANGE:
- case IBV_EVENT_SM_CHANGE:
- default:
- dat_status = DAT_ERROR(DAT_NOT_IMPLEMENTED, 0);
- }
- return dat_status;
-}
-
-/*
- * dapl_ib_cq_alloc
- *
- * Alloc a CQ
- *
- * Input:
- * ia_handle IA handle
- * evd_ptr pointer to EVD struct
- * cqlen minimum QLen
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN
-dapls_ib_cq_alloc(IN DAPL_IA * ia_ptr,
- IN DAPL_EVD * evd_ptr, IN DAT_COUNT * cqlen)
-{
- struct ibv_comp_channel *channel = ia_ptr->hca_ptr->ib_trans.ib_cq;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- "dapls_ib_cq_alloc: evd %p cqlen=%d \n", evd_ptr, *cqlen);
-
-#ifdef CQ_WAIT_OBJECT
- if (evd_ptr->cq_wait_obj_handle)
- channel = evd_ptr->cq_wait_obj_handle->events;
-#endif
-
- /* Call IB verbs to create CQ */
- evd_ptr->ib_cq_handle = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
- *cqlen, evd_ptr, channel, 0);
-
- if (evd_ptr->ib_cq_handle == IB_INVALID_HANDLE)
- return (dapl_convert_errno(errno, "create_cq"));
-
- /* arm cq for events */
- dapls_set_cq_notify(ia_ptr, evd_ptr);
-
- /* update with returned cq entry size */
- *cqlen = evd_ptr->ib_cq_handle->cqe;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- "dapls_ib_cq_alloc: new_cq %p cqlen=%d \n",
- evd_ptr->ib_cq_handle, *cqlen);
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_cq_resize
- *
- * Alloc a CQ
- *
- * Input:
- * ia_handle IA handle
- * evd_ptr pointer to EVD struct
- * cqlen minimum QLen
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN
-dapls_ib_cq_resize(IN DAPL_IA * ia_ptr,
- IN DAPL_EVD * evd_ptr, IN DAT_COUNT * cqlen)
-{
- ib_cq_handle_t new_cq;
- struct ibv_comp_channel *channel = ia_ptr->hca_ptr->ib_trans.ib_cq;
-
- /* IB verbs doe not support resize. Try to re-create CQ
- * with new size. Can only be done if QP is not attached.
- * destroy EBUSY == QP still attached.
- */
-
-#ifdef CQ_WAIT_OBJECT
- if (evd_ptr->cq_wait_obj_handle)
- channel = evd_ptr->cq_wait_obj_handle->events;
-#endif
-
- /* Call IB verbs to create CQ */
- new_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle, *cqlen,
- evd_ptr, channel, 0);
-
- if (new_cq == IB_INVALID_HANDLE)
- return DAT_INSUFFICIENT_RESOURCES;
-
- /* destroy the original and replace if successful */
- if (ibv_destroy_cq(evd_ptr->ib_cq_handle)) {
- ibv_destroy_cq(new_cq);
- return (dapl_convert_errno(errno, "resize_cq"));
- }
-
- /* update EVD with new cq handle and size */
- evd_ptr->ib_cq_handle = new_cq;
- *cqlen = new_cq->cqe;
-
- /* arm cq for events */
- dapls_set_cq_notify(ia_ptr, evd_ptr);
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_cq_free
- *
- * destroy a CQ
- *
- * Input:
- * ia_handle IA handle
- * evd_ptr pointer to EVD struct
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN dapls_ib_cq_free(IN DAPL_IA * ia_ptr, IN DAPL_EVD * evd_ptr)
-{
- if (evd_ptr->ib_cq_handle != IB_INVALID_HANDLE) {
- /* copy all entries on CQ to EVD before destroying */
- dapls_evd_copy_cq(evd_ptr);
- if (ibv_destroy_cq(evd_ptr->ib_cq_handle))
- return (dapl_convert_errno(errno, "destroy_cq"));
- evd_ptr->ib_cq_handle = IB_INVALID_HANDLE;
- }
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_set_cq_notify
- *
- * Set the CQ notification for next
- *
- * Input:
- * hca_handl hca handle
- * DAPL_EVD evd handle
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * dapl_convert_errno
- */
-DAT_RETURN dapls_set_cq_notify(IN DAPL_IA * ia_ptr, IN DAPL_EVD * evd_ptr)
-{
- if (ibv_req_notify_cq(evd_ptr->ib_cq_handle, 0))
- return (dapl_convert_errno(errno, "notify_cq"));
- else
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_completion_notify
- *
- * Set the CQ notification type
- *
- * Input:
- * hca_handl hca handle
- * evd_ptr evd handle
- * type notification type
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * dapl_convert_errno
- */
-DAT_RETURN dapls_ib_completion_notify(IN ib_hca_handle_t hca_handle,
- IN DAPL_EVD * evd_ptr,
- IN ib_notification_type_t type)
-{
- if (ibv_req_notify_cq(evd_ptr->ib_cq_handle, type))
- return (dapl_convert_errno(errno, "notify_cq_type"));
- else
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_completion_poll
- *
- * CQ poll for completions
- *
- * Input:
- * hca_handl hca handle
- * evd_ptr evd handle
- * wc_ptr work completion
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_QUEUE_EMPTY
- *
- */
-DAT_RETURN dapls_ib_completion_poll(IN DAPL_HCA * hca_ptr,
- IN DAPL_EVD * evd_ptr,
- IN ib_work_completion_t * wc_ptr)
-{
- if (ibv_poll_cq(evd_ptr->ib_cq_handle, 1, wc_ptr) == 1)
- return DAT_SUCCESS;
-
- return DAT_QUEUE_EMPTY;
-}
-
-#ifdef CQ_WAIT_OBJECT
-
-/* NEW common wait objects for providers with direct CQ wait objects */
-DAT_RETURN
-dapls_ib_wait_object_create(IN DAPL_EVD * evd_ptr,
- IN ib_wait_obj_handle_t * p_cq_wait_obj_handle)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " cq_object_create: (%p,%p)\n",
- evd_ptr, p_cq_wait_obj_handle);
-
- *p_cq_wait_obj_handle =
- dapl_os_alloc(sizeof(struct _ib_wait_obj_handle));
-
- if (*p_cq_wait_obj_handle == NULL)
- return (dapl_convert_errno(ENOMEM, " wait_object_create"));
-
- dapl_os_memzero(*p_cq_wait_obj_handle,
- sizeof(struct _ib_wait_obj_handle));
-
- /* create pipe for waking up work thread */
- if (pipe((*p_cq_wait_obj_handle)->pipe))
- goto bail;
-
- /* set cq_wait object to evd_ptr */
- (*p_cq_wait_obj_handle)->events =
- ibv_create_comp_channel(evd_ptr->header.owner_ia->hca_ptr->
- ib_hca_handle);
-
- if ((*p_cq_wait_obj_handle)->events == NULL)
- goto bail;
-
- return DAT_SUCCESS;
- bail:
- dapl_os_free(*p_cq_wait_obj_handle, sizeof(struct _ib_wait_obj_handle));
- *p_cq_wait_obj_handle = NULL;
- return (dapl_convert_errno(errno, " wait_object_create"));
-}
-
-DAT_RETURN
-dapls_ib_wait_object_destroy(IN ib_wait_obj_handle_t p_cq_wait_obj_handle)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " cq_object_destroy: wait_obj=%p\n", p_cq_wait_obj_handle);
-
- ibv_destroy_comp_channel(p_cq_wait_obj_handle->events);
-
- dapl_os_free(p_cq_wait_obj_handle, sizeof(struct _ib_wait_obj_handle));
-
- return DAT_SUCCESS;
-}
-
-DAT_RETURN
-dapls_ib_wait_object_wakeup(IN ib_wait_obj_handle_t p_cq_wait_obj_handle)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " cq_object_wakeup: wait_obj=%p\n", p_cq_wait_obj_handle);
-
- /* write to pipe for wake up */
- if (write(p_cq_wait_obj_handle->pipe[1], "w", sizeof "w") == -1)
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " wait object wakeup write error = %s\n",
- strerror(errno));
- return DAT_SUCCESS;
-}
-
-DAT_RETURN
-dapls_ib_wait_object_wait(IN ib_wait_obj_handle_t p_cq_wait_obj_handle,
- IN u_int32_t timeout)
-{
- struct dapl_evd *evd_ptr;
- struct ibv_cq *ibv_cq = NULL;
- int status = 0;
- int timeout_ms = -1;
- struct pollfd ufds[2];
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " cq_object_wait: CQ channel %p time %d\n",
- p_cq_wait_obj_handle, timeout);
-
- /* setup cq event channel and pipe fd for consumer wakeup */
- ufds[0].fd = p_cq_wait_obj_handle->events->fd;
- ufds[0].events = POLLIN;
- ufds[0].revents = 0;
- ufds[1].fd = p_cq_wait_obj_handle->pipe[0];
- ufds[1].events = POLLIN;
- ufds[1].revents = 0;
-
- /* uDAPL timeout values in usecs */
- if (timeout != DAT_TIMEOUT_INFINITE)
- timeout_ms = timeout / 1000;
-
- /* restart syscall */
- while ((status = poll(ufds, 2, timeout_ms)) == -1)
- if (errno == EINTR)
- continue;
-
- /* returned event */
- if (status > 0) {
- if (ufds[0].revents == POLLIN) {
- if (!ibv_get_cq_event(p_cq_wait_obj_handle->events,
- &ibv_cq, (void *)&evd_ptr)) {
- ibv_ack_cq_events(ibv_cq, 1);
- }
- }
- status = 0;
-
- /* timeout */
- } else if (status == 0)
- status = ETIMEDOUT;
- else
- status = errno;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " cq_object_wait: RET evd %p ibv_cq %p %s\n",
- evd_ptr, ibv_cq, strerror(errno));
-
- return (dapl_convert_errno(status, "cq_wait_object_wait"));
-
-}
-#endif
-
-/*
- * Local variables:
- * c-indent-level: 4
- * c-basic-offset: 4
- * tab-width: 8
- * End:
- */
diff --git a/dapl/openib_cma/dapl_ib_dto.h b/dapl/openib_cma/dapl_ib_dto.h
deleted file mode 100644
index d97c26b..0000000
--- a/dapl/openib_cma/dapl_ib_dto.h
+++ /dev/null
@@ -1,472 +0,0 @@
-/*
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- * copy of which is available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/***************************************************************************
- *
- * Module: uDAPL
- *
- * Filename: dapl_ib_dto.h
- *
- * Author: Arlin Davis
- *
- * Created: 3/10/2005
- *
- * Description:
- *
- * The OpenIB uCMA provider - DTO operations and CQE macros
- *
- ****************************************************************************
- * Source Control System Information
- *
- * $Id: $
- *
- * Copyright (c) 2005 Intel Corporation. All rights reserved.
- *
- **************************************************************************/
-#ifndef _DAPL_IB_DTO_H_
-#define _DAPL_IB_DTO_H_
-
-#include "dapl_ib_util.h"
-
-#ifdef DAT_EXTENSIONS
-#include <dat2/dat_ib_extensions.h>
-#endif
-
-STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p);
-
-/*
- * dapls_ib_post_recv
- *
- * Provider specific Post RECV function
- */
-STATIC _INLINE_ DAT_RETURN
-dapls_ib_post_recv (
- IN DAPL_EP *ep_ptr,
- IN DAPL_COOKIE *cookie,
- IN DAT_COUNT segments,
- IN DAT_LMR_TRIPLET *local_iov )
-{
- struct ibv_recv_wr wr;
- struct ibv_recv_wr *bad_wr;
- ib_data_segment_t *ds = (ib_data_segment_t *)local_iov;
- DAT_COUNT i, total_len;
- int ret;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_rcv: ep %p cookie %p segs %d l_iov %p\n",
- ep_ptr, cookie, segments, local_iov);
-
- /* setup work request */
- total_len = 0;
- wr.next = 0;
- wr.num_sge = segments;
- wr.wr_id = (uint64_t)(uintptr_t)cookie;
- wr.sg_list = ds;
-
- if (cookie != NULL) {
- for (i = 0; i < segments; i++) {
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_rcv: l_key 0x%x va %p len %d\n",
- ds->lkey, ds->addr, ds->length);
- total_len += ds->length;
- ds++;
- }
- cookie->val.dto.size = total_len;
- }
-
- ret = ibv_post_recv(ep_ptr->qp_handle->cm_id->qp, &wr, &bad_wr);
-
- if (ret)
- return( dapl_convert_errno(errno,"ibv_recv") );
-
- DAPL_CNTR(ep_ptr, DCNT_EP_POST_RECV);
- DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_RECV_DATA, total_len);
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_post_send
- *
- * Provider specific Post SEND function
- */
-STATIC _INLINE_ DAT_RETURN
-dapls_ib_post_send (
- IN DAPL_EP *ep_ptr,
- IN ib_send_op_type_t op_type,
- IN DAPL_COOKIE *cookie,
- IN DAT_COUNT segments,
- IN DAT_LMR_TRIPLET *local_iov,
- IN const DAT_RMR_TRIPLET *remote_iov,
- IN DAT_COMPLETION_FLAGS completion_flags)
-{
- struct ibv_send_wr wr;
- struct ibv_send_wr *bad_wr;
- ib_data_segment_t *ds = (ib_data_segment_t *)local_iov;
- ib_hca_transport_t *ibt_ptr =
- &ep_ptr->header.owner_ia->hca_ptr->ib_trans;
- DAT_COUNT i, total_len;
- int ret;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_snd: ep %p op %d ck %p sgs",
- "%d l_iov %p r_iov %p f %d\n",
- ep_ptr, op_type, cookie, segments, local_iov,
- remote_iov, completion_flags);
-
- /* setup the work request */
- wr.next = 0;
- wr.opcode = op_type;
- wr.num_sge = segments;
- wr.send_flags = 0;
- wr.wr_id = (uint64_t)(uintptr_t)cookie;
- wr.sg_list = ds;
- total_len = 0;
-
- if (cookie != NULL) {
- for (i = 0; i < segments; i++ ) {
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_snd: lkey 0x%x va %p len %d\n",
- ds->lkey, ds->addr, ds->length );
- total_len += ds->length;
- ds++;
- }
- cookie->val.dto.size = total_len;
- }
-
- if (wr.num_sge &&
- (op_type == OP_RDMA_WRITE || op_type == OP_RDMA_READ)) {
- wr.wr.rdma.remote_addr = remote_iov->virtual_address;
- wr.wr.rdma.rkey = remote_iov->rmr_context;
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_snd_rdma: rkey 0x%x va %#016Lx\n",
- wr.wr.rdma.rkey, wr.wr.rdma.remote_addr);
- }
-
- /* inline data for send or write ops */
- if ((total_len <= ibt_ptr->max_inline_send) &&
- ((op_type == OP_SEND) || (op_type == OP_RDMA_WRITE)))
- wr.send_flags |= IBV_SEND_INLINE;
-
- /* set completion flags in work request */
- wr.send_flags |= (DAT_COMPLETION_SUPPRESS_FLAG &
- completion_flags) ? 0 : IBV_SEND_SIGNALED;
- wr.send_flags |= (DAT_COMPLETION_BARRIER_FENCE_FLAG &
- completion_flags) ? IBV_SEND_FENCE : 0;
- wr.send_flags |= (DAT_COMPLETION_SOLICITED_WAIT_FLAG &
- completion_flags) ? IBV_SEND_SOLICITED : 0;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_snd: op 0x%x flags 0x%x sglist %p, %d\n",
- wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
-
- ret = ibv_post_send(ep_ptr->qp_handle->cm_id->qp, &wr, &bad_wr);
-
- if (ret)
- return( dapl_convert_errno(errno,"ibv_send") );
-
-#ifdef DAPL_COUNTERS
- switch (op_type) {
- case OP_SEND:
- DAPL_CNTR(ep_ptr, DCNT_EP_POST_SEND);
- DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_SEND_DATA,total_len);
- break;
- case OP_RDMA_WRITE:
- DAPL_CNTR(ep_ptr, DCNT_EP_POST_WRITE);
- DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_WRITE_DATA,total_len);
- break;
- case OP_RDMA_READ:
- DAPL_CNTR(ep_ptr, DCNT_EP_POST_READ);
- DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_READ_DATA,total_len);
- break;
- default:
- break;
- }
-#endif /* DAPL_COUNTERS */
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP," post_snd: returned\n");
- return DAT_SUCCESS;
-}
-
-/* map Work Completions to DAPL WR operations */
-STATIC _INLINE_ DAT_DTOS dapls_cqe_dtos_opcode(ib_work_completion_t *cqe_p)
-{
- switch (cqe_p->opcode) {
-
- case IBV_WC_SEND:
- return (DAT_DTO_SEND);
- case IBV_WC_RDMA_READ:
- return (DAT_DTO_RDMA_READ);
- case IBV_WC_BIND_MW:
- return (DAT_DTO_BIND_MW);
-#ifdef DAT_EXTENSIONS
- case IBV_WC_RDMA_WRITE:
- if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
- return (DAT_IB_DTO_RDMA_WRITE_IMMED);
- else
- return (DAT_DTO_RDMA_WRITE);
- case IBV_WC_COMP_SWAP:
- return (DAT_IB_DTO_CMP_SWAP);
- case IBV_WC_FETCH_ADD:
- return (DAT_IB_DTO_FETCH_ADD);
- case IBV_WC_RECV_RDMA_WITH_IMM:
- return (DAT_IB_DTO_RECV_IMMED);
-#else
- case IBV_WC_RDMA_WRITE:
- return (DAT_DTO_RDMA_WRITE);
-#endif
- case IBV_WC_RECV:
- return (DAT_DTO_RECEIVE);
- default:
- return (0xff);
- }
-}
-#define DAPL_GET_CQE_DTOS_OPTYPE(cqe_p) dapls_cqe_dtos_opcode(cqe_p)
-
-
-#ifdef DAT_EXTENSIONS
-/*
- * dapls_ib_post_ext_send
- *
- * Provider specific extended Post SEND function for atomics
- * OP_COMP_AND_SWAP and OP_FETCH_AND_ADD
- */
-STATIC _INLINE_ DAT_RETURN
-dapls_ib_post_ext_send (
- IN DAPL_EP *ep_ptr,
- IN ib_send_op_type_t op_type,
- IN DAPL_COOKIE *cookie,
- IN DAT_COUNT segments,
- IN DAT_LMR_TRIPLET *local_iov,
- IN const DAT_RMR_TRIPLET *remote_iov,
- IN DAT_UINT32 immed_data,
- IN DAT_UINT64 compare_add,
- IN DAT_UINT64 swap,
- IN DAT_COMPLETION_FLAGS completion_flags)
-{
- struct ibv_send_wr wr;
- struct ibv_send_wr *bad_wr;
- ib_data_segment_t *ds = (ib_data_segment_t *)local_iov;
- DAT_COUNT i, total_len;
- int ret;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_ext_snd: ep %p op %d ck %p sgs",
- "%d l_iov %p r_iov %p f %d\n",
- ep_ptr, op_type, cookie, segments, local_iov,
- remote_iov, completion_flags);
-
- /* setup the work request */
- wr.next = 0;
- wr.opcode = op_type;
- wr.num_sge = segments;
- wr.send_flags = 0;
- wr.wr_id = (uint64_t)(uintptr_t)cookie;
- wr.sg_list = ds;
- total_len = 0;
-
- if (cookie != NULL) {
- for (i = 0; i < segments; i++ ) {
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_ext_snd: lkey 0x%x va %p ln %d\n",
- ds->lkey, ds->addr, ds->length);
- total_len += ds->length;
- ds++;
- }
- cookie->val.dto.size = total_len;
- }
-
- switch (op_type) {
- case OP_RDMA_WRITE_IMM:
- /* OP_RDMA_WRITE)IMMED has direct IB wr_type mapping */
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_ext: rkey 0x%x va %#016Lx immed=0x%x\n",
- remote_iov?remote_iov->rmr_context:0,
- remote_iov?remote_iov->virtual_address:0,
- immed_data);
-
- wr.imm_data = immed_data;
- if (wr.num_sge) {
- wr.wr.rdma.remote_addr = remote_iov->virtual_address;
- wr.wr.rdma.rkey = remote_iov->rmr_context;
- }
- break;
- case OP_COMP_AND_SWAP:
- /* OP_COMP_AND_SWAP has direct IB wr_type mapping */
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_ext: OP_COMP_AND_SWAP=%lx,"
- "%lx rkey 0x%x va %#016Lx\n",
- compare_add, swap, remote_iov->rmr_context,
- remote_iov->virtual_address);
-
- wr.wr.atomic.compare_add = compare_add;
- wr.wr.atomic.swap = swap;
- wr.wr.atomic.remote_addr = remote_iov->virtual_address;
- wr.wr.atomic.rkey = remote_iov->rmr_context;
- break;
- case OP_FETCH_AND_ADD:
- /* OP_FETCH_AND_ADD has direct IB wr_type mapping */
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_ext: OP_FETCH_AND_ADD=%lx,"
- "%lx rkey 0x%x va %#016Lx\n",
- compare_add, remote_iov->rmr_context,
- remote_iov->virtual_address);
-
- wr.wr.atomic.compare_add = compare_add;
- wr.wr.atomic.remote_addr = remote_iov->virtual_address;
- wr.wr.atomic.rkey = remote_iov->rmr_context;
- break;
- default:
- break;
- }
-
- /* set completion flags in work request */
- wr.send_flags |= (DAT_COMPLETION_SUPPRESS_FLAG &
- completion_flags) ? 0 : IBV_SEND_SIGNALED;
- wr.send_flags |= (DAT_COMPLETION_BARRIER_FENCE_FLAG &
- completion_flags) ? IBV_SEND_FENCE : 0;
- wr.send_flags |= (DAT_COMPLETION_SOLICITED_WAIT_FLAG &
- completion_flags) ? IBV_SEND_SOLICITED : 0;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_snd: op 0x%x flags 0x%x sglist %p, %d\n",
- wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
-
- ret = ibv_post_send(ep_ptr->qp_handle->cm_id->qp, &wr, &bad_wr);
-
- if (ret)
- return( dapl_convert_errno(errno,"ibv_send") );
-
-#ifdef DAPL_COUNTERS
- switch (op_type) {
- case OP_RDMA_WRITE_IMM:
- DAPL_CNTR(ep_ptr, DCNT_EP_POST_WRITE_IMM);
- DAPL_CNTR_DATA(ep_ptr,
- DCNT_EP_POST_WRITE_IMM_DATA, total_len);
- break;
- case OP_COMP_AND_SWAP:
- DAPL_CNTR(ep_ptr, DCNT_EP_POST_CMP_SWAP);
- break;
- case OP_FETCH_AND_ADD:
- DAPL_CNTR(ep_ptr, DCNT_EP_POST_FETCH_ADD);
- break;
- default:
- break;
- }
-#endif /* DAPL_COUNTERS */
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP," post_snd: returned\n");
- return DAT_SUCCESS;
-}
-#endif
-
-STATIC _INLINE_ DAT_RETURN
-dapls_ib_optional_prv_dat(
- IN DAPL_CR *cr_ptr,
- IN const void *event_data,
- OUT DAPL_CR **cr_pp)
-{
- return DAT_SUCCESS;
-}
-
-/* map Work Completions to DAPL WR operations */
-STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p)
-{
-#ifdef DAPL_COUNTERS
- DAPL_COOKIE *cookie = (DAPL_COOKIE *)(uintptr_t)cqe_p->wr_id;
-#endif /* DAPL_COUNTERS */
-
- switch (cqe_p->opcode) {
- case IBV_WC_SEND:
- return (OP_SEND);
- case IBV_WC_RDMA_WRITE:
- if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
- return (OP_RDMA_WRITE_IMM);
- else
- return (OP_RDMA_WRITE);
- case IBV_WC_RDMA_READ:
- return (OP_RDMA_READ);
- case IBV_WC_COMP_SWAP:
- return (OP_COMP_AND_SWAP);
- case IBV_WC_FETCH_ADD:
- return (OP_FETCH_AND_ADD);
- case IBV_WC_BIND_MW:
- return (OP_BIND_MW);
- case IBV_WC_RECV:
- if (cqe_p->wc_flags & IBV_WC_WITH_IMM) {
- DAPL_CNTR(cookie->ep, DCNT_EP_RECV_IMM);
- DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_IMM_DATA,
- cqe_p->byte_len);
- return (OP_RECEIVE_IMM);
- } else {
- DAPL_CNTR(cookie->ep, DCNT_EP_RECV);
- DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_DATA,
- cqe_p->byte_len);
- return (OP_RECEIVE);
- }
- case IBV_WC_RECV_RDMA_WITH_IMM:
- DAPL_CNTR(cookie->ep, DCNT_EP_RECV_RDMA_IMM);
- DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_RDMA_IMM_DATA,
- cqe_p->byte_len);
- return (OP_RECEIVE_IMM);
- default:
- return (OP_INVALID);
- }
-}
-
-#define DAPL_GET_CQE_OPTYPE(cqe_p) dapls_cqe_opcode(cqe_p)
-#define DAPL_GET_CQE_WRID(cqe_p) ((ib_work_completion_t*)cqe_p)->wr_id
-#define DAPL_GET_CQE_STATUS(cqe_p) ((ib_work_completion_t*)cqe_p)->status
-#define DAPL_GET_CQE_VENDOR_ERR(cqe_p) ((ib_work_completion_t*)cqe_p)->vendor_err
-#define DAPL_GET_CQE_BYTESNUM(cqe_p) ((ib_work_completion_t*)cqe_p)->byte_len
-#define DAPL_GET_CQE_IMMED_DATA(cqe_p) ((ib_work_completion_t*)cqe_p)->imm_data
-
-STATIC _INLINE_ char * dapls_dto_op_str(int op)
-{
- static char *optable[] =
- {
- "OP_RDMA_WRITE",
- "OP_RDMA_WRITE_IMM",
- "OP_SEND",
- "OP_SEND_IMM",
- "OP_RDMA_READ",
- "OP_COMP_AND_SWAP",
- "OP_FETCH_AND_ADD",
- "OP_RECEIVE",
- "OP_RECEIVE_IMM",
- "OP_BIND_MW"
- };
- return ((op < 0 || op > 9) ? "Invalid CQE OP?" : optable[op]);
-}
-
-static _INLINE_ char *
-dapls_cqe_op_str(IN ib_work_completion_t *cqe_ptr)
-{
- return dapls_dto_op_str(DAPL_GET_CQE_OPTYPE(cqe_ptr));
-}
-
-#define DAPL_GET_CQE_OP_STR(cqe) dapls_cqe_op_str(cqe)
-
-#endif /* _DAPL_IB_DTO_H_ */
diff --git a/dapl/openib_cma/dapl_ib_extensions.c b/dapl/openib_cma/dapl_ib_extensions.c
deleted file mode 100755
index 3bcde58..0000000
--- a/dapl/openib_cma/dapl_ib_extensions.c
+++ /dev/null
@@ -1,329 +0,0 @@
-/*
- * Copyright (c) 2007 Intel Corporation. All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- * copy of which is available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- *
- * MODULE: dapl_ib_extensions.c
- *
- * PURPOSE: Extensions routines for OpenIB uCMA provider
- *
- * $Id: $
- *
- **********************************************************************/
-
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_evd_util.h"
-#include "dapl_ib_util.h"
-#include "dapl_ep_util.h"
-#include "dapl_cookie.h"
-#include <stdarg.h>
-
-DAT_RETURN
-dapli_post_ext(IN DAT_EP_HANDLE ep_handle,
- IN DAT_UINT64 cmp_add,
- IN DAT_UINT64 swap,
- IN DAT_UINT32 immed_data,
- IN DAT_COUNT segments,
- IN DAT_LMR_TRIPLET * local_iov,
- IN DAT_DTO_COOKIE user_cookie,
- IN const DAT_RMR_TRIPLET * remote_iov,
- IN int op_type, IN DAT_COMPLETION_FLAGS flags);
-
-/*
- * dapl_extensions
- *
- * Process extension requests
- *
- * Input:
- * ext_type,
- * ...
- *
- * Output:
- * Depends....
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_NOT_IMPLEMENTED
- * .....
- *
- */
-DAT_RETURN
-dapl_extensions(IN DAT_HANDLE dat_handle,
- IN DAT_EXTENDED_OP ext_op, IN va_list args)
-{
- DAT_EP_HANDLE ep;
- DAT_LMR_TRIPLET *lmr_p;
- DAT_DTO_COOKIE cookie;
- const DAT_RMR_TRIPLET *rmr_p;
- DAT_UINT64 dat_uint64a, dat_uint64b;
- DAT_UINT32 dat_uint32;
- DAT_COUNT segments = 1;
- DAT_COMPLETION_FLAGS comp_flags;
- DAT_RETURN status = DAT_NOT_IMPLEMENTED;
-
- dapl_dbg_log(DAPL_DBG_TYPE_API,
- "dapl_extensions(hdl %p operation %d, ...)\n",
- dat_handle, ext_op);
-
- switch ((int)ext_op) {
-
- case DAT_IB_RDMA_WRITE_IMMED_OP:
- dapl_dbg_log(DAPL_DBG_TYPE_RTN,
- " WRITE_IMMED_DATA extension call\n");
-
- ep = dat_handle; /* ep_handle */
- segments = va_arg(args, DAT_COUNT); /* num segments */
- lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
- cookie = va_arg(args, DAT_DTO_COOKIE);
- rmr_p = va_arg(args, const DAT_RMR_TRIPLET *);
- dat_uint32 = va_arg(args, DAT_UINT32); /* immed data */
- comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
-
- status = dapli_post_ext(ep, 0, 0, dat_uint32, segments, lmr_p,
- cookie, rmr_p, OP_RDMA_WRITE_IMM,
- comp_flags);
- break;
-
- case DAT_IB_CMP_AND_SWAP_OP:
- dapl_dbg_log(DAPL_DBG_TYPE_RTN,
- " CMP_AND_SWAP extension call\n");
-
- ep = dat_handle; /* ep_handle */
- dat_uint64a = va_arg(args, DAT_UINT64); /* cmp_value */
- dat_uint64b = va_arg(args, DAT_UINT64); /* swap_value */
- lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
- cookie = va_arg(args, DAT_DTO_COOKIE);
- rmr_p = va_arg(args, const DAT_RMR_TRIPLET *);
- comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
-
- status = dapli_post_ext(ep, dat_uint64a, dat_uint64b,
- 0, segments, lmr_p, cookie, rmr_p,
- OP_COMP_AND_SWAP, comp_flags);
- break;
-
- case DAT_IB_FETCH_AND_ADD_OP:
- dapl_dbg_log(DAPL_DBG_TYPE_RTN,
- " FETCH_AND_ADD extension call\n");
-
- ep = dat_handle; /* ep_handle */
- dat_uint64a = va_arg(args, DAT_UINT64); /* add value */
- lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
- cookie = va_arg(args, DAT_DTO_COOKIE);
- rmr_p = va_arg(args, const DAT_RMR_TRIPLET *);
- comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
-
- status = dapli_post_ext(ep, dat_uint64a, 0, 0, segments,
- lmr_p, cookie, rmr_p,
- OP_FETCH_AND_ADD, comp_flags);
-
- break;
-
-#ifdef DAPL_COUNTERS
- case DAT_QUERY_COUNTERS_OP:
- {
- int cntr, reset;
- DAT_UINT64 *p_cntr_out;
-
- dapl_dbg_log(DAPL_DBG_TYPE_RTN,
- " Query counter extension call\n");
-
- cntr = va_arg(args, int);
- p_cntr_out = va_arg(args, DAT_UINT64 *);
- reset = va_arg(args, int);
-
- status = dapl_query_counter(dat_handle, cntr,
- p_cntr_out, reset);
- break;
- }
- case DAT_PRINT_COUNTERS_OP:
- {
- int cntr, reset;
-
- dapl_dbg_log(DAPL_DBG_TYPE_RTN,
- " Print counter extension call\n");
-
- cntr = va_arg(args, int);
- reset = va_arg(args, int);
-
- dapl_print_counter(dat_handle, cntr, reset);
- status = DAT_SUCCESS;
- break;
- }
-#endif /* DAPL_COUNTERS */
-
- default:
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- "unsupported extension(%d)\n", (int)ext_op);
- }
-
- return (status);
-}
-
-DAT_RETURN
-dapli_post_ext(IN DAT_EP_HANDLE ep_handle,
- IN DAT_UINT64 cmp_add,
- IN DAT_UINT64 swap,
- IN DAT_UINT32 immed_data,
- IN DAT_COUNT segments,
- IN DAT_LMR_TRIPLET * local_iov,
- IN DAT_DTO_COOKIE user_cookie,
- IN const DAT_RMR_TRIPLET * remote_iov,
- IN int op_type, IN DAT_COMPLETION_FLAGS flags)
-{
- DAPL_EP *ep_ptr;
- ib_qp_handle_t qp_ptr;
- DAPL_COOKIE *cookie = NULL;
- DAT_RETURN dat_status = DAT_SUCCESS;
-
- dapl_dbg_log(DAPL_DBG_TYPE_API,
- " post_ext_op: ep %p cmp_val %d "
- "swap_val %d cookie 0x%x, r_iov %p, flags 0x%x\n",
- ep_handle, (unsigned)cmp_add, (unsigned)swap,
- (unsigned)user_cookie.as_64, remote_iov, flags);
-
- if (DAPL_BAD_HANDLE(ep_handle, DAPL_MAGIC_EP))
- return (DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
-
- ep_ptr = (DAPL_EP *) ep_handle;
- qp_ptr = ep_ptr->qp_handle;
-
- /*
- * Synchronization ok since this buffer is only used for send
- * requests, which aren't allowed to race with each other.
- */
- dat_status = dapls_dto_cookie_alloc(&ep_ptr->req_buffer,
- DAPL_DTO_TYPE_EXTENSION,
- user_cookie, &cookie);
- if (dat_status != DAT_SUCCESS)
- goto bail;
-
- /*
- * Take reference before posting to avoid race conditions with
- * completions
- */
- dapl_os_atomic_inc(&ep_ptr->req_count);
-
- /*
- * Invoke provider specific routine to post DTO
- */
- dat_status = dapls_ib_post_ext_send(ep_ptr, op_type, cookie, segments, /* data segments */
- local_iov, remote_iov, immed_data, /* immed data */
- cmp_add, /* compare or add */
- swap, /* swap */
- flags);
-
- if (dat_status != DAT_SUCCESS) {
- dapl_os_atomic_dec(&ep_ptr->req_count);
- dapls_cookie_dealloc(&ep_ptr->req_buffer, cookie);
- }
-
- bail:
- return dat_status;
-
-}
-
-/*
- * New provider routine to process extended DTO events
- */
-void
-dapls_cqe_to_event_extension(IN DAPL_EP * ep_ptr,
- IN DAPL_COOKIE * cookie,
- IN ib_work_completion_t * cqe_ptr,
- IN DAT_EVENT * event_ptr)
-{
- uint32_t ibtype;
- DAT_DTO_COMPLETION_EVENT_DATA *dto =
- &event_ptr->event_data.dto_completion_event_data;
- DAT_IB_EXTENSION_EVENT_DATA *ext_data = (DAT_IB_EXTENSION_EVENT_DATA *)
- & event_ptr->event_extension_data[0];
- DAT_DTO_COMPLETION_STATUS dto_status;
-
- /* Get status from cqe */
- dto_status = dapls_ib_get_dto_status(cqe_ptr);
-
- dapl_dbg_log(DAPL_DBG_TYPE_EVD,
- " cqe_to_event_ext: dto_ptr %p ext_ptr %p status %d\n",
- dto, ext_data, dto_status);
-
- event_ptr->event_number = DAT_IB_DTO_EVENT;
- dto->ep_handle = cookie->ep;
- dto->user_cookie = cookie->val.dto.cookie;
- dto->operation = DAPL_GET_CQE_DTOS_OPTYPE(cqe_ptr); /* new for 2.0 */
- dto->status = ext_data->status = dto_status;
-
- if (dto_status != DAT_DTO_SUCCESS)
- return;
-
- /*
- * Get operation type from CQ work completion entry and
- * if extented operation then set extended event data
- */
- ibtype = DAPL_GET_CQE_OPTYPE(cqe_ptr);
-
- switch (ibtype) {
-
- case OP_RDMA_WRITE_IMM:
- dapl_dbg_log(DAPL_DBG_TYPE_EVD,
- " cqe_to_event_ext: OP_RDMA_WRITE_IMMED\n");
-
- /* type and outbound rdma write transfer size */
- dto->transfered_length = cookie->val.dto.size;
- ext_data->type = DAT_IB_RDMA_WRITE_IMMED;
- break;
- case OP_RECEIVE_IMM:
- dapl_dbg_log(DAPL_DBG_TYPE_EVD,
- " cqe_to_event_ext: OP_RECEIVE_RDMA_IMMED\n");
-
- /* immed recvd, type and inbound rdma write transfer size */
- dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
- ext_data->type = DAT_IB_RDMA_WRITE_IMMED_DATA;
- ext_data->val.immed.data = DAPL_GET_CQE_IMMED_DATA(cqe_ptr);
- break;
- case OP_COMP_AND_SWAP:
- dapl_dbg_log(DAPL_DBG_TYPE_EVD,
- " cqe_to_event_ext: COMP_AND_SWAP_RESP\n");
-
- /* original data is returned in LMR provided with post */
- ext_data->type = DAT_IB_CMP_AND_SWAP;
- dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
- break;
- case OP_FETCH_AND_ADD:
- dapl_dbg_log(DAPL_DBG_TYPE_EVD,
- " cqe_to_event_ext: FETCH_AND_ADD_RESP\n");
-
- /* original data is returned in LMR provided with post */
- ext_data->type = DAT_IB_FETCH_AND_ADD;
- dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
- break;
- default:
- /* not extended operation */
- ext_data->status = DAT_IB_OP_ERR;
- dto->status = DAT_DTO_ERR_TRANSPORT;
- break;
- }
-}
diff --git a/dapl/openib_cma/dapl_ib_mem.c b/dapl/openib_cma/dapl_ib_mem.c
deleted file mode 100755
index 7e73044..0000000
--- a/dapl/openib_cma/dapl_ib_mem.c
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (c) 2005-2007 Intel Corporation. All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- * copy of which is available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- *
- * MODULE: dapl_ib_mem.c
- *
- * PURPOSE: Memory windows, registration, and protection domain
- *
- * $Id:$
- *
- **********************************************************************/
-
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_lmr_util.h"
-
-/*
- * dapls_convert_privileges
- *
- * Convert LMR privileges to provider
- *
- * Input:
- * DAT_MEM_PRIV_FLAGS
- *
- * Output:
- * none
- *
- * Returns:
- * ibv_access_flags
- *
- */
-STATIC _INLINE_ int dapls_convert_privileges(IN DAT_MEM_PRIV_FLAGS privileges)
-{
- int access = 0;
-
- /*
- * if (DAT_MEM_PRIV_LOCAL_READ_FLAG & privileges) do nothing
- */
- if (DAT_MEM_PRIV_LOCAL_WRITE_FLAG & privileges)
- access |= IBV_ACCESS_LOCAL_WRITE;
- if (DAT_MEM_PRIV_REMOTE_WRITE_FLAG & privileges)
- access |= IBV_ACCESS_REMOTE_WRITE;
- if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
- access |= IBV_ACCESS_REMOTE_READ;
- if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
- access |= IBV_ACCESS_REMOTE_READ;
- if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
- access |= IBV_ACCESS_REMOTE_READ;
-#ifdef DAT_EXTENSIONS
- if (DAT_IB_MEM_PRIV_REMOTE_ATOMIC & privileges)
- access |= IBV_ACCESS_REMOTE_ATOMIC;
-#endif
-
- return access;
-}
-
-/*
- * dapl_ib_pd_alloc
- *
- * Alloc a PD
- *
- * Input:
- * ia_handle IA handle
- * pz pointer to PZ struct
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN dapls_ib_pd_alloc(IN DAPL_IA * ia_ptr, IN DAPL_PZ * pz)
-{
- /* get a protection domain */
- pz->pd_handle = ibv_alloc_pd(ia_ptr->hca_ptr->ib_hca_handle);
- if (!pz->pd_handle)
- return (dapl_convert_errno(ENOMEM, "alloc_pd"));
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " pd_alloc: pd_handle=%p\n", pz->pd_handle);
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_pd_free
- *
- * Free a PD
- *
- * Input:
- * ia_handle IA handle
- * PZ_ptr pointer to PZ struct
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_STATE
- *
- */
-DAT_RETURN dapls_ib_pd_free(IN DAPL_PZ * pz)
-{
- if (pz->pd_handle != IB_INVALID_HANDLE) {
- if (ibv_dealloc_pd(pz->pd_handle))
- return (dapl_convert_errno(errno, "dealloc_pd"));
- pz->pd_handle = IB_INVALID_HANDLE;
- }
- return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_mr_register
- *
- * Register a virtual memory region
- *
- * Input:
- * ia_handle IA handle
- * lmr pointer to dapl_lmr struct
- * virt_addr virtual address of beginning of mem region
- * length length of memory region
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN
-dapls_ib_mr_register(IN DAPL_IA * ia_ptr,
- IN DAPL_LMR * lmr,
- IN DAT_PVOID virt_addr,
- IN DAT_VLEN length,
- IN DAT_MEM_PRIV_FLAGS privileges, IN DAT_VA_TYPE va_type)
-{
- ib_pd_handle_t ib_pd_handle;
-
- ib_pd_handle = ((DAPL_PZ *) lmr->param.pz_handle)->pd_handle;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " mr_register: ia=%p, lmr=%p va=%p ln=%d pv=0x%x\n",
- ia_ptr, lmr, virt_addr, length, privileges);
-
- /* TODO: shared memory */
- if (lmr->param.mem_type == DAT_MEM_TYPE_SHARED_VIRTUAL) {
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- " mr_register_shared: NOT IMPLEMENTED\n");
- return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
- }
-
- /* IB verbs does not support */
- if (va_type == DAT_VA_TYPE_ZB) {
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- " va_type == DAT_VA_TYPE_ZB: NOT SUPPORTED\n");
- return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
- }
-
- /* local read is default on IB */
- lmr->mr_handle =
- ibv_reg_mr(((DAPL_PZ *) lmr->param.pz_handle)->pd_handle,
- virt_addr, length, dapls_convert_privileges(privileges));
-
- if (!lmr->mr_handle)
- return (dapl_convert_errno(ENOMEM, "reg_mr"));
-
- lmr->param.lmr_context = lmr->mr_handle->lkey;
- lmr->param.rmr_context = lmr->mr_handle->rkey;
- lmr->param.registered_size = length;
- lmr->param.registered_address = (DAT_VADDR) (uintptr_t) virt_addr;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " mr_register: mr=%p addr=%p pd %p ctx %p "
- "lkey=0x%x rkey=0x%x priv=%x\n",
- lmr->mr_handle, lmr->mr_handle->addr,
- lmr->mr_handle->pd, lmr->mr_handle->context,
- lmr->mr_handle->lkey, lmr->mr_handle->rkey,
- length, dapls_convert_privileges(privileges));
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_mr_deregister
- *
- * Free a memory region
- *
- * Input:
- * lmr pointer to dapl_lmr struct
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_STATE
- *
- */
-DAT_RETURN dapls_ib_mr_deregister(IN DAPL_LMR * lmr)
-{
- if (lmr->mr_handle != IB_INVALID_HANDLE) {
- if (ibv_dereg_mr(lmr->mr_handle))
- return (dapl_convert_errno(errno, "dereg_pd"));
- lmr->mr_handle = IB_INVALID_HANDLE;
- }
- return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_mr_register_shared
- *
- * Register a virtual memory region
- *
- * Input:
- * ia_ptr IA handle
- * lmr pointer to dapl_lmr struct
- * virt_addr virtual address of beginning of mem region
- * length length of memory region
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN
-dapls_ib_mr_register_shared(IN DAPL_IA * ia_ptr,
- IN DAPL_LMR * lmr,
- IN DAT_MEM_PRIV_FLAGS privileges,
- IN DAT_VA_TYPE va_type)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- " mr_register_shared: NOT IMPLEMENTED\n");
-
- return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-}
-
-/*
- * dapls_ib_mw_alloc
- *
- * Bind a protection domain to a memory window
- *
- * Input:
- * rmr Initialized rmr to hold binding handles
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN dapls_ib_mw_alloc(IN DAPL_RMR * rmr)
-{
-
- dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_alloc: NOT IMPLEMENTED\n");
-
- return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-}
-
-/*
- * dapls_ib_mw_free
- *
- * Release bindings of a protection domain to a memory window
- *
- * Input:
- * rmr Initialized rmr to hold binding handles
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_STATE
- *
- */
-DAT_RETURN dapls_ib_mw_free(IN DAPL_RMR * rmr)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_free: NOT IMPLEMENTED\n");
-
- return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-}
-
-/*
- * dapls_ib_mw_bind
- *
- * Bind a protection domain to a memory window
- *
- * Input:
- * rmr Initialized rmr to hold binding handles
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_PARAMETER;
- * DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN
-dapls_ib_mw_bind(IN DAPL_RMR * rmr,
- IN DAPL_LMR * lmr,
- IN DAPL_EP * ep,
- IN DAPL_COOKIE * cookie,
- IN DAT_VADDR virtual_address,
- IN DAT_VLEN length,
- IN DAT_MEM_PRIV_FLAGS mem_priv, IN DAT_BOOLEAN is_signaled)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_bind: NOT IMPLEMENTED\n");
-
- return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-}
-
-/*
- * dapls_ib_mw_unbind
- *
- * Unbind a protection domain from a memory window
- *
- * Input:
- * rmr Initialized rmr to hold binding handles
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_PARAMETER;
- * DAT_INVALID_STATE;
- * DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN
-dapls_ib_mw_unbind(IN DAPL_RMR * rmr,
- IN DAPL_EP * ep,
- IN DAPL_COOKIE * cookie, IN DAT_BOOLEAN is_signaled)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_unbind: NOT IMPLEMENTED\n");
-
- return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-}
-
-/*
- * Local variables:
- * c-indent-level: 4
- * c-basic-offset: 4
- * tab-width: 8
- * End:
- */
diff --git a/dapl/openib_cma/dapl_ib_qp.c b/dapl/openib_cma/dapl_ib_qp.c
deleted file mode 100755
index c9a61c3..0000000
--- a/dapl/openib_cma/dapl_ib_qp.c
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * Copyright (c) 2005-2007 Intel Corporation. All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- * copy of which is available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- *
- * MODULE: dapl_ib_qp.c
- *
- * PURPOSE: QP routines for access to OFED IB Verbs
- *
- * $Id: $
- *
- **********************************************************************/
-
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-
-extern struct rdma_event_channel *g_cm_events;
-
-/*
- * dapl_ib_qp_alloc
- *
- * Alloc a QP
- *
- * Input:
- * *ep_ptr pointer to EP INFO
- * ib_hca_handle provider HCA handle
- * ib_pd_handle provider protection domain handle
- * cq_recv provider recv CQ handle
- * cq_send provider send CQ handle
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- * DAT_INTERNAL_ERROR
- *
- */
-DAT_RETURN dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,
- IN DAPL_EP * ep_ptr, IN DAPL_EP * ep_ctx_ptr)
-{
- DAT_EP_ATTR *attr;
- DAPL_EVD *rcv_evd, *req_evd;
- ib_cq_handle_t rcv_cq, req_cq;
- ib_pd_handle_t ib_pd_handle;
- struct ibv_qp_init_attr qp_create;
- dp_ib_cm_handle_t conn;
- struct rdma_cm_id *cm_id;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " qp_alloc: ia_ptr %p ep_ptr %p ep_ctx_ptr %p\n",
- ia_ptr, ep_ptr, ep_ctx_ptr);
-
- attr = &ep_ptr->param.ep_attr;
- ib_pd_handle = ((DAPL_PZ *) ep_ptr->param.pz_handle)->pd_handle;
- rcv_evd = (DAPL_EVD *) ep_ptr->param.recv_evd_handle;
- req_evd = (DAPL_EVD *) ep_ptr->param.request_evd_handle;
-
- /*
- * DAT allows usage model of EP's with no EVD's but IB does not.
- * Create a CQ with zero entries under the covers to support and
- * catch any invalid posting.
- */
- if (rcv_evd != DAT_HANDLE_NULL)
- rcv_cq = rcv_evd->ib_cq_handle;
- else if (!ia_ptr->hca_ptr->ib_trans.ib_cq_empty)
- rcv_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
- else {
- struct ibv_comp_channel *channel =
- ia_ptr->hca_ptr->ib_trans.ib_cq;
-#ifdef CQ_WAIT_OBJECT
- if (rcv_evd->cq_wait_obj_handle)
- channel = rcv_evd->cq_wait_obj_handle->events;
-#endif
- /* Call IB verbs to create CQ */
- rcv_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
- 0, NULL, channel, 0);
-
- if (rcv_cq == IB_INVALID_HANDLE)
- return (dapl_convert_errno(ENOMEM, "create_cq"));
-
- ia_ptr->hca_ptr->ib_trans.ib_cq_empty = rcv_cq;
- }
- if (req_evd != DAT_HANDLE_NULL)
- req_cq = req_evd->ib_cq_handle;
- else
- req_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
-
- /*
- * IMPLEMENTATION NOTE:
- * uDAPL allows consumers to post buffers on the EP after creation
- * and before a connect request (outbound and inbound). This forces
- * a binding to a device during the hca_open call and requires the
- * consumer to predetermine which device to listen on or connect from.
- * This restriction eliminates any option of listening or connecting
- * over multiple devices. uDAPL should add API's to resolve addresses
- * and bind to the device at the approriate time (before connect
- * and after CR arrives). Discovery should happen at connection time
- * based on addressing and not on static configuration during open.
- */
-
- /* Allocate CM and initialize lock */
- if ((conn = dapl_os_alloc(sizeof(*conn))) == NULL)
- return (dapl_convert_errno(ENOMEM, "create_cq"));
-
- dapl_os_memzero(conn, sizeof(*conn));
- dapl_os_lock_init(&conn->lock);
-
- /* create CM_ID, bind to local device, create QP */
- if (rdma_create_id(g_cm_events, &cm_id, (void *)conn, RDMA_PS_TCP)) {
- dapl_os_free(conn, sizeof(*conn));
- return (dapl_convert_errno(errno, "create_qp"));
- }
-
- /* open identifies the local device; per DAT specification */
- if (rdma_bind_addr(cm_id,
- (struct sockaddr *)&ia_ptr->hca_ptr->hca_address))
- goto bail;
-
- /* Setup attributes and create qp */
- dapl_os_memzero((void *)&qp_create, sizeof(qp_create));
- qp_create.cap.max_send_wr = attr->max_request_dtos;
- qp_create.cap.max_send_sge = attr->max_request_iov;
- qp_create.cap.max_inline_data =
- ia_ptr->hca_ptr->ib_trans.max_inline_send;
- qp_create.send_cq = req_cq;
-
- /* ibv assumes rcv_cq is never NULL, set to req_cq */
- if (rcv_cq == NULL) {
- qp_create.recv_cq = req_cq;
- qp_create.cap.max_recv_wr = 0;
- qp_create.cap.max_recv_sge = 0;
- } else {
- qp_create.recv_cq = rcv_cq;
- qp_create.cap.max_recv_wr = attr->max_recv_dtos;
- qp_create.cap.max_recv_sge = attr->max_recv_iov;
- }
- qp_create.qp_type = IBV_QPT_RC;
- qp_create.qp_context = (void *)ep_ptr;
-
- /* Let uCMA transition QP states */
- if (rdma_create_qp(cm_id, ib_pd_handle, &qp_create))
- goto bail;
-
- conn->cm_id = cm_id;
- conn->ep = ep_ptr;
- conn->hca = ia_ptr->hca_ptr;
-
- /* setup timers for address and route resolution */
- conn->arp_timeout = dapl_os_get_env_val("DAPL_CM_ARP_TIMEOUT_MS",
- IB_ARP_TIMEOUT);
- conn->arp_retries = dapl_os_get_env_val("DAPL_CM_ARP_RETRY_COUNT",
- IB_ARP_RETRY_COUNT);
- conn->route_timeout = dapl_os_get_env_val("DAPL_CM_ROUTE_TIMEOUT_MS",
- IB_ROUTE_TIMEOUT);
- conn->route_retries = dapl_os_get_env_val("DAPL_CM_ROUTE_RETRY_COUNT",
- IB_ROUTE_RETRY_COUNT);
-
- /* setup up ep->param to reference the bound local address and port */
- ep_ptr->param.local_ia_address_ptr = &cm_id->route.addr.src_addr;
- ep_ptr->param.local_port_qual = rdma_get_src_port(cm_id);
-
- ep_ptr->qp_handle = conn;
- ep_ptr->qp_state = IB_QP_STATE_INIT;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " qp_alloc: qpn %p sq %d,%d rq %d,%d port=%d\n",
- ep_ptr->qp_handle->cm_id->qp->qp_num,
- qp_create.cap.max_send_wr, qp_create.cap.max_send_sge,
- qp_create.cap.max_recv_wr, qp_create.cap.max_recv_sge,
- ep_ptr->param.local_port_qual);
-
- return DAT_SUCCESS;
- bail:
- rdma_destroy_id(cm_id);
- dapl_os_free(conn, sizeof(*conn));
- return (dapl_convert_errno(errno, "create_qp"));
-}
-
-/*
- * dapl_ib_qp_free
- *
- * Free a QP
- *
- * Input:
- * ia_handle IA handle
- * *ep_ptr pointer to EP INFO
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * dapl_convert_errno
- *
- */
-DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_EP, " qp_free: ep_ptr %p qp %p\n",
- ep_ptr, ep_ptr->qp_handle);
-
- if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {
- /* qp_handle is conn object with reference to cm_id and qp */
- dapli_destroy_conn(ep_ptr->qp_handle);
- ep_ptr->qp_handle = IB_INVALID_HANDLE;
- ep_ptr->qp_state = IB_QP_STATE_ERROR;
- }
- return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_qp_modify
- *
- * Set the QP to the parameters specified in an EP_PARAM
- *
- * The EP_PARAM structure that is provided has been
- * sanitized such that only non-zero values are valid.
- *
- * Input:
- * ib_hca_handle HCA handle
- * qp_handle QP handle
- * ep_attr Sanitized EP Params
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- * DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN dapls_ib_qp_modify(IN DAPL_IA * ia_ptr,
- IN DAPL_EP * ep_ptr, IN DAT_EP_ATTR * attr)
-{
- struct ibv_qp_attr qp_attr;
-
- if (ep_ptr->qp_handle == IB_INVALID_HANDLE)
- return DAT_INVALID_PARAMETER;
-
- /*
- * Check if we have the right qp_state to modify attributes
- */
- if ((ep_ptr->qp_handle->cm_id->qp->state != IBV_QPS_RTR) &&
- (ep_ptr->qp_handle->cm_id->qp->state != IBV_QPS_RTS))
- return DAT_INVALID_STATE;
-
- /* Adjust to current EP attributes */
- dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
- qp_attr.cap.max_send_wr = attr->max_request_dtos;
- qp_attr.cap.max_recv_wr = attr->max_recv_dtos;
- qp_attr.cap.max_send_sge = attr->max_request_iov;
- qp_attr.cap.max_recv_sge = attr->max_recv_iov;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- "modify_qp: qp %p sq %d,%d, rq %d,%d\n",
- ep_ptr->qp_handle->cm_id->qp,
- qp_attr.cap.max_send_wr, qp_attr.cap.max_send_sge,
- qp_attr.cap.max_recv_wr, qp_attr.cap.max_recv_sge);
-
- if (ibv_modify_qp(ep_ptr->qp_handle->cm_id->qp, &qp_attr, IBV_QP_CAP)) {
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- "modify_qp: modify ep %p qp %p failed\n",
- ep_ptr, ep_ptr->qp_handle->cm_id->qp);
- return (dapl_convert_errno(errno, "modify_qp_state"));
- }
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_reinit_ep
- *
- * Move the QP to INIT state again.
- *
- * Input:
- * ep_ptr DAPL_EP
- *
- * Output:
- * none
- *
- * Returns:
- * void
- *
- */
-void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)
-{
- /* uCMA does not allow reuse of CM_ID, destroy and create new one */
- if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {
-
- /* destroy */
- dapli_destroy_conn(ep_ptr->qp_handle);
-
- /* create new CM_ID and QP */
- ep_ptr->qp_handle = IB_INVALID_HANDLE;
- dapls_ib_qp_alloc(ep_ptr->header.owner_ia, ep_ptr, ep_ptr);
- }
-}
-
-/*
- * Local variables:
- * c-indent-level: 4
- * c-basic-offset: 4
- * tab-width: 8
- * End:
- */
diff --git a/dapl/openib_cma/dapl_ib_util.c b/dapl/openib_cma/dapl_ib_util.c
deleted file mode 100755
index bf23d43..0000000
--- a/dapl/openib_cma/dapl_ib_util.c
+++ /dev/null
@@ -1,1134 +0,0 @@
-/*
- * Copyright (c) 2005-2008 Intel Corporation. All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- * copy of which is available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- *
- * MODULE: dapl_ib_util.c
- *
- * PURPOSE: OFED provider - init, open, close, utilities, work thread
- *
- * $Id:$
- *
- **********************************************************************/
-
-#ifdef RCSID
-static const char rcsid[] = "$Id: $";
-#endif
-
-#include "openib_osd.h"
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_ib_util.h"
-#include "dapl_osd.h"
-
-#include <stdlib.h>
-
-int g_dapl_loopback_connection = 0;
-struct rdma_event_channel *g_cm_events = NULL;
-ib_thread_state_t g_ib_thread_state = 0;
-DAPL_OS_THREAD g_ib_thread;
-DAPL_OS_LOCK g_hca_lock;
-struct dapl_llist_entry *g_hca_list;
-
-#if defined(_WIN64) || defined(_WIN32)
-#include "..\..\..\..\..\etc\user\comp_channel.cpp"
-#include "..\..\..\..\..\etc\user\dlist.c"
-#include <rdma\winverbs.h>
-
-struct ibvw_windata windata;
-
-static int getipaddr_netdev(char *name, char *addr, int addr_len)
-{
- IWVProvider *prov;
- WV_DEVICE_ADDRESS devaddr;
- struct addrinfo *res, *ai;
- HRESULT hr;
- int index;
-
- if (strncmp(name, "rdma_dev", 8)) {
- return EINVAL;
- }
-
- index = atoi(name + 8);
-
- hr = WvGetObject(&IID_IWVProvider, (LPVOID *) &prov);
- if (FAILED(hr)) {
- return hr;
- }
-
- hr = getaddrinfo("..localmachine", NULL, NULL, &res);
- if (hr) {
- goto release;
- }
-
- for (ai = res; ai; ai = ai->ai_next) {
- hr = prov->lpVtbl->TranslateAddress(prov, ai->ai_addr, &devaddr);
- if (SUCCEEDED(hr) && (ai->ai_addrlen <= addr_len) && (index-- == 0)) {
- memcpy(addr, ai->ai_addr, ai->ai_addrlen);
- goto free;
- }
- }
- hr = ENODEV;
-
-free:
- freeaddrinfo(res);
-release:
- prov->lpVtbl->Release(prov);
- return hr;
-}
-
-static int dapls_os_init(void)
-{
- return ibvw_get_windata(&windata, IBVW_WINDATA_VERSION);
-}
-
-static void dapls_os_release(void)
-{
- if (windata.comp_mgr)
- ibvw_release_windata(&windata, IBVW_WINDATA_VERSION);
- windata.comp_mgr = NULL;
-}
-
-static int dapls_config_comp_channel(struct ibv_comp_channel *channel)
-{
- channel->comp_channel.Milliseconds = 0;
- return 0;
-}
-
-static int dapls_config_cm_channel(struct rdma_event_channel *channel)
-{
- channel->channel.Milliseconds = 0;
- return 0;
-}
-
-static int dapls_config_verbs(struct ibv_context *verbs)
-{
- verbs->channel.Milliseconds = 0;
- return 0;
-}
-
-static int dapls_thread_signal(void)
-{
- CompManagerCancel(windata.comp_mgr);
- return 0;
-}
-#else // _WIN64 || WIN32
-int g_ib_pipe[2];
-
-static int dapls_os_init(void)
-{
- /* create pipe for waking up work thread */
- return pipe(g_ib_pipe);
-}
-
-static void dapls_os_release(void)
-{
- /* close pipe? */
-}
-
-/* Get IP address using network device name */
-static int getipaddr_netdev(char *name, char *addr, int addr_len)
-{
- struct ifreq ifr;
- int skfd, ret, len;
-
- /* Fill in the structure */
- snprintf(ifr.ifr_name, IFNAMSIZ, "%s", name);
- ifr.ifr_hwaddr.sa_family = ARPHRD_INFINIBAND;
-
- /* Create a socket fd */
- skfd = socket(PF_INET, SOCK_STREAM, 0);
- ret = ioctl(skfd, SIOCGIFADDR, &ifr);
- if (ret)
- goto bail;
-
- switch (ifr.ifr_addr.sa_family) {
-#ifdef AF_INET6
- case AF_INET6:
- len = sizeof(struct sockaddr_in6);
- break;
-#endif
- case AF_INET:
- default:
- len = sizeof(struct sockaddr);
- break;
- }
-
- if (len <= addr_len)
- memcpy(addr, &ifr.ifr_addr, len);
- else
- ret = EINVAL;
-
- bail:
- close(skfd);
- return ret;
-}
-
-static int dapls_config_fd(int fd)
-{
- int opts;
-
- opts = fcntl(fd, F_GETFL);
- if (opts < 0 || fcntl(fd, F_SETFL, opts | O_NONBLOCK) < 0) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " dapls_config_fd: fcntl on fd %d ERR %d %s\n",
- fd, opts, strerror(errno));
- return errno;
- }
-
- return 0;
-}
-
-static int dapls_config_comp_channel(struct ibv_comp_channel *channel)
-{
- return dapls_config_fd(channel->fd);
-}
-
-static int dapls_config_cm_channel(struct rdma_event_channel *channel)
-{
- return dapls_config_fd(channel->fd);
-}
-
-static int dapls_config_verbs(struct ibv_context *verbs)
-{
- return dapls_config_fd(verbs->async_fd);
-}
-
-static int dapls_thread_signal(void)
-{
- return write(g_ib_pipe[1], "w", sizeof "w");
-}
-#endif
-
-/* Get IP address using network name, address, or device name */
-static int getipaddr(char *name, char *addr, int len)
-{
- struct addrinfo *res;
-
- /* assume netdev for first attempt, then network and address type */
- if (getipaddr_netdev(name, addr, len)) {
- if (getaddrinfo(name, NULL, NULL, &res)) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: getaddr_netdev ERROR:"
- " %s. Is %s configured?\n",
- strerror(errno), name);
- return 1;
- } else {
- if (len >= res->ai_addrlen)
- memcpy(addr, res->ai_addr, res->ai_addrlen);
- else {
- freeaddrinfo(res);
- return 1;
- }
- freeaddrinfo(res);
- }
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " getipaddr: family %d port %d addr %d.%d.%d.%d\n",
- ((struct sockaddr_in *)addr)->sin_family,
- ((struct sockaddr_in *)addr)->sin_port,
- ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 0 & 0xff,
- ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 8 & 0xff,
- ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 16 & 0xff,
- ((struct sockaddr_in *)addr)->sin_addr.
- s_addr >> 24 & 0xff);
-
- return 0;
-}
-
-/*
- * dapls_ib_init, dapls_ib_release
- *
- * Initialize Verb related items for device open
- *
- * Input:
- * none
- *
- * Output:
- * none
- *
- * Returns:
- * 0 success, -1 error
- *
- */
-int32_t dapls_ib_init(void)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapl_ib_init: \n");
-
- /* initialize hca_list lock */
- dapl_os_lock_init(&g_hca_lock);
-
- /* initialize hca list for CQ events */
- dapl_llist_init_head(&g_hca_list);
-
- if (dapls_os_init())
- return 1;
-
- return 0;
-}
-
-int32_t dapls_ib_release(void)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapl_ib_release: \n");
- dapli_ib_thread_destroy();
- if (g_cm_events != NULL)
- rdma_destroy_event_channel(g_cm_events);
- dapls_os_release();
- return 0;
-}
-
-/*
- * dapls_ib_open_hca
- *
- * Open HCA
- *
- * Input:
- * *hca_name pointer to provider device name
- * *ib_hca_handle_p pointer to provide HCA handle
- *
- * Output:
- * none
- *
- * Return:
- * DAT_SUCCESS
- * dapl_convert_errno
- *
- */
-DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr)
-{
- struct rdma_cm_id *cm_id = NULL;
- union ibv_gid *gid;
- int ret;
- DAT_RETURN dat_status;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " open_hca: %s - %p\n", hca_name, hca_ptr);
-
- /* Setup the global cm event channel */
- dapl_os_lock(&g_hca_lock);
- if (g_cm_events == NULL) {
- g_cm_events = rdma_create_event_channel();
- if (g_cm_events == NULL) {
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- " open_hca: ERR - RDMA channel %s\n",
- strerror(errno));
- return DAT_INTERNAL_ERROR;
- }
- }
- dapl_os_unlock(&g_hca_lock);
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " open_hca: RDMA channel created (%p)\n", g_cm_events);
-
- dat_status = dapli_ib_thread_init();
- if (dat_status != DAT_SUCCESS)
- return dat_status;
-
- /* HCA name will be hostname or IP address */
- if (getipaddr((char *)hca_name,
- (char *)&hca_ptr->hca_address, sizeof(DAT_SOCK_ADDR6)))
- return DAT_INVALID_ADDRESS;
-
- /* cm_id will bind local device/GID based on IP address */
- if (rdma_create_id(g_cm_events, &cm_id, (void *)hca_ptr, RDMA_PS_TCP)) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: rdma_create_id ERR %s\n", strerror(errno));
- return DAT_INTERNAL_ERROR;
- }
- ret = rdma_bind_addr(cm_id, (struct sockaddr *)&hca_ptr->hca_address);
- if ((ret) || (cm_id->verbs == NULL)) {
- rdma_destroy_id(cm_id);
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: rdma_bind ERR %s."
- " Is %s configured?\n", strerror(errno), hca_name);
- return DAT_INVALID_ADDRESS;
- }
-
- /* keep reference to IB device and cm_id */
- hca_ptr->ib_trans.cm_id = cm_id;
- hca_ptr->ib_hca_handle = cm_id->verbs;
- dapls_config_verbs(cm_id->verbs);
- hca_ptr->port_num = cm_id->port_num;
- gid = &cm_id->route.addr.addr.ibaddr.sgid;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " open_hca: ctx=%p port=%d GID subnet %016llx id %016llx\n",
- cm_id->verbs, cm_id->port_num,
- (unsigned long long)ntohll(gid->global.subnet_prefix),
- (unsigned long long)ntohll(gid->global.interface_id));
-
- /* set inline max with env or default, get local lid and gid 0 */
- if (hca_ptr->ib_hca_handle->device->transport_type
- == IBV_TRANSPORT_IWARP)
- hca_ptr->ib_trans.max_inline_send =
- dapl_os_get_env_val("DAPL_MAX_INLINE",
- INLINE_SEND_IWARP_DEFAULT);
- else
- hca_ptr->ib_trans.max_inline_send =
- dapl_os_get_env_val("DAPL_MAX_INLINE",
- INLINE_SEND_IB_DEFAULT);
-
- /* set CM timer defaults */
- hca_ptr->ib_trans.max_cm_timeout =
- dapl_os_get_env_val("DAPL_MAX_CM_RESPONSE_TIME",
- IB_CM_RESPONSE_TIMEOUT);
- hca_ptr->ib_trans.max_cm_retries =
- dapl_os_get_env_val("DAPL_MAX_CM_RETRIES", IB_CM_RETRIES);
-
- /* EVD events without direct CQ channels, non-blocking */
- hca_ptr->ib_trans.ib_cq =
- ibv_create_comp_channel(hca_ptr->ib_hca_handle);
- if (hca_ptr->ib_trans.ib_cq == NULL) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: ibv_create_comp_channel ERR %s\n",
- strerror(errno));
- goto bail;
- }
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " open_hca: CQ channel created\n");
-
- if (dapls_config_comp_channel(hca_ptr->ib_trans.ib_cq)) {
- goto bail;
- }
-
- /*
- * Put new hca_transport on list for async and CQ event processing
- * Wakeup work thread to add to polling list
- */
- dapl_llist_init_entry((DAPL_LLIST_ENTRY *) & hca_ptr->ib_trans.entry);
- dapl_os_lock(&g_hca_lock);
- dapl_llist_add_tail(&g_hca_list,
- (DAPL_LLIST_ENTRY *) & hca_ptr->ib_trans.entry,
- &hca_ptr->ib_trans.entry);
- if (dapls_thread_signal() == -1)
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " open_hca: thread wakeup error = %s\n",
- strerror(errno));
- dapl_os_unlock(&g_hca_lock);
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " open_hca: %s, %s %d.%d.%d.%d INLINE_MAX=%d\n", hca_name,
- ((struct sockaddr_in *)
- &hca_ptr->hca_address)->sin_family == AF_INET ?
- "AF_INET" : "AF_INET6", ((struct sockaddr_in *)
- &hca_ptr->hca_address)->sin_addr.
- s_addr >> 0 & 0xff, ((struct sockaddr_in *)
- &hca_ptr->hca_address)->sin_addr.
- s_addr >> 8 & 0xff, ((struct sockaddr_in *)
- &hca_ptr->hca_address)->sin_addr.
- s_addr >> 16 & 0xff, ((struct sockaddr_in *)
- &hca_ptr->hca_address)->sin_addr.
- s_addr >> 24 & 0xff, hca_ptr->ib_trans.max_inline_send);
-
- hca_ptr->ib_trans.d_hca = hca_ptr;
- return DAT_SUCCESS;
- bail:
- rdma_destroy_id(hca_ptr->ib_trans.cm_id);
- hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
- return DAT_INTERNAL_ERROR;
-}
-
-/*
- * dapls_ib_close_hca
- *
- * Open HCA
- *
- * Input:
- * DAPL_HCA provide CA handle
- *
- * Output:
- * none
- *
- * Return:
- * DAT_SUCCESS
- * dapl_convert_errno
- *
- */
-DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p->%p\n",
- hca_ptr, hca_ptr->ib_hca_handle);
-
- if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
- if (rdma_destroy_id(hca_ptr->ib_trans.cm_id))
- return (dapl_convert_errno(errno, "ib_close_device"));
- hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
- }
-
- dapl_os_lock(&g_hca_lock);
- if (g_ib_thread_state != IB_THREAD_RUN) {
- dapl_os_unlock(&g_hca_lock);
- goto bail;
- }
- dapl_os_unlock(&g_hca_lock);
-
- /*
- * Remove hca from async and CQ event processing list
- * Wakeup work thread to remove from polling list
- */
- hca_ptr->ib_trans.destroy = 1;
- if (dapls_thread_signal() == -1)
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " destroy: thread wakeup error = %s\n",
- strerror(errno));
-
- /* wait for thread to remove HCA references */
- while (hca_ptr->ib_trans.destroy != 2) {
- if (dapls_thread_signal() == -1)
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " destroy: thread wakeup error = %s\n",
- strerror(errno));
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread_destroy: wait on hca %p destroy\n");
- dapl_os_sleep_usec(10000);
- }
- bail:
- return (DAT_SUCCESS);
-}
-
-/*
- * dapls_ib_query_hca
- *
- * Query the hca attribute
- *
- * Input:
- * hca_handl hca handle
- * ia_attr attribute of the ia
- * ep_attr attribute of the ep
- * ip_addr ip address of DET NIC
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_HANDLE
- */
-
-DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA * hca_ptr,
- OUT DAT_IA_ATTR * ia_attr,
- OUT DAT_EP_ATTR * ep_attr,
- OUT DAT_SOCK_ADDR6 * ip_addr)
-{
- struct ibv_device_attr dev_attr;
- struct ibv_port_attr port_attr;
-
- if (hca_ptr->ib_hca_handle == NULL) {
- dapl_dbg_log(DAPL_DBG_TYPE_ERR, " query_hca: BAD handle\n");
- return (DAT_INVALID_HANDLE);
- }
-
- /* local IP address of device, set during ia_open */
- if (ip_addr != NULL)
- memcpy(ip_addr, &hca_ptr->hca_address, sizeof(DAT_SOCK_ADDR6));
-
- if (ia_attr == NULL && ep_attr == NULL)
- return DAT_SUCCESS;
-
- /* query verbs for this device and port attributes */
- if (ibv_query_device(hca_ptr->ib_hca_handle, &dev_attr) ||
- ibv_query_port(hca_ptr->ib_hca_handle,
- hca_ptr->port_num, &port_attr))
- return (dapl_convert_errno(errno, "ib_query_hca"));
-
- /*
- * There is no query for inline data so there is no way to
- * calculate the impact on sge nor the max inline send. Most
- * implementions consume 1 or none so just reduce by 1 until
- * we are provided with a query mechanism from verbs.
- */
- if (hca_ptr->ib_trans.max_inline_send)
- dev_attr.max_sge--;
-
- if (ia_attr != NULL) {
- (void)dapl_os_memzero(ia_attr, sizeof(*ia_attr));
- ia_attr->adapter_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
- ia_attr->vendor_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
- ia_attr->ia_address_ptr =
- (DAT_IA_ADDRESS_PTR) & hca_ptr->hca_address;
-
- dapl_log(DAPL_DBG_TYPE_UTIL,
- "dapl_query_hca: %s %s %s\n", hca_ptr->name,
- ((struct sockaddr_in *)
- ia_attr->ia_address_ptr)->sin_family == AF_INET ?
- "AF_INET" : "AF_INET6",
- inet_ntoa(((struct sockaddr_in *)
- ia_attr->ia_address_ptr)->sin_addr));
-
- ia_attr->hardware_version_major = dev_attr.hw_ver;
- ia_attr->max_eps = dev_attr.max_qp;
- ia_attr->max_dto_per_ep = dev_attr.max_qp_wr;
- ia_attr->max_rdma_read_in = dev_attr.max_res_rd_atom;
- ia_attr->max_rdma_read_out = dev_attr.max_qp_init_rd_atom;
- ia_attr->max_rdma_read_per_ep_in = dev_attr.max_qp_rd_atom;
- ia_attr->max_rdma_read_per_ep_out =
- dev_attr.max_qp_init_rd_atom;
- ia_attr->max_rdma_read_per_ep_in_guaranteed = DAT_TRUE;
- ia_attr->max_rdma_read_per_ep_out_guaranteed = DAT_TRUE;
- ia_attr->max_evds = dev_attr.max_cq;
- ia_attr->max_evd_qlen = dev_attr.max_cqe;
- ia_attr->max_iov_segments_per_dto = dev_attr.max_sge;
- ia_attr->max_lmrs = dev_attr.max_mr;
- /* 32bit attribute from 64bit, 4G-1 limit, DAT v2 needs fix */
- ia_attr->max_lmr_block_size =
- (dev_attr.max_mr_size >> 32) ? ~0 : dev_attr.max_mr_size;
- ia_attr->max_rmrs = dev_attr.max_mw;
- ia_attr->max_lmr_virtual_address = dev_attr.max_mr_size;
- ia_attr->max_rmr_target_address = dev_attr.max_mr_size;
- ia_attr->max_pzs = dev_attr.max_pd;
- ia_attr->max_mtu_size = port_attr.max_msg_sz;
- ia_attr->max_rdma_size = port_attr.max_msg_sz;
- ia_attr->num_transport_attr = 0;
- ia_attr->transport_attr = NULL;
- ia_attr->num_vendor_attr = 0;
- ia_attr->vendor_attr = NULL;
- /* iWARP spec. - 1 sge for RDMA reads */
- if (hca_ptr->ib_hca_handle->device->transport_type
- == IBV_TRANSPORT_IWARP)
- ia_attr->max_iov_segments_per_rdma_read = 1;
- else
- ia_attr->max_iov_segments_per_rdma_read =
- dev_attr.max_sge;
-
- ia_attr->max_iov_segments_per_rdma_write = dev_attr.max_sge;
- /* save rd_atom for peer validation during connect requests */
- hca_ptr->ib_trans.max_rdma_rd_in = dev_attr.max_qp_rd_atom;
- hca_ptr->ib_trans.max_rdma_rd_out =
- dev_attr.max_qp_init_rd_atom;
-#ifdef DAT_EXTENSIONS
- ia_attr->extension_supported = DAT_EXTENSION_IB;
- ia_attr->extension_version = DAT_IB_EXTENSION_VERSION;
-#endif
- dapl_log(DAPL_DBG_TYPE_UTIL,
- "dapl_query_hca: (ver=%x) ep's %d ep_q %d"
- " evd's %d evd_q %d mr %u\n",
- ia_attr->hardware_version_major,
- ia_attr->max_eps, ia_attr->max_dto_per_ep,
- ia_attr->max_evds, ia_attr->max_evd_qlen,
- ia_attr->max_lmr_block_size);
- dapl_log(DAPL_DBG_TYPE_UTIL,
- "dapl_query_hca: msg %llu rdma %llu iov's %d"
- " lmr %d rmr %d rd_in,out %d,%d inline=%d\n",
- ia_attr->max_mtu_size, ia_attr->max_rdma_size,
- ia_attr->max_iov_segments_per_dto, ia_attr->max_lmrs,
- ia_attr->max_rmrs, ia_attr->max_rdma_read_per_ep_in,
- ia_attr->max_rdma_read_per_ep_out,
- hca_ptr->ib_trans.max_inline_send);
- }
-
- if (ep_attr != NULL) {
- (void)dapl_os_memzero(ep_attr, sizeof(*ep_attr));
- ep_attr->max_mtu_size = port_attr.max_msg_sz;
- ep_attr->max_rdma_size = port_attr.max_msg_sz;
- ep_attr->max_recv_dtos = dev_attr.max_qp_wr;
- ep_attr->max_request_dtos = dev_attr.max_qp_wr;
- ep_attr->max_recv_iov = dev_attr.max_sge;
- ep_attr->max_request_iov = dev_attr.max_sge;
- ep_attr->max_rdma_read_in = dev_attr.max_qp_rd_atom;
- ep_attr->max_rdma_read_out = dev_attr.max_qp_init_rd_atom;
- /* iWARP spec. - 1 sge for RDMA reads */
- if (hca_ptr->ib_hca_handle->device->transport_type
- == IBV_TRANSPORT_IWARP)
- ep_attr->max_rdma_read_iov = 1;
- else
- ep_attr->max_rdma_read_iov = dev_attr.max_sge;
-
- ep_attr->max_rdma_write_iov = dev_attr.max_sge;
- dapl_log(DAPL_DBG_TYPE_UTIL,
- "dapl_query_hca: MAX msg %llu dto %d iov %d"
- " rdma i%d,o%d\n",
- ep_attr->max_mtu_size,
- ep_attr->max_recv_dtos, ep_attr->max_recv_iov,
- ep_attr->max_rdma_read_in, ep_attr->max_rdma_read_out);
- }
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_setup_async_callback
- *
- * Set up an asynchronous callbacks of various kinds
- *
- * Input:
- * ia_handle IA handle
- * handler_type type of handler to set up
- * callback_handle handle param for completion callbacks
- * callback callback routine pointer
- * context argument for callback routine
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- * DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN dapls_ib_setup_async_callback(IN DAPL_IA * ia_ptr,
- IN DAPL_ASYNC_HANDLER_TYPE type,
- IN DAPL_EVD * evd_ptr,
- IN ib_async_handler_t callback,
- IN void *context)
-{
- ib_hca_transport_t *hca_ptr;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " setup_async_cb: ia %p type %d hdl %p cb %p ctx %p\n",
- ia_ptr, type, evd_ptr, callback, context);
-
- hca_ptr = &ia_ptr->hca_ptr->ib_trans;
- switch (type) {
- case DAPL_ASYNC_UNAFILIATED:
- hca_ptr->async_unafiliated = (ib_async_handler_t) callback;
- hca_ptr->async_un_ctx = context;
- break;
- case DAPL_ASYNC_CQ_ERROR:
- hca_ptr->async_cq_error = (ib_async_cq_handler_t) callback;
- break;
- case DAPL_ASYNC_CQ_COMPLETION:
- hca_ptr->async_cq = (ib_async_dto_handler_t) callback;
- break;
- case DAPL_ASYNC_QP_ERROR:
- hca_ptr->async_qp_error = (ib_async_qp_handler_t) callback;
- break;
- default:
- break;
- }
- return DAT_SUCCESS;
-}
-
-DAT_RETURN dapli_ib_thread_init(void)
-{
- DAT_RETURN dat_status;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread_init(%d)\n", dapl_os_getpid());
-
- dapl_os_lock(&g_hca_lock);
- if (g_ib_thread_state != IB_THREAD_INIT) {
- dapl_os_unlock(&g_hca_lock);
- return DAT_SUCCESS;
- }
-
- /* uCMA events non-blocking */
- if (dapls_config_cm_channel(g_cm_events)) {
- dapl_os_unlock(&g_hca_lock);
- return (dapl_convert_errno(errno, "create_thread ERR: cm_fd"));
- }
-
- g_ib_thread_state = IB_THREAD_CREATE;
- dapl_os_unlock(&g_hca_lock);
-
- /* create thread to process inbound connect request */
- dat_status = dapl_os_thread_create(dapli_thread, NULL, &g_ib_thread);
- if (dat_status != DAT_SUCCESS)
- return (dapl_convert_errno(errno,
- "create_thread ERR:"
- " check resource limits"));
-
- /* wait for thread to start */
- dapl_os_lock(&g_hca_lock);
- while (g_ib_thread_state != IB_THREAD_RUN) {
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread_init: waiting for ib_thread\n");
- dapl_os_unlock(&g_hca_lock);
- dapl_os_sleep_usec(2000);
- dapl_os_lock(&g_hca_lock);
- }
- dapl_os_unlock(&g_hca_lock);
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread_init(%d) exit\n", dapl_os_getpid());
-
- return DAT_SUCCESS;
-}
-
-void dapli_ib_thread_destroy(void)
-{
- int retries = 10;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread_destroy(%d)\n", dapl_os_getpid());
- /*
- * wait for async thread to terminate.
- * pthread_join would be the correct method
- * but some applications have some issues
- */
-
- /* destroy ib_thread, wait for termination, if not already */
- dapl_os_lock(&g_hca_lock);
- if (g_ib_thread_state != IB_THREAD_RUN)
- goto bail;
-
- g_ib_thread_state = IB_THREAD_CANCEL;
- if (dapls_thread_signal() == -1)
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " destroy: thread wakeup error = %s\n",
- strerror(errno));
- while ((g_ib_thread_state != IB_THREAD_EXIT) && (retries--)) {
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread_destroy: waiting for ib_thread\n");
- if (dapls_thread_signal() == -1)
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " destroy: thread wakeup error = %s\n",
- strerror(errno));
- dapl_os_unlock(&g_hca_lock);
- dapl_os_sleep_usec(2000);
- dapl_os_lock(&g_hca_lock);
- }
-
- bail:
- dapl_os_unlock(&g_hca_lock);
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread_destroy(%d) exit\n", dapl_os_getpid());
-}
-
-void dapli_async_event_cb(struct _ib_hca_transport *hca)
-{
- struct ibv_async_event event;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " async_event(%p)\n", hca);
-
- if (hca->destroy)
- return;
-
- if (!ibv_get_async_event(hca->cm_id->verbs, &event)) {
-
- switch (event.event_type) {
- case IBV_EVENT_CQ_ERR:
- {
- struct dapl_ep *evd_ptr =
- event.element.cq->cq_context;
-
- dapl_log(DAPL_DBG_TYPE_ERR,
- "dapl async_event CQ (%p) ERR %d\n",
- evd_ptr, event.event_type);
-
- /* report up if async callback still setup */
- if (hca->async_cq_error)
- hca->async_cq_error(hca->cm_id->verbs,
- event.element.cq,
- &event,
- (void *)evd_ptr);
- break;
- }
- case IBV_EVENT_COMM_EST:
- {
- /* Received msgs on connected QP before RTU */
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " async_event COMM_EST(%p) rdata beat RTU\n",
- event.element.qp);
-
- break;
- }
- case IBV_EVENT_QP_FATAL:
- case IBV_EVENT_QP_REQ_ERR:
- case IBV_EVENT_QP_ACCESS_ERR:
- case IBV_EVENT_QP_LAST_WQE_REACHED:
- case IBV_EVENT_SRQ_ERR:
- case IBV_EVENT_SRQ_LIMIT_REACHED:
- case IBV_EVENT_SQ_DRAINED:
- {
- struct dapl_ep *ep_ptr =
- event.element.qp->qp_context;
-
- dapl_log(DAPL_DBG_TYPE_ERR,
- "dapl async_event QP (%p) ERR %d\n",
- ep_ptr, event.event_type);
-
- /* report up if async callback still setup */
- if (hca->async_qp_error)
- hca->async_qp_error(hca->cm_id->verbs,
- ep_ptr->qp_handle,
- &event,
- (void *)ep_ptr);
- break;
- }
- case IBV_EVENT_PATH_MIG:
- case IBV_EVENT_PATH_MIG_ERR:
- case IBV_EVENT_DEVICE_FATAL:
- case IBV_EVENT_PORT_ACTIVE:
- case IBV_EVENT_PORT_ERR:
- case IBV_EVENT_LID_CHANGE:
- case IBV_EVENT_PKEY_CHANGE:
- case IBV_EVENT_SM_CHANGE:
- {
- dapl_log(DAPL_DBG_TYPE_WARN,
- "dapl async_event: DEV ERR %d\n",
- event.event_type);
-
- /* report up if async callback still setup */
- if (hca->async_unafiliated)
- hca->async_unafiliated(hca->cm_id->
- verbs, &event,
- hca->
- async_un_ctx);
- break;
- }
- case IBV_EVENT_CLIENT_REREGISTER:
- /* no need to report this event this time */
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " async_event: IBV_EVENT_CLIENT_REREGISTER\n");
- break;
-
- default:
- dapl_log(DAPL_DBG_TYPE_WARN,
- "dapl async_event: %d UNKNOWN\n",
- event.event_type);
- break;
-
- }
- ibv_ack_async_event(&event);
- }
-}
-
-#if defined(_WIN64) || defined(_WIN32)
-/* work thread for uAT, uCM, CQ, and async events */
-void dapli_thread(void *arg)
-{
- struct _ib_hca_transport *hca;
- struct _ib_hca_transport *uhca[8];
- COMP_CHANNEL *channel;
- int ret, idx, cnt;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d,0x%x): ENTER: \n",
- dapl_os_getpid(), g_ib_thread);
-
- dapl_os_lock(&g_hca_lock);
- for (g_ib_thread_state = IB_THREAD_RUN;
- g_ib_thread_state == IB_THREAD_RUN; dapl_os_lock(&g_hca_lock)) {
-
- idx = 0;
- hca = dapl_llist_is_empty(&g_hca_list) ? NULL :
- dapl_llist_peek_head(&g_hca_list);
-
- while (hca) {
- uhca[idx++] = hca;
- hca = dapl_llist_next_entry(&g_hca_list,
- (DAPL_LLIST_ENTRY *) & hca->
- entry);
- }
- cnt = idx;
-
- dapl_os_unlock(&g_hca_lock);
- ret = CompManagerPoll(windata.comp_mgr, INFINITE, &channel);
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread(%d) poll_event 0x%x\n",
- dapl_os_getpid(), ret);
-
- dapli_cma_event_cb();
-
- /* check and process CQ and ASYNC events, per device */
- for (idx = 0; idx < cnt; idx++) {
- if (uhca[idx]->destroy == 1) {
- dapl_os_lock(&g_hca_lock);
- dapl_llist_remove_entry(&g_hca_list,
- (DAPL_LLIST_ENTRY *) &
- uhca[idx]->entry);
- dapl_os_unlock(&g_hca_lock);
- uhca[idx]->destroy = 2;
- } else {
- dapli_cq_event_cb(uhca[idx]);
- dapli_async_event_cb(uhca[idx]);
- }
- }
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d) EXIT\n",
- dapl_os_getpid());
- g_ib_thread_state = IB_THREAD_EXIT;
- dapl_os_unlock(&g_hca_lock);
-}
-#else // _WIN64 || WIN32
-/* work thread for uAT, uCM, CQ, and async events */
-void dapli_thread(void *arg)
-{
- struct pollfd ufds[__FD_SETSIZE];
- struct _ib_hca_transport *uhca[__FD_SETSIZE] = { NULL };
- struct _ib_hca_transport *hca;
- int ret, idx, fds;
- char rbuf[2];
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread(%d,0x%x): ENTER: pipe %d ucma %d\n",
- dapl_os_getpid(), g_ib_thread, g_ib_pipe[0],
- g_cm_events->fd);
-
- /* Poll across pipe, CM, AT never changes */
- dapl_os_lock(&g_hca_lock);
- g_ib_thread_state = IB_THREAD_RUN;
-
- ufds[0].fd = g_ib_pipe[0]; /* pipe */
- ufds[0].events = POLLIN;
- ufds[1].fd = g_cm_events->fd; /* uCMA */
- ufds[1].events = POLLIN;
-
- while (g_ib_thread_state == IB_THREAD_RUN) {
-
- /* build ufds after pipe and uCMA events */
- ufds[0].revents = 0;
- ufds[1].revents = 0;
- idx = 1;
-
- /* Walk HCA list and setup async and CQ events */
- if (!dapl_llist_is_empty(&g_hca_list))
- hca = dapl_llist_peek_head(&g_hca_list);
- else
- hca = NULL;
-
- while (hca) {
-
- /* uASYNC events */
- ufds[++idx].fd = hca->cm_id->verbs->async_fd;
- ufds[idx].events = POLLIN;
- ufds[idx].revents = 0;
- uhca[idx] = hca;
-
- /* uCQ, non-direct events */
- ufds[++idx].fd = hca->ib_cq->fd;
- ufds[idx].events = POLLIN;
- ufds[idx].revents = 0;
- uhca[idx] = hca;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread(%d) poll_fd: hca[%d]=%p, async=%d"
- " pipe=%d cm=%d cq=d\n",
- dapl_os_getpid(), hca, ufds[idx - 1].fd,
- ufds[0].fd, ufds[1].fd, ufds[idx].fd);
-
- hca = dapl_llist_next_entry(&g_hca_list,
- (DAPL_LLIST_ENTRY *) & hca->
- entry);
- }
-
- /* unlock, and setup poll */
- fds = idx + 1;
- dapl_os_unlock(&g_hca_lock);
- ret = poll(ufds, fds, -1);
- if (ret <= 0) {
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread(%d): ERR %s poll\n",
- dapl_os_getpid(), strerror(errno));
- dapl_os_lock(&g_hca_lock);
- continue;
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread(%d) poll_event: "
- " async=0x%x pipe=0x%x cm=0x%x cq=0x%x\n",
- dapl_os_getpid(), ufds[idx - 1].revents,
- ufds[0].revents, ufds[1].revents,
- ufds[idx].revents);
-
- /* uCMA events */
- if (ufds[1].revents == POLLIN)
- dapli_cma_event_cb();
-
- /* check and process CQ and ASYNC events, per device */
- for (idx = 2; idx < fds; idx++) {
- if (ufds[idx].revents == POLLIN) {
- dapli_cq_event_cb(uhca[idx]);
- dapli_async_event_cb(uhca[idx]);
- }
- }
-
- /* check and process user events, PIPE */
- if (ufds[0].revents == POLLIN) {
- if (read(g_ib_pipe[0], rbuf, 2) == -1)
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " cr_thread: pipe rd err= %s\n",
- strerror(errno));
-
- /* cleanup any device on list marked for destroy */
- for (idx = 3; idx < fds; idx++) {
- if (uhca[idx] && uhca[idx]->destroy == 1) {
- dapl_os_lock(&g_hca_lock);
- dapl_llist_remove_entry(&g_hca_list,
- (DAPL_LLIST_ENTRY
- *)
- & uhca[idx]->
- entry);
- dapl_os_unlock(&g_hca_lock);
- uhca[idx]->destroy = 2;
- }
- }
- }
- dapl_os_lock(&g_hca_lock);
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d) EXIT\n",
- dapl_os_getpid());
- g_ib_thread_state = IB_THREAD_EXIT;
- dapl_os_unlock(&g_hca_lock);
-}
-#endif
-
-/*
- * dapls_set_provider_specific_attr
- *
- * Input:
- * attr_ptr Pointer provider specific attributes
- *
- * Output:
- * none
- *
- * Returns:
- * void
- */
-DAT_NAMED_ATTR ib_attrs[] = {
-#ifdef DAT_EXTENSIONS
- {
- "DAT_EXTENSION_INTERFACE", "TRUE"}
- ,
- {
- DAT_IB_ATTR_FETCH_AND_ADD, "TRUE"}
- ,
- {
- DAT_IB_ATTR_CMP_AND_SWAP, "TRUE"}
- ,
- {
- DAT_IB_ATTR_IMMED_DATA, "TRUE"}
- ,
-#ifdef DAPL_COUNTERS
- {
- DAT_ATTR_COUNTERS, "TRUE"}
- ,
-#endif /* DAPL_COUNTERS */
-#endif
-};
-
-#define SPEC_ATTR_SIZE( x ) (sizeof( x ) / sizeof( DAT_NAMED_ATTR))
-
-void dapls_query_provider_specific_attr(IN DAPL_IA * ia_ptr,
- IN DAT_PROVIDER_ATTR * attr_ptr)
-{
- attr_ptr->num_provider_specific_attr = SPEC_ATTR_SIZE(ib_attrs);
- attr_ptr->provider_specific_attr = ib_attrs;
-}
diff --git a/dapl/openib_cma/dapl_ib_util.h b/dapl/openib_cma/dapl_ib_util.h
index dde5fac..f466c06 100755
--- a/dapl/openib_cma/dapl_ib_util.h
+++ b/dapl/openib_cma/dapl_ib_util.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005-2008 Intel Corporation. All rights reserved.
+ * Copyright (c) 2005-2009 Intel Corporation. All rights reserved.
*
* This Software is licensed under one of the following licenses:
*
@@ -24,35 +24,18 @@
* notice, one of the license notices in the documentation
* and/or other materials provided with the distribution.
*/
-
-/**********************************************************************
- *
- * MODULE: dapl_ib_util.h
- *
- * PURPOSE: The OFED provider - definitions, prototypes,
- *
- * $Id: $
- *
- **********************************************************************/
-
+/*
+ * Definitions specific to OpenIB CMA provider.
+ * Connection manager - rdma_cma, provided in separate library.
+ */
#ifndef _DAPL_IB_UTIL_H_
#define _DAPL_IB_UTIL_H_
+#define _OPENIB_CMA_
-#include "openib_osd.h"
#include <infiniband/verbs.h>
#include <rdma/rdma_cma.h>
-
-/* Typedefs to map common DAPL provider types to IB verbs */
-typedef struct dapl_cm_id *ib_qp_handle_t;
-typedef struct ibv_cq *ib_cq_handle_t;
-typedef struct ibv_pd *ib_pd_handle_t;
-typedef struct ibv_mr *ib_mr_handle_t;
-typedef struct ibv_mw *ib_mw_handle_t;
-typedef struct ibv_wc ib_work_completion_t;
-
-/* HCA context type maps to IB verbs */
-typedef struct ibv_context *ib_hca_handle_t;
-typedef ib_hca_handle_t dapl_ibal_ca_t;
+#include "openib_osd.h"
+#include "dapl_ib_common.h"
#define IB_RC_RETRY_COUNT 7
#define IB_RNR_RETRY_COUNT 7
@@ -64,56 +47,6 @@ typedef ib_hca_handle_t dapl_ibal_ca_t;
#define IB_ROUTE_RETRY_COUNT 15 /* 60 sec total */
#define IB_MAX_AT_RETRY 3
-typedef enum {
- IB_CME_CONNECTED,
- IB_CME_DISCONNECTED,
- IB_CME_DISCONNECTED_ON_LINK_DOWN,
- IB_CME_CONNECTION_REQUEST_PENDING,
- IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
- IB_CME_CONNECTION_REQUEST_ACKED,
- IB_CME_DESTINATION_REJECT,
- IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
- IB_CME_DESTINATION_UNREACHABLE,
- IB_CME_TOO_MANY_CONNECTION_REQUESTS,
- IB_CME_LOCAL_FAILURE,
- IB_CME_BROKEN,
- IB_CME_TIMEOUT
-} ib_cm_events_t;
-
-/* CQ notifications */
-typedef enum
-{
- IB_NOTIFY_ON_NEXT_COMP,
- IB_NOTIFY_ON_SOLIC_COMP
-
-} ib_notification_type_t;
-
-/* other mappings */
-typedef int ib_bool_t;
-typedef union ibv_gid GID;
-typedef char *IB_HCA_NAME;
-typedef uint16_t ib_hca_port_t;
-typedef uint32_t ib_comp_handle_t;
-
-#ifdef CQ_WAIT_OBJECT
-
-/* CQ event channel, plus pipe to enable consumer wakeup */
-typedef struct _ib_wait_obj_handle
-{
- struct ibv_comp_channel *events;
- int pipe[2];
-
-} *ib_wait_obj_handle_t;
-
-#endif
-
-/* Definitions */
-#define IB_INVALID_HANDLE NULL
-
-/* inline send rdma threshold */
-#define INLINE_SEND_IWARP_DEFAULT 64
-#define INLINE_SEND_IB_DEFAULT 200
-
/* CMA private data areas */
#define CMA_PDATA_HDR 36
#define IB_MAX_REQ_PDATA_SIZE (92-CMA_PDATA_HDR)
@@ -123,38 +56,6 @@ typedef struct _ib_wait_obj_handle
#define IB_MAX_DREP_PDATA_SIZE (224-CMA_PDATA_HDR)
#define IWARP_MAX_PDATA_SIZE (512-CMA_PDATA_HDR)
-/* DTO OPs, ordered for DAPL ENUM definitions */
-#define OP_RDMA_WRITE IBV_WR_RDMA_WRITE
-#define OP_RDMA_WRITE_IMM IBV_WR_RDMA_WRITE_WITH_IMM
-#define OP_SEND IBV_WR_SEND
-#define OP_SEND_IMM IBV_WR_SEND_WITH_IMM
-#define OP_RDMA_READ IBV_WR_RDMA_READ
-#define OP_COMP_AND_SWAP IBV_WR_ATOMIC_CMP_AND_SWP
-#define OP_FETCH_AND_ADD IBV_WR_ATOMIC_FETCH_AND_ADD
-#define OP_RECEIVE 7 /* internal op */
-#define OP_RECEIVE_IMM 8 /* internel op */
-#define OP_BIND_MW 9 /* internal op */
-#define OP_INVALID 0xff
-
-/* Definitions to map QP state */
-#define IB_QP_STATE_RESET IBV_QPS_RESET
-#define IB_QP_STATE_INIT IBV_QPS_INIT
-#define IB_QP_STATE_RTR IBV_QPS_RTR
-#define IB_QP_STATE_RTS IBV_QPS_RTS
-#define IB_QP_STATE_SQD IBV_QPS_SQD
-#define IB_QP_STATE_SQE IBV_QPS_SQE
-#define IB_QP_STATE_ERROR IBV_QPS_ERR
-
-typedef enum
-{
- IB_THREAD_INIT,
- IB_THREAD_CREATE,
- IB_THREAD_RUN,
- IB_THREAD_CANCEL,
- IB_THREAD_EXIT
-
-} ib_thread_state_t;
-
struct dapl_cm_id {
DAPL_OS_LOCK lock;
int destroy;
@@ -171,67 +72,13 @@ struct dapl_cm_id {
DAT_SOCK_ADDR6 r_addr;
int p_len;
unsigned char p_data[256]; /* dapl max private data size */
+ ib_qp_cm_t dst; /* dapls_modify_qp_state */
+ struct ibv_ah *ah; /* dapls_modify_qp_state */
};
typedef struct dapl_cm_id *dp_ib_cm_handle_t;
typedef struct dapl_cm_id *ib_cm_srvc_handle_t;
-/* Operation and state mappings */
-typedef int ib_send_op_type_t;
-typedef struct ibv_sge ib_data_segment_t;
-typedef enum ibv_qp_state ib_qp_state_t;
-typedef enum ibv_event_type ib_async_event_type;
-typedef struct ibv_async_event ib_error_record_t;
-
-/* Definitions for ibverbs/mthca return codes, should be defined in verbs.h */
-/* some are errno and some are -n values */
-
-/**
- * ibv_get_device_name - Return kernel device name
- * ibv_get_device_guid - Return device's node GUID
- * ibv_open_device - Return ibv_context or NULL
- * ibv_close_device - Return 0, (errno?)
- * ibv_get_async_event - Return 0, -1
- * ibv_alloc_pd - Return ibv_pd, NULL
- * ibv_dealloc_pd - Return 0, errno
- * ibv_reg_mr - Return ibv_mr, NULL
- * ibv_dereg_mr - Return 0, errno
- * ibv_create_cq - Return ibv_cq, NULL
- * ibv_destroy_cq - Return 0, errno
- * ibv_get_cq_event - Return 0 & ibv_cq/context, int
- * ibv_poll_cq - Return n & ibv_wc, 0 ok, -1 empty, -2 error
- * ibv_req_notify_cq - Return 0 (void?)
- * ibv_create_qp - Return ibv_qp, NULL
- * ibv_modify_qp - Return 0, errno
- * ibv_destroy_qp - Return 0, errno
- * ibv_post_send - Return 0, -1 & bad_wr
- * ibv_post_recv - Return 0, -1 & bad_wr
- */
-
-/* async handlers for DTO, CQ, QP, and unafiliated */
-typedef void (*ib_async_dto_handler_t)(
- IN ib_hca_handle_t ib_hca_handle,
- IN ib_error_record_t *err_code,
- IN void *context);
-
-typedef void (*ib_async_cq_handler_t)(
- IN ib_hca_handle_t ib_hca_handle,
- IN ib_cq_handle_t ib_cq_handle,
- IN ib_error_record_t *err_code,
- IN void *context);
-
-typedef void (*ib_async_qp_handler_t)(
- IN ib_hca_handle_t ib_hca_handle,
- IN ib_qp_handle_t ib_qp_handle,
- IN ib_error_record_t *err_code,
- IN void *context);
-
-typedef void (*ib_async_handler_t)(
- IN ib_hca_handle_t ib_hca_handle,
- IN ib_error_record_t *err_code,
- IN void *context);
-
-
/* ib_hca_transport_t, specific to this implementation */
typedef struct _ib_hca_transport
{
@@ -250,79 +97,38 @@ typedef struct _ib_hca_transport
uint8_t max_cm_timeout;
uint8_t max_cm_retries;
/* device attributes */
- int max_rdma_rd_in;
- int max_rdma_rd_out;
+ int rd_atom_in;
+ int rd_atom_out;
+ struct ibv_device *ib_dev;
+ /* dapls_modify_qp_state */
+ uint16_t lid;
+ uint8_t ack_timer;
+ uint8_t ack_retry;
+ uint8_t rnr_timer;
+ uint8_t rnr_retry;
+ uint8_t global;
+ uint8_t hop_limit;
+ uint8_t tclass;
+ uint8_t mtu;
+ DAT_NAMED_ATTR named_attr;
} ib_hca_transport_t;
-/* provider specfic fields for shared memory support */
-typedef uint32_t ib_shm_transport_t;
-
/* prototypes */
-int32_t dapls_ib_init (void);
-int32_t dapls_ib_release (void);
void dapli_thread(void *arg);
DAT_RETURN dapli_ib_thread_init(void);
void dapli_ib_thread_destroy(void);
void dapli_cma_event_cb(void);
-void dapli_cq_event_cb(struct _ib_hca_transport *hca);
void dapli_async_event_cb(struct _ib_hca_transport *hca);
-void dapli_destroy_conn(struct dapl_cm_id *conn);
-
-DAT_RETURN
-dapls_modify_qp_state ( IN ib_qp_handle_t qp_handle,
- IN ib_qp_state_t qp_state,
- IN struct dapl_cm_id *conn );
-
-/* inline functions */
-STATIC _INLINE_ IB_HCA_NAME dapl_ib_convert_name (IN char *name)
-{
- /* use ascii; name of local device */
- return dapl_os_strdup(name);
-}
-
-STATIC _INLINE_ void dapl_ib_release_name (IN IB_HCA_NAME name)
-{
- return;
-}
+dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep);
+void dapls_ib_cm_free(dp_ib_cm_handle_t cm, DAPL_EP *ep);
+DAT_RETURN dapls_modify_qp_state(IN ib_qp_handle_t qp_handle,
+ IN ib_qp_state_t qp_state,
+ IN dp_ib_cm_handle_t cm);
STATIC _INLINE_ void dapls_print_cm_list(IN DAPL_IA * ia_ptr)
{
return;
}
-/*
- * Convert errno to DAT_RETURN values
- */
-STATIC _INLINE_ DAT_RETURN
-dapl_convert_errno( IN int err, IN const char *str )
-{
- if (!err) return DAT_SUCCESS;
-
-#if DAPL_DBG
- if ((err != EAGAIN) && (err != ETIMEDOUT))
- dapl_dbg_log (DAPL_DBG_TYPE_ERR," %s %s\n", str, strerror(err));
-#endif
-
- switch( err )
- {
- case EOVERFLOW : return DAT_LENGTH_ERROR;
- case EACCES : return DAT_PRIVILEGES_VIOLATION;
- case EPERM : return DAT_PROTECTION_VIOLATION;
- case EINVAL : return DAT_INVALID_HANDLE;
- case EISCONN : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_CONNECTED;
- case ECONNREFUSED : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_NOTREADY;
- case ETIMEDOUT : return DAT_TIMEOUT_EXPIRED;
- case ENETUNREACH: return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_UNREACHABLE;
- case EADDRINUSE : return DAT_CONN_QUAL_IN_USE;
- case EALREADY : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_ACTCONNPENDING;
- case ENOMEM : return DAT_INSUFFICIENT_RESOURCES;
- case EAGAIN : return DAT_QUEUE_EMPTY;
- case EINTR : return DAT_INTERRUPTED_CALL;
- case EAFNOSUPPORT : return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_MALFORMED;
- case EFAULT :
- default : return DAT_INTERNAL_ERROR;
- }
- }
-
#endif /* _DAPL_IB_UTIL_H_ */
diff --git a/dapl/openib_cma/device.c b/dapl/openib_cma/device.c
new file mode 100644
index 0000000..0e974f6
--- /dev/null
+++ b/dapl/openib_cma/device.c
@@ -0,0 +1,847 @@
+/*
+ * Copyright (c) 2005-2008 Intel Corporation. All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ * copy of which is available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/**********************************************************************
+ *
+ * MODULE: dapl_ib_util.c
+ *
+ * PURPOSE: OFED provider - init, open, close, utilities, work thread
+ *
+ * $Id:$
+ *
+ **********************************************************************/
+
+#ifdef RCSID
+static const char rcsid[] = "$Id: $";
+#endif
+
+#include "openib_osd.h"
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_ib_util.h"
+#include "dapl_osd.h"
+
+#include <stdlib.h>
+
+struct rdma_event_channel *g_cm_events = NULL;
+ib_thread_state_t g_ib_thread_state = 0;
+DAPL_OS_THREAD g_ib_thread;
+DAPL_OS_LOCK g_hca_lock;
+struct dapl_llist_entry *g_hca_list;
+
+#if defined(_WIN64) || defined(_WIN32)
+#include "..\..\..\..\..\etc\user\comp_channel.cpp"
+#include "..\..\..\..\..\etc\user\dlist.c"
+#include <rdma\winverbs.h>
+
+struct ibvw_windata windata;
+
+static int getipaddr_netdev(char *name, char *addr, int addr_len)
+{
+ IWVProvider *prov;
+ WV_DEVICE_ADDRESS devaddr;
+ struct addrinfo *res, *ai;
+ HRESULT hr;
+ int index;
+
+ if (strncmp(name, "rdma_dev", 8)) {
+ return EINVAL;
+ }
+
+ index = atoi(name + 8);
+
+ hr = WvGetObject(&IID_IWVProvider, (LPVOID *) &prov);
+ if (FAILED(hr)) {
+ return hr;
+ }
+
+ hr = getaddrinfo("..localmachine", NULL, NULL, &res);
+ if (hr) {
+ goto release;
+ }
+
+ for (ai = res; ai; ai = ai->ai_next) {
+ hr = prov->lpVtbl->TranslateAddress(prov, ai->ai_addr, &devaddr);
+ if (SUCCEEDED(hr) && (ai->ai_addrlen <= addr_len) && (index-- == 0)) {
+ memcpy(addr, ai->ai_addr, ai->ai_addrlen);
+ goto free;
+ }
+ }
+ hr = ENODEV;
+
+free:
+ freeaddrinfo(res);
+release:
+ prov->lpVtbl->Release(prov);
+ return hr;
+}
+
+static int dapls_os_init(void)
+{
+ return ibvw_get_windata(&windata, IBVW_WINDATA_VERSION);
+}
+
+static void dapls_os_release(void)
+{
+ if (windata.comp_mgr)
+ ibvw_release_windata(&windata, IBVW_WINDATA_VERSION);
+ windata.comp_mgr = NULL;
+}
+
+static int dapls_config_cm_channel(struct rdma_event_channel *channel)
+{
+ channel->channel.Milliseconds = 0;
+ return 0;
+}
+
+static int dapls_config_verbs(struct ibv_context *verbs)
+{
+ verbs->channel.Milliseconds = 0;
+ return 0;
+}
+
+static int dapls_thread_signal(void)
+{
+ CompManagerCancel(windata.comp_mgr);
+ return 0;
+}
+#else // _WIN64 || WIN32
+int g_ib_pipe[2];
+
+static int dapls_os_init(void)
+{
+ /* create pipe for waking up work thread */
+ return pipe(g_ib_pipe);
+}
+
+static void dapls_os_release(void)
+{
+ /* close pipe? */
+}
+
+/* Get IP address using network device name */
+static int getipaddr_netdev(char *name, char *addr, int addr_len)
+{
+ struct ifreq ifr;
+ int skfd, ret, len;
+
+ /* Fill in the structure */
+ snprintf(ifr.ifr_name, IFNAMSIZ, "%s", name);
+ ifr.ifr_hwaddr.sa_family = ARPHRD_INFINIBAND;
+
+ /* Create a socket fd */
+ skfd = socket(PF_INET, SOCK_STREAM, 0);
+ ret = ioctl(skfd, SIOCGIFADDR, &ifr);
+ if (ret)
+ goto bail;
+
+ switch (ifr.ifr_addr.sa_family) {
+#ifdef AF_INET6
+ case AF_INET6:
+ len = sizeof(struct sockaddr_in6);
+ break;
+#endif
+ case AF_INET:
+ default:
+ len = sizeof(struct sockaddr);
+ break;
+ }
+
+ if (len <= addr_len)
+ memcpy(addr, &ifr.ifr_addr, len);
+ else
+ ret = EINVAL;
+
+ bail:
+ close(skfd);
+ return ret;
+}
+
+static int dapls_config_fd(int fd)
+{
+ int opts;
+
+ opts = fcntl(fd, F_GETFL);
+ if (opts < 0 || fcntl(fd, F_SETFL, opts | O_NONBLOCK) < 0) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " dapls_config_fd: fcntl on fd %d ERR %d %s\n",
+ fd, opts, strerror(errno));
+ return errno;
+ }
+
+ return 0;
+}
+
+static int dapls_config_cm_channel(struct rdma_event_channel *channel)
+{
+ return dapls_config_fd(channel->fd);
+}
+
+static int dapls_config_verbs(struct ibv_context *verbs)
+{
+ return dapls_config_fd(verbs->async_fd);
+}
+
+static int dapls_thread_signal(void)
+{
+ return write(g_ib_pipe[1], "w", sizeof "w");
+}
+#endif
+
+/* Get IP address using network name, address, or device name */
+static int getipaddr(char *name, char *addr, int len)
+{
+ struct addrinfo *res;
+
+ /* assume netdev for first attempt, then network and address type */
+ if (getipaddr_netdev(name, addr, len)) {
+ if (getaddrinfo(name, NULL, NULL, &res)) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " open_hca: getaddr_netdev ERROR:"
+ " %s. Is %s configured?\n",
+ strerror(errno), name);
+ return 1;
+ } else {
+ if (len >= res->ai_addrlen)
+ memcpy(addr, res->ai_addr, res->ai_addrlen);
+ else {
+ freeaddrinfo(res);
+ return 1;
+ }
+ freeaddrinfo(res);
+ }
+ }
+
+ dapl_dbg_log(
+ DAPL_DBG_TYPE_UTIL,
+ " getipaddr: family %d port %d addr %d.%d.%d.%d\n",
+ ((struct sockaddr_in *)addr)->sin_family,
+ ((struct sockaddr_in *)addr)->sin_port,
+ ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 0 & 0xff,
+ ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 8 & 0xff,
+ ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 16 & 0xff,
+ ((struct sockaddr_in *)addr)->sin_addr.
+ s_addr >> 24 & 0xff);
+
+ return 0;
+}
+
+/*
+ * dapls_ib_init, dapls_ib_release
+ *
+ * Initialize Verb related items for device open
+ *
+ * Input:
+ * none
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * 0 success, -1 error
+ *
+ */
+int32_t dapls_ib_init(void)
+{
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapl_ib_init: \n");
+
+ /* initialize hca_list lock */
+ dapl_os_lock_init(&g_hca_lock);
+
+ /* initialize hca list for CQ events */
+ dapl_llist_init_head(&g_hca_list);
+
+ if (dapls_os_init())
+ return 1;
+
+ return 0;
+}
+
+int32_t dapls_ib_release(void)
+{
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapl_ib_release: \n");
+ dapli_ib_thread_destroy();
+ if (g_cm_events != NULL)
+ rdma_destroy_event_channel(g_cm_events);
+ dapls_os_release();
+ return 0;
+}
+
+/*
+ * dapls_ib_open_hca
+ *
+ * Open HCA
+ *
+ * Input:
+ * *hca_name pointer to provider device name
+ * *ib_hca_handle_p pointer to provide HCA handle
+ *
+ * Output:
+ * none
+ *
+ * Return:
+ * DAT_SUCCESS
+ * dapl_convert_errno
+ *
+ */
+DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr)
+{
+ struct rdma_cm_id *cm_id = NULL;
+ union ibv_gid *gid;
+ int ret;
+ DAT_RETURN dat_status;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " open_hca: %s - %p\n", hca_name, hca_ptr);
+
+ /* Setup the global cm event channel */
+ dapl_os_lock(&g_hca_lock);
+ if (g_cm_events == NULL) {
+ g_cm_events = rdma_create_event_channel();
+ if (g_cm_events == NULL) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " open_hca: ERR - RDMA channel %s\n",
+ strerror(errno));
+ return DAT_INTERNAL_ERROR;
+ }
+ }
+ dapl_os_unlock(&g_hca_lock);
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " open_hca: RDMA channel created (%p)\n", g_cm_events);
+
+ dat_status = dapli_ib_thread_init();
+ if (dat_status != DAT_SUCCESS)
+ return dat_status;
+
+ /* HCA name will be hostname or IP address */
+ if (getipaddr((char *)hca_name,
+ (char *)&hca_ptr->hca_address,
+ sizeof(DAT_SOCK_ADDR6)))
+ return DAT_INVALID_ADDRESS;
+
+ /* cm_id will bind local device/GID based on IP address */
+ if (rdma_create_id(g_cm_events, &cm_id,
+ (void *)hca_ptr, RDMA_PS_TCP)) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " open_hca: rdma_create ERR %s\n", strerror(errno));
+ return DAT_INTERNAL_ERROR;
+ }
+ ret = rdma_bind_addr(cm_id, (struct sockaddr *)&hca_ptr->hca_address);
+ if ((ret) || (cm_id->verbs == NULL)) {
+ rdma_destroy_id(cm_id);
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " open_hca: rdma_bind ERR %s."
+ " Is %s configured?\n", strerror(errno), hca_name);
+ return DAT_INVALID_ADDRESS;
+ }
+
+ /* keep reference to IB device and cm_id */
+ hca_ptr->ib_trans.cm_id = cm_id;
+ hca_ptr->ib_hca_handle = cm_id->verbs;
+ dapls_config_verbs(cm_id->verbs);
+ hca_ptr->port_num = cm_id->port_num;
+ hca_ptr->ib_trans.ib_dev = cm_id->verbs->device;
+ gid = &cm_id->route.addr.addr.ibaddr.sgid;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " open_hca: ctx=%p port=%d GID subnet %016llx"
+ " id %016llx\n", cm_id->verbs, cm_id->port_num,
+ (unsigned long long)ntohll(gid->global.subnet_prefix),
+ (unsigned long long)ntohll(gid->global.interface_id));
+
+ /* set inline max with env or default, get local lid and gid 0 */
+ if (hca_ptr->ib_hca_handle->device->transport_type
+ == IBV_TRANSPORT_IWARP)
+ hca_ptr->ib_trans.max_inline_send =
+ dapl_os_get_env_val("DAPL_MAX_INLINE",
+ INLINE_SEND_IWARP_DEFAULT);
+ else
+ hca_ptr->ib_trans.max_inline_send =
+ dapl_os_get_env_val("DAPL_MAX_INLINE",
+ INLINE_SEND_IB_DEFAULT);
+
+ /* set CM timer defaults */
+ hca_ptr->ib_trans.max_cm_timeout =
+ dapl_os_get_env_val("DAPL_MAX_CM_RESPONSE_TIME",
+ IB_CM_RESPONSE_TIMEOUT);
+ hca_ptr->ib_trans.max_cm_retries =
+ dapl_os_get_env_val("DAPL_MAX_CM_RETRIES", IB_CM_RETRIES);
+
+ /* set default IB MTU */
+ hca_ptr->ib_trans.mtu = dapl_ib_mtu(2048);
+
+ /*
+ * Put new hca_transport on list for async and CQ event processing
+ * Wakeup work thread to add to polling list
+ */
+ dapl_llist_init_entry((DAPL_LLIST_ENTRY *) & hca_ptr->ib_trans.entry);
+ dapl_os_lock(&g_hca_lock);
+ dapl_llist_add_tail(&g_hca_list,
+ (DAPL_LLIST_ENTRY *) & hca_ptr->ib_trans.entry,
+ &hca_ptr->ib_trans.entry);
+ if (dapls_thread_signal() == -1)
+ dapl_log(DAPL_DBG_TYPE_UTIL,
+ " open_hca: thread wakeup error = %s\n",
+ strerror(errno));
+ dapl_os_unlock(&g_hca_lock);
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " open_hca: %s, %s %d.%d.%d.%d INLINE_MAX=%d\n", hca_name,
+ ((struct sockaddr_in *)
+ &hca_ptr->hca_address)->sin_family == AF_INET ?
+ "AF_INET" : "AF_INET6",
+ ((struct sockaddr_in *)
+ &hca_ptr->hca_address)->sin_addr.s_addr >> 0 & 0xff,
+ ((struct sockaddr_in *)
+ &hca_ptr->hca_address)->sin_addr.s_addr >> 8 & 0xff,
+ ((struct sockaddr_in *)
+ &hca_ptr->hca_address)->sin_addr.s_addr >> 16 & 0xff,
+ ((struct sockaddr_in *)
+ &hca_ptr->hca_address)->sin_addr.s_addr >> 24 & 0xff,
+ hca_ptr->ib_trans.max_inline_send);
+
+ hca_ptr->ib_trans.d_hca = hca_ptr;
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_close_hca
+ *
+ * Open HCA
+ *
+ * Input:
+ * DAPL_HCA provide CA handle
+ *
+ * Output:
+ * none
+ *
+ * Return:
+ * DAT_SUCCESS
+ * dapl_convert_errno
+ *
+ */
+DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr)
+{
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p->%p\n",
+ hca_ptr, hca_ptr->ib_hca_handle);
+
+ if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
+ if (rdma_destroy_id(hca_ptr->ib_trans.cm_id))
+ return (dapl_convert_errno(errno, "ib_close_device"));
+ hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
+ }
+
+ dapl_os_lock(&g_hca_lock);
+ if (g_ib_thread_state != IB_THREAD_RUN) {
+ dapl_os_unlock(&g_hca_lock);
+ goto bail;
+ }
+ dapl_os_unlock(&g_hca_lock);
+
+ /*
+ * Remove hca from async event processing list
+ * Wakeup work thread to remove from polling list
+ */
+ hca_ptr->ib_trans.destroy = 1;
+ if (dapls_thread_signal() == -1)
+ dapl_log(DAPL_DBG_TYPE_UTIL,
+ " destroy: thread wakeup error = %s\n",
+ strerror(errno));
+
+ /* wait for thread to remove HCA references */
+ while (hca_ptr->ib_trans.destroy != 2) {
+ if (dapls_thread_signal() == -1)
+ dapl_log(DAPL_DBG_TYPE_UTIL,
+ " destroy: thread wakeup error = %s\n",
+ strerror(errno));
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " ib_thread_destroy: wait on hca %p destroy\n");
+ dapl_os_sleep_usec(1000);
+ }
+bail:
+ return (DAT_SUCCESS);
+}
+
+
+DAT_RETURN dapli_ib_thread_init(void)
+{
+ DAT_RETURN dat_status;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " ib_thread_init(%d)\n", dapl_os_getpid());
+
+ dapl_os_lock(&g_hca_lock);
+ if (g_ib_thread_state != IB_THREAD_INIT) {
+ dapl_os_unlock(&g_hca_lock);
+ return DAT_SUCCESS;
+ }
+
+ /* uCMA events non-blocking */
+ if (dapls_config_cm_channel(g_cm_events)) {
+ dapl_os_unlock(&g_hca_lock);
+ return (dapl_convert_errno(errno, "create_thread ERR: cm_fd"));
+ }
+
+ g_ib_thread_state = IB_THREAD_CREATE;
+ dapl_os_unlock(&g_hca_lock);
+
+ /* create thread to process inbound connect request */
+ dat_status = dapl_os_thread_create(dapli_thread, NULL, &g_ib_thread);
+ if (dat_status != DAT_SUCCESS)
+ return (dapl_convert_errno(errno,
+ "create_thread ERR:"
+ " check resource limits"));
+
+ /* wait for thread to start */
+ dapl_os_lock(&g_hca_lock);
+ while (g_ib_thread_state != IB_THREAD_RUN) {
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " ib_thread_init: waiting for ib_thread\n");
+ dapl_os_unlock(&g_hca_lock);
+ dapl_os_sleep_usec(1000);
+ dapl_os_lock(&g_hca_lock);
+ }
+ dapl_os_unlock(&g_hca_lock);
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " ib_thread_init(%d) exit\n", dapl_os_getpid());
+
+ return DAT_SUCCESS;
+}
+
+void dapli_ib_thread_destroy(void)
+{
+ int retries = 10;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " ib_thread_destroy(%d)\n", dapl_os_getpid());
+ /*
+ * wait for async thread to terminate.
+ * pthread_join would be the correct method
+ * but some applications have some issues
+ */
+
+ /* destroy ib_thread, wait for termination, if not already */
+ dapl_os_lock(&g_hca_lock);
+ if (g_ib_thread_state != IB_THREAD_RUN)
+ goto bail;
+
+ g_ib_thread_state = IB_THREAD_CANCEL;
+ if (dapls_thread_signal() == -1)
+ dapl_log(DAPL_DBG_TYPE_UTIL,
+ " destroy: thread wakeup error = %s\n",
+ strerror(errno));
+ while ((g_ib_thread_state != IB_THREAD_EXIT) && (retries--)) {
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " ib_thread_destroy: waiting for ib_thread\n");
+ if (dapls_thread_signal() == -1)
+ dapl_log(DAPL_DBG_TYPE_UTIL,
+ " destroy: thread wakeup error = %s\n",
+ strerror(errno));
+ dapl_os_unlock(&g_hca_lock);
+ dapl_os_sleep_usec(2000);
+ dapl_os_lock(&g_hca_lock);
+ }
+bail:
+ dapl_os_unlock(&g_hca_lock);
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " ib_thread_destroy(%d) exit\n", dapl_os_getpid());
+}
+
+void dapli_async_event_cb(struct _ib_hca_transport *hca)
+{
+ struct ibv_async_event event;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " async_event(%p)\n", hca);
+
+ if (hca->destroy)
+ return;
+
+ if (!ibv_get_async_event(hca->cm_id->verbs, &event)) {
+
+ switch (event.event_type) {
+ case IBV_EVENT_CQ_ERR:
+ {
+ struct dapl_ep *evd_ptr =
+ event.element.cq->cq_context;
+
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ "dapl async_event CQ (%p) ERR %d\n",
+ evd_ptr, event.event_type);
+
+ /* report up if async callback still setup */
+ if (hca->async_cq_error)
+ hca->async_cq_error(hca->cm_id->verbs,
+ event.element.cq,
+ &event,
+ (void *)evd_ptr);
+ break;
+ }
+ case IBV_EVENT_COMM_EST:
+ {
+ /* Received msgs on connected QP before RTU */
+ dapl_log(DAPL_DBG_TYPE_UTIL,
+ " async_event COMM_EST(%p) rdata beat RTU\n",
+ event.element.qp);
+
+ break;
+ }
+ case IBV_EVENT_QP_FATAL:
+ case IBV_EVENT_QP_REQ_ERR:
+ case IBV_EVENT_QP_ACCESS_ERR:
+ case IBV_EVENT_QP_LAST_WQE_REACHED:
+ case IBV_EVENT_SRQ_ERR:
+ case IBV_EVENT_SRQ_LIMIT_REACHED:
+ case IBV_EVENT_SQ_DRAINED:
+ {
+ struct dapl_ep *ep_ptr =
+ event.element.qp->qp_context;
+
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ "dapl async_event QP (%p) ERR %d\n",
+ ep_ptr, event.event_type);
+
+ /* report up if async callback still setup */
+ if (hca->async_qp_error)
+ hca->async_qp_error(hca->cm_id->verbs,
+ ep_ptr->qp_handle,
+ &event,
+ (void *)ep_ptr);
+ break;
+ }
+ case IBV_EVENT_PATH_MIG:
+ case IBV_EVENT_PATH_MIG_ERR:
+ case IBV_EVENT_DEVICE_FATAL:
+ case IBV_EVENT_PORT_ACTIVE:
+ case IBV_EVENT_PORT_ERR:
+ case IBV_EVENT_LID_CHANGE:
+ case IBV_EVENT_PKEY_CHANGE:
+ case IBV_EVENT_SM_CHANGE:
+ {
+ dapl_log(DAPL_DBG_TYPE_WARN,
+ "dapl async_event: DEV ERR %d\n",
+ event.event_type);
+
+ /* report up if async callback still setup */
+ if (hca->async_unafiliated)
+ hca->async_unafiliated(hca->cm_id->
+ verbs, &event,
+ hca->
+ async_un_ctx);
+ break;
+ }
+ case IBV_EVENT_CLIENT_REREGISTER:
+ /* no need to report this event this time */
+ dapl_log(DAPL_DBG_TYPE_UTIL,
+ " async_event: IBV_CLIENT_REREGISTER\n");
+ break;
+
+ default:
+ dapl_log(DAPL_DBG_TYPE_WARN,
+ "dapl async_event: %d UNKNOWN\n",
+ event.event_type);
+ break;
+
+ }
+ ibv_ack_async_event(&event);
+ }
+}
+
+#if defined(_WIN64) || defined(_WIN32)
+/* work thread for uAT, uCM, CQ, and async events */
+void dapli_thread(void *arg)
+{
+ struct _ib_hca_transport *hca;
+ struct _ib_hca_transport *uhca[8];
+ COMP_CHANNEL *channel;
+ int ret, idx, cnt;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d,0x%x): ENTER: \n",
+ dapl_os_getpid(), g_ib_thread);
+
+ dapl_os_lock(&g_hca_lock);
+ for (g_ib_thread_state = IB_THREAD_RUN;
+ g_ib_thread_state == IB_THREAD_RUN;
+ dapl_os_lock(&g_hca_lock)) {
+
+ idx = 0;
+ hca = dapl_llist_is_empty(&g_hca_list) ? NULL :
+ dapl_llist_peek_head(&g_hca_list);
+
+ while (hca) {
+ uhca[idx++] = hca;
+ hca = dapl_llist_next_entry(&g_hca_list,
+ (DAPL_LLIST_ENTRY *)
+ &hca->entry);
+ }
+ cnt = idx;
+
+ dapl_os_unlock(&g_hca_lock);
+ ret = CompManagerPoll(windata.comp_mgr, INFINITE, &channel);
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " ib_thread(%d) poll_event 0x%x\n",
+ dapl_os_getpid(), ret);
+
+ dapli_cma_event_cb();
+
+ /* check and process ASYNC events, per device */
+ for (idx = 0; idx < cnt; idx++) {
+ if (uhca[idx]->destroy == 1) {
+ dapl_os_lock(&g_hca_lock);
+ dapl_llist_remove_entry(&g_hca_list,
+ (DAPL_LLIST_ENTRY *)
+ &uhca[idx]->entry);
+ dapl_os_unlock(&g_hca_lock);
+ uhca[idx]->destroy = 2;
+ } else {
+ dapli_async_event_cb(uhca[idx]);
+ }
+ }
+ }
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d) EXIT\n",
+ dapl_os_getpid());
+ g_ib_thread_state = IB_THREAD_EXIT;
+ dapl_os_unlock(&g_hca_lock);
+}
+#else // _WIN64 || WIN32
+/* work thread for uAT, uCM, CQ, and async events */
+void dapli_thread(void *arg)
+{
+ struct pollfd ufds[__FD_SETSIZE];
+ struct _ib_hca_transport *uhca[__FD_SETSIZE] = { NULL };
+ struct _ib_hca_transport *hca;
+ int ret, idx, fds;
+ char rbuf[2];
+
+ dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
+ " ib_thread(%d,0x%x): ENTER: pipe %d ucma %d\n",
+ dapl_os_getpid(), g_ib_thread, g_ib_pipe[0],
+ g_cm_events->fd);
+
+ /* Poll across pipe, CM, AT never changes */
+ dapl_os_lock(&g_hca_lock);
+ g_ib_thread_state = IB_THREAD_RUN;
+
+ ufds[0].fd = g_ib_pipe[0]; /* pipe */
+ ufds[0].events = POLLIN;
+ ufds[1].fd = g_cm_events->fd; /* uCMA */
+ ufds[1].events = POLLIN;
+
+ while (g_ib_thread_state == IB_THREAD_RUN) {
+
+ /* build ufds after pipe and uCMA events */
+ ufds[0].revents = 0;
+ ufds[1].revents = 0;
+ idx = 1;
+
+ /* Walk HCA list and setup async and CQ events */
+ if (!dapl_llist_is_empty(&g_hca_list))
+ hca = dapl_llist_peek_head(&g_hca_list);
+ else
+ hca = NULL;
+
+ while (hca) {
+
+ /* uASYNC events */
+ ufds[++idx].fd = hca->cm_id->verbs->async_fd;
+ ufds[idx].events = POLLIN;
+ ufds[idx].revents = 0;
+ uhca[idx] = hca;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
+ " ib_thread(%d) poll_fd: hca[%d]=%p,"
+ " async=%d pipe=%d cm=%d \n",
+ dapl_os_getpid(), hca, ufds[idx - 1].fd,
+ ufds[0].fd, ufds[1].fd);
+
+ hca = dapl_llist_next_entry(&g_hca_list,
+ (DAPL_LLIST_ENTRY *)
+ &hca->entry);
+ }
+
+ /* unlock, and setup poll */
+ fds = idx + 1;
+ dapl_os_unlock(&g_hca_lock);
+ ret = poll(ufds, fds, -1);
+ if (ret <= 0) {
+ dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
+ " ib_thread(%d): ERR %s poll\n",
+ dapl_os_getpid(), strerror(errno));
+ dapl_os_lock(&g_hca_lock);
+ continue;
+ }
+
+ dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
+ " ib_thread(%d) poll_event: "
+ " async=0x%x pipe=0x%x cm=0x%x \n",
+ dapl_os_getpid(), ufds[idx].revents,
+ ufds[0].revents, ufds[1].revents);
+
+ /* uCMA events */
+ if (ufds[1].revents == POLLIN)
+ dapli_cma_event_cb();
+
+ /* check and process ASYNC events, per device */
+ for (idx = 2; idx < fds; idx++) {
+ if (ufds[idx].revents == POLLIN) {
+ dapli_async_event_cb(uhca[idx]);
+ }
+ }
+
+ /* check and process user events, PIPE */
+ if (ufds[0].revents == POLLIN) {
+ if (read(g_ib_pipe[0], rbuf, 2) == -1)
+ dapl_log(DAPL_DBG_TYPE_THREAD,
+ " cr_thread: pipe rd err= %s\n",
+ strerror(errno));
+
+ /* cleanup any device on list marked for destroy */
+ for (idx = 2; idx < fds; idx++) {
+ if (uhca[idx] && uhca[idx]->destroy == 1) {
+ dapl_os_lock(&g_hca_lock);
+ dapl_llist_remove_entry(
+ &g_hca_list,
+ (DAPL_LLIST_ENTRY*)
+ &uhca[idx]->entry);
+ dapl_os_unlock(&g_hca_lock);
+ uhca[idx]->destroy = 2;
+ }
+ }
+ }
+ dapl_os_lock(&g_hca_lock);
+ }
+
+ dapl_dbg_log(DAPL_DBG_TYPE_THREAD, " ib_thread(%d) EXIT\n",
+ dapl_os_getpid());
+ g_ib_thread_state = IB_THREAD_EXIT;
+ dapl_os_unlock(&g_hca_lock);
+}
+#endif
diff --git a/dapl/openib_common/cq.c b/dapl/openib_common/cq.c
new file mode 100644
index 0000000..74a5940
--- /dev/null
+++ b/dapl/openib_common/cq.c
@@ -0,0 +1,491 @@
+/*
+ * Copyright (c) 2009 Intel Corporation. All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ * copy of which is available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+#include "openib_osd.h"
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_lmr_util.h"
+#include "dapl_evd_util.h"
+#include "dapl_ring_buffer_util.h"
+
+/*
+ * Map all verbs DTO completion codes to the DAT equivelent.
+ *
+ * Not returned by verbs: DAT_DTO_ERR_PARTIAL_PACKET
+ */
+static struct ib_status_map {
+ int ib_status;
+ DAT_DTO_COMPLETION_STATUS dat_status;
+} ib_status_map[] = {
+/* 00 */ {IBV_WC_SUCCESS, DAT_DTO_SUCCESS},
+/* 01 */ {IBV_WC_LOC_LEN_ERR, DAT_DTO_ERR_LOCAL_LENGTH},
+/* 02 */ {IBV_WC_LOC_QP_OP_ERR, DAT_DTO_ERR_LOCAL_EP},
+/* 03 */ {IBV_WC_LOC_EEC_OP_ERR, DAT_DTO_ERR_TRANSPORT},
+/* 04 */ {IBV_WC_LOC_PROT_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
+/* 05 */ {IBV_WC_WR_FLUSH_ERR, DAT_DTO_ERR_FLUSHED},
+/* 06 */ {IBV_WC_MW_BIND_ERR, DAT_RMR_OPERATION_FAILED},
+/* 07 */ {IBV_WC_BAD_RESP_ERR, DAT_DTO_ERR_BAD_RESPONSE},
+/* 08 */ {IBV_WC_LOC_ACCESS_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
+/* 09 */ {IBV_WC_REM_INV_REQ_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
+/* 10 */ {IBV_WC_REM_ACCESS_ERR, DAT_DTO_ERR_REMOTE_ACCESS},
+/* 11 */ {IBV_WC_REM_OP_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
+/* 12 */ {IBV_WC_RETRY_EXC_ERR, DAT_DTO_ERR_TRANSPORT},
+/* 13 */ {IBV_WC_RNR_RETRY_EXC_ERR, DAT_DTO_ERR_RECEIVER_NOT_READY},
+/* 14 */ {IBV_WC_LOC_RDD_VIOL_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
+/* 15 */ {IBV_WC_REM_INV_RD_REQ_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
+/* 16 */ {IBV_WC_REM_ABORT_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
+/* 17 */ {IBV_WC_INV_EECN_ERR, DAT_DTO_ERR_TRANSPORT},
+/* 18 */ {IBV_WC_INV_EEC_STATE_ERR, DAT_DTO_ERR_TRANSPORT},
+/* 19 */ {IBV_WC_FATAL_ERR, DAT_DTO_ERR_TRANSPORT},
+/* 20 */ {IBV_WC_RESP_TIMEOUT_ERR, DAT_DTO_ERR_RECEIVER_NOT_READY},
+/* 21 */ {IBV_WC_GENERAL_ERR, DAT_DTO_ERR_TRANSPORT},
+};
+
+/*
+ * dapls_ib_get_dto_status
+ *
+ * Return the DAT status of a DTO operation
+ *
+ * Input:
+ * cqe_ptr pointer to completion queue entry
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * Value from ib_status_map table above
+ */
+
+DAT_DTO_COMPLETION_STATUS
+dapls_ib_get_dto_status(IN ib_work_completion_t * cqe_ptr)
+{
+ uint32_t ib_status;
+ int i;
+
+ ib_status = DAPL_GET_CQE_STATUS(cqe_ptr);
+
+ /*
+ * Due to the implementation of verbs completion code, we need to
+ * search the table for the correct value rather than assuming
+ * linear distribution.
+ */
+ for (i = 0; i <= IBV_WC_GENERAL_ERR; i++) {
+ if (ib_status == ib_status_map[i].ib_status) {
+ if (ib_status != IBV_WC_SUCCESS) {
+ dapl_log(DAPL_DBG_TYPE_DTO_COMP_ERR,
+ " DTO completion ERROR: %d:"
+ " op %#x\n",
+ ib_status,
+ DAPL_GET_CQE_OPTYPE(cqe_ptr));
+ }
+ return ib_status_map[i].dat_status;
+ }
+ }
+
+ return DAT_DTO_FAILURE;
+}
+
+DAT_RETURN dapls_ib_get_async_event(IN ib_error_record_t * err_record,
+ OUT DAT_EVENT_NUMBER * async_event)
+{
+ DAT_RETURN dat_status = DAT_SUCCESS;
+ int err_code = err_record->event_type;
+
+ switch (err_code) {
+ /* OVERFLOW error */
+ case IBV_EVENT_CQ_ERR:
+ *async_event = DAT_ASYNC_ERROR_EVD_OVERFLOW;
+ break;
+ /* INTERNAL errors */
+ case IBV_EVENT_DEVICE_FATAL:
+ *async_event = DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR;
+ break;
+ /* CATASTROPHIC errors */
+ case IBV_EVENT_PORT_ERR:
+ *async_event = DAT_ASYNC_ERROR_IA_CATASTROPHIC;
+ break;
+ /* BROKEN QP error */
+ case IBV_EVENT_SQ_DRAINED:
+ case IBV_EVENT_QP_FATAL:
+ case IBV_EVENT_QP_REQ_ERR:
+ case IBV_EVENT_QP_ACCESS_ERR:
+ *async_event = DAT_ASYNC_ERROR_EP_BROKEN;
+ break;
+
+ /* connection completion */
+ case IBV_EVENT_COMM_EST:
+ *async_event = DAT_CONNECTION_EVENT_ESTABLISHED;
+ break;
+
+ /* TODO: process HW state changes */
+ case IBV_EVENT_PATH_MIG:
+ case IBV_EVENT_PATH_MIG_ERR:
+ case IBV_EVENT_PORT_ACTIVE:
+ case IBV_EVENT_LID_CHANGE:
+ case IBV_EVENT_PKEY_CHANGE:
+ case IBV_EVENT_SM_CHANGE:
+ default:
+ dat_status = DAT_ERROR(DAT_NOT_IMPLEMENTED, 0);
+ }
+ return dat_status;
+}
+
+/*
+ * dapl_ib_cq_alloc
+ *
+ * Alloc a CQ
+ *
+ * Input:
+ * ia_handle IA handle
+ * evd_ptr pointer to EVD struct
+ * cqlen minimum QLen
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_cq_alloc(IN DAPL_IA * ia_ptr,
+ IN DAPL_EVD * evd_ptr, IN DAT_COUNT * cqlen)
+{
+ struct ibv_comp_channel *channel = evd_ptr->cq_wait_obj_handle;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ "dapls_ib_cq_alloc: evd %p cqlen=%d \n", evd_ptr, *cqlen);
+
+ /* Call IB verbs to create CQ */
+ evd_ptr->ib_cq_handle = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
+ *cqlen, evd_ptr, channel, 0);
+
+ if (evd_ptr->ib_cq_handle == IB_INVALID_HANDLE)
+ return DAT_INSUFFICIENT_RESOURCES;
+
+ /* arm cq for events */
+ dapls_set_cq_notify(ia_ptr, evd_ptr);
+
+ /* update with returned cq entry size */
+ *cqlen = evd_ptr->ib_cq_handle->cqe;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ "dapls_ib_cq_alloc: new_cq %p cqlen=%d \n",
+ evd_ptr->ib_cq_handle, *cqlen);
+
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_cq_resize
+ *
+ * Alloc a CQ
+ *
+ * Input:
+ * ia_handle IA handle
+ * evd_ptr pointer to EVD struct
+ * cqlen minimum QLen
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN
+dapls_ib_cq_resize(IN DAPL_IA * ia_ptr,
+ IN DAPL_EVD * evd_ptr, IN DAT_COUNT * cqlen)
+{
+ ib_cq_handle_t new_cq;
+ struct ibv_comp_channel *channel = evd_ptr->cq_wait_obj_handle;
+
+ /* IB verbs DOES support resize. REDO THIS.
+ * Try to re-create CQ
+ * with new size. Can only be done if QP is not attached.
+ * destroy EBUSY == QP still attached.
+ */
+
+ /* Call IB verbs to create CQ */
+ new_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle, *cqlen,
+ evd_ptr, channel, 0);
+
+ if (new_cq == IB_INVALID_HANDLE)
+ return DAT_INSUFFICIENT_RESOURCES;
+
+ /* destroy the original and replace if successful */
+ if (ibv_destroy_cq(evd_ptr->ib_cq_handle)) {
+ ibv_destroy_cq(new_cq);
+ return (dapl_convert_errno(errno, "resize_cq"));
+ }
+
+ /* update EVD with new cq handle and size */
+ evd_ptr->ib_cq_handle = new_cq;
+ *cqlen = new_cq->cqe;
+
+ /* arm cq for events */
+ dapls_set_cq_notify(ia_ptr, evd_ptr);
+
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_cq_free
+ *
+ * destroy a CQ
+ *
+ * Input:
+ * ia_handle IA handle
+ * evd_ptr pointer to EVD struct
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN dapls_ib_cq_free(IN DAPL_IA * ia_ptr, IN DAPL_EVD * evd_ptr)
+{
+ DAT_EVENT event;
+ ib_work_completion_t wc;
+
+ if (evd_ptr->ib_cq_handle != IB_INVALID_HANDLE) {
+ /* pull off CQ and EVD entries and toss */
+ while (ibv_poll_cq(evd_ptr->ib_cq_handle, 1, &wc) == 1) ;
+ while (dapl_evd_dequeue(evd_ptr, &event) == DAT_SUCCESS) ;
+ if (ibv_destroy_cq(evd_ptr->ib_cq_handle))
+ return (dapl_convert_errno(errno, "ibv_destroy_cq"));
+ evd_ptr->ib_cq_handle = IB_INVALID_HANDLE;
+ }
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_set_cq_notify
+ *
+ * Set the CQ notification for next
+ *
+ * Input:
+ * hca_handl hca handle
+ * DAPL_EVD evd handle
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * dapl_convert_errno
+ */
+DAT_RETURN dapls_set_cq_notify(IN DAPL_IA * ia_ptr, IN DAPL_EVD * evd_ptr)
+{
+ if (ibv_req_notify_cq(evd_ptr->ib_cq_handle, 0))
+ return (dapl_convert_errno(errno, "notify_cq"));
+ else
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_completion_notify
+ *
+ * Set the CQ notification type
+ *
+ * Input:
+ * hca_handl hca handle
+ * evd_ptr evd handle
+ * type notification type
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * dapl_convert_errno
+ */
+DAT_RETURN dapls_ib_completion_notify(IN ib_hca_handle_t hca_handle,
+ IN DAPL_EVD * evd_ptr,
+ IN ib_notification_type_t type)
+{
+ if (ibv_req_notify_cq(evd_ptr->ib_cq_handle, type))
+ return (dapl_convert_errno(errno, "notify_cq_type"));
+ else
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_completion_poll
+ *
+ * CQ poll for completions
+ *
+ * Input:
+ * hca_handl hca handle
+ * evd_ptr evd handle
+ * wc_ptr work completion
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_QUEUE_EMPTY
+ *
+ */
+DAT_RETURN dapls_ib_completion_poll(IN DAPL_HCA * hca_ptr,
+ IN DAPL_EVD * evd_ptr,
+ IN ib_work_completion_t * wc_ptr)
+{
+ int ret;
+
+ ret = ibv_poll_cq(evd_ptr->ib_cq_handle, 1, wc_ptr);
+ if (ret == 1)
+ return DAT_SUCCESS;
+
+ return DAT_QUEUE_EMPTY;
+}
+
+/* NEW common wait objects for providers with direct CQ wait objects */
+DAT_RETURN
+dapls_ib_wait_object_create(IN DAPL_EVD * evd_ptr,
+ IN ib_wait_obj_handle_t * p_cq_wait_obj_handle)
+{
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " cq_object_create: (%p,%p)\n",
+ evd_ptr, p_cq_wait_obj_handle);
+
+ /* set cq_wait object to evd_ptr */
+ *p_cq_wait_obj_handle =
+ ibv_create_comp_channel(evd_ptr->header.owner_ia->hca_ptr->
+ ib_hca_handle);
+
+ return DAT_SUCCESS;
+}
+
+DAT_RETURN
+dapls_ib_wait_object_destroy(IN ib_wait_obj_handle_t p_cq_wait_obj_handle)
+{
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " cq_object_destroy: wait_obj=%p\n", p_cq_wait_obj_handle);
+
+ ibv_destroy_comp_channel(p_cq_wait_obj_handle);
+
+ return DAT_SUCCESS;
+}
+
+DAT_RETURN
+dapls_ib_wait_object_wakeup(IN ib_wait_obj_handle_t p_cq_wait_obj_handle)
+{
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " cq_object_wakeup: wait_obj=%p\n", p_cq_wait_obj_handle);
+
+ /* no wake up mechanism */
+ return DAT_SUCCESS;
+}
+
+#if defined(_WIN32) || defined(_WIN64)
+DAT_RETURN
+dapls_ib_wait_object_wait(IN ib_wait_obj_handle_t p_cq_wait_obj_handle,
+ IN uint32_t timeout)
+{
+ struct dapl_evd *evd_ptr;
+ struct ibv_cq *ibv_cq = NULL;
+ int status = 0;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " cq_object_wait: CQ channel %p time %d\n",
+ p_cq_wait_obj_handle, timeout);
+
+ /* uDAPL timeout values in usecs */
+ p_cq_wait_obj_handle->comp_channel.Milliseconds = timeout / 1000;
+
+ /* returned event */
+ status = ibv_get_cq_event(p_cq_wait_obj_handle, &ibv_cq,
+ (void *)&evd_ptr);
+ if (status == 0) {
+ ibv_ack_cq_events(ibv_cq, 1);
+ }
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " cq_object_wait: RET evd %p ibv_cq %p %s\n",
+ evd_ptr, ibv_cq, strerror(errno));
+
+ return (dapl_convert_errno(status, "cq_wait_object_wait"));
+}
+#else //_WIN32 || _WIN64
+DAT_RETURN
+dapls_ib_wait_object_wait(IN ib_wait_obj_handle_t p_cq_wait_obj_handle,
+ IN uint32_t timeout)
+{
+ struct dapl_evd *evd_ptr;
+ struct ibv_cq *ibv_cq = NULL;
+ int status = 0;
+ int timeout_ms = -1;
+ struct pollfd cq_fd = {
+ .fd = p_cq_wait_obj_handle->fd,
+ .events = POLLIN,
+ .revents = 0
+ };
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " cq_object_wait: CQ channel %p time %d\n",
+ p_cq_wait_obj_handle, timeout);
+
+ /* uDAPL timeout values in usecs */
+ if (timeout != DAT_TIMEOUT_INFINITE)
+ timeout_ms = timeout / 1000;
+
+ status = poll(&cq_fd, 1, timeout_ms);
+
+ /* returned event */
+ if (status > 0) {
+ if (!ibv_get_cq_event(p_cq_wait_obj_handle,
+ &ibv_cq, (void *)&evd_ptr)) {
+ ibv_ack_cq_events(ibv_cq, 1);
+ }
+ status = 0;
+
+ /* timeout */
+ } else if (status == 0)
+ status = ETIMEDOUT;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " cq_object_wait: RET evd %p ibv_cq %p %s\n",
+ evd_ptr, ibv_cq, strerror(errno));
+
+ return (dapl_convert_errno(status, "cq_wait_object_wait"));
+
+}
+#endif //_WIN32 || _WIN64
+
+/*
+ * Local variables:
+ * c-indent-level: 4
+ * c-basic-offset: 4
+ * tab-width: 8
+ * End:
+ */
diff --git a/dapl/openib_common/dapl_ib_common.h b/dapl/openib_common/dapl_ib_common.h
new file mode 100644
index 0000000..b61e50e
--- /dev/null
+++ b/dapl/openib_common/dapl_ib_common.h
@@ -0,0 +1,299 @@
+/*
+ * Copyright (c) 2009 Intel Corporation. All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ * copy of which is available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/*
+ * Definitions common to all OpenIB providers, cma, scm, ucm
+ */
+
+#ifndef _DAPL_IB_COMMON_H_
+#define _DAPL_IB_COMMON_H_
+
+#include <infiniband/verbs.h>
+
+#ifdef DAT_EXTENSIONS
+#include <dat2/dat_ib_extensions.h>
+#endif
+
+#ifndef __cplusplus
+#define false 0
+#define true 1
+#endif /*__cplusplus */
+
+/* Typedefs to map common DAPL provider types to IB verbs */
+typedef struct ibv_qp *ib_qp_handle_t;
+typedef struct ibv_cq *ib_cq_handle_t;
+typedef struct ibv_pd *ib_pd_handle_t;
+typedef struct ibv_mr *ib_mr_handle_t;
+typedef struct ibv_mw *ib_mw_handle_t;
+typedef struct ibv_wc ib_work_completion_t;
+
+/* HCA context type maps to IB verbs */
+typedef struct ibv_context *ib_hca_handle_t;
+typedef ib_hca_handle_t dapl_ibal_ca_t;
+
+/* QP info to exchange, wire protocol version for these CM's */
+#define DCM_VER 4
+typedef struct _ib_qp_cm
+{
+ uint16_t ver;
+ uint16_t rej;
+ uint16_t lid;
+ uint16_t port;
+ uint32_t qpn;
+ uint32_t p_size;
+ union ibv_gid gid;
+ DAT_SOCK_ADDR6 ia_address;
+ uint16_t qp_type;
+} ib_qp_cm_t;
+
+/* CM events */
+typedef enum {
+ IB_CME_CONNECTED,
+ IB_CME_DISCONNECTED,
+ IB_CME_DISCONNECTED_ON_LINK_DOWN,
+ IB_CME_CONNECTION_REQUEST_PENDING,
+ IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
+ IB_CME_CONNECTION_REQUEST_ACKED,
+ IB_CME_DESTINATION_REJECT,
+ IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
+ IB_CME_DESTINATION_UNREACHABLE,
+ IB_CME_TOO_MANY_CONNECTION_REQUESTS,
+ IB_CME_LOCAL_FAILURE,
+ IB_CME_BROKEN,
+ IB_CME_TIMEOUT
+} ib_cm_events_t;
+
+/* Operation and state mappings */
+typedef int ib_send_op_type_t;
+typedef struct ibv_sge ib_data_segment_t;
+typedef enum ibv_qp_state ib_qp_state_t;
+typedef enum ibv_event_type ib_async_event_type;
+typedef struct ibv_async_event ib_error_record_t;
+
+/* CQ notifications */
+typedef enum
+{
+ IB_NOTIFY_ON_NEXT_COMP,
+ IB_NOTIFY_ON_SOLIC_COMP
+
+} ib_notification_type_t;
+
+/* other mappings */
+typedef int ib_bool_t;
+typedef union ibv_gid GID;
+typedef char *IB_HCA_NAME;
+typedef uint16_t ib_hca_port_t;
+typedef uint32_t ib_comp_handle_t;
+
+typedef struct ibv_comp_channel *ib_wait_obj_handle_t;
+
+/* Definitions */
+#define IB_INVALID_HANDLE NULL
+
+/* inline send rdma threshold */
+#define INLINE_SEND_IWARP_DEFAULT 64
+#define INLINE_SEND_IB_DEFAULT 200
+
+/* qkey for UD QP's */
+#define DAT_UD_QKEY 0x78654321
+
+/* DTO OPs, ordered for DAPL ENUM definitions */
+#define OP_RDMA_WRITE IBV_WR_RDMA_WRITE
+#define OP_RDMA_WRITE_IMM IBV_WR_RDMA_WRITE_WITH_IMM
+#define OP_SEND IBV_WR_SEND
+#define OP_SEND_IMM IBV_WR_SEND_WITH_IMM
+#define OP_RDMA_READ IBV_WR_RDMA_READ
+#define OP_COMP_AND_SWAP IBV_WR_ATOMIC_CMP_AND_SWP
+#define OP_FETCH_AND_ADD IBV_WR_ATOMIC_FETCH_AND_ADD
+#define OP_RECEIVE 7 /* internal op */
+#define OP_RECEIVE_IMM 8 /* rdma write with immed, internel op */
+#define OP_RECEIVE_MSG_IMM 9 /* recv msg with immed, internel op */
+#define OP_BIND_MW 10 /* internal op */
+#define OP_SEND_UD 11 /* internal op */
+#define OP_RECV_UD 12 /* internal op */
+#define OP_INVALID 0xff
+
+/* Definitions to map QP state */
+#define IB_QP_STATE_RESET IBV_QPS_RESET
+#define IB_QP_STATE_INIT IBV_QPS_INIT
+#define IB_QP_STATE_RTR IBV_QPS_RTR
+#define IB_QP_STATE_RTS IBV_QPS_RTS
+#define IB_QP_STATE_SQD IBV_QPS_SQD
+#define IB_QP_STATE_SQE IBV_QPS_SQE
+#define IB_QP_STATE_ERROR IBV_QPS_ERR
+
+/* Definitions for ibverbs/mthca return codes, should be defined in verbs.h */
+/* some are errno and some are -n values */
+
+/**
+ * ibv_get_device_name - Return kernel device name
+ * ibv_get_device_guid - Return device's node GUID
+ * ibv_open_device - Return ibv_context or NULL
+ * ibv_close_device - Return 0, (errno?)
+ * ibv_get_async_event - Return 0, -1
+ * ibv_alloc_pd - Return ibv_pd, NULL
+ * ibv_dealloc_pd - Return 0, errno
+ * ibv_reg_mr - Return ibv_mr, NULL
+ * ibv_dereg_mr - Return 0, errno
+ * ibv_create_cq - Return ibv_cq, NULL
+ * ibv_destroy_cq - Return 0, errno
+ * ibv_get_cq_event - Return 0 & ibv_cq/context, int
+ * ibv_poll_cq - Return n & ibv_wc, 0 ok, -1 empty, -2 error
+ * ibv_req_notify_cq - Return 0 (void?)
+ * ibv_create_qp - Return ibv_qp, NULL
+ * ibv_modify_qp - Return 0, errno
+ * ibv_destroy_qp - Return 0, errno
+ * ibv_post_send - Return 0, -1 & bad_wr
+ * ibv_post_recv - Return 0, -1 & bad_wr
+ */
+
+/* async handler for DTO, CQ, QP, and unafiliated */
+typedef void (*ib_async_dto_handler_t)(
+ IN ib_hca_handle_t ib_hca_handle,
+ IN ib_error_record_t *err_code,
+ IN void *context);
+
+typedef void (*ib_async_cq_handler_t)(
+ IN ib_hca_handle_t ib_hca_handle,
+ IN ib_cq_handle_t ib_cq_handle,
+ IN ib_error_record_t *err_code,
+ IN void *context);
+
+typedef void (*ib_async_qp_handler_t)(
+ IN ib_hca_handle_t ib_hca_handle,
+ IN ib_qp_handle_t ib_qp_handle,
+ IN ib_error_record_t *err_code,
+ IN void *context);
+
+typedef void (*ib_async_handler_t)(
+ IN ib_hca_handle_t ib_hca_handle,
+ IN ib_error_record_t *err_code,
+ IN void *context);
+
+typedef enum
+{
+ IB_THREAD_INIT,
+ IB_THREAD_CREATE,
+ IB_THREAD_RUN,
+ IB_THREAD_CANCEL,
+ IB_THREAD_EXIT
+
+} ib_thread_state_t;
+
+
+/* provider specfic fields for shared memory support */
+typedef uint32_t ib_shm_transport_t;
+
+/* prototypes */
+int32_t dapls_ib_init(void);
+int32_t dapls_ib_release(void);
+enum ibv_mtu dapl_ib_mtu(int mtu);
+char *dapl_ib_mtu_str(enum ibv_mtu mtu);
+DAT_RETURN getlocalipaddr(DAT_SOCK_ADDR *addr, int addr_len);
+
+/* inline functions */
+STATIC _INLINE_ IB_HCA_NAME dapl_ib_convert_name (IN char *name)
+{
+ /* use ascii; name of local device */
+ return dapl_os_strdup(name);
+}
+
+STATIC _INLINE_ void dapl_ib_release_name (IN IB_HCA_NAME name)
+{
+ return;
+}
+
+/*
+ * Convert errno to DAT_RETURN values
+ */
+STATIC _INLINE_ DAT_RETURN
+dapl_convert_errno( IN int err, IN const char *str )
+{
+ if (!err) return DAT_SUCCESS;
+
+#if DAPL_DBG
+ if ((err != EAGAIN) && (err != ETIMEDOUT))
+ dapl_dbg_log (DAPL_DBG_TYPE_ERR," %s %s\n", str, strerror(err));
+#endif
+
+ switch( err )
+ {
+ case EOVERFLOW : return DAT_LENGTH_ERROR;
+ case EACCES : return DAT_PRIVILEGES_VIOLATION;
+ case EPERM : return DAT_PROTECTION_VIOLATION;
+ case EINVAL : return DAT_INVALID_HANDLE;
+ case EISCONN : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_CONNECTED;
+ case ECONNREFUSED : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_NOTREADY;
+ case ETIMEDOUT : return DAT_TIMEOUT_EXPIRED;
+ case ENETUNREACH: return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_UNREACHABLE;
+ case EADDRINUSE : return DAT_CONN_QUAL_IN_USE;
+ case EALREADY : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_ACTCONNPENDING;
+ case ENOMEM : return DAT_INSUFFICIENT_RESOURCES;
+ case EAGAIN : return DAT_QUEUE_EMPTY;
+ case EINTR : return DAT_INTERRUPTED_CALL;
+ case EAFNOSUPPORT : return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_MALFORMED;
+ case EFAULT :
+ default : return DAT_INTERNAL_ERROR;
+ }
+ }
+
+typedef enum dapl_cm_state
+{
+ DCM_INIT,
+ DCM_LISTEN,
+ DCM_CONN_PENDING,
+ DCM_RTU_PENDING,
+ DCM_ACCEPTING,
+ DCM_ACCEPTING_DATA,
+ DCM_ACCEPTED,
+ DCM_REJECTED,
+ DCM_CONNECTED,
+ DCM_RELEASED,
+ DCM_DISCONNECTED,
+ DCM_DESTROY
+} DAPL_CM_STATE;
+
+STATIC _INLINE_ char * dapl_cm_state_str(IN int st)
+{
+ static char *state[] = {
+ "CM_INIT",
+ "CM_LISTEN",
+ "CM_CONN_PENDING",
+ "CM_RTU_PENDING",
+ "CM_ACCEPTING",
+ "CM_ACCEPTING_DATA",
+ "CM_ACCEPTED",
+ "CM_REJECTED",
+ "CM_CONNECTED",
+ "CM_RELEASED",
+ "CM_DISCONNECTED",
+ "CM_DESTROY"
+ };
+ return ((st < 0 || st > 11) ? "Invalid CM state?" : state[st]);
+}
+
+#endif /* _DAPL_IB_COMMON_H_ */
diff --git a/dapl/openib_common/dapl_ib_dto.h b/dapl/openib_common/dapl_ib_dto.h
new file mode 100644
index 0000000..e6c03b2
--- /dev/null
+++ b/dapl/openib_common/dapl_ib_dto.h
@@ -0,0 +1,504 @@
+/*
+ * Copyright (c) 2009 Intel Corporation. All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ * copy of which is available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+#ifndef _DAPL_IB_DTO_H_
+#define _DAPL_IB_DTO_H_
+
+#include "dapl_ib_util.h"
+
+#ifdef DAT_EXTENSIONS
+#include <dat2/dat_ib_extensions.h>
+#endif
+
+STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p);
+
+#define CQE_WR_TYPE_UD(id) \
+ (((DAPL_COOKIE *)(uintptr_t)id)->ep->qp_handle->qp_type == IBV_QPT_UD)
+
+/*
+ * dapls_ib_post_recv
+ *
+ * Provider specific Post RECV function
+ */
+STATIC _INLINE_ DAT_RETURN
+dapls_ib_post_recv (
+ IN DAPL_EP *ep_ptr,
+ IN DAPL_COOKIE *cookie,
+ IN DAT_COUNT segments,
+ IN DAT_LMR_TRIPLET *local_iov )
+{
+ struct ibv_recv_wr wr;
+ struct ibv_recv_wr *bad_wr;
+ ib_data_segment_t *ds = (ib_data_segment_t *)local_iov;
+ DAT_COUNT i, total_len;
+ int ret;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " post_rcv: ep %p cookie %p segs %d l_iov %p\n",
+ ep_ptr, cookie, segments, local_iov);
+
+ /* setup work request */
+ total_len = 0;
+ wr.next = 0;
+ wr.num_sge = segments;
+ wr.wr_id = (uint64_t)(uintptr_t)cookie;
+ wr.sg_list = ds;
+
+ if (cookie != NULL) {
+ for (i = 0; i < segments; i++) {
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " post_rcv: l_key 0x%x va %p len %d\n",
+ ds->lkey, ds->addr, ds->length );
+ total_len += ds->length;
+ ds++;
+ }
+ cookie->val.dto.size = total_len;
+ }
+
+ ret = ibv_post_recv(ep_ptr->qp_handle, &wr, &bad_wr);
+
+ if (ret)
+ return(dapl_convert_errno(errno,"ibv_recv"));
+
+ DAPL_CNTR(ep_ptr, DCNT_EP_POST_RECV);
+ DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_RECV_DATA, total_len);
+
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_post_send
+ *
+ * Provider specific Post SEND function
+ */
+STATIC _INLINE_ DAT_RETURN
+dapls_ib_post_send (
+ IN DAPL_EP *ep_ptr,
+ IN ib_send_op_type_t op_type,
+ IN DAPL_COOKIE *cookie,
+ IN DAT_COUNT segments,
+ IN DAT_LMR_TRIPLET *local_iov,
+ IN const DAT_RMR_TRIPLET *remote_iov,
+ IN DAT_COMPLETION_FLAGS completion_flags)
+{
+ struct ibv_send_wr wr;
+ struct ibv_send_wr *bad_wr;
+ ib_data_segment_t *ds = (ib_data_segment_t *)local_iov;
+ ib_hca_transport_t *ibt_ptr =
+ &ep_ptr->header.owner_ia->hca_ptr->ib_trans;
+ DAT_COUNT i, total_len;
+ int ret;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " post_snd: ep %p op %d ck %p sgs",
+ "%d l_iov %p r_iov %p f %d\n",
+ ep_ptr, op_type, cookie, segments, local_iov,
+ remote_iov, completion_flags);
+
+#ifdef DAT_EXTENSIONS
+ if (ep_ptr->qp_handle->qp_type != IBV_QPT_RC)
+ return(DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
+#endif
+ /* setup the work request */
+ wr.next = 0;
+ wr.opcode = op_type;
+ wr.num_sge = segments;
+ wr.send_flags = 0;
+ wr.wr_id = (uint64_t)(uintptr_t)cookie;
+ wr.sg_list = ds;
+ total_len = 0;
+
+ if (cookie != NULL) {
+ for (i = 0; i < segments; i++ ) {
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " post_snd: lkey 0x%x va %p len %d\n",
+ ds->lkey, ds->addr, ds->length );
+ total_len += ds->length;
+ ds++;
+ }
+ cookie->val.dto.size = total_len;
+ }
+
+ if (wr.num_sge &&
+ (op_type == OP_RDMA_WRITE || op_type == OP_RDMA_READ)) {
+ wr.wr.rdma.remote_addr = remote_iov->virtual_address;
+ wr.wr.rdma.rkey = remote_iov->rmr_context;
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " post_snd_rdma: rkey 0x%x va %#016Lx\n",
+ wr.wr.rdma.rkey, wr.wr.rdma.remote_addr);
+ }
+
+
+ /* inline data for send or write ops */
+ if ((total_len <= ibt_ptr->max_inline_send) &&
+ ((op_type == OP_SEND) || (op_type == OP_RDMA_WRITE)))
+ wr.send_flags |= IBV_SEND_INLINE;
+
+ /* set completion flags in work request */
+ wr.send_flags |= (DAT_COMPLETION_SUPPRESS_FLAG &
+ completion_flags) ? 0 : IBV_SEND_SIGNALED;
+ wr.send_flags |= (DAT_COMPLETION_BARRIER_FENCE_FLAG &
+ completion_flags) ? IBV_SEND_FENCE : 0;
+ wr.send_flags |= (DAT_COMPLETION_SOLICITED_WAIT_FLAG &
+ completion_flags) ? IBV_SEND_SOLICITED : 0;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " post_snd: op 0x%x flags 0x%x sglist %p, %d\n",
+ wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
+
+ ret = ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr);
+
+ if (ret)
+ return(dapl_convert_errno(errno,"ibv_send"));
+
+#ifdef DAPL_COUNTERS
+ switch (op_type) {
+ case OP_SEND:
+ DAPL_CNTR(ep_ptr, DCNT_EP_POST_SEND);
+ DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_SEND_DATA,total_len);
+ break;
+ case OP_RDMA_WRITE:
+ DAPL_CNTR(ep_ptr, DCNT_EP_POST_WRITE);
+ DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_WRITE_DATA,total_len);
+ break;
+ case OP_RDMA_READ:
+ DAPL_CNTR(ep_ptr, DCNT_EP_POST_READ);
+ DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_READ_DATA,total_len);
+ break;
+ default:
+ break;
+ }
+#endif /* DAPL_COUNTERS */
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP," post_snd: returned\n");
+ return DAT_SUCCESS;
+}
+
+/* map Work Completions to DAPL WR operations */
+STATIC _INLINE_ DAT_DTOS dapls_cqe_dtos_opcode(ib_work_completion_t *cqe_p)
+{
+ switch (cqe_p->opcode) {
+
+ case IBV_WC_SEND:
+#ifdef DAT_EXTENSIONS
+ if (CQE_WR_TYPE_UD(cqe_p->wr_id))
+ return (DAT_IB_DTO_SEND_UD);
+ else
+#endif
+ return (DAT_DTO_SEND);
+ case IBV_WC_RDMA_READ:
+ return (DAT_DTO_RDMA_READ);
+ case IBV_WC_BIND_MW:
+ return (DAT_DTO_BIND_MW);
+#ifdef DAT_EXTENSIONS
+ case IBV_WC_RDMA_WRITE:
+ if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
+ return (DAT_IB_DTO_RDMA_WRITE_IMMED);
+ else
+ return (DAT_DTO_RDMA_WRITE);
+ case IBV_WC_COMP_SWAP:
+ return (DAT_IB_DTO_CMP_SWAP);
+ case IBV_WC_FETCH_ADD:
+ return (DAT_IB_DTO_FETCH_ADD);
+ case IBV_WC_RECV_RDMA_WITH_IMM:
+ return (DAT_IB_DTO_RECV_IMMED);
+#else
+ case IBV_WC_RDMA_WRITE:
+ return (DAT_DTO_RDMA_WRITE);
+#endif
+ case IBV_WC_RECV:
+#ifdef DAT_EXTENSIONS
+ if (CQE_WR_TYPE_UD(cqe_p->wr_id))
+ return (DAT_IB_DTO_RECV_UD);
+ else if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
+ return (DAT_IB_DTO_RECV_MSG_IMMED);
+ else
+#endif
+ return (DAT_DTO_RECEIVE);
+ default:
+ return (0xff);
+ }
+}
+#define DAPL_GET_CQE_DTOS_OPTYPE(cqe_p) dapls_cqe_dtos_opcode(cqe_p)
+
+
+#ifdef DAT_EXTENSIONS
+/*
+ * dapls_ib_post_ext_send
+ *
+ * Provider specific extended Post SEND function for atomics
+ * OP_COMP_AND_SWAP and OP_FETCH_AND_ADD
+ */
+STATIC _INLINE_ DAT_RETURN
+dapls_ib_post_ext_send (
+ IN DAPL_EP *ep_ptr,
+ IN ib_send_op_type_t op_type,
+ IN DAPL_COOKIE *cookie,
+ IN DAT_COUNT segments,
+ IN DAT_LMR_TRIPLET *local_iov,
+ IN const DAT_RMR_TRIPLET *remote_iov,
+ IN DAT_UINT32 immed_data,
+ IN DAT_UINT64 compare_add,
+ IN DAT_UINT64 swap,
+ IN DAT_COMPLETION_FLAGS completion_flags,
+ IN DAT_IB_ADDR_HANDLE *remote_ah)
+{
+ struct ibv_send_wr wr;
+ struct ibv_send_wr *bad_wr;
+ ib_data_segment_t *ds = (ib_data_segment_t *)local_iov;
+ DAT_COUNT i, total_len;
+ int ret;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " post_ext_snd: ep %p op %d ck %p sgs",
+ "%d l_iov %p r_iov %p f %d\n",
+ ep_ptr, op_type, cookie, segments, local_iov,
+ remote_iov, completion_flags, remote_ah);
+
+ /* setup the work request */
+ wr.next = 0;
+ wr.opcode = op_type;
+ wr.num_sge = segments;
+ wr.send_flags = 0;
+ wr.wr_id = (uint64_t)(uintptr_t)cookie;
+ wr.sg_list = ds;
+ total_len = 0;
+
+ if (cookie != NULL) {
+ for (i = 0; i < segments; i++ ) {
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " post_snd: lkey 0x%x va %p len %d\n",
+ ds->lkey, ds->addr, ds->length );
+ total_len += ds->length;
+ ds++;
+ }
+ cookie->val.dto.size = total_len;
+ }
+
+ switch (op_type) {
+ case OP_RDMA_WRITE_IMM:
+ /* OP_RDMA_WRITE)IMMED has direct IB wr_type mapping */
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " post_ext: rkey 0x%x va %#016Lx immed=0x%x\n",
+ remote_iov?remote_iov->rmr_context:0,
+ remote_iov?remote_iov->virtual_address:0,
+ immed_data);
+
+ wr.imm_data = immed_data;
+ if (wr.num_sge) {
+ wr.wr.rdma.remote_addr = remote_iov->virtual_address;
+ wr.wr.rdma.rkey = remote_iov->rmr_context;
+ }
+ break;
+ case OP_COMP_AND_SWAP:
+ /* OP_COMP_AND_SWAP has direct IB wr_type mapping */
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " post_ext: OP_COMP_AND_SWAP=%lx,"
+ "%lx rkey 0x%x va %#016Lx\n",
+ compare_add, swap, remote_iov->rmr_context,
+ remote_iov->virtual_address);
+
+ wr.wr.atomic.compare_add = compare_add;
+ wr.wr.atomic.swap = swap;
+ wr.wr.atomic.remote_addr = remote_iov->virtual_address;
+ wr.wr.atomic.rkey = remote_iov->rmr_context;
+ break;
+ case OP_FETCH_AND_ADD:
+ /* OP_FETCH_AND_ADD has direct IB wr_type mapping */
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " post_ext: OP_FETCH_AND_ADD=%lx,"
+ "%lx rkey 0x%x va %#016Lx\n",
+ compare_add, remote_iov->rmr_context,
+ remote_iov->virtual_address);
+
+ wr.wr.atomic.compare_add = compare_add;
+ wr.wr.atomic.remote_addr = remote_iov->virtual_address;
+ wr.wr.atomic.rkey = remote_iov->rmr_context;
+ break;
+ case OP_SEND_UD:
+ /* post must be on EP with service_type of UD */
+ if (ep_ptr->qp_handle->qp_type != IBV_QPT_UD)
+ return(DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " post_ext: OP_SEND_UD ah=%p"
+ " qp_num=0x%x\n",
+ remote_ah, remote_ah->qpn);
+
+ wr.opcode = OP_SEND;
+ wr.wr.ud.ah = remote_ah->ah;
+ wr.wr.ud.remote_qpn = remote_ah->qpn;
+ wr.wr.ud.remote_qkey = DAT_UD_QKEY;
+ break;
+ default:
+ break;
+ }
+
+ /* set completion flags in work request */
+ wr.send_flags |= (DAT_COMPLETION_SUPPRESS_FLAG &
+ completion_flags) ? 0 : IBV_SEND_SIGNALED;
+ wr.send_flags |= (DAT_COMPLETION_BARRIER_FENCE_FLAG &
+ completion_flags) ? IBV_SEND_FENCE : 0;
+ wr.send_flags |= (DAT_COMPLETION_SOLICITED_WAIT_FLAG &
+ completion_flags) ? IBV_SEND_SOLICITED : 0;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " post_snd: op 0x%x flags 0x%x sglist %p, %d\n",
+ wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
+
+ ret = ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr);
+
+ if (ret)
+ return( dapl_convert_errno(errno,"ibv_send") );
+
+#ifdef DAPL_COUNTERS
+ switch (op_type) {
+ case OP_RDMA_WRITE_IMM:
+ DAPL_CNTR(ep_ptr, DCNT_EP_POST_WRITE_IMM);
+ DAPL_CNTR_DATA(ep_ptr,
+ DCNT_EP_POST_WRITE_IMM_DATA, total_len);
+ break;
+ case OP_COMP_AND_SWAP:
+ DAPL_CNTR(ep_ptr, DCNT_EP_POST_CMP_SWAP);
+ break;
+ case OP_FETCH_AND_ADD:
+ DAPL_CNTR(ep_ptr, DCNT_EP_POST_FETCH_ADD);
+ break;
+ case OP_SEND_UD:
+ DAPL_CNTR(ep_ptr, DCNT_EP_POST_SEND_UD);
+ DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_SEND_UD_DATA, total_len);
+ break;
+ default:
+ break;
+ }
+#endif /* DAPL_COUNTERS */
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP," post_snd: returned\n");
+ return DAT_SUCCESS;
+}
+#endif
+
+STATIC _INLINE_ DAT_RETURN
+dapls_ib_optional_prv_dat(
+ IN DAPL_CR *cr_ptr,
+ IN const void *event_data,
+ OUT DAPL_CR **cr_pp)
+{
+ return DAT_SUCCESS;
+}
+
+
+/* map Work Completions to DAPL WR operations */
+STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p)
+{
+#ifdef DAPL_COUNTERS
+ DAPL_COOKIE *cookie = (DAPL_COOKIE *)(uintptr_t)cqe_p->wr_id;
+#endif /* DAPL_COUNTERS */
+
+ switch (cqe_p->opcode) {
+ case IBV_WC_SEND:
+ if (CQE_WR_TYPE_UD(cqe_p->wr_id))
+ return(OP_SEND_UD);
+ else
+ return (OP_SEND);
+ case IBV_WC_RDMA_WRITE:
+ if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
+ return (OP_RDMA_WRITE_IMM);
+ else
+ return (OP_RDMA_WRITE);
+ case IBV_WC_RDMA_READ:
+ return (OP_RDMA_READ);
+ case IBV_WC_COMP_SWAP:
+ return (OP_COMP_AND_SWAP);
+ case IBV_WC_FETCH_ADD:
+ return (OP_FETCH_AND_ADD);
+ case IBV_WC_BIND_MW:
+ return (OP_BIND_MW);
+ case IBV_WC_RECV:
+ if (CQE_WR_TYPE_UD(cqe_p->wr_id)) {
+ DAPL_CNTR(cookie->ep, DCNT_EP_RECV_UD);
+ DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_UD_DATA,
+ cqe_p->byte_len);
+ return (OP_RECV_UD);
+ }
+ else if (cqe_p->wc_flags & IBV_WC_WITH_IMM) {
+ DAPL_CNTR(cookie->ep, DCNT_EP_RECV_IMM);
+ DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_IMM_DATA,
+ cqe_p->byte_len);
+ return (OP_RECEIVE_IMM);
+ } else {
+ DAPL_CNTR(cookie->ep, DCNT_EP_RECV);
+ DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_DATA,
+ cqe_p->byte_len);
+ return (OP_RECEIVE);
+ }
+ case IBV_WC_RECV_RDMA_WITH_IMM:
+ DAPL_CNTR(cookie->ep, DCNT_EP_RECV_RDMA_IMM);
+ DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_RDMA_IMM_DATA,
+ cqe_p->byte_len);
+ return (OP_RECEIVE_IMM);
+ default:
+ return (OP_INVALID);
+ }
+}
+
+#define DAPL_GET_CQE_OPTYPE(cqe_p) dapls_cqe_opcode(cqe_p)
+#define DAPL_GET_CQE_WRID(cqe_p) ((ib_work_completion_t*)cqe_p)->wr_id
+#define DAPL_GET_CQE_STATUS(cqe_p) ((ib_work_completion_t*)cqe_p)->status
+#define DAPL_GET_CQE_VENDOR_ERR(cqe_p) ((ib_work_completion_t*)cqe_p)->vendor_err
+#define DAPL_GET_CQE_BYTESNUM(cqe_p) ((ib_work_completion_t*)cqe_p)->byte_len
+#define DAPL_GET_CQE_IMMED_DATA(cqe_p) ((ib_work_completion_t*)cqe_p)->imm_data
+
+STATIC _INLINE_ char * dapls_dto_op_str(int op)
+{
+ static char *optable[] =
+ {
+ "OP_RDMA_WRITE",
+ "OP_RDMA_WRITE_IMM",
+ "OP_SEND",
+ "OP_SEND_IMM",
+ "OP_RDMA_READ",
+ "OP_COMP_AND_SWAP",
+ "OP_FETCH_AND_ADD",
+ "OP_RECEIVE",
+ "OP_RECEIVE_MSG_IMM",
+ "OP_RECEIVE_RDMA_IMM",
+ "OP_BIND_MW"
+ "OP_SEND_UD"
+ "OP_RECV_UD"
+ };
+ return ((op < 0 || op > 12) ? "Invalid CQE OP?" : optable[op]);
+}
+
+static _INLINE_ char *
+dapls_cqe_op_str(IN ib_work_completion_t *cqe_ptr)
+{
+ return dapls_dto_op_str(DAPL_GET_CQE_OPTYPE(cqe_ptr));
+}
+
+#define DAPL_GET_CQE_OP_STR(cqe) dapls_cqe_op_str(cqe)
+
+#endif /* _DAPL_IB_DTO_H_ */
diff --git a/dapl/openib_common/ib_extensions.c b/dapl/openib_common/ib_extensions.c
new file mode 100644
index 0000000..3c418e1
--- /dev/null
+++ b/dapl/openib_common/ib_extensions.c
@@ -0,0 +1,360 @@
+/*
+ * Copyright (c) 2007-2009 Intel Corporation. All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ * copy of which is available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_evd_util.h"
+#include "dapl_ib_util.h"
+#include "dapl_ep_util.h"
+#include "dapl_cookie.h"
+#include <stdarg.h>
+
+DAT_RETURN
+dapli_post_ext(IN DAT_EP_HANDLE ep_handle,
+ IN DAT_UINT64 cmp_add,
+ IN DAT_UINT64 swap,
+ IN DAT_UINT32 immed_data,
+ IN DAT_COUNT segments,
+ IN DAT_LMR_TRIPLET * local_iov,
+ IN DAT_DTO_COOKIE user_cookie,
+ IN const DAT_RMR_TRIPLET * remote_iov,
+ IN int op_type,
+ IN DAT_COMPLETION_FLAGS flags, IN DAT_IB_ADDR_HANDLE * ah);
+
+/*
+ * dapl_extensions
+ *
+ * Process extension requests
+ *
+ * Input:
+ * ext_type,
+ * ...
+ *
+ * Output:
+ * Depends....
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_NOT_IMPLEMENTED
+ * .....
+ *
+ */
+DAT_RETURN
+dapl_extensions(IN DAT_HANDLE dat_handle,
+ IN DAT_EXTENDED_OP ext_op, IN va_list args)
+{
+ DAT_EP_HANDLE ep;
+ DAT_IB_ADDR_HANDLE *ah = NULL;
+ DAT_LMR_TRIPLET *lmr_p;
+ DAT_DTO_COOKIE cookie;
+ const DAT_RMR_TRIPLET *rmr_p;
+ DAT_UINT64 dat_uint64a, dat_uint64b;
+ DAT_UINT32 dat_uint32;
+ DAT_COUNT segments = 1;
+ DAT_COMPLETION_FLAGS comp_flags;
+ DAT_RETURN status = DAT_NOT_IMPLEMENTED;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_API,
+ "dapl_extensions(hdl %p operation %d, ...)\n",
+ dat_handle, ext_op);
+
+ switch ((int)ext_op) {
+
+ case DAT_IB_RDMA_WRITE_IMMED_OP:
+ dapl_dbg_log(DAPL_DBG_TYPE_RTN,
+ " WRITE_IMMED_DATA extension call\n");
+
+ ep = dat_handle; /* ep_handle */
+ segments = va_arg(args, DAT_COUNT); /* num segments */
+ lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
+ cookie = va_arg(args, DAT_DTO_COOKIE);
+ rmr_p = va_arg(args, const DAT_RMR_TRIPLET *);
+ dat_uint32 = va_arg(args, DAT_UINT32); /* immed data */
+ comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
+
+ status = dapli_post_ext(ep, 0, 0, dat_uint32, segments, lmr_p,
+ cookie, rmr_p, OP_RDMA_WRITE_IMM,
+ comp_flags, ah);
+ break;
+
+ case DAT_IB_CMP_AND_SWAP_OP:
+ dapl_dbg_log(DAPL_DBG_TYPE_RTN,
+ " CMP_AND_SWAP extension call\n");
+
+ ep = dat_handle; /* ep_handle */
+ dat_uint64a = va_arg(args, DAT_UINT64); /* cmp_value */
+ dat_uint64b = va_arg(args, DAT_UINT64); /* swap_value */
+ lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
+ cookie = va_arg(args, DAT_DTO_COOKIE);
+ rmr_p = va_arg(args, const DAT_RMR_TRIPLET *);
+ comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
+
+ status = dapli_post_ext(ep, dat_uint64a, dat_uint64b,
+ 0, segments, lmr_p, cookie, rmr_p,
+ OP_COMP_AND_SWAP, comp_flags, ah);
+ break;
+
+ case DAT_IB_FETCH_AND_ADD_OP:
+ dapl_dbg_log(DAPL_DBG_TYPE_RTN,
+ " FETCH_AND_ADD extension call\n");
+
+ ep = dat_handle; /* ep_handle */
+ dat_uint64a = va_arg(args, DAT_UINT64); /* add value */
+ lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
+ cookie = va_arg(args, DAT_DTO_COOKIE);
+ rmr_p = va_arg(args, const DAT_RMR_TRIPLET *);
+ comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
+
+ status = dapli_post_ext(ep, dat_uint64a, 0, 0, segments,
+ lmr_p, cookie, rmr_p,
+ OP_FETCH_AND_ADD, comp_flags, ah);
+ break;
+
+ case DAT_IB_UD_SEND_OP:
+ dapl_dbg_log(DAPL_DBG_TYPE_RTN,
+ " UD post_send extension call\n");
+
+ ep = dat_handle; /* ep_handle */
+ segments = va_arg(args, DAT_COUNT); /* segments */
+ lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
+ ah = va_arg(args, DAT_IB_ADDR_HANDLE *);
+ cookie = va_arg(args, DAT_DTO_COOKIE);
+ comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
+
+ status = dapli_post_ext(ep, 0, 0, 0, segments,
+ lmr_p, cookie, NULL,
+ OP_SEND_UD, comp_flags, ah);
+ break;
+
+#ifdef DAPL_COUNTERS
+ case DAT_QUERY_COUNTERS_OP:
+ {
+ int cntr, reset;
+ DAT_UINT64 *p_cntr_out;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_RTN,
+ " Query counter extension call\n");
+
+ cntr = va_arg(args, int);
+ p_cntr_out = va_arg(args, DAT_UINT64 *);
+ reset = va_arg(args, int);
+
+ status = dapl_query_counter(dat_handle, cntr,
+ p_cntr_out, reset);
+ break;
+ }
+ case DAT_PRINT_COUNTERS_OP:
+ {
+ int cntr, reset;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_RTN,
+ " Print counter extension call\n");
+
+ cntr = va_arg(args, int);
+ reset = va_arg(args, int);
+
+ dapl_print_counter(dat_handle, cntr, reset);
+ status = DAT_SUCCESS;
+ break;
+ }
+#endif /* DAPL_COUNTERS */
+
+ default:
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ "unsupported extension(%d)\n", (int)ext_op);
+ }
+
+ return (status);
+}
+
+DAT_RETURN
+dapli_post_ext(IN DAT_EP_HANDLE ep_handle,
+ IN DAT_UINT64 cmp_add,
+ IN DAT_UINT64 swap,
+ IN DAT_UINT32 immed_data,
+ IN DAT_COUNT segments,
+ IN DAT_LMR_TRIPLET * local_iov,
+ IN DAT_DTO_COOKIE user_cookie,
+ IN const DAT_RMR_TRIPLET * remote_iov,
+ IN int op_type,
+ IN DAT_COMPLETION_FLAGS flags, IN DAT_IB_ADDR_HANDLE * ah)
+{
+ DAPL_EP *ep_ptr;
+ ib_qp_handle_t qp_ptr;
+ DAPL_COOKIE *cookie = NULL;
+ DAT_RETURN dat_status = DAT_SUCCESS;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_API,
+ " post_ext_op: ep %p cmp_val %d "
+ "swap_val %d cookie 0x%x, r_iov %p, flags 0x%x, ah %p\n",
+ ep_handle, (unsigned)cmp_add, (unsigned)swap,
+ (unsigned)user_cookie.as_64, remote_iov, flags, ah);
+
+ if (DAPL_BAD_HANDLE(ep_handle, DAPL_MAGIC_EP))
+ return (DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
+
+ ep_ptr = (DAPL_EP *) ep_handle;
+ qp_ptr = ep_ptr->qp_handle;
+
+ /*
+ * Synchronization ok since this buffer is only used for send
+ * requests, which aren't allowed to race with each other.
+ */
+ dat_status = dapls_dto_cookie_alloc(&ep_ptr->req_buffer,
+ DAPL_DTO_TYPE_EXTENSION,
+ user_cookie, &cookie);
+ if (dat_status != DAT_SUCCESS)
+ goto bail;
+
+ /*
+ * Take reference before posting to avoid race conditions with
+ * completions
+ */
+ dapl_os_atomic_inc(&ep_ptr->req_count);
+
+ /*
+ * Invoke provider specific routine to post DTO
+ */
+ dat_status = dapls_ib_post_ext_send(ep_ptr, op_type, cookie, segments, /* data segments */
+ local_iov, remote_iov, immed_data, /* immed data */
+ cmp_add, /* compare or add */
+ swap, /* swap */
+ flags, ah);
+
+ if (dat_status != DAT_SUCCESS) {
+ dapl_os_atomic_dec(&ep_ptr->req_count);
+ dapls_cookie_dealloc(&ep_ptr->req_buffer, cookie);
+ }
+
+ bail:
+ return dat_status;
+
+}
+
+/*
+ * New provider routine to process extended DTO events
+ */
+void
+dapls_cqe_to_event_extension(IN DAPL_EP * ep_ptr,
+ IN DAPL_COOKIE * cookie,
+ IN ib_work_completion_t * cqe_ptr,
+ IN DAT_EVENT * event_ptr)
+{
+ uint32_t ibtype;
+ DAT_DTO_COMPLETION_EVENT_DATA *dto =
+ &event_ptr->event_data.dto_completion_event_data;
+ DAT_IB_EXTENSION_EVENT_DATA *ext_data = (DAT_IB_EXTENSION_EVENT_DATA *)
+ & event_ptr->event_extension_data[0];
+ DAT_DTO_COMPLETION_STATUS dto_status;
+
+ /* Get status from cqe */
+ dto_status = dapls_ib_get_dto_status(cqe_ptr);
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EVD,
+ " cqe_to_event_ext: dto_ptr %p ext_ptr %p status %d\n",
+ dto, ext_data, dto_status);
+
+ event_ptr->event_number = DAT_IB_DTO_EVENT;
+ dto->ep_handle = cookie->ep;
+ dto->user_cookie = cookie->val.dto.cookie;
+ dto->operation = DAPL_GET_CQE_DTOS_OPTYPE(cqe_ptr); /* new for 2.0 */
+ dto->status = ext_data->status = dto_status;
+
+ if (dto_status != DAT_DTO_SUCCESS)
+ return;
+
+ /*
+ * Get operation type from CQ work completion entry and
+ * if extented operation then set extended event data
+ */
+ ibtype = DAPL_GET_CQE_OPTYPE(cqe_ptr);
+
+ switch (ibtype) {
+
+ case OP_RDMA_WRITE_IMM:
+ dapl_dbg_log(DAPL_DBG_TYPE_EVD,
+ " cqe_to_event_ext: OP_RDMA_WRITE_IMMED\n");
+
+ /* type and outbound rdma write transfer size */
+ dto->transfered_length = cookie->val.dto.size;
+ ext_data->type = DAT_IB_RDMA_WRITE_IMMED;
+ break;
+ case OP_RECEIVE_IMM:
+ dapl_dbg_log(DAPL_DBG_TYPE_EVD,
+ " cqe_to_event_ext: OP_RECEIVE_RDMA_IMMED\n");
+
+ /* immed recvd, type and inbound rdma write transfer size */
+ dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
+ ext_data->type = DAT_IB_RDMA_WRITE_IMMED_DATA;
+ ext_data->val.immed.data = DAPL_GET_CQE_IMMED_DATA(cqe_ptr);
+ break;
+ case OP_RECEIVE_MSG_IMM:
+ dapl_dbg_log(DAPL_DBG_TYPE_EVD,
+ " cqe_to_event_ext: OP_RECEIVE_MSG_IMMED\n");
+
+ /* immed recvd, type and inbound recv message transfer size */
+ dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
+ ext_data->type = DAT_IB_RECV_IMMED_DATA;
+ ext_data->val.immed.data = DAPL_GET_CQE_IMMED_DATA(cqe_ptr);
+ break;
+ case OP_COMP_AND_SWAP:
+ dapl_dbg_log(DAPL_DBG_TYPE_EVD,
+ " cqe_to_event_ext: COMP_AND_SWAP_RESP\n");
+
+ /* original data is returned in LMR provided with post */
+ ext_data->type = DAT_IB_CMP_AND_SWAP;
+ dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
+ break;
+ case OP_FETCH_AND_ADD:
+ dapl_dbg_log(DAPL_DBG_TYPE_EVD,
+ " cqe_to_event_ext: FETCH_AND_ADD_RESP\n");
+
+ /* original data is returned in LMR provided with post */
+ ext_data->type = DAT_IB_FETCH_AND_ADD;
+ dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
+ break;
+ case OP_SEND_UD:
+ dapl_dbg_log(DAPL_DBG_TYPE_EVD, " cqe_to_event_ext: UD_SEND\n");
+
+ /* type and outbound send transfer size */
+ ext_data->type = DAT_IB_UD_SEND;
+ dto->transfered_length = cookie->val.dto.size;
+ break;
+ case OP_RECV_UD:
+ dapl_dbg_log(DAPL_DBG_TYPE_EVD, " cqe_to_event_ext: UD_RECV\n");
+
+ /* type and inbound recv message transfer size */
+ ext_data->type = DAT_IB_UD_RECV;
+ dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
+ break;
+
+ default:
+ /* not extended operation */
+ ext_data->status = DAT_IB_OP_ERR;
+ dto->status = DAT_DTO_ERR_TRANSPORT;
+ break;
+ }
+}
diff --git a/dapl/openib_common/mem.c b/dapl/openib_common/mem.c
new file mode 100644
index 0000000..8a3e152
--- /dev/null
+++ b/dapl/openib_common/mem.c
@@ -0,0 +1,370 @@
+/*
+ * Copyright (c) 2005-2007 Intel Corporation. All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ * copy of which is available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_lmr_util.h"
+
+/*
+ * dapls_convert_privileges
+ *
+ * Convert LMR privileges to provider
+ *
+ * Input:
+ * DAT_MEM_PRIV_FLAGS
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * ibv_access_flags
+ *
+ */
+STATIC _INLINE_ int dapls_convert_privileges(IN DAT_MEM_PRIV_FLAGS privileges)
+{
+ int access = 0;
+
+ /*
+ * if (DAT_MEM_PRIV_LOCAL_READ_FLAG & privileges) do nothing
+ */
+ if (DAT_MEM_PRIV_LOCAL_WRITE_FLAG & privileges)
+ access |= IBV_ACCESS_LOCAL_WRITE;
+ if (DAT_MEM_PRIV_REMOTE_WRITE_FLAG & privileges)
+ access |= IBV_ACCESS_REMOTE_WRITE;
+ if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
+ access |= IBV_ACCESS_REMOTE_READ;
+ if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
+ access |= IBV_ACCESS_REMOTE_READ;
+ if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
+ access |= IBV_ACCESS_REMOTE_READ;
+#ifdef DAT_EXTENSIONS
+ if (DAT_IB_MEM_PRIV_REMOTE_ATOMIC & privileges)
+ access |= IBV_ACCESS_REMOTE_ATOMIC;
+#endif
+
+ return access;
+}
+
+/*
+ * dapl_ib_pd_alloc
+ *
+ * Alloc a PD
+ *
+ * Input:
+ * ia_handle IA handle
+ * pz pointer to PZ struct
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN dapls_ib_pd_alloc(IN DAPL_IA * ia_ptr, IN DAPL_PZ * pz)
+{
+ /* get a protection domain */
+ pz->pd_handle = ibv_alloc_pd(ia_ptr->hca_ptr->ib_hca_handle);
+ if (!pz->pd_handle)
+ return (dapl_convert_errno(ENOMEM, "alloc_pd"));
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " pd_alloc: pd_handle=%p\n", pz->pd_handle);
+
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_pd_free
+ *
+ * Free a PD
+ *
+ * Input:
+ * ia_handle IA handle
+ * PZ_ptr pointer to PZ struct
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN dapls_ib_pd_free(IN DAPL_PZ * pz)
+{
+ if (pz->pd_handle != IB_INVALID_HANDLE) {
+ if (ibv_dealloc_pd(pz->pd_handle))
+ return (dapl_convert_errno(errno, "ibv_dealloc_pd"));
+ pz->pd_handle = IB_INVALID_HANDLE;
+ }
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_mr_register
+ *
+ * Register a virtual memory region
+ *
+ * Input:
+ * ia_handle IA handle
+ * lmr pointer to dapl_lmr struct
+ * virt_addr virtual address of beginning of mem region
+ * length length of memory region
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mr_register(IN DAPL_IA * ia_ptr,
+ IN DAPL_LMR * lmr,
+ IN DAT_PVOID virt_addr,
+ IN DAT_VLEN length,
+ IN DAT_MEM_PRIV_FLAGS privileges, IN DAT_VA_TYPE va_type)
+{
+ ib_pd_handle_t ib_pd_handle;
+ struct ibv_device *ibv_dev = ia_ptr->hca_ptr->ib_hca_handle->device;
+
+ ib_pd_handle = ((DAPL_PZ *) lmr->param.pz_handle)->pd_handle;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " mr_register: ia=%p, lmr=%p va=%p ln=%d pv=0x%x\n",
+ ia_ptr, lmr, virt_addr, length, privileges);
+
+ /* TODO: shared memory */
+ if (lmr->param.mem_type == DAT_MEM_TYPE_SHARED_VIRTUAL) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " mr_register_shared: NOT IMPLEMENTED\n");
+ return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+ }
+
+ /* iWARP only support */
+ if ((va_type == DAT_VA_TYPE_ZB) &&
+ (ibv_dev->transport_type != IBV_TRANSPORT_IWARP)) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " va_type == DAT_VA_TYPE_ZB: NOT SUPPORTED\n");
+ return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+ }
+
+ /* local read is default on IB */
+ lmr->mr_handle =
+ ibv_reg_mr(((DAPL_PZ *) lmr->param.pz_handle)->pd_handle,
+ virt_addr, length, dapls_convert_privileges(privileges));
+
+ if (!lmr->mr_handle)
+ return (dapl_convert_errno(ENOMEM, "reg_mr"));
+
+ lmr->param.lmr_context = lmr->mr_handle->lkey;
+ lmr->param.rmr_context = lmr->mr_handle->rkey;
+ lmr->param.registered_size = length;
+ lmr->param.registered_address = (DAT_VADDR) (uintptr_t) virt_addr;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " mr_register: mr=%p addr=%p pd %p ctx %p "
+ "lkey=0x%x rkey=0x%x priv=%x\n",
+ lmr->mr_handle, lmr->mr_handle->addr,
+ lmr->mr_handle->pd, lmr->mr_handle->context,
+ lmr->mr_handle->lkey, lmr->mr_handle->rkey,
+ length, dapls_convert_privileges(privileges));
+
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_mr_deregister
+ *
+ * Free a memory region
+ *
+ * Input:
+ * lmr pointer to dapl_lmr struct
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN dapls_ib_mr_deregister(IN DAPL_LMR * lmr)
+{
+ if (lmr->mr_handle != IB_INVALID_HANDLE) {
+ if (ibv_dereg_mr(lmr->mr_handle))
+ return (dapl_convert_errno(errno, "dereg_pd"));
+ lmr->mr_handle = IB_INVALID_HANDLE;
+ }
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_mr_register_shared
+ *
+ * Register a virtual memory region
+ *
+ * Input:
+ * ia_ptr IA handle
+ * lmr pointer to dapl_lmr struct
+ * privileges
+ * va_type
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mr_register_shared(IN DAPL_IA * ia_ptr,
+ IN DAPL_LMR * lmr,
+ IN DAT_MEM_PRIV_FLAGS privileges,
+ IN DAT_VA_TYPE va_type)
+{
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " mr_register_shared: NOT IMPLEMENTED\n");
+
+ return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+}
+
+/*
+ * dapls_ib_mw_alloc
+ *
+ * Bind a protection domain to a memory window
+ *
+ * Input:
+ * rmr Initialized rmr to hold binding handles
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN dapls_ib_mw_alloc(IN DAPL_RMR * rmr)
+{
+
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_alloc: NOT IMPLEMENTED\n");
+
+ return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+}
+
+/*
+ * dapls_ib_mw_free
+ *
+ * Release bindings of a protection domain to a memory window
+ *
+ * Input:
+ * rmr Initialized rmr to hold binding handles
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN dapls_ib_mw_free(IN DAPL_RMR * rmr)
+{
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_free: NOT IMPLEMENTED\n");
+
+ return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+}
+
+/*
+ * dapls_ib_mw_bind
+ *
+ * Bind a protection domain to a memory window
+ *
+ * Input:
+ * rmr Initialized rmr to hold binding handles
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_PARAMETER;
+ * DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mw_bind(IN DAPL_RMR * rmr,
+ IN DAPL_LMR * lmr,
+ IN DAPL_EP * ep,
+ IN DAPL_COOKIE * cookie,
+ IN DAT_VADDR virtual_address,
+ IN DAT_VLEN length,
+ IN DAT_MEM_PRIV_FLAGS mem_priv, IN DAT_BOOLEAN is_signaled)
+{
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_bind: NOT IMPLEMENTED\n");
+
+ return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+}
+
+/*
+ * dapls_ib_mw_unbind
+ *
+ * Unbind a protection domain from a memory window
+ *
+ * Input:
+ * rmr Initialized rmr to hold binding handles
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_PARAMETER;
+ * DAT_INVALID_STATE;
+ * DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mw_unbind(IN DAPL_RMR * rmr,
+ IN DAPL_EP * ep,
+ IN DAPL_COOKIE * cookie, IN DAT_BOOLEAN is_signaled)
+{
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_unbind: NOT IMPLEMENTED\n");
+
+ return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 4
+ * c-basic-offset: 4
+ * tab-width: 8
+ * End:
+ */
diff --git a/dapl/openib_common/qp.c b/dapl/openib_common/qp.c
new file mode 100644
index 0000000..9fb7c96
--- /dev/null
+++ b/dapl/openib_common/qp.c
@@ -0,0 +1,515 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ * copy of which is available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+
+/*
+ * dapl_ib_qp_alloc
+ *
+ * Alloc a QP
+ *
+ * Input:
+ * *ep_ptr pointer to EP INFO
+ * ib_hca_handle provider HCA handle
+ * ib_pd_handle provider protection domain handle
+ * cq_recv provider recv CQ handle
+ * cq_send provider send CQ handle
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ * DAT_INTERNAL_ERROR
+ *
+ */
+DAT_RETURN
+dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,
+ IN DAPL_EP * ep_ptr, IN DAPL_EP * ep_ctx_ptr)
+{
+ DAT_EP_ATTR *attr;
+ DAPL_EVD *rcv_evd, *req_evd;
+ ib_cq_handle_t rcv_cq, req_cq;
+ ib_pd_handle_t ib_pd_handle;
+ struct ibv_qp_init_attr qp_create;
+#ifdef _OPENIB_CMA_
+ dp_ib_cm_handle_t conn;
+#endif
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " qp_alloc: ia_ptr %p ep_ptr %p ep_ctx_ptr %p\n",
+ ia_ptr, ep_ptr, ep_ctx_ptr);
+
+ attr = &ep_ptr->param.ep_attr;
+ ib_pd_handle = ((DAPL_PZ *) ep_ptr->param.pz_handle)->pd_handle;
+ rcv_evd = (DAPL_EVD *) ep_ptr->param.recv_evd_handle;
+ req_evd = (DAPL_EVD *) ep_ptr->param.request_evd_handle;
+
+ /*
+ * DAT allows usage model of EP's with no EVD's but IB does not.
+ * Create a CQ with zero entries under the covers to support and
+ * catch any invalid posting.
+ */
+ if (rcv_evd != DAT_HANDLE_NULL)
+ rcv_cq = rcv_evd->ib_cq_handle;
+ else if (!ia_ptr->hca_ptr->ib_trans.ib_cq_empty)
+ rcv_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
+ else {
+ struct ibv_comp_channel *channel =
+ rcv_evd->cq_wait_obj_handle;
+
+ /* Call IB verbs to create CQ */
+ rcv_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
+ 0, NULL, channel, 0);
+
+ if (rcv_cq == IB_INVALID_HANDLE)
+ return (dapl_convert_errno(ENOMEM, "create_cq"));
+
+ ia_ptr->hca_ptr->ib_trans.ib_cq_empty = rcv_cq;
+ }
+ if (req_evd != DAT_HANDLE_NULL)
+ req_cq = req_evd->ib_cq_handle;
+ else
+ req_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
+
+ /*
+ * IMPLEMENTATION NOTE:
+ * uDAPL allows consumers to post buffers on the EP after creation
+ * and before a connect request (outbound and inbound). This forces
+ * a binding to a device during the hca_open call and requires the
+ * consumer to predetermine which device to listen on or connect from.
+ * This restriction eliminates any option of listening or connecting
+ * over multiple devices. uDAPL should add API's to resolve addresses
+ * and bind to the device at the approriate time (before connect
+ * and after CR arrives). Discovery should happen at connection time
+ * based on addressing and not on static configuration during open.
+ */
+
+#ifdef _OPENIB_CMA_
+ /* Allocate CM and initialize lock */
+ if ((conn = dapls_ib_cm_create(ep_ptr)) == NULL)
+ return (dapl_convert_errno(ENOMEM, "create_cq"));
+
+ /* open identifies the local device; per DAT specification */
+ if (rdma_bind_addr(conn->cm_id,
+ (struct sockaddr *)&ia_ptr->hca_ptr->hca_address))
+ return (dapl_convert_errno(EAFNOSUPPORT, "create_cq"));
+#endif
+ /* Setup attributes and create qp */
+ dapl_os_memzero((void *)&qp_create, sizeof(qp_create));
+ qp_create.send_cq = req_cq;
+ qp_create.cap.max_send_wr = attr->max_request_dtos;
+ qp_create.cap.max_send_sge = attr->max_request_iov;
+ qp_create.cap.max_inline_data =
+ ia_ptr->hca_ptr->ib_trans.max_inline_send;
+ qp_create.qp_type = IBV_QPT_RC;
+ qp_create.qp_context = (void *)ep_ptr;
+
+#ifdef DAT_EXTENSIONS
+ if (attr->service_type == DAT_IB_SERVICE_TYPE_UD) {
+#ifdef _OPENIB_CMA_
+ return (DAT_NOT_IMPLEMENTED);
+#endif
+ qp_create.qp_type = IBV_QPT_UD;
+ if (attr->max_message_size >
+ (128 << ia_ptr->hca_ptr->ib_trans.mtu)) {
+ return (DAT_INVALID_PARAMETER | DAT_INVALID_ARG6);
+ }
+ }
+#endif
+
+ /* ibv assumes rcv_cq is never NULL, set to req_cq */
+ if (rcv_cq == NULL) {
+ qp_create.recv_cq = req_cq;
+ qp_create.cap.max_recv_wr = 0;
+ qp_create.cap.max_recv_sge = 0;
+ } else {
+ qp_create.recv_cq = rcv_cq;
+ qp_create.cap.max_recv_wr = attr->max_recv_dtos;
+ qp_create.cap.max_recv_sge = attr->max_recv_iov;
+ }
+
+#ifdef _OPENIB_CMA_
+ if (rdma_create_qp(conn->cm_id, ib_pd_handle, &qp_create)) {
+ dapls_ib_cm_free(conn, ep_ptr);
+ return (dapl_convert_errno(errno, "create_qp"));
+ }
+ ep_ptr->qp_handle = conn->cm_id->qp;
+ ep_ptr->cm_handle = conn;
+ ep_ptr->qp_state = IBV_QPS_INIT;
+
+ /* setup up ep->param to reference the bound local address and port */
+ ep_ptr->param.local_ia_address_ptr =
+ &conn->cm_id->route.addr.src_addr;
+ ep_ptr->param.local_port_qual = rdma_get_src_port(conn->cm_id);
+#else
+ ep_ptr->qp_handle = ibv_create_qp(ib_pd_handle, &qp_create);
+ if (!ep_ptr->qp_handle)
+ return (dapl_convert_errno(ENOMEM, "create_qp"));
+
+ /* Setup QP attributes for INIT state on the way out */
+ if (dapls_modify_qp_state(ep_ptr->qp_handle,
+ IBV_QPS_INIT, NULL) != DAT_SUCCESS) {
+ ibv_destroy_qp(ep_ptr->qp_handle);
+ ep_ptr->qp_handle = IB_INVALID_HANDLE;
+ return DAT_INTERNAL_ERROR;
+ }
+#endif
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " qp_alloc: qpn %p sq %d,%d rq %d,%d\n",
+ ep_ptr->qp_handle->qp_num,
+ qp_create.cap.max_send_wr, qp_create.cap.max_send_sge,
+ qp_create.cap.max_recv_wr, qp_create.cap.max_recv_sge);
+
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_qp_free
+ *
+ * Free a QP
+ *
+ * Input:
+ * ia_handle IA handle
+ * *ep_ptr pointer to EP INFO
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * dapl_convert_errno
+ *
+ */
+DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr)
+{
+ dapl_dbg_log(DAPL_DBG_TYPE_EP, " qp_free: ep_ptr %p qp %p\n",
+ ep_ptr, ep_ptr->qp_handle);
+
+ if (ep_ptr->cm_handle != NULL) {
+ dapls_ib_cm_free(ep_ptr->cm_handle, ep_ptr);
+ }
+
+ if (ep_ptr->qp_handle != NULL) {
+ /* force error state to flush queue, then destroy */
+ dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, NULL);
+
+ if (ibv_destroy_qp(ep_ptr->qp_handle))
+ return (dapl_convert_errno(errno, "destroy_qp"));
+
+ ep_ptr->qp_handle = NULL;
+ }
+
+#ifdef DAT_EXTENSIONS
+ /* UD endpoints can have many CR associations and will not
+ * set ep->cm_handle. Call provider with cm_ptr null to incidate
+ * UD type multi CR's for this EP. It will parse internal list
+ * and cleanup all associations.
+ */
+ if (ep_ptr->param.ep_attr.service_type == DAT_IB_SERVICE_TYPE_UD)
+ dapls_ib_cm_free(NULL, ep_ptr);
+#endif
+
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_qp_modify
+ *
+ * Set the QP to the parameters specified in an EP_PARAM
+ *
+ * The EP_PARAM structure that is provided has been
+ * sanitized such that only non-zero values are valid.
+ *
+ * Input:
+ * ib_hca_handle HCA handle
+ * qp_handle QP handle
+ * ep_attr Sanitized EP Params
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ * DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN
+dapls_ib_qp_modify(IN DAPL_IA * ia_ptr,
+ IN DAPL_EP * ep_ptr, IN DAT_EP_ATTR * attr)
+{
+ struct ibv_qp_attr qp_attr;
+
+ if (ep_ptr->qp_handle == IB_INVALID_HANDLE)
+ return DAT_INVALID_PARAMETER;
+
+ /*
+ * EP state, qp_handle state should be an indication
+ * of current state but the only way to be sure is with
+ * a user mode ibv_query_qp call which is NOT available
+ */
+
+ /* move to error state if necessary */
+ if ((ep_ptr->qp_state == IB_QP_STATE_ERROR) &&
+ (ep_ptr->qp_handle->state != IBV_QPS_ERR)) {
+ return (dapls_modify_qp_state(ep_ptr->qp_handle,
+ IBV_QPS_ERR, NULL));
+ }
+
+ /*
+ * Check if we have the right qp_state to modify attributes
+ */
+ if ((ep_ptr->qp_handle->state != IBV_QPS_RTR) &&
+ (ep_ptr->qp_handle->state != IBV_QPS_RTS))
+ return DAT_INVALID_STATE;
+
+ /* Adjust to current EP attributes */
+ dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
+ qp_attr.cap.max_send_wr = attr->max_request_dtos;
+ qp_attr.cap.max_recv_wr = attr->max_recv_dtos;
+ qp_attr.cap.max_send_sge = attr->max_request_iov;
+ qp_attr.cap.max_recv_sge = attr->max_recv_iov;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ "modify_qp: qp %p sq %d,%d, rq %d,%d\n",
+ ep_ptr->qp_handle,
+ qp_attr.cap.max_send_wr, qp_attr.cap.max_send_sge,
+ qp_attr.cap.max_recv_wr, qp_attr.cap.max_recv_sge);
+
+ if (ibv_modify_qp(ep_ptr->qp_handle, &qp_attr, IBV_QP_CAP)) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ "modify_qp: modify ep %p qp %p failed\n",
+ ep_ptr, ep_ptr->qp_handle);
+ return (dapl_convert_errno(errno, "modify_qp_state"));
+ }
+
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_reinit_ep
+ *
+ * Move the QP to INIT state again.
+ *
+ * Input:
+ * ep_ptr DAPL_EP
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * void
+ *
+ */
+#if defined(_WIN32) || defined(_WIN64)
+void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)
+{
+ /* work around bug in low level driver - 3/24/09 */
+ /* RTS -> RESET -> INIT -> ERROR QP transition crashes system */
+ if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {
+ dapls_ib_qp_free(ep_ptr->header.owner_ia, ep_ptr);
+ dapls_ib_qp_alloc(ep_ptr->header.owner_ia, ep_ptr, ep_ptr);
+ }
+}
+#else // _WIN32 || _WIN64
+void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)
+{
+ if (ep_ptr->qp_handle != IB_INVALID_HANDLE &&
+ ep_ptr->qp_handle->qp_type != IBV_QPT_UD) {
+ /* move to RESET state and then to INIT */
+ dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET, 0);
+ dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT, 0);
+ }
+}
+#endif // _WIN32 || _WIN64
+
+/*
+ * Generic QP modify for init, reset, error, RTS, RTR
+ * For UD, create_ah on RTR, qkey on INIT
+ */
+DAT_RETURN
+dapls_modify_qp_state(IN ib_qp_handle_t qp_handle,
+ IN ib_qp_state_t qp_state,
+ IN dp_ib_cm_handle_t cm_ptr)
+{
+ struct ibv_qp_attr qp_attr;
+ enum ibv_qp_attr_mask mask = IBV_QP_STATE;
+ DAPL_EP *ep_ptr = (DAPL_EP *) qp_handle->qp_context;
+ DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
+ ib_qp_cm_t *qp_cm = &cm_ptr->dst;
+ int ret;
+
+ dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
+ qp_attr.qp_state = qp_state;
+ switch (qp_state) {
+ /* additional attributes with RTR and RTS */
+ case IBV_QPS_RTR:
+ {
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " QPS_RTR: type %d state %d qpn %x lid %x"
+ " port %x ep %p qp_state %d\n",
+ qp_handle->qp_type, qp_handle->qp_type,
+ qp_cm->qpn, qp_cm->lid, qp_cm->port,
+ ep_ptr, ep_ptr->qp_state);
+
+ mask |= IBV_QP_AV |
+ IBV_QP_PATH_MTU |
+ IBV_QP_DEST_QPN |
+ IBV_QP_RQ_PSN |
+ IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER;
+
+ qp_attr.dest_qp_num = qp_cm->qpn;
+ qp_attr.rq_psn = 1;
+ qp_attr.path_mtu = ia_ptr->hca_ptr->ib_trans.mtu;
+ qp_attr.max_dest_rd_atomic =
+ ep_ptr->param.ep_attr.max_rdma_read_out;
+ qp_attr.min_rnr_timer =
+ ia_ptr->hca_ptr->ib_trans.rnr_timer;
+
+ /* address handle. RC and UD */
+ qp_attr.ah_attr.dlid = qp_cm->lid;
+ if (ia_ptr->hca_ptr->ib_trans.global) {
+ qp_attr.ah_attr.is_global = 1;
+ qp_attr.ah_attr.grh.dgid = qp_cm->gid;
+ qp_attr.ah_attr.grh.hop_limit =
+ ia_ptr->hca_ptr->ib_trans.hop_limit;
+ qp_attr.ah_attr.grh.traffic_class =
+ ia_ptr->hca_ptr->ib_trans.tclass;
+ }
+ qp_attr.ah_attr.sl = 0;
+ qp_attr.ah_attr.src_path_bits = 0;
+ qp_attr.ah_attr.port_num = ia_ptr->hca_ptr->port_num;
+#ifdef DAT_EXTENSIONS
+ /* UD: create AH for remote side */
+ if (qp_handle->qp_type == IBV_QPT_UD) {
+ ib_pd_handle_t pz;
+ pz = ((DAPL_PZ *)
+ ep_ptr->param.pz_handle)->pd_handle;
+ mask = IBV_QP_STATE;
+ cm_ptr->ah = ibv_create_ah(pz,
+ &qp_attr.ah_attr);
+ if (!cm_ptr->ah)
+ return (dapl_convert_errno(errno,
+ "ibv_ah"));
+
+ /* already RTR, multi remote AH's on QP */
+ if (ep_ptr->qp_state == IBV_QPS_RTR ||
+ ep_ptr->qp_state == IBV_QPS_RTS)
+ return DAT_SUCCESS;
+ }
+#endif
+ break;
+ }
+ case IBV_QPS_RTS:
+ {
+ /* RC only */
+ if (qp_handle->qp_type == IBV_QPT_RC) {
+ mask |= IBV_QP_SQ_PSN |
+ IBV_QP_TIMEOUT |
+ IBV_QP_RETRY_CNT |
+ IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC;
+ qp_attr.timeout =
+ ia_ptr->hca_ptr->ib_trans.ack_timer;
+ qp_attr.retry_cnt =
+ ia_ptr->hca_ptr->ib_trans.ack_retry;
+ qp_attr.rnr_retry =
+ ia_ptr->hca_ptr->ib_trans.rnr_retry;
+ qp_attr.max_rd_atomic =
+ ep_ptr->param.ep_attr.max_rdma_read_out;
+ }
+ /* RC and UD */
+ qp_attr.qp_state = IBV_QPS_RTS;
+ qp_attr.sq_psn = 1;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " QPS_RTS: psn %x rd_atomic %d ack %d "
+ " retry %d rnr_retry %d ep %p qp_state %d\n",
+ qp_attr.sq_psn, qp_attr.max_rd_atomic,
+ qp_attr.timeout, qp_attr.retry_cnt,
+ qp_attr.rnr_retry, ep_ptr,
+ ep_ptr->qp_state);
+#ifdef DAT_EXTENSIONS
+ if (qp_handle->qp_type == IBV_QPT_UD) {
+ /* already RTS, multi remote AH's on QP */
+ if (ep_ptr->qp_state == IBV_QPS_RTS)
+ return DAT_SUCCESS;
+ else
+ mask = IBV_QP_STATE | IBV_QP_SQ_PSN;
+ }
+#endif
+ break;
+ }
+ case IBV_QPS_INIT:
+ {
+ mask |= IBV_QP_PKEY_INDEX | IBV_QP_PORT;
+ if (qp_handle->qp_type == IBV_QPT_RC) {
+ mask |= IBV_QP_ACCESS_FLAGS;
+ qp_attr.qp_access_flags =
+ IBV_ACCESS_LOCAL_WRITE |
+ IBV_ACCESS_REMOTE_WRITE |
+ IBV_ACCESS_REMOTE_READ |
+ IBV_ACCESS_REMOTE_ATOMIC |
+ IBV_ACCESS_MW_BIND;
+ }
+#ifdef DAT_EXTENSIONS
+ if (qp_handle->qp_type == IBV_QPT_UD) {
+ /* already INIT, multi remote AH's on QP */
+ if (ep_ptr->qp_state == IBV_QPS_INIT)
+ return DAT_SUCCESS;
+ mask |= IBV_QP_QKEY;
+ qp_attr.qkey = DAT_UD_QKEY;
+ }
+#endif
+ qp_attr.pkey_index = 0;
+ qp_attr.port_num = ia_ptr->hca_ptr->port_num;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " QPS_INIT: pi %x port %x acc %x qkey 0x%x\n",
+ qp_attr.pkey_index, qp_attr.port_num,
+ qp_attr.qp_access_flags, qp_attr.qkey);
+ break;
+ }
+ default:
+ break;
+
+ }
+
+ ret = ibv_modify_qp(qp_handle, &qp_attr, mask);
+ if (ret == 0) {
+ ep_ptr->qp_state = qp_state;
+ return DAT_SUCCESS;
+ } else {
+ return (dapl_convert_errno(errno, "modify_qp_state"));
+ }
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 4
+ * c-basic-offset: 4
+ * tab-width: 8
+ * End:
+ */
diff --git a/dapl/openib_common/util.c b/dapl/openib_common/util.c
new file mode 100644
index 0000000..da913c5
--- /dev/null
+++ b/dapl/openib_common/util.c
@@ -0,0 +1,375 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ * copy of which is available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_ib_util.h"
+#include "dapl_osd.h"
+
+#include <stdlib.h>
+
+int g_dapl_loopback_connection = 0;
+
+enum ibv_mtu dapl_ib_mtu(int mtu)
+{
+ switch (mtu) {
+ case 256:
+ return IBV_MTU_256;
+ case 512:
+ return IBV_MTU_512;
+ case 1024:
+ return IBV_MTU_1024;
+ case 2048:
+ return IBV_MTU_2048;
+ case 4096:
+ return IBV_MTU_4096;
+ default:
+ return IBV_MTU_1024;
+ }
+}
+
+char *dapl_ib_mtu_str(enum ibv_mtu mtu)
+{
+ switch (mtu) {
+ case IBV_MTU_256:
+ return "256";
+ case IBV_MTU_512:
+ return "512";
+ case IBV_MTU_1024:
+ return "1024";
+ case IBV_MTU_2048:
+ return "2048";
+ case IBV_MTU_4096:
+ return "4096";
+ default:
+ return "1024";
+ }
+}
+
+DAT_RETURN getlocalipaddr(DAT_SOCK_ADDR * addr, int addr_len)
+{
+ struct sockaddr_in *sin;
+ struct addrinfo *res, hint, *ai;
+ int ret;
+ char hostname[256];
+
+ if (addr_len < sizeof(*sin)) {
+ return DAT_INTERNAL_ERROR;
+ }
+
+ ret = gethostname(hostname, 256);
+ if (ret)
+ return dapl_convert_errno(ret, "gethostname");
+
+ memset(&hint, 0, sizeof hint);
+ hint.ai_flags = AI_PASSIVE;
+ hint.ai_family = AF_INET;
+ hint.ai_socktype = SOCK_STREAM;
+ hint.ai_protocol = IPPROTO_TCP;
+
+ ret = getaddrinfo(hostname, NULL, &hint, &res);
+ if (ret) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " getaddrinfo ERR: %d %s\n", ret, gai_strerror(ret));
+ return DAT_INVALID_ADDRESS;
+ }
+
+ ret = DAT_INVALID_ADDRESS;
+ for (ai = res; ai; ai = ai->ai_next) {
+ sin = (struct sockaddr_in *)ai->ai_addr;
+ if (*((uint32_t *) & sin->sin_addr) != htonl(0x7f000001)) {
+ *((struct sockaddr_in *)addr) = *sin;
+ ret = DAT_SUCCESS;
+ break;
+ }
+ }
+
+ freeaddrinfo(res);
+ return ret;
+}
+
+/*
+ * dapls_ib_query_hca
+ *
+ * Query the hca attribute
+ *
+ * Input:
+ * hca_handl hca handle
+ * ia_attr attribute of the ia
+ * ep_attr attribute of the ep
+ * ip_addr ip address of DET NIC
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_HANDLE
+ */
+
+DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA * hca_ptr,
+ OUT DAT_IA_ATTR * ia_attr,
+ OUT DAT_EP_ATTR * ep_attr,
+ OUT DAT_SOCK_ADDR6 * ip_addr)
+{
+ struct ibv_device_attr dev_attr;
+ struct ibv_port_attr port_attr;
+
+ if (hca_ptr->ib_hca_handle == NULL) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR, " query_hca: BAD handle\n");
+ return (DAT_INVALID_HANDLE);
+ }
+
+ /* local IP address of device, set during ia_open */
+ if (ip_addr != NULL)
+ memcpy(ip_addr, &hca_ptr->hca_address, sizeof(DAT_SOCK_ADDR6));
+
+ if (ia_attr == NULL && ep_attr == NULL)
+ return DAT_SUCCESS;
+
+ /* query verbs for this device and port attributes */
+ if (ibv_query_device(hca_ptr->ib_hca_handle, &dev_attr) ||
+ ibv_query_port(hca_ptr->ib_hca_handle,
+ hca_ptr->port_num, &port_attr))
+ return (dapl_convert_errno(errno, "ib_query_hca"));
+
+ if (ia_attr != NULL) {
+ (void)dapl_os_memzero(ia_attr, sizeof(*ia_attr));
+ ia_attr->adapter_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
+ ia_attr->vendor_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
+ ia_attr->ia_address_ptr =
+ (DAT_IA_ADDRESS_PTR) & hca_ptr->hca_address;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " query_hca: %s %s \n",
+ ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+ inet_ntoa(((struct sockaddr_in *)
+ &hca_ptr->hca_address)->sin_addr));
+
+ ia_attr->hardware_version_major = dev_attr.hw_ver;
+ /* ia_attr->hardware_version_minor = dev_attr.fw_ver; */
+ ia_attr->max_eps = dev_attr.max_qp;
+ ia_attr->max_dto_per_ep = dev_attr.max_qp_wr;
+ ia_attr->max_rdma_read_in = dev_attr.max_qp_rd_atom;
+ ia_attr->max_rdma_read_out = dev_attr.max_qp_init_rd_atom;
+ ia_attr->max_rdma_read_per_ep_in = dev_attr.max_qp_rd_atom;
+ ia_attr->max_rdma_read_per_ep_out =
+ dev_attr.max_qp_init_rd_atom;
+ ia_attr->max_rdma_read_per_ep_in_guaranteed = DAT_TRUE;
+ ia_attr->max_rdma_read_per_ep_out_guaranteed = DAT_TRUE;
+ ia_attr->max_evds = dev_attr.max_cq;
+ ia_attr->max_evd_qlen = dev_attr.max_cqe;
+ ia_attr->max_iov_segments_per_dto = dev_attr.max_sge;
+ ia_attr->max_lmrs = dev_attr.max_mr;
+ /* 32bit attribute from 64bit, 4G-1 limit, DAT v2 needs fix */
+ ia_attr->max_lmr_block_size =
+ (dev_attr.max_mr_size >> 32) ? ~0 : dev_attr.max_mr_size;
+ ia_attr->max_rmrs = dev_attr.max_mw;
+ ia_attr->max_lmr_virtual_address = dev_attr.max_mr_size;
+ ia_attr->max_rmr_target_address = dev_attr.max_mr_size;
+ ia_attr->max_pzs = dev_attr.max_pd;
+ ia_attr->max_message_size = port_attr.max_msg_sz;
+ ia_attr->max_rdma_size = port_attr.max_msg_sz;
+ /* iWARP spec. - 1 sge for RDMA reads */
+ if (hca_ptr->ib_hca_handle->device->transport_type
+ == IBV_TRANSPORT_IWARP)
+ ia_attr->max_iov_segments_per_rdma_read = 1;
+ else
+ ia_attr->max_iov_segments_per_rdma_read =
+ dev_attr.max_sge;
+ ia_attr->max_iov_segments_per_rdma_write = dev_attr.max_sge;
+ ia_attr->num_transport_attr = 0;
+ ia_attr->transport_attr = NULL;
+ ia_attr->num_vendor_attr = 0;
+ ia_attr->vendor_attr = NULL;
+#ifdef DAT_EXTENSIONS
+ ia_attr->extension_supported = DAT_EXTENSION_IB;
+ ia_attr->extension_version = DAT_IB_EXTENSION_VERSION;
+#endif
+ /* save key device attributes for CM exchange */
+ hca_ptr->ib_trans.rd_atom_in = dev_attr.max_qp_rd_atom;
+ hca_ptr->ib_trans.rd_atom_out = dev_attr.max_qp_init_rd_atom;
+
+ hca_ptr->ib_trans.mtu = DAPL_MIN(port_attr.active_mtu,
+ hca_ptr->ib_trans.mtu);
+ hca_ptr->ib_trans.ack_timer =
+ DAPL_MAX(dev_attr.local_ca_ack_delay,
+ hca_ptr->ib_trans.ack_timer);
+
+ /* set MTU in transport specific named attribute */
+ hca_ptr->ib_trans.named_attr.name = "DAT_IB_TRANSPORT_MTU";
+ hca_ptr->ib_trans.named_attr.value =
+ dapl_ib_mtu_str(hca_ptr->ib_trans.mtu);
+
+ dapl_log(DAPL_DBG_TYPE_UTIL,
+ " query_hca: (%x.%x) eps %d, sz %d evds %d,"
+ " sz %d mtu %d\n",
+ ia_attr->hardware_version_major,
+ ia_attr->hardware_version_minor,
+ ia_attr->max_eps, ia_attr->max_dto_per_ep,
+ ia_attr->max_evds, ia_attr->max_evd_qlen,
+ 128 << hca_ptr->ib_trans.mtu);
+
+ dapl_log(DAPL_DBG_TYPE_UTIL,
+ " query_hca: msg %llu rdma %llu iov %d lmr %d rmr %d"
+ " ack_time %d mr %u\n",
+ ia_attr->max_message_size, ia_attr->max_rdma_size,
+ ia_attr->max_iov_segments_per_dto,
+ ia_attr->max_lmrs, ia_attr->max_rmrs,
+ hca_ptr->ib_trans.ack_timer,
+ ia_attr->max_lmr_block_size);
+ }
+
+ if (ep_attr != NULL) {
+ (void)dapl_os_memzero(ep_attr, sizeof(*ep_attr));
+ ep_attr->max_message_size = port_attr.max_msg_sz;
+ ep_attr->max_rdma_size = port_attr.max_msg_sz;
+ ep_attr->max_recv_dtos = dev_attr.max_qp_wr;
+ ep_attr->max_request_dtos = dev_attr.max_qp_wr;
+ ep_attr->max_recv_iov = dev_attr.max_sge;
+ ep_attr->max_request_iov = dev_attr.max_sge;
+ ep_attr->max_rdma_read_in = dev_attr.max_qp_rd_atom;
+ ep_attr->max_rdma_read_out = dev_attr.max_qp_init_rd_atom;
+ ep_attr->max_rdma_read_iov = dev_attr.max_sge;
+ ep_attr->max_rdma_write_iov = dev_attr.max_sge;
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " query_hca: MAX msg %llu mtu %d qsz %d iov %d"
+ " rdma i%d,o%d\n",
+ ep_attr->max_message_size,
+ 128 << hca_ptr->ib_trans.mtu,
+ ep_attr->max_recv_dtos,
+ ep_attr->max_recv_iov,
+ ep_attr->max_rdma_read_in,
+ ep_attr->max_rdma_read_out);
+ }
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_setup_async_callback
+ *
+ * Set up an asynchronous callbacks of various kinds
+ *
+ * Input:
+ * ia_handle IA handle
+ * handler_type type of handler to set up
+ * callback_handle handle param for completion callbacks
+ * callback callback routine pointer
+ * context argument for callback routine
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ * DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN dapls_ib_setup_async_callback(IN DAPL_IA * ia_ptr,
+ IN DAPL_ASYNC_HANDLER_TYPE
+ handler_type, IN DAPL_EVD * evd_ptr,
+ IN ib_async_handler_t callback,
+ IN void *context)
+{
+ ib_hca_transport_t *hca_ptr;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " setup_async_cb: ia %p type %d handle %p cb %p ctx %p\n",
+ ia_ptr, handler_type, evd_ptr, callback, context);
+
+ hca_ptr = &ia_ptr->hca_ptr->ib_trans;
+ switch (handler_type) {
+ case DAPL_ASYNC_UNAFILIATED:
+ hca_ptr->async_unafiliated = (ib_async_handler_t) callback;
+ hca_ptr->async_un_ctx = context;
+ break;
+ case DAPL_ASYNC_CQ_ERROR:
+ hca_ptr->async_cq_error = (ib_async_cq_handler_t) callback;
+ break;
+ case DAPL_ASYNC_CQ_COMPLETION:
+ hca_ptr->async_cq = (ib_async_dto_handler_t) callback;
+ break;
+ case DAPL_ASYNC_QP_ERROR:
+ hca_ptr->async_qp_error = (ib_async_qp_handler_t) callback;
+ break;
+ default:
+ break;
+ }
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_set_provider_specific_attr
+ *
+ * Input:
+ * attr_ptr Pointer provider specific attributes
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * void
+ */
+DAT_NAMED_ATTR ib_attrs[] = {
+ {
+ "DAT_IB_TRANSPORT_MTU", "2048"}
+ ,
+#ifdef DAT_EXTENSIONS
+ {
+ "DAT_EXTENSION_INTERFACE", "TRUE"}
+ ,
+ {
+ DAT_IB_ATTR_FETCH_AND_ADD, "TRUE"}
+ ,
+ {
+ DAT_IB_ATTR_CMP_AND_SWAP, "TRUE"}
+ ,
+ {
+ DAT_IB_ATTR_IMMED_DATA, "TRUE"}
+ ,
+#ifndef _OPENIB_CMA_
+ {
+ DAT_IB_ATTR_UD, "TRUE"}
+ ,
+#endif
+#ifdef DAPL_COUNTERS
+ {
+ DAT_ATTR_COUNTERS, "TRUE"}
+ ,
+#endif /* DAPL_COUNTERS */
+#endif
+};
+
+#define SPEC_ATTR_SIZE( x ) (sizeof( x ) / sizeof( DAT_NAMED_ATTR))
+
+void dapls_query_provider_specific_attr(IN DAPL_IA * ia_ptr,
+ IN DAT_PROVIDER_ATTR * attr_ptr)
+{
+ attr_ptr->num_provider_specific_attr = SPEC_ATTR_SIZE(ib_attrs);
+ attr_ptr->provider_specific_attr = ib_attrs;
+
+ /* set MTU to actual settings */
+ ib_attrs[0].value = ia_ptr->hca_ptr->ib_trans.named_attr.value;
+}
diff --git a/dapl/openib_scm/SOURCES b/dapl/openib_scm/SOURCES
index f9204d9..5714aa3 100644
--- a/dapl/openib_scm/SOURCES
+++ b/dapl/openib_scm/SOURCES
@@ -18,16 +18,17 @@ USE_MSVCRT = 1
SOURCES = \
udapl.rc \
- ..\dapl_common_src.c \
- ..\dapl_udapl_src.c \
- dapl_ib_cq.c \
- dapl_ib_extensions.c \
- dapl_ib_mem.c \
- dapl_ib_qp.c \
- dapl_ib_util.c \
- dapl_ib_cm.c
-
-INCLUDES = ..\include;..\common;windows;..\..\dat\include;\
+ ..\dapl_common_src.c \
+ ..\dapl_udapl_src.c \
+ ..\openib_common\mem.c \
+ ..\openib_common\util.c \
+ ..\openib_common\cq.c \
+ ..\openib_common\qp.c \
+ ..\openib_common\ib_extensions.c \
+ device.c \
+ cm.c
+
+INCLUDES = ..\include;..\openib_common\;..\common;windows;..\..\dat\include;\
..\..\dat\udat\windows;..\udapl\windows;\
..\..\..\..\inc;..\..\..\..\inc\user;..\..\..\libibverbs\include
diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c
new file mode 100644
index 0000000..5708214
--- /dev/null
+++ b/dapl/openib_scm/cm.c
@@ -0,0 +1,1839 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ * copy of which is available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/***************************************************************************
+ *
+ * Module: uDAPL
+ *
+ * Filename: dapl_ib_cm.c
+ *
+ * Author: Arlin Davis
+ *
+ * Created: 3/10/2005
+ *
+ * Description:
+ *
+ * The uDAPL openib provider - connection management
+ *
+ ****************************************************************************
+ * Source Control System Information
+ *
+ * $Id: $
+ *
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ *
+ **************************************************************************/
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_evd_util.h"
+#include "dapl_cr_util.h"
+#include "dapl_name_service.h"
+#include "dapl_ib_util.h"
+#include "dapl_osd.h"
+
+#if defined(_WIN32) || defined(_WIN64)
+enum DAPL_FD_EVENTS {
+ DAPL_FD_READ = 0x1,
+ DAPL_FD_WRITE = 0x2,
+ DAPL_FD_ERROR = 0x4
+};
+
+static int dapl_config_socket(DAPL_SOCKET s)
+{
+ unsigned long nonblocking = 1;
+ return ioctlsocket(s, FIONBIO, &nonblocking);
+}
+
+static int dapl_connect_socket(DAPL_SOCKET s, struct sockaddr *addr,
+ int addrlen)
+{
+ int err;
+
+ err = connect(s, addr, addrlen);
+ if (err == SOCKET_ERROR)
+ err = WSAGetLastError();
+ return (err == WSAEWOULDBLOCK) ? EAGAIN : err;
+}
+
+struct dapl_fd_set {
+ struct fd_set set[3];
+};
+
+static struct dapl_fd_set *dapl_alloc_fd_set(void)
+{
+ return dapl_os_alloc(sizeof(struct dapl_fd_set));
+}
+
+static void dapl_fd_zero(struct dapl_fd_set *set)
+{
+ FD_ZERO(&set->set[0]);
+ FD_ZERO(&set->set[1]);
+ FD_ZERO(&set->set[2]);
+}
+
+static int dapl_fd_set(DAPL_SOCKET s, struct dapl_fd_set *set,
+ enum DAPL_FD_EVENTS event)
+{
+ FD_SET(s, &set->set[(event == DAPL_FD_READ) ? 0 : 1]);
+ FD_SET(s, &set->set[2]);
+ return 0;
+}
+
+static enum DAPL_FD_EVENTS dapl_poll(DAPL_SOCKET s, enum DAPL_FD_EVENTS event)
+{
+ struct fd_set rw_fds;
+ struct fd_set err_fds;
+ struct timeval tv;
+ int ret;
+
+ FD_ZERO(&rw_fds);
+ FD_ZERO(&err_fds);
+ FD_SET(s, &rw_fds);
+ FD_SET(s, &err_fds);
+
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+
+ if (event == DAPL_FD_READ)
+ ret = select(1, &rw_fds, NULL, &err_fds, &tv);
+ else
+ ret = select(1, NULL, &rw_fds, &err_fds, &tv);
+
+ if (ret == 0)
+ return 0;
+ else if (ret == SOCKET_ERROR)
+ return WSAGetLastError();
+ else if (FD_ISSET(s, &rw_fds))
+ return event;
+ else
+ return DAPL_FD_ERROR;
+}
+
+static int dapl_select(struct dapl_fd_set *set)
+{
+ int ret;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: sleep\n");
+ ret = select(0, &set->set[0], &set->set[1], &set->set[2], NULL);
+ dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: wakeup\n");
+
+ if (ret == SOCKET_ERROR)
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " dapl_select: error 0x%x\n", WSAGetLastError());
+
+ return ret;
+}
+#else // _WIN32 || _WIN64
+enum DAPL_FD_EVENTS {
+ DAPL_FD_READ = POLLIN,
+ DAPL_FD_WRITE = POLLOUT,
+ DAPL_FD_ERROR = POLLERR
+};
+
+static int dapl_config_socket(DAPL_SOCKET s)
+{
+ int ret;
+
+ ret = fcntl(s, F_GETFL);
+ if (ret >= 0)
+ ret = fcntl(s, F_SETFL, ret | O_NONBLOCK);
+ return ret;
+}
+
+static int dapl_connect_socket(DAPL_SOCKET s, struct sockaddr *addr,
+ int addrlen)
+{
+ int ret;
+
+ ret = connect(s, addr, addrlen);
+
+ return (errno == EINPROGRESS) ? EAGAIN : ret;
+}
+
+struct dapl_fd_set {
+ int index;
+ struct pollfd set[DAPL_FD_SETSIZE];
+};
+
+static struct dapl_fd_set *dapl_alloc_fd_set(void)
+{
+ return dapl_os_alloc(sizeof(struct dapl_fd_set));
+}
+
+static void dapl_fd_zero(struct dapl_fd_set *set)
+{
+ set->index = 0;
+}
+
+static int dapl_fd_set(DAPL_SOCKET s, struct dapl_fd_set *set,
+ enum DAPL_FD_EVENTS event)
+{
+ if (set->index == DAPL_FD_SETSIZE - 1) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ "SCM ERR: cm_thread exceeded FD_SETSIZE %d\n",
+ set->index + 1);
+ return -1;
+ }
+
+ set->set[set->index].fd = s;
+ set->set[set->index].revents = 0;
+ set->set[set->index++].events = event;
+ return 0;
+}
+
+static enum DAPL_FD_EVENTS dapl_poll(DAPL_SOCKET s, enum DAPL_FD_EVENTS event)
+{
+ struct pollfd fds;
+ int ret;
+
+ fds.fd = s;
+ fds.events = event;
+ fds.revents = 0;
+ ret = poll(&fds, 1, 0);
+ dapl_log(DAPL_DBG_TYPE_CM, " dapl_poll: fd=%d ret=%d, evnts=0x%x\n",
+ s, ret, fds.revents);
+ if (ret == 0)
+ return 0;
+ else if (fds.revents & (POLLERR | POLLHUP | POLLNVAL))
+ return DAPL_FD_ERROR;
+ else
+ return fds.revents;
+}
+
+static int dapl_select(struct dapl_fd_set *set)
+{
+ int ret;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: sleep, fds=%d\n",
+ set->index);
+ ret = poll(set->set, set->index, -1);
+ dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: wakeup, ret=0x%x\n", ret);
+ return ret;
+}
+#endif
+
+dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
+{
+ dp_ib_cm_handle_t cm_ptr;
+
+ /* Allocate CM, init lock, and initialize */
+ if ((cm_ptr = dapl_os_alloc(sizeof(*cm_ptr))) == NULL)
+ return NULL;
+
+ (void)dapl_os_memzero(cm_ptr, sizeof(*cm_ptr));
+ if (dapl_os_lock_init(&cm_ptr->lock))
+ goto bail;
+
+ cm_ptr->dst.ver = htons(DCM_VER);
+ cm_ptr->socket = DAPL_INVALID_SOCKET;
+ cm_ptr->ep = ep;
+ return cm_ptr;
+bail:
+ dapl_os_free(cm_ptr, sizeof(*cm_ptr));
+ return NULL;
+}
+
+/* mark for destroy, remove all references, schedule cleanup */
+/* cm_ptr == NULL (UD), then multi CR's, kill all associated with EP */
+void dapls_ib_cm_free(dp_ib_cm_handle_t cm_ptr, DAPL_EP *ep)
+{
+ DAPL_IA *ia_ptr;
+ DAPL_HCA *hca_ptr = NULL;
+ dp_ib_cm_handle_t cr, next_cr;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " cm_destroy: cm %p ep %p\n", cm_ptr, ep);
+
+ if (cm_ptr == NULL)
+ goto multi_cleanup;
+
+ /* to notify cleanup thread */
+ hca_ptr = cm_ptr->hca;
+
+ /* cleanup, never made it to work queue */
+ if (cm_ptr->state == DCM_INIT) {
+ if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
+ shutdown(cm_ptr->socket, SHUT_RDWR);
+ closesocket(cm_ptr->socket);
+ }
+ dapl_os_free(cm_ptr, sizeof(*cm_ptr));
+ return;
+ }
+
+ dapl_os_lock(&cm_ptr->lock);
+ cm_ptr->state = DCM_DESTROY;
+ if ((cm_ptr->ep) && (cm_ptr->ep->cm_handle == cm_ptr)) {
+ cm_ptr->ep->cm_handle = IB_INVALID_HANDLE;
+ cm_ptr->ep = NULL;
+ }
+
+ /* close socket if still active */
+ if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
+ shutdown(cm_ptr->socket, SHUT_RDWR);
+ closesocket(cm_ptr->socket);
+ cm_ptr->socket = DAPL_INVALID_SOCKET;
+ }
+ dapl_os_unlock(&cm_ptr->lock);
+ goto notify_thread;
+
+multi_cleanup:
+
+ /*
+ * UD CR objects are kept active because of direct private data references
+ * from CONN events. The cr->socket is closed and marked inactive but the
+ * object remains allocated and queued on the CR resource list. There can
+ * be multiple CR's associated with a given EP. There is no way to determine
+ * when consumer is finished with event until the dat_ep_free.
+ *
+ * Schedule destruction for all CR's associated with this EP, cr_thread will
+ * complete the cleanup with state == DCM_DESTROY.
+ */
+ ia_ptr = ep->header.owner_ia;
+ dapl_os_lock(&ia_ptr->hca_ptr->ib_trans.lock);
+ if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)
+ &ia_ptr->hca_ptr->ib_trans.list))
+ next_cr = dapl_llist_peek_head((DAPL_LLIST_HEAD*)
+ &ia_ptr->hca_ptr->ib_trans.list);
+ else
+ next_cr = NULL;
+
+ while (next_cr) {
+ cr = next_cr;
+ next_cr = dapl_llist_next_entry((DAPL_LLIST_HEAD*)
+ &ia_ptr->hca_ptr->ib_trans.list,
+ (DAPL_LLIST_ENTRY*)&cr->entry);
+ if (cr->ep == ep) {
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " qp_free CR: ep %p cr %p\n", ep, cr);
+ dapli_socket_disconnect(cr);
+ dapl_os_lock(&cr->lock);
+ hca_ptr = cr->hca;
+ cr->ep = NULL;
+ cr->state = DCM_DESTROY;
+ dapl_os_unlock(&cr->lock);
+ }
+ }
+ dapl_os_unlock(&ia_ptr->hca_ptr->ib_trans.lock);
+
+notify_thread:
+
+ /* wakeup work thread, if something destroyed */
+ if (hca_ptr != NULL) {
+ if (send(hca_ptr->ib_trans.scm[1],
+ "w", sizeof "w", 0) == -1)
+ dapl_log(DAPL_DBG_TYPE_CM,
+ " cm_destroy: thread wakeup error = %s\n",
+ strerror(errno));
+ }
+}
+
+/* queue socket for processing CM work */
+static void dapli_cm_queue(struct ib_cm_handle *cm_ptr)
+{
+ /* add to work queue for cr thread processing */
+ dapl_llist_init_entry((DAPL_LLIST_ENTRY *) & cm_ptr->entry);
+ dapl_os_lock(&cm_ptr->hca->ib_trans.lock);
+ dapl_llist_add_tail(&cm_ptr->hca->ib_trans.list,
+ (DAPL_LLIST_ENTRY *) & cm_ptr->entry, cm_ptr);
+ dapl_os_unlock(&cm_ptr->hca->ib_trans.lock);
+
+ /* wakeup CM work thread */
+ if (send(cm_ptr->hca->ib_trans.scm[1], "w", sizeof "w", 0) == -1)
+ dapl_log(DAPL_DBG_TYPE_CM,
+ " cm_queue: thread wakeup error = %s\n",
+ strerror(errno));
+}
+
+/*
+ * ACTIVE/PASSIVE: called from CR thread or consumer via ep_disconnect
+ * or from ep_free
+ */
+DAT_RETURN dapli_socket_disconnect(dp_ib_cm_handle_t cm_ptr)
+{
+ DAPL_EP *ep_ptr = cm_ptr->ep;
+ DAT_UINT32 disc_data = htonl(0xdead);
+
+ if (ep_ptr == NULL)
+ return DAT_SUCCESS;
+
+ dapl_os_lock(&cm_ptr->lock);
+ if ((cm_ptr->state == DCM_INIT) ||
+ (cm_ptr->state == DCM_DISCONNECTED) ||
+ (cm_ptr->state == DCM_DESTROY)) {
+ dapl_os_unlock(&cm_ptr->lock);
+ return DAT_SUCCESS;
+ } else {
+ /* send disc date, close socket, schedule destroy */
+ if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
+ if (send(cm_ptr->socket, (char *)&disc_data,
+ sizeof(disc_data), 0) == -1)
+ dapl_log(DAPL_DBG_TYPE_WARN,
+ " cm_disc: write error = %s\n",
+ strerror(errno));
+ shutdown(cm_ptr->socket, SHUT_RDWR);
+ closesocket(cm_ptr->socket);
+ cm_ptr->socket = DAPL_INVALID_SOCKET;
+ }
+ cm_ptr->state = DCM_DISCONNECTED;
+ }
+ dapl_os_unlock(&cm_ptr->lock);
+
+ /* disconnect events for RC's only */
+ if (ep_ptr->param.ep_attr.service_type == DAT_SERVICE_TYPE_RC) {
+ if (ep_ptr->cr_ptr) {
+ dapls_cr_callback(cm_ptr,
+ IB_CME_DISCONNECTED,
+ NULL,
+ ((DAPL_CR *) ep_ptr->cr_ptr)->sp_ptr);
+ } else {
+ dapl_evd_connection_callback(ep_ptr->cm_handle,
+ IB_CME_DISCONNECTED,
+ NULL, ep_ptr);
+ }
+ }
+
+ /* scheduled destroy via disconnect clean in callback */
+ return DAT_SUCCESS;
+}
+
+/*
+ * ACTIVE: socket connected, send QP information to peer
+ */
+static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
+{
+ int len, opt = 1;
+ struct iovec iov[2];
+ struct dapl_ep *ep_ptr = cm_ptr->ep;
+
+ if (err) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " CONN_PENDING: %s ERR %s -> %s %d\n",
+ err == -1 ? "POLL" : "SOCKOPT",
+ err == -1 ? strerror(errno) : strerror(err),
+ inet_ntoa(((struct sockaddr_in *)
+ ep_ptr->param.
+ remote_ia_address_ptr)->sin_addr),
+ ntohs(((struct sockaddr_in *)
+ &cm_ptr->dst.ia_address)->sin_port));
+ goto bail;
+ }
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " socket connected, write QP and private data\n");
+
+ /* no delay for small packets */
+ setsockopt(cm_ptr->socket, IPPROTO_TCP, TCP_NODELAY,
+ (char *)&opt, sizeof(opt));
+
+ /* send qp info and pdata to remote peer */
+ iov[0].iov_base = (void *)&cm_ptr->dst;
+ iov[0].iov_len = sizeof(ib_qp_cm_t);
+ if (cm_ptr->dst.p_size) {
+ iov[1].iov_base = cm_ptr->p_data;
+ iov[1].iov_len = ntohl(cm_ptr->dst.p_size);
+ len = writev(cm_ptr->socket, iov, 2);
+ } else {
+ len = writev(cm_ptr->socket, iov, 1);
+ }
+
+ if (len != (ntohl(cm_ptr->dst.p_size) + sizeof(ib_qp_cm_t))) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " CONN_PENDING write: ERR %s, wcnt=%d -> %s\n",
+ strerror(errno), len, inet_ntoa(((struct sockaddr_in *)
+ ep_ptr->param.
+ remote_ia_address_ptr)->
+ sin_addr));
+ goto bail;
+ }
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " connected: sending SRC port=0x%x lid=0x%x,"
+ " qpn=0x%x, psize=%d\n",
+ ntohs(cm_ptr->dst.port), ntohs(cm_ptr->dst.lid),
+ ntohl(cm_ptr->dst.qpn), ntohl(cm_ptr->dst.p_size));
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " connected: sending SRC GID subnet %016llx id %016llx\n",
+ (unsigned long long)
+ htonll(cm_ptr->dst.gid.global.subnet_prefix),
+ (unsigned long long)
+ htonll(cm_ptr->dst.gid.global.interface_id));
+
+ /* queue up to work thread to avoid blocking consumer */
+ cm_ptr->state = DCM_RTU_PENDING;
+ return;
+ bail:
+ /* close socket, free cm structure and post error event */
+ dapls_ib_cm_free(cm_ptr, cm_ptr->ep);
+ dapl_evd_connection_callback(NULL, IB_CME_LOCAL_FAILURE, NULL, ep_ptr);
+}
+
+/*
+ * ACTIVE: Create socket, connect, defer exchange QP information to CR thread
+ * to avoid blocking.
+ */
+DAT_RETURN
+dapli_socket_connect(DAPL_EP * ep_ptr,
+ DAT_IA_ADDRESS_PTR r_addr,
+ DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data)
+{
+ dp_ib_cm_handle_t cm_ptr;
+ int ret;
+ DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
+ struct sockaddr_in addr;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect: r_qual %d p_size=%d\n",
+ r_qual, p_size);
+
+ cm_ptr = dapls_ib_cm_create(ep_ptr);
+ if (cm_ptr == NULL)
+ return DAT_INSUFFICIENT_RESOURCES;
+
+ /* create, connect, sockopt, and exchange QP information */
+ if ((cm_ptr->socket =
+ socket(AF_INET, SOCK_STREAM, 0)) == DAPL_INVALID_SOCKET) {
+ dapl_os_free(cm_ptr, sizeof(*cm_ptr));
+ return DAT_INSUFFICIENT_RESOURCES;
+ }
+
+ ret = dapl_config_socket(cm_ptr->socket);
+ if (ret < 0) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " socket connect: config socket %d ERR %d %s\n",
+ cm_ptr->socket, ret, strerror(errno));
+ goto bail;
+ }
+
+ dapl_os_memcpy(&addr, r_addr, sizeof(addr));
+ addr.sin_port = htons(r_qual);
+ ret = dapl_connect_socket(cm_ptr->socket, (struct sockaddr *)&addr,
+ sizeof(addr));
+ if (ret && ret != EAGAIN) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " socket connect ERROR: %s -> %s r_qual %d\n",
+ strerror(errno),
+ inet_ntoa(addr.sin_addr), (unsigned int)r_qual);
+ dapls_ib_cm_free(cm_ptr, cm_ptr->ep);
+ return DAT_INVALID_ADDRESS;
+ }
+
+ /* Send QP info, IA address, and private data */
+ cm_ptr->dst.qpn = htonl(ep_ptr->qp_handle->qp_num);
+#ifdef DAT_EXTENSIONS
+ cm_ptr->dst.qp_type = htons(ep_ptr->qp_handle->qp_type);
+#endif
+ cm_ptr->dst.port = htons(ia_ptr->hca_ptr->port_num);
+ cm_ptr->dst.lid = ia_ptr->hca_ptr->ib_trans.lid;
+ cm_ptr->dst.gid = ia_ptr->hca_ptr->ib_trans.gid;
+
+ /* save references */
+ cm_ptr->hca = ia_ptr->hca_ptr;
+ cm_ptr->ep = ep_ptr;
+ cm_ptr->dst.ia_address = ia_ptr->hca_ptr->hca_address;
+ ((struct sockaddr_in *)
+ &cm_ptr->dst.ia_address)->sin_port = ntohs(r_qual);
+
+ if (p_size) {
+ cm_ptr->dst.p_size = htonl(p_size);
+ dapl_os_memcpy(cm_ptr->p_data, p_data, p_size);
+ }
+
+ /* connected or pending, either way results via async event */
+ if (ret == 0)
+ dapli_socket_connected(cm_ptr, 0);
+ else
+ cm_ptr->state = DCM_CONN_PENDING;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " connect: socket %d to %s r_qual %d pending\n",
+ cm_ptr->socket,
+ inet_ntoa(addr.sin_addr), (unsigned int)r_qual);
+
+ dapli_cm_queue(cm_ptr);
+ return DAT_SUCCESS;
+ bail:
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " socket connect ERROR: %s query lid(0x%x)/gid"
+ " -> %s r_qual %d\n",
+ strerror(errno), ntohs(cm_ptr->dst.lid),
+ inet_ntoa(((struct sockaddr_in *)r_addr)->sin_addr),
+ (unsigned int)r_qual);
+
+ /* close socket, free cm structure */
+ dapls_ib_cm_free(cm_ptr, cm_ptr->ep);
+ return DAT_INTERNAL_ERROR;
+}
+
+/*
+ * ACTIVE: exchange QP information, called from CR thread
+ */
+static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
+{
+ DAPL_EP *ep_ptr = cm_ptr->ep;
+ int len;
+ short rtu_data = htons(0x0E0F);
+ ib_cm_events_t event = IB_CME_DESTINATION_REJECT;
+
+ /* read DST information into cm_ptr, overwrite SRC info */
+ dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect_rtu: recv peer QP data\n");
+
+ len = recv(cm_ptr->socket, (char *)&cm_ptr->dst, sizeof(ib_qp_cm_t), 0);
+ if (len != sizeof(ib_qp_cm_t) || ntohs(cm_ptr->dst.ver) != DCM_VER) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " CONN_RTU read: ERR %s, rcnt=%d, ver=%d -> %s\n",
+ strerror(errno), len, cm_ptr->dst.ver,
+ inet_ntoa(((struct sockaddr_in *)
+ ep_ptr->param.remote_ia_address_ptr)->
+ sin_addr));
+ goto bail;
+ }
+
+ /* convert peer response values to host order */
+ cm_ptr->dst.port = ntohs(cm_ptr->dst.port);
+ cm_ptr->dst.lid = ntohs(cm_ptr->dst.lid);
+ cm_ptr->dst.qpn = ntohl(cm_ptr->dst.qpn);
+#ifdef DAT_EXTENSIONS
+ cm_ptr->dst.qp_type = ntohs(cm_ptr->dst.qp_type);
+#endif
+ cm_ptr->dst.p_size = ntohl(cm_ptr->dst.p_size);
+
+ /* save remote address information */
+ dapl_os_memcpy(&ep_ptr->remote_ia_address,
+ &cm_ptr->dst.ia_address,
+ sizeof(ep_ptr->remote_ia_address));
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " CONN_RTU: DST %s port=0x%x lid=0x%x,"
+ " qpn=0x%x, qp_type=%d, psize=%d\n",
+ inet_ntoa(((struct sockaddr_in *)
+ &cm_ptr->dst.ia_address)->sin_addr),
+ cm_ptr->dst.port, cm_ptr->dst.lid,
+ cm_ptr->dst.qpn, cm_ptr->dst.qp_type, cm_ptr->dst.p_size);
+
+ /* validate private data size before reading */
+ if (cm_ptr->dst.p_size > IB_MAX_REP_PDATA_SIZE) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " CONN_RTU read: psize (%d) wrong -> %s\n",
+ cm_ptr->dst.p_size, inet_ntoa(((struct sockaddr_in *)
+ ep_ptr->param.
+ remote_ia_address_ptr)->
+ sin_addr));
+ goto bail;
+ }
+
+ /* read private data into cm_handle if any present */
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " socket connected, read private data\n");
+ if (cm_ptr->dst.p_size) {
+ len =
+ recv(cm_ptr->socket, cm_ptr->p_data, cm_ptr->dst.p_size, 0);
+ if (len != cm_ptr->dst.p_size) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " CONN_RTU read pdata: ERR %s, rcnt=%d -> %s\n",
+ strerror(errno), len,
+ inet_ntoa(((struct sockaddr_in *)
+ ep_ptr->param.
+ remote_ia_address_ptr)->sin_addr));
+ goto bail;
+ }
+ }
+
+ /* check for consumer reject */
+ if (cm_ptr->dst.rej) {
+ dapl_log(DAPL_DBG_TYPE_CM,
+ " CONN_RTU read: PEER REJ reason=0x%x -> %s\n",
+ ntohs(cm_ptr->dst.rej),
+ inet_ntoa(((struct sockaddr_in *)
+ ep_ptr->param.remote_ia_address_ptr)->
+ sin_addr));
+ event = IB_CME_DESTINATION_REJECT_PRIVATE_DATA;
+#ifdef DAT_EXTENSIONS
+ if (cm_ptr->dst.qp_type == IBV_QPT_UD)
+ goto ud_bail;
+ else
+#endif
+ goto bail;
+ }
+
+ /* modify QP to RTR and then to RTS with remote info */
+ dapl_os_lock(&ep_ptr->header.lock);
+ if (dapls_modify_qp_state(ep_ptr->qp_handle,
+ IBV_QPS_RTR, cm_ptr) != DAT_SUCCESS) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " CONN_RTU: QPS_RTR ERR %s -> %s\n",
+ strerror(errno), inet_ntoa(((struct sockaddr_in *)
+ ep_ptr->param.
+ remote_ia_address_ptr)->
+ sin_addr));
+ dapl_os_unlock(&ep_ptr->header.lock);
+ goto bail;
+ }
+ if (dapls_modify_qp_state(ep_ptr->qp_handle,
+ IBV_QPS_RTS, cm_ptr) != DAT_SUCCESS) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " CONN_RTU: QPS_RTS ERR %s -> %s\n",
+ strerror(errno), inet_ntoa(((struct sockaddr_in *)
+ ep_ptr->param.
+ remote_ia_address_ptr)->
+ sin_addr));
+ dapl_os_unlock(&ep_ptr->header.lock);
+ goto bail;
+ }
+ dapl_os_unlock(&ep_ptr->header.lock);
+ dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect_rtu: send RTU\n");
+
+ /* complete handshake after final QP state change */
+ if (send(cm_ptr->socket, (char *)&rtu_data, sizeof(rtu_data), 0) == -1) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " CONN_RTU: write error = %s\n", strerror(errno));
+ goto bail;
+ }
+ /* init cm_handle and post the event with private data */
+ cm_ptr->state = DCM_CONNECTED;
+ event = IB_CME_CONNECTED;
+ dapl_dbg_log(DAPL_DBG_TYPE_EP, " ACTIVE: connected!\n");
+
+#ifdef DAT_EXTENSIONS
+ud_bail:
+ if (cm_ptr->dst.qp_type == IBV_QPT_UD) {
+ DAT_IB_EXTENSION_EVENT_DATA xevent;
+
+ /* post EVENT, modify_qp created ah */
+ xevent.status = 0;
+ xevent.type = DAT_IB_UD_REMOTE_AH;
+ xevent.remote_ah.ah = cm_ptr->ah;
+ xevent.remote_ah.qpn = cm_ptr->dst.qpn;
+ dapl_os_memcpy(&xevent.remote_ah.ia_addr,
+ &cm_ptr->dst.ia_address,
+ sizeof(cm_ptr->dst.ia_address));
+
+ if (event == IB_CME_CONNECTED)
+ event = DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED;
+ else
+ event = DAT_IB_UD_CONNECTION_REJECT_EVENT;
+
+ dapls_evd_post_connection_event_ext((DAPL_EVD *) ep_ptr->param.
+ connect_evd_handle,
+ event,
+ (DAT_EP_HANDLE) ep_ptr,
+ (DAT_COUNT) cm_ptr->dst.p_size,
+ (DAT_PVOID *) cm_ptr->p_data,
+ (DAT_PVOID *) &xevent);
+
+ /* done with socket, don't destroy cm_ptr, need pdata */
+ closesocket(cm_ptr->socket);
+ cm_ptr->socket = DAPL_INVALID_SOCKET;
+ cm_ptr->state = DCM_RELEASED;
+ } else
+#endif
+ {
+ ep_ptr->cm_handle = cm_ptr; /* only RC, multi CR's on UD */
+ dapl_evd_connection_callback(cm_ptr,
+ IB_CME_CONNECTED,
+ cm_ptr->p_data, ep_ptr);
+ }
+ return;
+
+bail:
+ /* close socket, and post error event */
+ dapls_ib_reinit_ep(ep_ptr); /* reset QP state */
+ closesocket(cm_ptr->socket);
+ cm_ptr->socket = DAPL_INVALID_SOCKET;
+ dapl_evd_connection_callback(NULL, event, cm_ptr->p_data, ep_ptr);
+}
+
+/*
+ * PASSIVE: Create socket, listen, accept, exchange QP information
+ */
+DAT_RETURN
+dapli_socket_listen(DAPL_IA * ia_ptr, DAT_CONN_QUAL serviceID, DAPL_SP * sp_ptr)
+{
+ struct sockaddr_in addr;
+ ib_cm_srvc_handle_t cm_ptr = NULL;
+ int opt = 1;
+ DAT_RETURN dat_status = DAT_SUCCESS;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " listen(ia_ptr %p ServiceID %d sp_ptr %p)\n",
+ ia_ptr, serviceID, sp_ptr);
+
+ cm_ptr = dapls_ib_cm_create(NULL);
+ if (cm_ptr == NULL)
+ return DAT_INSUFFICIENT_RESOURCES;
+
+ cm_ptr->sp = sp_ptr;
+ cm_ptr->hca = ia_ptr->hca_ptr;
+
+ /* bind, listen, set sockopt, accept, exchange data */
+ if ((cm_ptr->socket =
+ socket(AF_INET, SOCK_STREAM, 0)) == DAPL_INVALID_SOCKET) {
+ dapl_log(DAPL_DBG_TYPE_ERR, " ERR: listen socket create: %s\n",
+ strerror(errno));
+ dat_status = DAT_INSUFFICIENT_RESOURCES;
+ goto bail;
+ }
+
+ setsockopt(cm_ptr->socket, SOL_SOCKET, SO_REUSEADDR,
+ (char *)&opt, sizeof(opt));
+ addr.sin_port = htons(serviceID);
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = INADDR_ANY;
+
+ if ((bind(cm_ptr->socket, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+ || (listen(cm_ptr->socket, 128) < 0)) {
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " listen: ERROR %s on conn_qual 0x%x\n",
+ strerror(errno), serviceID);
+ if (errno == EADDRINUSE)
+ dat_status = DAT_CONN_QUAL_IN_USE;
+ else
+ dat_status = DAT_CONN_QUAL_UNAVAILABLE;
+ goto bail;
+ }
+
+ /* set cm_handle for this service point, save listen socket */
+ sp_ptr->cm_srvc_handle = cm_ptr;
+
+ /* queue up listen socket to process inbound CR's */
+ cm_ptr->state = DCM_LISTEN;
+ dapli_cm_queue(cm_ptr);
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " listen: qual 0x%x cr %p s_fd %d\n",
+ ntohs(serviceID), cm_ptr, cm_ptr->socket);
+
+ return dat_status;
+ bail:
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " listen: ERROR on conn_qual 0x%x\n", serviceID);
+ dapls_ib_cm_free(cm_ptr, cm_ptr->ep);
+ return dat_status;
+}
+
+/*
+ * PASSIVE: accept socket
+ */
+static void dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr)
+{
+ dp_ib_cm_handle_t acm_ptr;
+ int len;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP, " socket_accept\n");
+
+ /*
+ * Accept all CR's on this port to avoid half-connection (SYN_RCV)
+ * stalls with many to one connection storms
+ */
+ do {
+ /* Allocate accept CM and initialize */
+ if ((acm_ptr = dapls_ib_cm_create(NULL)) == NULL)
+ return;
+
+ acm_ptr->sp = cm_ptr->sp;
+ acm_ptr->hca = cm_ptr->hca;
+
+ len = sizeof(acm_ptr->dst.ia_address);
+ acm_ptr->socket = accept(cm_ptr->socket,
+ (struct sockaddr *)
+ &acm_ptr->dst.ia_address,
+ (socklen_t *) & len);
+ if (acm_ptr->socket == DAPL_INVALID_SOCKET) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " accept: ERR %s on FD %d l_cr %p\n",
+ strerror(errno), cm_ptr->socket, cm_ptr);
+ dapls_ib_cm_free(acm_ptr, acm_ptr->ep);
+ return;
+ }
+
+ acm_ptr->state = DCM_ACCEPTING;
+ dapli_cm_queue(acm_ptr);
+
+ } while (dapl_poll(cm_ptr->socket, DAPL_FD_READ) == DAPL_FD_READ);
+}
+
+/*
+ * PASSIVE: receive peer QP information, private data, post cr_event
+ */
+static void dapli_socket_accept_data(ib_cm_srvc_handle_t acm_ptr)
+{
+ int len;
+ void *p_data = NULL;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP, " socket accepted, read QP data\n");
+
+ /* read in DST QP info, IA address. check for private data */
+ len =
+ recv(acm_ptr->socket, (char *)&acm_ptr->dst, sizeof(ib_qp_cm_t), 0);
+ if (len != sizeof(ib_qp_cm_t) || ntohs(acm_ptr->dst.ver) != DCM_VER) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " accept read: ERR %s, rcnt=%d, ver=%d\n",
+ strerror(errno), len, ntohs(acm_ptr->dst.ver));
+ goto bail;
+ }
+
+ /* convert accepted values to host order */
+ acm_ptr->dst.port = ntohs(acm_ptr->dst.port);
+ acm_ptr->dst.lid = ntohs(acm_ptr->dst.lid);
+ acm_ptr->dst.qpn = ntohl(acm_ptr->dst.qpn);
+#ifdef DAT_EXTENSIONS
+ acm_ptr->dst.qp_type = ntohs(acm_ptr->dst.qp_type);
+#endif
+ acm_ptr->dst.p_size = ntohl(acm_ptr->dst.p_size);
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " accept: DST %s port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
+ inet_ntoa(((struct sockaddr_in *)&acm_ptr->dst.
+ ia_address)->sin_addr), acm_ptr->dst.port,
+ acm_ptr->dst.lid, acm_ptr->dst.qpn, acm_ptr->dst.p_size);
+
+ /* validate private data size before reading */
+ if (acm_ptr->dst.p_size > IB_MAX_REQ_PDATA_SIZE) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " accept read: psize (%d) wrong\n",
+ acm_ptr->dst.p_size);
+ goto bail;
+ }
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP, " socket accepted, read private data\n");
+
+ /* read private data into cm_handle if any present */
+ if (acm_ptr->dst.p_size) {
+ len =
+ recv(acm_ptr->socket, acm_ptr->p_data, acm_ptr->dst.p_size,
+ 0);
+ if (len != acm_ptr->dst.p_size) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " accept read pdata: ERR %s, rcnt=%d\n",
+ strerror(errno), len);
+ goto bail;
+ }
+ dapl_dbg_log(DAPL_DBG_TYPE_EP, " accept: psize=%d read\n", len);
+ p_data = acm_ptr->p_data;
+ }
+
+ acm_ptr->state = DCM_ACCEPTING_DATA;
+
+#ifdef DAT_EXTENSIONS
+ if (acm_ptr->dst.qp_type == IBV_QPT_UD) {
+ DAT_IB_EXTENSION_EVENT_DATA xevent;
+
+ /* post EVENT, modify_qp created ah */
+ xevent.status = 0;
+ xevent.type = DAT_IB_UD_CONNECT_REQUEST;
+
+ dapls_evd_post_cr_event_ext(acm_ptr->sp,
+ DAT_IB_UD_CONNECTION_REQUEST_EVENT,
+ acm_ptr,
+ (DAT_COUNT) acm_ptr->dst.p_size,
+ (DAT_PVOID *) acm_ptr->p_data,
+ (DAT_PVOID *) & xevent);
+ } else
+#endif
+ /* trigger CR event and return SUCCESS */
+ dapls_cr_callback(acm_ptr,
+ IB_CME_CONNECTION_REQUEST_PENDING,
+ p_data, acm_ptr->sp);
+ return;
+ bail:
+ /* close socket, free cm structure, active will see socket close as reject */
+ dapls_ib_cm_free(acm_ptr, acm_ptr->ep);
+ return;
+}
+
+/*
+ * PASSIVE: consumer accept, send local QP information, private data,
+ * queue on work thread to receive RTU information to avoid blocking
+ * user thread.
+ */
+DAT_RETURN
+dapli_socket_accept_usr(DAPL_EP * ep_ptr,
+ DAPL_CR * cr_ptr, DAT_COUNT p_size, DAT_PVOID p_data)
+{
+ DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
+ dp_ib_cm_handle_t cm_ptr = cr_ptr->ib_cm_handle;
+ ib_qp_cm_t local;
+ struct iovec iov[2];
+ int len;
+
+ if (p_size > IB_MAX_REP_PDATA_SIZE)
+ return DAT_LENGTH_ERROR;
+
+ /* must have a accepted socket */
+ if (cm_ptr->socket == DAPL_INVALID_SOCKET)
+ return DAT_INTERNAL_ERROR;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " ACCEPT_USR: remote port=0x%x lid=0x%x"
+ " qpn=0x%x qp_type %d, psize=%d\n",
+ cm_ptr->dst.port, cm_ptr->dst.lid,
+ cm_ptr->dst.qpn, cm_ptr->dst.qp_type, cm_ptr->dst.p_size);
+
+#ifdef DAT_EXTENSIONS
+ if (cm_ptr->dst.qp_type == IBV_QPT_UD &&
+ ep_ptr->qp_handle->qp_type != IBV_QPT_UD) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " ACCEPT_USR: ERR remote QP is UD,"
+ ", but local QP is not\n");
+ return (DAT_INVALID_HANDLE | DAT_INVALID_HANDLE_EP);
+ }
+#endif
+
+ /* modify QP to RTR and then to RTS with remote info already read */
+ dapl_os_lock(&ep_ptr->header.lock);
+ if (dapls_modify_qp_state(ep_ptr->qp_handle,
+ IBV_QPS_RTR, cm_ptr) != DAT_SUCCESS) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " ACCEPT_USR: QPS_RTR ERR %s -> %s\n",
+ strerror(errno), inet_ntoa(((struct sockaddr_in *)
+ &cm_ptr->dst.ia_address)->
+ sin_addr));
+ dapl_os_unlock(&ep_ptr->header.lock);
+ goto bail;
+ }
+ if (dapls_modify_qp_state(ep_ptr->qp_handle,
+ IBV_QPS_RTS, cm_ptr) != DAT_SUCCESS) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " ACCEPT_USR: QPS_RTS ERR %s -> %s\n",
+ strerror(errno), inet_ntoa(((struct sockaddr_in *)
+ &cm_ptr->dst.ia_address)->
+ sin_addr));
+ dapl_os_unlock(&ep_ptr->header.lock);
+ goto bail;
+ }
+ dapl_os_unlock(&ep_ptr->header.lock);
+
+ /* save remote address information */
+ dapl_os_memcpy(&ep_ptr->remote_ia_address,
+ &cm_ptr->dst.ia_address,
+ sizeof(ep_ptr->remote_ia_address));
+
+ /* send our QP info, IA address, pdata. Don't overwrite dst data */
+ local.ver = htons(DCM_VER);
+ local.rej = 0;
+ local.qpn = htonl(ep_ptr->qp_handle->qp_num);
+ local.qp_type = htons(ep_ptr->qp_handle->qp_type);
+ local.port = htons(ia_ptr->hca_ptr->port_num);
+ local.lid = ia_ptr->hca_ptr->ib_trans.lid;
+ local.gid = ia_ptr->hca_ptr->ib_trans.gid;
+ local.ia_address = ia_ptr->hca_ptr->hca_address;
+ ((struct sockaddr_in *)&local.ia_address)->sin_port =
+ ntohs(cm_ptr->sp->conn_qual);
+
+ local.p_size = htonl(p_size);
+ iov[0].iov_base = (void *)&local;
+ iov[0].iov_len = sizeof(ib_qp_cm_t);
+ if (p_size) {
+ iov[1].iov_base = p_data;
+ iov[1].iov_len = p_size;
+ len = writev(cm_ptr->socket, iov, 2);
+ } else {
+ len = writev(cm_ptr->socket, iov, 1);
+ }
+
+ if (len != (p_size + sizeof(ib_qp_cm_t))) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " ACCEPT_USR: ERR %s, wcnt=%d -> %s\n",
+ strerror(errno), len, inet_ntoa(((struct sockaddr_in *)
+ &cm_ptr->dst.
+ ia_address)->
+ sin_addr));
+ goto bail;
+ }
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " ACCEPT_USR: local port=0x%x lid=0x%x"
+ " qpn=0x%x psize=%d\n",
+ ntohs(local.port), ntohs(local.lid),
+ ntohl(local.qpn), ntohl(local.p_size));
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " ACCEPT_USR SRC GID subnet %016llx id %016llx\n",
+ (unsigned long long)
+ htonll(local.gid.global.subnet_prefix),
+ (unsigned long long)
+ htonll(local.gid.global.interface_id));
+
+ /* save state and reference to EP, queue for RTU data */
+ cm_ptr->ep = ep_ptr;
+ cm_ptr->hca = ia_ptr->hca_ptr;
+ cm_ptr->state = DCM_ACCEPTED;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP, " PASSIVE: accepted!\n");
+ return DAT_SUCCESS;
+ bail:
+ dapls_ib_cm_free(cm_ptr, cm_ptr->ep);
+ dapls_ib_reinit_ep(ep_ptr); /* reset QP state */
+ return DAT_INTERNAL_ERROR;
+}
+
+/*
+ * PASSIVE: read RTU from active peer, post CONN event
+ */
+void dapli_socket_accept_rtu(dp_ib_cm_handle_t cm_ptr)
+{
+ int len;
+ short rtu_data = 0;
+
+ /* complete handshake after final QP state change */
+ len = recv(cm_ptr->socket, (char *)&rtu_data, sizeof(rtu_data), 0);
+ if (len != sizeof(rtu_data) || ntohs(rtu_data) != 0x0e0f) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " ACCEPT_RTU: ERR %s, rcnt=%d rdata=%x\n",
+ strerror(errno), len, ntohs(rtu_data),
+ inet_ntoa(((struct sockaddr_in *)
+ &cm_ptr->dst.ia_address)->sin_addr));
+ goto bail;
+ }
+
+ /* save state and reference to EP, queue for disc event */
+ cm_ptr->state = DCM_CONNECTED;
+
+ /* final data exchange if remote QP state is good to go */
+ dapl_dbg_log(DAPL_DBG_TYPE_EP, " PASSIVE: connected!\n");
+
+#ifdef DAT_EXTENSIONS
+ if (cm_ptr->dst.qp_type == IBV_QPT_UD) {
+ DAT_IB_EXTENSION_EVENT_DATA xevent;
+
+ /* post EVENT, modify_qp created ah */
+ xevent.status = 0;
+ xevent.type = DAT_IB_UD_PASSIVE_REMOTE_AH;
+ xevent.remote_ah.ah = cm_ptr->ah;
+ xevent.remote_ah.qpn = cm_ptr->dst.qpn;
+ dapl_os_memcpy(&xevent.remote_ah.ia_addr,
+ &cm_ptr->dst.ia_address,
+ sizeof(cm_ptr->dst.ia_address));
+
+ dapls_evd_post_connection_event_ext((DAPL_EVD *) cm_ptr->ep->
+ param.connect_evd_handle,
+ DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED,
+ (DAT_EP_HANDLE) cm_ptr->ep,
+ (DAT_COUNT) cm_ptr->dst.p_size,
+ (DAT_PVOID *) cm_ptr->p_data,
+ (DAT_PVOID *) &xevent);
+
+ /* done with socket, don't destroy cm_ptr, need pdata */
+ closesocket(cm_ptr->socket);
+ cm_ptr->socket = DAPL_INVALID_SOCKET;
+ cm_ptr->state = DCM_RELEASED;
+ } else {
+#endif
+ cm_ptr->ep->cm_handle = cm_ptr; /* only RC, multi CR's on UD */
+ dapls_cr_callback(cm_ptr, IB_CME_CONNECTED, NULL, cm_ptr->sp);
+ }
+ return;
+
+bail:
+ dapls_ib_reinit_ep(cm_ptr->ep); /* reset QP state */
+ dapls_ib_cm_free(cm_ptr, cm_ptr->ep);
+ dapls_cr_callback(cm_ptr, IB_CME_DESTINATION_REJECT, NULL, cm_ptr->sp);
+}
+
+/*
+ * dapls_ib_connect
+ *
+ * Initiate a connection with the passive listener on another node
+ *
+ * Input:
+ * ep_handle,
+ * remote_ia_address,
+ * remote_conn_qual,
+ * prd_size size of private data and structure
+ * prd_prt pointer to private data structure
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ * DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN
+dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
+ IN DAT_IA_ADDRESS_PTR remote_ia_address,
+ IN DAT_CONN_QUAL remote_conn_qual,
+ IN DAT_COUNT private_data_size, IN void *private_data)
+{
+ DAPL_EP *ep_ptr;
+ ib_qp_handle_t qp_ptr;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " connect(ep_handle %p ....)\n", ep_handle);
+
+ ep_ptr = (DAPL_EP *) ep_handle;
+ qp_ptr = ep_ptr->qp_handle;
+
+ return (dapli_socket_connect(ep_ptr, remote_ia_address,
+ remote_conn_qual,
+ private_data_size, private_data));
+}
+
+/*
+ * dapls_ib_disconnect
+ *
+ * Disconnect an EP
+ *
+ * Input:
+ * ep_handle,
+ * disconnect_flags
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ */
+DAT_RETURN
+dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags)
+{
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ "dapls_ib_disconnect(ep_handle %p ....)\n", ep_ptr);
+
+ /* reinit to modify QP state */
+ dapls_ib_reinit_ep(ep_ptr);
+
+ if (ep_ptr->cm_handle == NULL ||
+ ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED)
+ return DAT_SUCCESS;
+ else
+ return (dapli_socket_disconnect(ep_ptr->cm_handle));
+}
+
+/*
+ * dapls_ib_disconnect_clean
+ *
+ * Clean up outstanding connection data. This routine is invoked
+ * after the final disconnect callback has occurred. Only on the
+ * ACTIVE side of a connection. It is also called if dat_ep_connect
+ * times out using the consumer supplied timeout value.
+ *
+ * Input:
+ * ep_ptr DAPL_EP
+ * active Indicates active side of connection
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * void
+ *
+ */
+void
+dapls_ib_disconnect_clean(IN DAPL_EP * ep_ptr,
+ IN DAT_BOOLEAN active,
+ IN const ib_cm_events_t ib_cm_event)
+{
+ /* NOTE: SCM will only initialize cm_handle with RC type
+ *
+ * For UD there can many in-flight CR's so you
+ * cannot cleanup timed out CR's with EP reference
+ * alone since they share the same EP. The common
+ * code that handles connection timeout logic needs
+ * updated for UD support.
+ */
+ if (ep_ptr->cm_handle)
+ dapls_ib_cm_free(ep_ptr->cm_handle, ep_ptr);
+
+ return;
+}
+
+/*
+ * dapl_ib_setup_conn_listener
+ *
+ * Have the CM set up a connection listener.
+ *
+ * Input:
+ * ibm_hca_handle HCA handle
+ * qp_handle QP handle
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ * DAT_INTERNAL_ERROR
+ * DAT_CONN_QUAL_UNAVAILBLE
+ * DAT_CONN_QUAL_IN_USE
+ *
+ */
+DAT_RETURN
+dapls_ib_setup_conn_listener(IN DAPL_IA * ia_ptr,
+ IN DAT_UINT64 ServiceID, IN DAPL_SP * sp_ptr)
+{
+ return (dapli_socket_listen(ia_ptr, ServiceID, sp_ptr));
+}
+
+/*
+ * dapl_ib_remove_conn_listener
+ *
+ * Have the CM remove a connection listener.
+ *
+ * Input:
+ * ia_handle IA handle
+ * ServiceID IB Channel Service ID
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN
+dapls_ib_remove_conn_listener(IN DAPL_IA * ia_ptr, IN DAPL_SP * sp_ptr)
+{
+ ib_cm_srvc_handle_t cm_ptr = sp_ptr->cm_srvc_handle;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ "dapls_ib_remove_conn_listener(ia_ptr %p sp_ptr %p cm_ptr %p)\n",
+ ia_ptr, sp_ptr, cm_ptr);
+
+ /* close accepted socket, free cm_srvc_handle and return */
+ if (cm_ptr != NULL) {
+ if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
+ shutdown(cm_ptr->socket, SHUT_RDWR);
+ closesocket(cm_ptr->socket);
+ cm_ptr->socket = DAPL_INVALID_SOCKET;
+ }
+ /* cr_thread will free */
+ cm_ptr->state = DCM_DESTROY;
+ sp_ptr->cm_srvc_handle = NULL;
+ if (send(cm_ptr->hca->ib_trans.scm[1],
+ "w", sizeof "w", 0) == -1)
+ dapl_log(DAPL_DBG_TYPE_CM,
+ " cm_destroy: thread wakeup error = %s\n",
+ strerror(errno));
+ }
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_accept_connection
+ *
+ * Perform necessary steps to accept a connection
+ *
+ * Input:
+ * cr_handle
+ * ep_handle
+ * private_data_size
+ * private_data
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ * DAT_INTERNAL_ERROR
+ *
+ */
+DAT_RETURN
+dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle,
+ IN DAT_EP_HANDLE ep_handle,
+ IN DAT_COUNT p_size, IN const DAT_PVOID p_data)
+{
+ DAPL_CR *cr_ptr;
+ DAPL_EP *ep_ptr;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ "dapls_ib_accept_connection(cr %p ep %p prd %p,%d)\n",
+ cr_handle, ep_handle, p_data, p_size);
+
+ cr_ptr = (DAPL_CR *) cr_handle;
+ ep_ptr = (DAPL_EP *) ep_handle;
+
+ /* allocate and attach a QP if necessary */
+ if (ep_ptr->qp_state == DAPL_QP_STATE_UNATTACHED) {
+ DAT_RETURN status;
+ status = dapls_ib_qp_alloc(ep_ptr->header.owner_ia,
+ ep_ptr, ep_ptr);
+ if (status != DAT_SUCCESS)
+ return status;
+ }
+ return (dapli_socket_accept_usr(ep_ptr, cr_ptr, p_size, p_data));
+}
+
+/*
+ * dapls_ib_reject_connection
+ *
+ * Reject a connection
+ *
+ * Input:
+ * cr_handle
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INTERNAL_ERROR
+ *
+ */
+DAT_RETURN
+dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm_ptr,
+ IN int reason,
+ IN DAT_COUNT psize, IN const DAT_PVOID pdata)
+{
+ struct iovec iov[2];
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " reject(cm %p reason %x, pdata %p, psize %d)\n",
+ cm_ptr, reason, pdata, psize);
+
+ if (psize > IB_MAX_REJ_PDATA_SIZE)
+ return DAT_LENGTH_ERROR;
+
+ /* write reject data to indicate reject */
+ if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
+ cm_ptr->dst.rej = (uint16_t) reason;
+ cm_ptr->dst.rej = htons(cm_ptr->dst.rej);
+ cm_ptr->dst.p_size = htonl(psize);
+ /* get qp_type from request */
+ cm_ptr->dst.qp_type = ntohs(cm_ptr->dst.qp_type);
+
+ iov[0].iov_base = (void *)&cm_ptr->dst;
+ iov[0].iov_len = sizeof(ib_qp_cm_t);
+ if (psize) {
+ iov[1].iov_base = pdata;
+ iov[1].iov_len = psize;
+ writev(cm_ptr->socket, iov, 2);
+ } else {
+ writev(cm_ptr->socket, iov, 1);
+ }
+
+ shutdown(cm_ptr->socket, SHUT_RDWR);
+ closesocket(cm_ptr->socket);
+ cm_ptr->socket = DAPL_INVALID_SOCKET;
+ }
+
+ /* cr_thread will destroy CR */
+ cm_ptr->state = DCM_DESTROY;
+ if (send(cm_ptr->hca->ib_trans.scm[1], "w", sizeof "w", 0) == -1)
+ dapl_log(DAPL_DBG_TYPE_CM,
+ " cm_destroy: thread wakeup error = %s\n",
+ strerror(errno));
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_cm_remote_addr
+ *
+ * Obtain the remote IP address given a connection
+ *
+ * Input:
+ * cr_handle
+ *
+ * Output:
+ * remote_ia_address: where to place the remote address
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_HANDLE
+ *
+ */
+DAT_RETURN
+dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle,
+ OUT DAT_SOCK_ADDR6 * remote_ia_address)
+{
+ DAPL_HEADER *header;
+ dp_ib_cm_handle_t ib_cm_handle;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ "dapls_ib_cm_remote_addr(dat_handle %p, ....)\n",
+ dat_handle);
+
+ header = (DAPL_HEADER *) dat_handle;
+
+ if (header->magic == DAPL_MAGIC_EP)
+ ib_cm_handle = ((DAPL_EP *) dat_handle)->cm_handle;
+ else if (header->magic == DAPL_MAGIC_CR)
+ ib_cm_handle = ((DAPL_CR *) dat_handle)->ib_cm_handle;
+ else
+ return DAT_INVALID_HANDLE;
+
+ dapl_os_memcpy(remote_ia_address,
+ &ib_cm_handle->dst.ia_address, sizeof(DAT_SOCK_ADDR6));
+
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_private_data_size
+ *
+ * Return the size of private data given a connection op type
+ *
+ * Input:
+ * prd_ptr private data pointer
+ * conn_op connection operation type
+ *
+ * If prd_ptr is NULL, this is a query for the max size supported by
+ * the provider, otherwise it is the actual size of the private data
+ * contained in prd_ptr.
+ *
+ *
+ * Output:
+ * None
+ *
+ * Returns:
+ * length of private data
+ *
+ */
+int dapls_ib_private_data_size(IN DAPL_PRIVATE * prd_ptr,
+ IN DAPL_PDATA_OP conn_op, IN DAPL_HCA * hca_ptr)
+{
+ int size;
+
+ switch (conn_op) {
+ case DAPL_PDATA_CONN_REQ:
+ {
+ size = IB_MAX_REQ_PDATA_SIZE;
+ break;
+ }
+ case DAPL_PDATA_CONN_REP:
+ {
+ size = IB_MAX_REP_PDATA_SIZE;
+ break;
+ }
+ case DAPL_PDATA_CONN_REJ:
+ {
+ size = IB_MAX_REJ_PDATA_SIZE;
+ break;
+ }
+ case DAPL_PDATA_CONN_DREQ:
+ {
+ size = IB_MAX_DREQ_PDATA_SIZE;
+ break;
+ }
+ case DAPL_PDATA_CONN_DREP:
+ {
+ size = IB_MAX_DREP_PDATA_SIZE;
+ break;
+ }
+ default:
+ {
+ size = 0;
+ }
+
+ } /* end case */
+
+ return size;
+}
+
+/*
+ * Map all socket CM event codes to the DAT equivelent.
+ */
+#define DAPL_IB_EVENT_CNT 10
+
+static struct ib_cm_event_map {
+ const ib_cm_events_t ib_cm_event;
+ DAT_EVENT_NUMBER dat_event_num;
+} ib_cm_event_map[DAPL_IB_EVENT_CNT] = {
+/* 00 */ {IB_CME_CONNECTED,
+ DAT_CONNECTION_EVENT_ESTABLISHED},
+/* 01 */ {IB_CME_DISCONNECTED,
+ DAT_CONNECTION_EVENT_DISCONNECTED},
+/* 02 */ {IB_CME_DISCONNECTED_ON_LINK_DOWN,
+ DAT_CONNECTION_EVENT_DISCONNECTED},
+/* 03 */ {IB_CME_CONNECTION_REQUEST_PENDING,
+ DAT_CONNECTION_REQUEST_EVENT},
+/* 04 */ {IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
+ DAT_CONNECTION_REQUEST_EVENT},
+/* 05 */ {IB_CME_DESTINATION_REJECT,
+ DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
+/* 06 */ {IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
+ DAT_CONNECTION_EVENT_PEER_REJECTED},
+/* 07 */ {IB_CME_DESTINATION_UNREACHABLE,
+ DAT_CONNECTION_EVENT_UNREACHABLE},
+/* 08 */ {IB_CME_TOO_MANY_CONNECTION_REQUESTS,
+ DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
+/* 09 */ {IB_CME_LOCAL_FAILURE,
+ DAT_CONNECTION_EVENT_BROKEN}
+};
+
+/*
+ * dapls_ib_get_cm_event
+ *
+ * Return a DAT connection event given a provider CM event.
+ *
+ * Input:
+ * dat_event_num DAT event we need an equivelent CM event for
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * ib_cm_event of translated DAPL value
+ */
+DAT_EVENT_NUMBER
+dapls_ib_get_dat_event(IN const ib_cm_events_t ib_cm_event,
+ IN DAT_BOOLEAN active)
+{
+ DAT_EVENT_NUMBER dat_event_num;
+ int i;
+
+ active = active;
+
+ if (ib_cm_event > IB_CME_LOCAL_FAILURE)
+ return (DAT_EVENT_NUMBER) 0;
+
+ dat_event_num = 0;
+ for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
+ if (ib_cm_event == ib_cm_event_map[i].ib_cm_event) {
+ dat_event_num = ib_cm_event_map[i].dat_event_num;
+ break;
+ }
+ }
+ dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
+ "dapls_ib_get_dat_event: event translate(%s) ib=0x%x dat=0x%x\n",
+ active ? "active" : "passive", ib_cm_event, dat_event_num);
+
+ return dat_event_num;
+}
+
+/*
+ * dapls_ib_get_dat_event
+ *
+ * Return a DAT connection event given a provider CM event.
+ *
+ * Input:
+ * ib_cm_event event provided to the dapl callback routine
+ * active switch indicating active or passive connection
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_EVENT_NUMBER of translated provider value
+ */
+ib_cm_events_t dapls_ib_get_cm_event(IN DAT_EVENT_NUMBER dat_event_num)
+{
+ ib_cm_events_t ib_cm_event;
+ int i;
+
+ ib_cm_event = 0;
+ for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
+ if (dat_event_num == ib_cm_event_map[i].dat_event_num) {
+ ib_cm_event = ib_cm_event_map[i].ib_cm_event;
+ break;
+ }
+ }
+ return ib_cm_event;
+}
+
+/* outbound/inbound CR processing thread to avoid blocking applications */
+void cr_thread(void *arg)
+{
+ struct dapl_hca *hca_ptr = arg;
+ dp_ib_cm_handle_t cr, next_cr;
+ int opt, ret;
+ socklen_t opt_len;
+ char rbuf[2];
+ struct dapl_fd_set *set;
+ enum DAPL_FD_EVENTS event;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cr_thread: ENTER hca %p\n", hca_ptr);
+ set = dapl_alloc_fd_set();
+ if (!set)
+ goto out;
+
+ dapl_os_lock(&hca_ptr->ib_trans.lock);
+ hca_ptr->ib_trans.cr_state = IB_THREAD_RUN;
+
+ while (1) {
+ dapl_fd_zero(set);
+ dapl_fd_set(hca_ptr->ib_trans.scm[0], set, DAPL_FD_READ);
+
+ if (!dapl_llist_is_empty(&hca_ptr->ib_trans.list))
+ next_cr = dapl_llist_peek_head(&hca_ptr->ib_trans.list);
+ else
+ next_cr = NULL;
+
+ while (next_cr) {
+ cr = next_cr;
+ next_cr = dapl_llist_next_entry(&hca_ptr->ib_trans.list,
+ (DAPL_LLIST_ENTRY *) &
+ cr->entry);
+ if (cr->state == DCM_DESTROY
+ || hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
+ dapl_llist_remove_entry(&hca_ptr->ib_trans.list,
+ (DAPL_LLIST_ENTRY *) &
+ cr->entry);
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " CR FREE: %p ep=%p st=%d sock=%d\n",
+ cr, cr->ep, cr->state, cr->socket);
+ dapl_os_free(cr, sizeof(*cr));
+ continue;
+ }
+ if (cr->socket == DAPL_INVALID_SOCKET)
+ continue;
+
+ event = (cr->state == DCM_CONN_PENDING) ?
+ DAPL_FD_WRITE : DAPL_FD_READ;
+ if (dapl_fd_set(cr->socket, set, event)) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " cr_thread: DESTROY CR st=%d fd %d"
+ " -> %s\n", cr->state, cr->socket,
+ inet_ntoa(((struct sockaddr_in *)
+ &cr->dst.ia_address)->
+ sin_addr));
+ dapls_ib_cm_free(cr, cr->ep);
+ continue;
+ }
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " poll cr=%p, socket=%d\n", cr,
+ cr->socket);
+ dapl_os_unlock(&hca_ptr->ib_trans.lock);
+
+ ret = dapl_poll(cr->socket, event);
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " poll ret=0x%x cr->state=%d socket=%d\n",
+ ret, cr->state, cr->socket);
+
+ /* data on listen, qp exchange, and on disc req */
+ if (ret == DAPL_FD_READ) {
+ if (cr->socket != DAPL_INVALID_SOCKET) {
+ switch (cr->state) {
+ case DCM_LISTEN:
+ dapli_socket_accept(cr);
+ break;
+ case DCM_ACCEPTING:
+ dapli_socket_accept_data(cr);
+ break;
+ case DCM_ACCEPTED:
+ dapli_socket_accept_rtu(cr);
+ break;
+ case DCM_RTU_PENDING:
+ dapli_socket_connect_rtu(cr);
+ break;
+ case DCM_CONNECTED:
+ dapli_socket_disconnect(cr);
+ break;
+ default:
+ break;
+ }
+ }
+ /* connect socket is writable, check status */
+ } else if (ret == DAPL_FD_WRITE ||
+ (cr->state == DCM_CONN_PENDING &&
+ ret == DAPL_FD_ERROR)) {
+ opt = 0;
+ opt_len = sizeof(opt);
+ ret = getsockopt(cr->socket, SOL_SOCKET,
+ SO_ERROR, (char *)&opt,
+ &opt_len);
+ if (!ret)
+ dapli_socket_connected(cr, opt);
+ else
+ dapli_socket_connected(cr, errno);
+
+ /* POLLUP, ERR, NVAL, or poll error - DISC */
+ } else if (ret < 0 || ret == DAPL_FD_ERROR) {
+ dapl_log(DAPL_DBG_TYPE_CM,
+ " poll=%d cr->st=%s sk=%d ep %p, %d\n",
+ ret, dapl_cm_state_str(cr->state),
+ cr->socket, cr->ep,
+ cr->ep ? cr->ep->param.ep_state:0);
+ dapli_socket_disconnect(cr);
+ }
+ dapl_os_lock(&hca_ptr->ib_trans.lock);
+ }
+
+ /* set to exit and all resources destroyed */
+ if ((hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) &&
+ (dapl_llist_is_empty(&hca_ptr->ib_trans.list)))
+ break;
+
+ dapl_os_unlock(&hca_ptr->ib_trans.lock);
+ dapl_select(set);
+
+ /* if pipe used to wakeup, consume */
+ while (dapl_poll(hca_ptr->ib_trans.scm[0],
+ DAPL_FD_READ) == DAPL_FD_READ) {
+ if (recv(hca_ptr->ib_trans.scm[0], rbuf, 2, 0) == -1)
+ dapl_log(DAPL_DBG_TYPE_CM,
+ " cr_thread: read pipe error = %s\n",
+ strerror(errno));
+ }
+ dapl_os_lock(&hca_ptr->ib_trans.lock);
+
+ /* set to exit and all resources destroyed */
+ if ((hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) &&
+ (dapl_llist_is_empty(&hca_ptr->ib_trans.list)))
+ break;
+ }
+
+ dapl_os_unlock(&hca_ptr->ib_trans.lock);
+ free(set);
+ out:
+ hca_ptr->ib_trans.cr_state = IB_THREAD_EXIT;
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cr_thread(hca %p) exit\n", hca_ptr);
+}
+
+
+#ifdef DAPL_COUNTERS
+/* Debug aid: List all Connections in process and state */
+void dapls_print_cm_list(IN DAPL_IA *ia_ptr)
+{
+ /* Print in process CR's for this IA, if debug type set */
+ int i = 0;
+ dp_ib_cm_handle_t cr, next_cr;
+
+ dapl_os_lock(&ia_ptr->hca_ptr->ib_trans.lock);
+ if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)
+ &ia_ptr->hca_ptr->ib_trans.list))
+ next_cr = dapl_llist_peek_head((DAPL_LLIST_HEAD*)
+ &ia_ptr->hca_ptr->ib_trans.list);
+ else
+ next_cr = NULL;
+
+ printf("\n DAPL IA CONNECTIONS IN PROCESS:\n");
+ while (next_cr) {
+ cr = next_cr;
+ next_cr = dapl_llist_next_entry((DAPL_LLIST_HEAD*)
+ &ia_ptr->hca_ptr->ib_trans.list,
+ (DAPL_LLIST_ENTRY*)&cr->entry);
+
+ printf( " CONN[%d]: sp %p ep %p sock %d %s %s %s %s %d\n",
+ i, cr->sp, cr->ep, cr->socket,
+ cr->dst.qp_type == IBV_QPT_RC ? "RC" : "UD",
+ dapl_cm_state_str(cr->state),
+ cr->sp ? "<-" : "->",
+ cr->state == DCM_LISTEN ?
+ inet_ntoa(((struct sockaddr_in *)
+ &ia_ptr->hca_ptr->hca_address)->sin_addr) :
+ inet_ntoa(((struct sockaddr_in *)
+ &cr->dst.ia_address)->sin_addr),
+ cr->sp ? (int)cr->sp->conn_qual :
+ ntohs(((struct sockaddr_in *)
+ &cr->dst.ia_address)->sin_port));
+ i++;
+ }
+ printf("\n");
+ dapl_os_unlock(&ia_ptr->hca_ptr->ib_trans.lock);
+}
+#endif
diff --git a/dapl/openib_scm/dapl_ib_cm.c b/dapl/openib_scm/dapl_ib_cm.c
deleted file mode 100644
index 90d6d27..0000000
--- a/dapl/openib_scm/dapl_ib_cm.c
+++ /dev/null
@@ -1,1786 +0,0 @@
-/*
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- * copy of which is available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/***************************************************************************
- *
- * Module: uDAPL
- *
- * Filename: dapl_ib_cm.c
- *
- * Author: Arlin Davis
- *
- * Created: 3/10/2005
- *
- * Description:
- *
- * The uDAPL openib provider - connection management
- *
- ****************************************************************************
- * Source Control System Information
- *
- * $Id: $
- *
- * Copyright (c) 2005 Intel Corporation. All rights reserved.
- *
- **************************************************************************/
-
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_evd_util.h"
-#include "dapl_cr_util.h"
-#include "dapl_name_service.h"
-#include "dapl_ib_util.h"
-#include "dapl_osd.h"
-
-#if defined(_WIN32) || defined(_WIN64)
-enum DAPL_FD_EVENTS {
- DAPL_FD_READ = 0x1,
- DAPL_FD_WRITE = 0x2,
- DAPL_FD_ERROR = 0x4
-};
-
-static int dapl_config_socket(DAPL_SOCKET s)
-{
- unsigned long nonblocking = 1;
- return ioctlsocket(s, FIONBIO, &nonblocking);
-}
-
-static int dapl_connect_socket(DAPL_SOCKET s, struct sockaddr *addr,
- int addrlen)
-{
- int err;
-
- err = connect(s, addr, addrlen);
- if (err == SOCKET_ERROR)
- err = WSAGetLastError();
- return (err == WSAEWOULDBLOCK) ? EAGAIN : err;
-}
-
-struct dapl_fd_set {
- struct fd_set set[3];
-};
-
-static struct dapl_fd_set *dapl_alloc_fd_set(void)
-{
- return dapl_os_alloc(sizeof(struct dapl_fd_set));
-}
-
-static void dapl_fd_zero(struct dapl_fd_set *set)
-{
- FD_ZERO(&set->set[0]);
- FD_ZERO(&set->set[1]);
- FD_ZERO(&set->set[2]);
-}
-
-static int dapl_fd_set(DAPL_SOCKET s, struct dapl_fd_set *set,
- enum DAPL_FD_EVENTS event)
-{
- FD_SET(s, &set->set[(event == DAPL_FD_READ) ? 0 : 1]);
- FD_SET(s, &set->set[2]);
- return 0;
-}
-
-static enum DAPL_FD_EVENTS dapl_poll(DAPL_SOCKET s, enum DAPL_FD_EVENTS event)
-{
- struct fd_set rw_fds;
- struct fd_set err_fds;
- struct timeval tv;
- int ret;
-
- FD_ZERO(&rw_fds);
- FD_ZERO(&err_fds);
- FD_SET(s, &rw_fds);
- FD_SET(s, &err_fds);
-
- tv.tv_sec = 0;
- tv.tv_usec = 0;
-
- if (event == DAPL_FD_READ)
- ret = select(1, &rw_fds, NULL, &err_fds, &tv);
- else
- ret = select(1, NULL, &rw_fds, &err_fds, &tv);
-
- if (ret == 0)
- return 0;
- else if (ret == SOCKET_ERROR)
- return WSAGetLastError();
- else if (FD_ISSET(s, &rw_fds))
- return event;
- else
- return DAPL_FD_ERROR;
-}
-
-static int dapl_select(struct dapl_fd_set *set)
-{
- int ret;
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: sleep\n");
- ret = select(0, &set->set[0], &set->set[1], &set->set[2], NULL);
- dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: wakeup\n");
-
- if (ret == SOCKET_ERROR)
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " dapl_select: error 0x%x\n", WSAGetLastError());
-
- return ret;
-}
-#else // _WIN32 || _WIN64
-enum DAPL_FD_EVENTS {
- DAPL_FD_READ = POLLIN,
- DAPL_FD_WRITE = POLLOUT,
- DAPL_FD_ERROR = POLLERR
-};
-
-static int dapl_config_socket(DAPL_SOCKET s)
-{
- int ret;
-
- ret = fcntl(s, F_GETFL);
- if (ret >= 0)
- ret = fcntl(s, F_SETFL, ret | O_NONBLOCK);
- return ret;
-}
-
-static int dapl_connect_socket(DAPL_SOCKET s, struct sockaddr *addr,
- int addrlen)
-{
- int ret;
-
- ret = connect(s, addr, addrlen);
-
- return (errno == EINPROGRESS) ? EAGAIN : ret;
-}
-
-struct dapl_fd_set {
- int index;
- struct pollfd set[DAPL_FD_SETSIZE];
-};
-
-static struct dapl_fd_set *dapl_alloc_fd_set(void)
-{
- return dapl_os_alloc(sizeof(struct dapl_fd_set));
-}
-
-static void dapl_fd_zero(struct dapl_fd_set *set)
-{
- set->index = 0;
-}
-
-static int dapl_fd_set(DAPL_SOCKET s, struct dapl_fd_set *set,
- enum DAPL_FD_EVENTS event)
-{
- if (set->index == DAPL_FD_SETSIZE - 1) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- "SCM ERR: cm_thread exceeded FD_SETSIZE %d\n",
- set->index + 1);
- return -1;
- }
-
- set->set[set->index].fd = s;
- set->set[set->index].revents = 0;
- set->set[set->index++].events = event;
- return 0;
-}
-
-static enum DAPL_FD_EVENTS dapl_poll(DAPL_SOCKET s, enum DAPL_FD_EVENTS event)
-{
- struct pollfd fds;
- int ret;
-
- fds.fd = s;
- fds.events = event;
- fds.revents = 0;
- ret = poll(&fds, 1, 0);
- dapl_log(DAPL_DBG_TYPE_CM, " dapl_poll: fd=%d ret=%d, evnts=0x%x\n",
- s, ret, fds.revents);
- if (ret == 0)
- return 0;
- else if (fds.revents & (POLLERR | POLLHUP | POLLNVAL))
- return DAPL_FD_ERROR;
- else
- return fds.revents;
-}
-
-static int dapl_select(struct dapl_fd_set *set)
-{
- int ret;
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: sleep, fds=%d\n",
- set->index);
- ret = poll(set->set, set->index, -1);
- dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: wakeup, ret=0x%x\n", ret);
- return ret;
-}
-#endif
-
-static struct ib_cm_handle *dapli_cm_create(void)
-{
- struct ib_cm_handle *cm_ptr;
-
- /* Allocate CM, init lock, and initialize */
- if ((cm_ptr = dapl_os_alloc(sizeof(*cm_ptr))) == NULL)
- return NULL;
-
- (void)dapl_os_memzero(cm_ptr, sizeof(*cm_ptr));
- if (dapl_os_lock_init(&cm_ptr->lock))
- goto bail;
-
- cm_ptr->dst.ver = htons(DSCM_VER);
- cm_ptr->socket = DAPL_INVALID_SOCKET;
- return cm_ptr;
- bail:
- dapl_os_free(cm_ptr, sizeof(*cm_ptr));
- return NULL;
-}
-
-/* mark for destroy, remove all references, schedule cleanup */
-static void dapli_cm_destroy(struct ib_cm_handle *cm_ptr)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " cm_destroy: cm %p ep %p\n", cm_ptr, cm_ptr->ep);
-
- /* cleanup, never made it to work queue */
- if (cm_ptr->state == SCM_INIT) {
- if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
- shutdown(cm_ptr->socket, SHUT_RDWR);
- closesocket(cm_ptr->socket);
- }
- dapl_os_free(cm_ptr, sizeof(*cm_ptr));
- return;
- }
-
- dapl_os_lock(&cm_ptr->lock);
- cm_ptr->state = SCM_DESTROY;
- if ((cm_ptr->ep) && (cm_ptr->ep->cm_handle == cm_ptr)) {
- cm_ptr->ep->cm_handle = IB_INVALID_HANDLE;
- cm_ptr->ep = NULL;
- }
-
- /* close socket if still active */
- if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
- shutdown(cm_ptr->socket, SHUT_RDWR);
- closesocket(cm_ptr->socket);
- cm_ptr->socket = DAPL_INVALID_SOCKET;
- }
- dapl_os_unlock(&cm_ptr->lock);
-
- /* wakeup work thread */
- if (send(cm_ptr->hca->ib_trans.scm[1], "w", sizeof "w", 0) == -1)
- dapl_log(DAPL_DBG_TYPE_CM,
- " cm_destroy: thread wakeup error = %s\n",
- strerror(errno));
-}
-
-/* queue socket for processing CM work */
-static void dapli_cm_queue(struct ib_cm_handle *cm_ptr)
-{
- /* add to work queue for cr thread processing */
- dapl_llist_init_entry((DAPL_LLIST_ENTRY *) & cm_ptr->entry);
- dapl_os_lock(&cm_ptr->hca->ib_trans.lock);
- dapl_llist_add_tail(&cm_ptr->hca->ib_trans.list,
- (DAPL_LLIST_ENTRY *) & cm_ptr->entry, cm_ptr);
- dapl_os_unlock(&cm_ptr->hca->ib_trans.lock);
-
- /* wakeup CM work thread */
- if (send(cm_ptr->hca->ib_trans.scm[1], "w", sizeof "w", 0) == -1)
- dapl_log(DAPL_DBG_TYPE_CM,
- " cm_queue: thread wakeup error = %s\n",
- strerror(errno));
-}
-
-/*
- * ACTIVE/PASSIVE: called from CR thread or consumer via ep_disconnect
- * or from ep_free
- */
-DAT_RETURN dapli_socket_disconnect(dp_ib_cm_handle_t cm_ptr)
-{
- DAPL_EP *ep_ptr = cm_ptr->ep;
- DAT_UINT32 disc_data = htonl(0xdead);
-
- if (ep_ptr == NULL)
- return DAT_SUCCESS;
-
- dapl_os_lock(&cm_ptr->lock);
- if ((cm_ptr->state == SCM_INIT) ||
- (cm_ptr->state == SCM_DISCONNECTED) ||
- (cm_ptr->state == SCM_DESTROY)) {
- dapl_os_unlock(&cm_ptr->lock);
- return DAT_SUCCESS;
- } else {
- /* send disc date, close socket, schedule destroy */
- if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
- if (send(cm_ptr->socket, (char *)&disc_data,
- sizeof(disc_data), 0) == -1)
- dapl_log(DAPL_DBG_TYPE_WARN,
- " cm_disc: write error = %s\n",
- strerror(errno));
- shutdown(cm_ptr->socket, SHUT_RDWR);
- closesocket(cm_ptr->socket);
- cm_ptr->socket = DAPL_INVALID_SOCKET;
- }
- cm_ptr->state = SCM_DISCONNECTED;
- }
- dapl_os_unlock(&cm_ptr->lock);
-
- /* disconnect events for RC's only */
- if (ep_ptr->param.ep_attr.service_type == DAT_SERVICE_TYPE_RC) {
- if (ep_ptr->cr_ptr) {
- dapls_cr_callback(cm_ptr,
- IB_CME_DISCONNECTED,
- NULL,
- ((DAPL_CR *) ep_ptr->cr_ptr)->sp_ptr);
- } else {
- dapl_evd_connection_callback(ep_ptr->cm_handle,
- IB_CME_DISCONNECTED,
- NULL, ep_ptr);
- }
- }
-
- /* scheduled destroy via disconnect clean in callback */
- return DAT_SUCCESS;
-}
-
-/*
- * ACTIVE: socket connected, send QP information to peer
- */
-static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
-{
- int len, opt = 1;
- struct iovec iov[2];
- struct dapl_ep *ep_ptr = cm_ptr->ep;
-
- if (err) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " CONN_PENDING: %s ERR %s -> %s %d\n",
- err == -1 ? "POLL" : "SOCKOPT",
- err == -1 ? strerror(errno) : strerror(err),
- inet_ntoa(((struct sockaddr_in *)
- ep_ptr->param.
- remote_ia_address_ptr)->sin_addr),
- ntohs(((struct sockaddr_in *)
- &cm_ptr->dst.ia_address)->sin_port));
- goto bail;
- }
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " socket connected, write QP and private data\n");
-
- /* no delay for small packets */
- setsockopt(cm_ptr->socket, IPPROTO_TCP, TCP_NODELAY,
- (char *)&opt, sizeof(opt));
-
- /* send qp info and pdata to remote peer */
- iov[0].iov_base = (void *)&cm_ptr->dst;
- iov[0].iov_len = sizeof(ib_qp_cm_t);
- if (cm_ptr->dst.p_size) {
- iov[1].iov_base = cm_ptr->p_data;
- iov[1].iov_len = ntohl(cm_ptr->dst.p_size);
- len = writev(cm_ptr->socket, iov, 2);
- } else {
- len = writev(cm_ptr->socket, iov, 1);
- }
-
- if (len != (ntohl(cm_ptr->dst.p_size) + sizeof(ib_qp_cm_t))) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " CONN_PENDING write: ERR %s, wcnt=%d -> %s\n",
- strerror(errno), len, inet_ntoa(((struct sockaddr_in *)
- ep_ptr->param.
- remote_ia_address_ptr)->
- sin_addr));
- goto bail;
- }
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " connected: sending SRC port=0x%x lid=0x%x,"
- " qpn=0x%x, psize=%d\n",
- ntohs(cm_ptr->dst.port), ntohs(cm_ptr->dst.lid),
- ntohl(cm_ptr->dst.qpn), ntohl(cm_ptr->dst.p_size));
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " connected: sending SRC GID subnet %016llx id %016llx\n",
- (unsigned long long)
- htonll(cm_ptr->dst.gid.global.subnet_prefix),
- (unsigned long long)
- htonll(cm_ptr->dst.gid.global.interface_id));
-
- /* queue up to work thread to avoid blocking consumer */
- cm_ptr->state = SCM_RTU_PENDING;
- return;
- bail:
- /* close socket, free cm structure and post error event */
- dapli_cm_destroy(cm_ptr);
- dapl_evd_connection_callback(NULL, IB_CME_LOCAL_FAILURE, NULL, ep_ptr);
-}
-
-/*
- * ACTIVE: Create socket, connect, defer exchange QP information to CR thread
- * to avoid blocking.
- */
-DAT_RETURN
-dapli_socket_connect(DAPL_EP * ep_ptr,
- DAT_IA_ADDRESS_PTR r_addr,
- DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data)
-{
- dp_ib_cm_handle_t cm_ptr;
- int ret;
- DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
- struct sockaddr_in addr;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect: r_qual %d p_size=%d\n",
- r_qual, p_size);
-
- cm_ptr = dapli_cm_create();
- if (cm_ptr == NULL)
- return DAT_INSUFFICIENT_RESOURCES;
-
- /* create, connect, sockopt, and exchange QP information */
- if ((cm_ptr->socket =
- socket(AF_INET, SOCK_STREAM, 0)) == DAPL_INVALID_SOCKET) {
- dapl_os_free(cm_ptr, sizeof(*cm_ptr));
- return DAT_INSUFFICIENT_RESOURCES;
- }
-
- ret = dapl_config_socket(cm_ptr->socket);
- if (ret < 0) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " socket connect: config socket %d ERR %d %s\n",
- cm_ptr->socket, ret, strerror(errno));
- goto bail;
- }
-
- dapl_os_memcpy(&addr, r_addr, sizeof(addr));
- addr.sin_port = htons(r_qual);
- ret = dapl_connect_socket(cm_ptr->socket, (struct sockaddr *)&addr,
- sizeof(addr));
- if (ret && ret != EAGAIN) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " socket connect ERROR: %s -> %s r_qual %d\n",
- strerror(errno),
- inet_ntoa(addr.sin_addr), (unsigned int)r_qual);
- dapli_cm_destroy(cm_ptr);
- return DAT_INVALID_ADDRESS;
- }
-
- /* Send QP info, IA address, and private data */
- cm_ptr->dst.qpn = htonl(ep_ptr->qp_handle->qp_num);
-#ifdef DAT_EXTENSIONS
- cm_ptr->dst.qp_type = htons(ep_ptr->qp_handle->qp_type);
-#endif
- cm_ptr->dst.port = htons(ia_ptr->hca_ptr->port_num);
- cm_ptr->dst.lid = ia_ptr->hca_ptr->ib_trans.lid;
- cm_ptr->dst.gid = ia_ptr->hca_ptr->ib_trans.gid;
-
- /* save references */
- cm_ptr->hca = ia_ptr->hca_ptr;
- cm_ptr->ep = ep_ptr;
- cm_ptr->dst.ia_address = ia_ptr->hca_ptr->hca_address;
- ((struct sockaddr_in *)
- &cm_ptr->dst.ia_address)->sin_port = ntohs(r_qual);
-
- if (p_size) {
- cm_ptr->dst.p_size = htonl(p_size);
- dapl_os_memcpy(cm_ptr->p_data, p_data, p_size);
- }
-
- /* connected or pending, either way results via async event */
- if (ret == 0)
- dapli_socket_connected(cm_ptr, 0);
- else
- cm_ptr->state = SCM_CONN_PENDING;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " connect: socket %d to %s r_qual %d pending\n",
- cm_ptr->socket,
- inet_ntoa(addr.sin_addr), (unsigned int)r_qual);
-
- dapli_cm_queue(cm_ptr);
- return DAT_SUCCESS;
- bail:
- dapl_log(DAPL_DBG_TYPE_ERR,
- " socket connect ERROR: %s query lid(0x%x)/gid"
- " -> %s r_qual %d\n",
- strerror(errno), ntohs(cm_ptr->dst.lid),
- inet_ntoa(((struct sockaddr_in *)r_addr)->sin_addr),
- (unsigned int)r_qual);
-
- /* close socket, free cm structure */
- dapli_cm_destroy(cm_ptr);
- return DAT_INTERNAL_ERROR;
-}
-
-/*
- * ACTIVE: exchange QP information, called from CR thread
- */
-static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
-{
- DAPL_EP *ep_ptr = cm_ptr->ep;
- int len;
- short rtu_data = htons(0x0E0F);
- ib_cm_events_t event = IB_CME_DESTINATION_REJECT;
-
- /* read DST information into cm_ptr, overwrite SRC info */
- dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect_rtu: recv peer QP data\n");
-
- len = recv(cm_ptr->socket, (char *)&cm_ptr->dst, sizeof(ib_qp_cm_t), 0);
- if (len != sizeof(ib_qp_cm_t) || ntohs(cm_ptr->dst.ver) != DSCM_VER) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " CONN_RTU read: ERR %s, rcnt=%d, ver=%d -> %s\n",
- strerror(errno), len, cm_ptr->dst.ver,
- inet_ntoa(((struct sockaddr_in *)
- ep_ptr->param.remote_ia_address_ptr)->
- sin_addr));
- goto bail;
- }
-
- /* convert peer response values to host order */
- cm_ptr->dst.port = ntohs(cm_ptr->dst.port);
- cm_ptr->dst.lid = ntohs(cm_ptr->dst.lid);
- cm_ptr->dst.qpn = ntohl(cm_ptr->dst.qpn);
-#ifdef DAT_EXTENSIONS
- cm_ptr->dst.qp_type = ntohs(cm_ptr->dst.qp_type);
-#endif
- cm_ptr->dst.p_size = ntohl(cm_ptr->dst.p_size);
-
- /* save remote address information */
- dapl_os_memcpy(&ep_ptr->remote_ia_address,
- &cm_ptr->dst.ia_address,
- sizeof(ep_ptr->remote_ia_address));
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " CONN_RTU: DST %s port=0x%x lid=0x%x,"
- " qpn=0x%x, qp_type=%d, psize=%d\n",
- inet_ntoa(((struct sockaddr_in *)
- &cm_ptr->dst.ia_address)->sin_addr),
- cm_ptr->dst.port, cm_ptr->dst.lid,
- cm_ptr->dst.qpn, cm_ptr->dst.qp_type, cm_ptr->dst.p_size);
-
- /* validate private data size before reading */
- if (cm_ptr->dst.p_size > IB_MAX_REP_PDATA_SIZE) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " CONN_RTU read: psize (%d) wrong -> %s\n",
- cm_ptr->dst.p_size, inet_ntoa(((struct sockaddr_in *)
- ep_ptr->param.
- remote_ia_address_ptr)->
- sin_addr));
- goto bail;
- }
-
- /* read private data into cm_handle if any present */
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " socket connected, read private data\n");
- if (cm_ptr->dst.p_size) {
- len =
- recv(cm_ptr->socket, cm_ptr->p_data, cm_ptr->dst.p_size, 0);
- if (len != cm_ptr->dst.p_size) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " CONN_RTU read pdata: ERR %s, rcnt=%d -> %s\n",
- strerror(errno), len,
- inet_ntoa(((struct sockaddr_in *)
- ep_ptr->param.
- remote_ia_address_ptr)->sin_addr));
- goto bail;
- }
- }
-
- /* check for consumer reject */
- if (cm_ptr->dst.rej) {
- dapl_log(DAPL_DBG_TYPE_CM,
- " CONN_RTU read: PEER REJ reason=0x%x -> %s\n",
- ntohs(cm_ptr->dst.rej),
- inet_ntoa(((struct sockaddr_in *)
- ep_ptr->param.remote_ia_address_ptr)->
- sin_addr));
- event = IB_CME_DESTINATION_REJECT_PRIVATE_DATA;
-#ifdef DAT_EXTENSIONS
- if (cm_ptr->dst.qp_type == IBV_QPT_UD)
- goto ud_bail;
- else
-#endif
- goto bail;
- }
-
- /* modify QP to RTR and then to RTS with remote info */
- dapl_os_lock(&ep_ptr->header.lock);
- if (dapls_modify_qp_state(ep_ptr->qp_handle,
- IBV_QPS_RTR, cm_ptr) != DAT_SUCCESS) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " CONN_RTU: QPS_RTR ERR %s -> %s\n",
- strerror(errno), inet_ntoa(((struct sockaddr_in *)
- ep_ptr->param.
- remote_ia_address_ptr)->
- sin_addr));
- dapl_os_unlock(&ep_ptr->header.lock);
- goto bail;
- }
- if (dapls_modify_qp_state(ep_ptr->qp_handle,
- IBV_QPS_RTS, cm_ptr) != DAT_SUCCESS) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " CONN_RTU: QPS_RTS ERR %s -> %s\n",
- strerror(errno), inet_ntoa(((struct sockaddr_in *)
- ep_ptr->param.
- remote_ia_address_ptr)->
- sin_addr));
- dapl_os_unlock(&ep_ptr->header.lock);
- goto bail;
- }
- dapl_os_unlock(&ep_ptr->header.lock);
- dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect_rtu: send RTU\n");
-
- /* complete handshake after final QP state change */
- if (send(cm_ptr->socket, (char *)&rtu_data, sizeof(rtu_data), 0) == -1) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " CONN_RTU: write error = %s\n", strerror(errno));
- goto bail;
- }
- /* init cm_handle and post the event with private data */
- cm_ptr->state = SCM_CONNECTED;
- event = IB_CME_CONNECTED;
- dapl_dbg_log(DAPL_DBG_TYPE_EP, " ACTIVE: connected!\n");
-
-#ifdef DAT_EXTENSIONS
-ud_bail:
- if (cm_ptr->dst.qp_type == IBV_QPT_UD) {
- DAT_IB_EXTENSION_EVENT_DATA xevent;
-
- /* post EVENT, modify_qp created ah */
- xevent.status = 0;
- xevent.type = DAT_IB_UD_REMOTE_AH;
- xevent.remote_ah.ah = cm_ptr->ah;
- xevent.remote_ah.qpn = cm_ptr->dst.qpn;
- dapl_os_memcpy(&xevent.remote_ah.ia_addr,
- &cm_ptr->dst.ia_address,
- sizeof(cm_ptr->dst.ia_address));
-
- if (event == IB_CME_CONNECTED)
- event = DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED;
- else
- event = DAT_IB_UD_CONNECTION_REJECT_EVENT;
-
- dapls_evd_post_connection_event_ext((DAPL_EVD *) ep_ptr->param.
- connect_evd_handle,
- event,
- (DAT_EP_HANDLE) ep_ptr,
- (DAT_COUNT) cm_ptr->dst.p_size,
- (DAT_PVOID *) cm_ptr->p_data,
- (DAT_PVOID *) &xevent);
-
- /* done with socket, don't destroy cm_ptr, need pdata */
- closesocket(cm_ptr->socket);
- cm_ptr->socket = DAPL_INVALID_SOCKET;
- cm_ptr->state = SCM_RELEASED;
- } else
-#endif
- {
- ep_ptr->cm_handle = cm_ptr; /* only RC, multi CR's on UD */
- dapl_evd_connection_callback(cm_ptr,
- IB_CME_CONNECTED,
- cm_ptr->p_data, ep_ptr);
- }
- return;
-
-bail:
- /* close socket, and post error event */
- dapls_ib_reinit_ep(ep_ptr); /* reset QP state */
- closesocket(cm_ptr->socket);
- cm_ptr->socket = DAPL_INVALID_SOCKET;
- dapl_evd_connection_callback(NULL, event, cm_ptr->p_data, ep_ptr);
-}
-
-/*
- * PASSIVE: Create socket, listen, accept, exchange QP information
- */
-DAT_RETURN
-dapli_socket_listen(DAPL_IA * ia_ptr, DAT_CONN_QUAL serviceID, DAPL_SP * sp_ptr)
-{
- struct sockaddr_in addr;
- ib_cm_srvc_handle_t cm_ptr = NULL;
- int opt = 1;
- DAT_RETURN dat_status = DAT_SUCCESS;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " listen(ia_ptr %p ServiceID %d sp_ptr %p)\n",
- ia_ptr, serviceID, sp_ptr);
-
- cm_ptr = dapli_cm_create();
- if (cm_ptr == NULL)
- return DAT_INSUFFICIENT_RESOURCES;
-
- cm_ptr->sp = sp_ptr;
- cm_ptr->hca = ia_ptr->hca_ptr;
-
- /* bind, listen, set sockopt, accept, exchange data */
- if ((cm_ptr->socket =
- socket(AF_INET, SOCK_STREAM, 0)) == DAPL_INVALID_SOCKET) {
- dapl_log(DAPL_DBG_TYPE_ERR, " ERR: listen socket create: %s\n",
- strerror(errno));
- dat_status = DAT_INSUFFICIENT_RESOURCES;
- goto bail;
- }
-
- setsockopt(cm_ptr->socket, SOL_SOCKET, SO_REUSEADDR,
- (char *)&opt, sizeof(opt));
- addr.sin_port = htons(serviceID);
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = INADDR_ANY;
-
- if ((bind(cm_ptr->socket, (struct sockaddr *)&addr, sizeof(addr)) < 0)
- || (listen(cm_ptr->socket, 128) < 0)) {
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " listen: ERROR %s on conn_qual 0x%x\n",
- strerror(errno), serviceID);
- if (errno == EADDRINUSE)
- dat_status = DAT_CONN_QUAL_IN_USE;
- else
- dat_status = DAT_CONN_QUAL_UNAVAILABLE;
- goto bail;
- }
-
- /* set cm_handle for this service point, save listen socket */
- sp_ptr->cm_srvc_handle = cm_ptr;
-
- /* queue up listen socket to process inbound CR's */
- cm_ptr->state = SCM_LISTEN;
- dapli_cm_queue(cm_ptr);
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " listen: qual 0x%x cr %p s_fd %d\n",
- ntohs(serviceID), cm_ptr, cm_ptr->socket);
-
- return dat_status;
- bail:
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " listen: ERROR on conn_qual 0x%x\n", serviceID);
- dapli_cm_destroy(cm_ptr);
- return dat_status;
-}
-
-/*
- * PASSIVE: accept socket
- */
-static void dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr)
-{
- dp_ib_cm_handle_t acm_ptr;
- int len;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP, " socket_accept\n");
-
- /*
- * Accept all CR's on this port to avoid half-connection (SYN_RCV)
- * stalls with many to one connection storms
- */
- do {
- /* Allocate accept CM and initialize */
- if ((acm_ptr = dapli_cm_create()) == NULL)
- return;
-
- acm_ptr->sp = cm_ptr->sp;
- acm_ptr->hca = cm_ptr->hca;
-
- len = sizeof(acm_ptr->dst.ia_address);
- acm_ptr->socket = accept(cm_ptr->socket,
- (struct sockaddr *)
- &acm_ptr->dst.ia_address,
- (socklen_t *) & len);
- if (acm_ptr->socket == DAPL_INVALID_SOCKET) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " accept: ERR %s on FD %d l_cr %p\n",
- strerror(errno), cm_ptr->socket, cm_ptr);
- dapli_cm_destroy(acm_ptr);
- return;
- }
-
- acm_ptr->state = SCM_ACCEPTING;
- dapli_cm_queue(acm_ptr);
-
- } while (dapl_poll(cm_ptr->socket, DAPL_FD_READ) == DAPL_FD_READ);
-}
-
-/*
- * PASSIVE: receive peer QP information, private data, post cr_event
- */
-static void dapli_socket_accept_data(ib_cm_srvc_handle_t acm_ptr)
-{
- int len;
- void *p_data = NULL;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP, " socket accepted, read QP data\n");
-
- /* read in DST QP info, IA address. check for private data */
- len =
- recv(acm_ptr->socket, (char *)&acm_ptr->dst, sizeof(ib_qp_cm_t), 0);
- if (len != sizeof(ib_qp_cm_t) || ntohs(acm_ptr->dst.ver) != DSCM_VER) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " accept read: ERR %s, rcnt=%d, ver=%d\n",
- strerror(errno), len, ntohs(acm_ptr->dst.ver));
- goto bail;
- }
-
- /* convert accepted values to host order */
- acm_ptr->dst.port = ntohs(acm_ptr->dst.port);
- acm_ptr->dst.lid = ntohs(acm_ptr->dst.lid);
- acm_ptr->dst.qpn = ntohl(acm_ptr->dst.qpn);
-#ifdef DAT_EXTENSIONS
- acm_ptr->dst.qp_type = ntohs(acm_ptr->dst.qp_type);
-#endif
- acm_ptr->dst.p_size = ntohl(acm_ptr->dst.p_size);
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " accept: DST %s port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
- inet_ntoa(((struct sockaddr_in *)&acm_ptr->dst.
- ia_address)->sin_addr), acm_ptr->dst.port,
- acm_ptr->dst.lid, acm_ptr->dst.qpn, acm_ptr->dst.p_size);
-
- /* validate private data size before reading */
- if (acm_ptr->dst.p_size > IB_MAX_REQ_PDATA_SIZE) {
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- " accept read: psize (%d) wrong\n",
- acm_ptr->dst.p_size);
- goto bail;
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP, " socket accepted, read private data\n");
-
- /* read private data into cm_handle if any present */
- if (acm_ptr->dst.p_size) {
- len =
- recv(acm_ptr->socket, acm_ptr->p_data, acm_ptr->dst.p_size,
- 0);
- if (len != acm_ptr->dst.p_size) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " accept read pdata: ERR %s, rcnt=%d\n",
- strerror(errno), len);
- goto bail;
- }
- dapl_dbg_log(DAPL_DBG_TYPE_EP, " accept: psize=%d read\n", len);
- p_data = acm_ptr->p_data;
- }
-
- acm_ptr->state = SCM_ACCEPTING_DATA;
-
-#ifdef DAT_EXTENSIONS
- if (acm_ptr->dst.qp_type == IBV_QPT_UD) {
- DAT_IB_EXTENSION_EVENT_DATA xevent;
-
- /* post EVENT, modify_qp created ah */
- xevent.status = 0;
- xevent.type = DAT_IB_UD_CONNECT_REQUEST;
-
- dapls_evd_post_cr_event_ext(acm_ptr->sp,
- DAT_IB_UD_CONNECTION_REQUEST_EVENT,
- acm_ptr,
- (DAT_COUNT) acm_ptr->dst.p_size,
- (DAT_PVOID *) acm_ptr->p_data,
- (DAT_PVOID *) & xevent);
- } else
-#endif
- /* trigger CR event and return SUCCESS */
- dapls_cr_callback(acm_ptr,
- IB_CME_CONNECTION_REQUEST_PENDING,
- p_data, acm_ptr->sp);
- return;
- bail:
- /* close socket, free cm structure, active will see socket close as reject */
- dapli_cm_destroy(acm_ptr);
- return;
-}
-
-/*
- * PASSIVE: consumer accept, send local QP information, private data,
- * queue on work thread to receive RTU information to avoid blocking
- * user thread.
- */
-DAT_RETURN
-dapli_socket_accept_usr(DAPL_EP * ep_ptr,
- DAPL_CR * cr_ptr, DAT_COUNT p_size, DAT_PVOID p_data)
-{
- DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
- dp_ib_cm_handle_t cm_ptr = cr_ptr->ib_cm_handle;
- ib_qp_cm_t local;
- struct iovec iov[2];
- int len;
-
- if (p_size > IB_MAX_REP_PDATA_SIZE)
- return DAT_LENGTH_ERROR;
-
- /* must have a accepted socket */
- if (cm_ptr->socket == DAPL_INVALID_SOCKET)
- return DAT_INTERNAL_ERROR;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " ACCEPT_USR: remote port=0x%x lid=0x%x"
- " qpn=0x%x qp_type %d, psize=%d\n",
- cm_ptr->dst.port, cm_ptr->dst.lid,
- cm_ptr->dst.qpn, cm_ptr->dst.qp_type, cm_ptr->dst.p_size);
-
-#ifdef DAT_EXTENSIONS
- if (cm_ptr->dst.qp_type == IBV_QPT_UD &&
- ep_ptr->qp_handle->qp_type != IBV_QPT_UD) {
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- " ACCEPT_USR: ERR remote QP is UD,"
- ", but local QP is not\n");
- return (DAT_INVALID_HANDLE | DAT_INVALID_HANDLE_EP);
- }
-#endif
-
- /* modify QP to RTR and then to RTS with remote info already read */
- dapl_os_lock(&ep_ptr->header.lock);
- if (dapls_modify_qp_state(ep_ptr->qp_handle,
- IBV_QPS_RTR, cm_ptr) != DAT_SUCCESS) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " ACCEPT_USR: QPS_RTR ERR %s -> %s\n",
- strerror(errno), inet_ntoa(((struct sockaddr_in *)
- &cm_ptr->dst.ia_address)->
- sin_addr));
- dapl_os_unlock(&ep_ptr->header.lock);
- goto bail;
- }
- if (dapls_modify_qp_state(ep_ptr->qp_handle,
- IBV_QPS_RTS, cm_ptr) != DAT_SUCCESS) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " ACCEPT_USR: QPS_RTS ERR %s -> %s\n",
- strerror(errno), inet_ntoa(((struct sockaddr_in *)
- &cm_ptr->dst.ia_address)->
- sin_addr));
- dapl_os_unlock(&ep_ptr->header.lock);
- goto bail;
- }
- dapl_os_unlock(&ep_ptr->header.lock);
-
- /* save remote address information */
- dapl_os_memcpy(&ep_ptr->remote_ia_address,
- &cm_ptr->dst.ia_address,
- sizeof(ep_ptr->remote_ia_address));
-
- /* send our QP info, IA address, pdata. Don't overwrite dst data */
- local.ver = htons(DSCM_VER);
- local.rej = 0;
- local.qpn = htonl(ep_ptr->qp_handle->qp_num);
- local.qp_type = htons(ep_ptr->qp_handle->qp_type);
- local.port = htons(ia_ptr->hca_ptr->port_num);
- local.lid = ia_ptr->hca_ptr->ib_trans.lid;
- local.gid = ia_ptr->hca_ptr->ib_trans.gid;
- local.ia_address = ia_ptr->hca_ptr->hca_address;
- ((struct sockaddr_in *)&local.ia_address)->sin_port =
- ntohs(cm_ptr->sp->conn_qual);
-
- local.p_size = htonl(p_size);
- iov[0].iov_base = (void *)&local;
- iov[0].iov_len = sizeof(ib_qp_cm_t);
- if (p_size) {
- iov[1].iov_base = p_data;
- iov[1].iov_len = p_size;
- len = writev(cm_ptr->socket, iov, 2);
- } else {
- len = writev(cm_ptr->socket, iov, 1);
- }
-
- if (len != (p_size + sizeof(ib_qp_cm_t))) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " ACCEPT_USR: ERR %s, wcnt=%d -> %s\n",
- strerror(errno), len, inet_ntoa(((struct sockaddr_in *)
- &cm_ptr->dst.
- ia_address)->
- sin_addr));
- goto bail;
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " ACCEPT_USR: local port=0x%x lid=0x%x"
- " qpn=0x%x psize=%d\n",
- ntohs(local.port), ntohs(local.lid),
- ntohl(local.qpn), ntohl(local.p_size));
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " ACCEPT_USR SRC GID subnet %016llx id %016llx\n",
- (unsigned long long)
- htonll(local.gid.global.subnet_prefix),
- (unsigned long long)
- htonll(local.gid.global.interface_id));
-
- /* save state and reference to EP, queue for RTU data */
- cm_ptr->ep = ep_ptr;
- cm_ptr->hca = ia_ptr->hca_ptr;
- cm_ptr->state = SCM_ACCEPTED;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP, " PASSIVE: accepted!\n");
- return DAT_SUCCESS;
- bail:
- dapli_cm_destroy(cm_ptr);
- dapls_ib_reinit_ep(ep_ptr); /* reset QP state */
- return DAT_INTERNAL_ERROR;
-}
-
-/*
- * PASSIVE: read RTU from active peer, post CONN event
- */
-void dapli_socket_accept_rtu(dp_ib_cm_handle_t cm_ptr)
-{
- int len;
- short rtu_data = 0;
-
- /* complete handshake after final QP state change */
- len = recv(cm_ptr->socket, (char *)&rtu_data, sizeof(rtu_data), 0);
- if (len != sizeof(rtu_data) || ntohs(rtu_data) != 0x0e0f) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " ACCEPT_RTU: ERR %s, rcnt=%d rdata=%x\n",
- strerror(errno), len, ntohs(rtu_data),
- inet_ntoa(((struct sockaddr_in *)
- &cm_ptr->dst.ia_address)->sin_addr));
- goto bail;
- }
-
- /* save state and reference to EP, queue for disc event */
- cm_ptr->state = SCM_CONNECTED;
-
- /* final data exchange if remote QP state is good to go */
- dapl_dbg_log(DAPL_DBG_TYPE_EP, " PASSIVE: connected!\n");
-
-#ifdef DAT_EXTENSIONS
- if (cm_ptr->dst.qp_type == IBV_QPT_UD) {
- DAT_IB_EXTENSION_EVENT_DATA xevent;
-
- /* post EVENT, modify_qp created ah */
- xevent.status = 0;
- xevent.type = DAT_IB_UD_PASSIVE_REMOTE_AH;
- xevent.remote_ah.ah = cm_ptr->ah;
- xevent.remote_ah.qpn = cm_ptr->dst.qpn;
- dapl_os_memcpy(&xevent.remote_ah.ia_addr,
- &cm_ptr->dst.ia_address,
- sizeof(cm_ptr->dst.ia_address));
-
- dapls_evd_post_connection_event_ext((DAPL_EVD *) cm_ptr->ep->
- param.connect_evd_handle,
- DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED,
- (DAT_EP_HANDLE) cm_ptr->ep,
- (DAT_COUNT) cm_ptr->dst.p_size,
- (DAT_PVOID *) cm_ptr->p_data,
- (DAT_PVOID *) &xevent);
-
- /* done with socket, don't destroy cm_ptr, need pdata */
- closesocket(cm_ptr->socket);
- cm_ptr->socket = DAPL_INVALID_SOCKET;
- cm_ptr->state = SCM_RELEASED;
- } else
-#endif
- dapls_cr_callback(cm_ptr, IB_CME_CONNECTED, NULL, cm_ptr->sp);
- return;
- bail:
- dapls_ib_reinit_ep(cm_ptr->ep); /* reset QP state */
- dapli_cm_destroy(cm_ptr);
- dapls_cr_callback(cm_ptr, IB_CME_DESTINATION_REJECT, NULL, cm_ptr->sp);
-}
-
-/*
- * dapls_ib_connect
- *
- * Initiate a connection with the passive listener on another node
- *
- * Input:
- * ep_handle,
- * remote_ia_address,
- * remote_conn_qual,
- * prd_size size of private data and structure
- * prd_prt pointer to private data structure
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- * DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN
-dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
- IN DAT_IA_ADDRESS_PTR remote_ia_address,
- IN DAT_CONN_QUAL remote_conn_qual,
- IN DAT_COUNT private_data_size, IN void *private_data)
-{
- DAPL_EP *ep_ptr;
- ib_qp_handle_t qp_ptr;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " connect(ep_handle %p ....)\n", ep_handle);
-
- ep_ptr = (DAPL_EP *) ep_handle;
- qp_ptr = ep_ptr->qp_handle;
-
- return (dapli_socket_connect(ep_ptr, remote_ia_address,
- remote_conn_qual,
- private_data_size, private_data));
-}
-
-/*
- * dapls_ib_disconnect
- *
- * Disconnect an EP
- *
- * Input:
- * ep_handle,
- * disconnect_flags
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- */
-DAT_RETURN
-dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- "dapls_ib_disconnect(ep_handle %p ....)\n", ep_ptr);
-
- /* reinit to modify QP state */
- dapls_ib_reinit_ep(ep_ptr);
-
- if (ep_ptr->cm_handle == NULL ||
- ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED)
- return DAT_SUCCESS;
- else
- return (dapli_socket_disconnect(ep_ptr->cm_handle));
-}
-
-/*
- * dapls_ib_disconnect_clean
- *
- * Clean up outstanding connection data. This routine is invoked
- * after the final disconnect callback has occurred. Only on the
- * ACTIVE side of a connection. It is also called if dat_ep_connect
- * times out using the consumer supplied timeout value.
- *
- * Input:
- * ep_ptr DAPL_EP
- * active Indicates active side of connection
- *
- * Output:
- * none
- *
- * Returns:
- * void
- *
- */
-void
-dapls_ib_disconnect_clean(IN DAPL_EP * ep_ptr,
- IN DAT_BOOLEAN active,
- IN const ib_cm_events_t ib_cm_event)
-{
- /* NOTE: SCM will only initialize cm_handle with RC type
- *
- * For UD there can many in-flight CR's so you
- * cannot cleanup timed out CR's with EP reference
- * alone since they share the same EP. The common
- * code that handles connection timeout logic needs
- * updated for UD support.
- */
- if (ep_ptr->cm_handle)
- dapli_cm_destroy(ep_ptr->cm_handle);
-
- return;
-}
-
-/*
- * dapl_ib_setup_conn_listener
- *
- * Have the CM set up a connection listener.
- *
- * Input:
- * ibm_hca_handle HCA handle
- * qp_handle QP handle
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- * DAT_INTERNAL_ERROR
- * DAT_CONN_QUAL_UNAVAILBLE
- * DAT_CONN_QUAL_IN_USE
- *
- */
-DAT_RETURN
-dapls_ib_setup_conn_listener(IN DAPL_IA * ia_ptr,
- IN DAT_UINT64 ServiceID, IN DAPL_SP * sp_ptr)
-{
- return (dapli_socket_listen(ia_ptr, ServiceID, sp_ptr));
-}
-
-/*
- * dapl_ib_remove_conn_listener
- *
- * Have the CM remove a connection listener.
- *
- * Input:
- * ia_handle IA handle
- * ServiceID IB Channel Service ID
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_STATE
- *
- */
-DAT_RETURN
-dapls_ib_remove_conn_listener(IN DAPL_IA * ia_ptr, IN DAPL_SP * sp_ptr)
-{
- ib_cm_srvc_handle_t cm_ptr = sp_ptr->cm_srvc_handle;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- "dapls_ib_remove_conn_listener(ia_ptr %p sp_ptr %p cm_ptr %p)\n",
- ia_ptr, sp_ptr, cm_ptr);
-
- /* close accepted socket, free cm_srvc_handle and return */
- if (cm_ptr != NULL) {
- if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
- shutdown(cm_ptr->socket, SHUT_RDWR);
- closesocket(cm_ptr->socket);
- cm_ptr->socket = DAPL_INVALID_SOCKET;
- }
- /* cr_thread will free */
- cm_ptr->state = SCM_DESTROY;
- sp_ptr->cm_srvc_handle = NULL;
- if (send(cm_ptr->hca->ib_trans.scm[1],
- "w", sizeof "w", 0) == -1)
- dapl_log(DAPL_DBG_TYPE_CM,
- " cm_destroy: thread wakeup error = %s\n",
- strerror(errno));
- }
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_accept_connection
- *
- * Perform necessary steps to accept a connection
- *
- * Input:
- * cr_handle
- * ep_handle
- * private_data_size
- * private_data
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- * DAT_INTERNAL_ERROR
- *
- */
-DAT_RETURN
-dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle,
- IN DAT_EP_HANDLE ep_handle,
- IN DAT_COUNT p_size, IN const DAT_PVOID p_data)
-{
- DAPL_CR *cr_ptr;
- DAPL_EP *ep_ptr;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- "dapls_ib_accept_connection(cr %p ep %p prd %p,%d)\n",
- cr_handle, ep_handle, p_data, p_size);
-
- cr_ptr = (DAPL_CR *) cr_handle;
- ep_ptr = (DAPL_EP *) ep_handle;
-
- /* allocate and attach a QP if necessary */
- if (ep_ptr->qp_state == DAPL_QP_STATE_UNATTACHED) {
- DAT_RETURN status;
- status = dapls_ib_qp_alloc(ep_ptr->header.owner_ia,
- ep_ptr, ep_ptr);
- if (status != DAT_SUCCESS)
- return status;
- }
- return (dapli_socket_accept_usr(ep_ptr, cr_ptr, p_size, p_data));
-}
-
-/*
- * dapls_ib_reject_connection
- *
- * Reject a connection
- *
- * Input:
- * cr_handle
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INTERNAL_ERROR
- *
- */
-DAT_RETURN
-dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm_ptr,
- IN int reason,
- IN DAT_COUNT psize, IN const DAT_PVOID pdata)
-{
- struct iovec iov[2];
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " reject(cm %p reason %x, pdata %p, psize %d)\n",
- cm_ptr, reason, pdata, psize);
-
- if (psize > IB_MAX_REJ_PDATA_SIZE)
- return DAT_LENGTH_ERROR;
-
- /* write reject data to indicate reject */
- if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
- cm_ptr->dst.rej = (uint16_t) reason;
- cm_ptr->dst.rej = htons(cm_ptr->dst.rej);
- cm_ptr->dst.p_size = htonl(psize);
- /* get qp_type from request */
- cm_ptr->dst.qp_type = ntohs(cm_ptr->dst.qp_type);
-
- iov[0].iov_base = (void *)&cm_ptr->dst;
- iov[0].iov_len = sizeof(ib_qp_cm_t);
- if (psize) {
- iov[1].iov_base = pdata;
- iov[1].iov_len = psize;
- writev(cm_ptr->socket, iov, 2);
- } else {
- writev(cm_ptr->socket, iov, 1);
- }
-
- shutdown(cm_ptr->socket, SHUT_RDWR);
- closesocket(cm_ptr->socket);
- cm_ptr->socket = DAPL_INVALID_SOCKET;
- }
-
- /* cr_thread will destroy CR */
- cm_ptr->state = SCM_DESTROY;
- if (send(cm_ptr->hca->ib_trans.scm[1], "w", sizeof "w", 0) == -1)
- dapl_log(DAPL_DBG_TYPE_CM,
- " cm_destroy: thread wakeup error = %s\n",
- strerror(errno));
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_cm_remote_addr
- *
- * Obtain the remote IP address given a connection
- *
- * Input:
- * cr_handle
- *
- * Output:
- * remote_ia_address: where to place the remote address
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_HANDLE
- *
- */
-DAT_RETURN
-dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle,
- OUT DAT_SOCK_ADDR6 * remote_ia_address)
-{
- DAPL_HEADER *header;
- dp_ib_cm_handle_t ib_cm_handle;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- "dapls_ib_cm_remote_addr(dat_handle %p, ....)\n",
- dat_handle);
-
- header = (DAPL_HEADER *) dat_handle;
-
- if (header->magic == DAPL_MAGIC_EP)
- ib_cm_handle = ((DAPL_EP *) dat_handle)->cm_handle;
- else if (header->magic == DAPL_MAGIC_CR)
- ib_cm_handle = ((DAPL_CR *) dat_handle)->ib_cm_handle;
- else
- return DAT_INVALID_HANDLE;
-
- dapl_os_memcpy(remote_ia_address,
- &ib_cm_handle->dst.ia_address, sizeof(DAT_SOCK_ADDR6));
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_private_data_size
- *
- * Return the size of private data given a connection op type
- *
- * Input:
- * prd_ptr private data pointer
- * conn_op connection operation type
- *
- * If prd_ptr is NULL, this is a query for the max size supported by
- * the provider, otherwise it is the actual size of the private data
- * contained in prd_ptr.
- *
- *
- * Output:
- * None
- *
- * Returns:
- * length of private data
- *
- */
-int dapls_ib_private_data_size(IN DAPL_PRIVATE * prd_ptr,
- IN DAPL_PDATA_OP conn_op, IN DAPL_HCA * hca_ptr)
-{
- int size;
-
- switch (conn_op) {
- case DAPL_PDATA_CONN_REQ:
- {
- size = IB_MAX_REQ_PDATA_SIZE;
- break;
- }
- case DAPL_PDATA_CONN_REP:
- {
- size = IB_MAX_REP_PDATA_SIZE;
- break;
- }
- case DAPL_PDATA_CONN_REJ:
- {
- size = IB_MAX_REJ_PDATA_SIZE;
- break;
- }
- case DAPL_PDATA_CONN_DREQ:
- {
- size = IB_MAX_DREQ_PDATA_SIZE;
- break;
- }
- case DAPL_PDATA_CONN_DREP:
- {
- size = IB_MAX_DREP_PDATA_SIZE;
- break;
- }
- default:
- {
- size = 0;
- }
-
- } /* end case */
-
- return size;
-}
-
-/*
- * Map all socket CM event codes to the DAT equivelent.
- */
-#define DAPL_IB_EVENT_CNT 11
-
-static struct ib_cm_event_map {
- const ib_cm_events_t ib_cm_event;
- DAT_EVENT_NUMBER dat_event_num;
-} ib_cm_event_map[DAPL_IB_EVENT_CNT] = {
- /* 00 */ {
- IB_CME_CONNECTED, DAT_CONNECTION_EVENT_ESTABLISHED},
- /* 01 */ {
- IB_CME_DISCONNECTED, DAT_CONNECTION_EVENT_DISCONNECTED},
- /* 02 */ {
- IB_CME_DISCONNECTED_ON_LINK_DOWN,
- DAT_CONNECTION_EVENT_DISCONNECTED},
- /* 03 */ {
- IB_CME_CONNECTION_REQUEST_PENDING, DAT_CONNECTION_REQUEST_EVENT},
- /* 04 */ {
- IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
- DAT_CONNECTION_REQUEST_EVENT},
- /* 05 */ {
- IB_CME_DESTINATION_REJECT,
- DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
- /* 06 */ {
- IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
- DAT_CONNECTION_EVENT_PEER_REJECTED},
- /* 07 */ {
- IB_CME_DESTINATION_UNREACHABLE, DAT_CONNECTION_EVENT_UNREACHABLE},
- /* 08 */ {
- IB_CME_TOO_MANY_CONNECTION_REQUESTS,
- DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
- /* 09 */ {
- IB_CME_LOCAL_FAILURE, DAT_CONNECTION_EVENT_BROKEN},
- /* 10 */ {
- IB_CM_LOCAL_FAILURE, DAT_CONNECTION_EVENT_BROKEN}
-};
-
-/*
- * dapls_ib_get_cm_event
- *
- * Return a DAT connection event given a provider CM event.
- *
- * Input:
- * dat_event_num DAT event we need an equivelent CM event for
- *
- * Output:
- * none
- *
- * Returns:
- * ib_cm_event of translated DAPL value
- */
-DAT_EVENT_NUMBER
-dapls_ib_get_dat_event(IN const ib_cm_events_t ib_cm_event,
- IN DAT_BOOLEAN active)
-{
- DAT_EVENT_NUMBER dat_event_num;
- int i;
-
- active = active;
-
- if (ib_cm_event > IB_CM_LOCAL_FAILURE)
- return (DAT_EVENT_NUMBER) 0;
-
- dat_event_num = 0;
- for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
- if (ib_cm_event == ib_cm_event_map[i].ib_cm_event) {
- dat_event_num = ib_cm_event_map[i].dat_event_num;
- break;
- }
- }
- dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
- "dapls_ib_get_dat_event: event translate(%s) ib=0x%x dat=0x%x\n",
- active ? "active" : "passive", ib_cm_event, dat_event_num);
-
- return dat_event_num;
-}
-
-/*
- * dapls_ib_get_dat_event
- *
- * Return a DAT connection event given a provider CM event.
- *
- * Input:
- * ib_cm_event event provided to the dapl callback routine
- * active switch indicating active or passive connection
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_EVENT_NUMBER of translated provider value
- */
-ib_cm_events_t dapls_ib_get_cm_event(IN DAT_EVENT_NUMBER dat_event_num)
-{
- ib_cm_events_t ib_cm_event;
- int i;
-
- ib_cm_event = 0;
- for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
- if (dat_event_num == ib_cm_event_map[i].dat_event_num) {
- ib_cm_event = ib_cm_event_map[i].ib_cm_event;
- break;
- }
- }
- return ib_cm_event;
-}
-
-/* outbound/inbound CR processing thread to avoid blocking applications */
-void cr_thread(void *arg)
-{
- struct dapl_hca *hca_ptr = arg;
- dp_ib_cm_handle_t cr, next_cr;
- int opt, ret;
- socklen_t opt_len;
- char rbuf[2];
- struct dapl_fd_set *set;
- enum DAPL_FD_EVENTS event;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cr_thread: ENTER hca %p\n", hca_ptr);
- set = dapl_alloc_fd_set();
- if (!set)
- goto out;
-
- dapl_os_lock(&hca_ptr->ib_trans.lock);
- hca_ptr->ib_trans.cr_state = IB_THREAD_RUN;
-
- while (1) {
- dapl_fd_zero(set);
- dapl_fd_set(hca_ptr->ib_trans.scm[0], set, DAPL_FD_READ);
-
- if (!dapl_llist_is_empty(&hca_ptr->ib_trans.list))
- next_cr = dapl_llist_peek_head(&hca_ptr->ib_trans.list);
- else
- next_cr = NULL;
-
- while (next_cr) {
- cr = next_cr;
- next_cr = dapl_llist_next_entry(&hca_ptr->ib_trans.list,
- (DAPL_LLIST_ENTRY *) &
- cr->entry);
- if (cr->state == SCM_DESTROY
- || hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
- dapl_llist_remove_entry(&hca_ptr->ib_trans.list,
- (DAPL_LLIST_ENTRY *) &
- cr->entry);
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " CR FREE: %p ep=%p st=%d sock=%d\n",
- cr, cr->ep, cr->state, cr->socket);
- dapl_os_free(cr, sizeof(*cr));
- continue;
- }
- if (cr->socket == DAPL_INVALID_SOCKET)
- continue;
-
- event = (cr->state == SCM_CONN_PENDING) ?
- DAPL_FD_WRITE : DAPL_FD_READ;
- if (dapl_fd_set(cr->socket, set, event)) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " cr_thread: DESTROY CR st=%d fd %d"
- " -> %s\n", cr->state, cr->socket,
- inet_ntoa(((struct sockaddr_in *)
- &cr->dst.ia_address)->
- sin_addr));
- dapli_cm_destroy(cr);
- continue;
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " poll cr=%p, socket=%d\n", cr,
- cr->socket);
- dapl_os_unlock(&hca_ptr->ib_trans.lock);
-
- ret = dapl_poll(cr->socket, event);
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " poll ret=0x%x cr->state=%d socket=%d\n",
- ret, cr->state, cr->socket);
-
- /* data on listen, qp exchange, and on disc req */
- if (ret == DAPL_FD_READ) {
- if (cr->socket != DAPL_INVALID_SOCKET) {
- switch (cr->state) {
- case SCM_LISTEN:
- dapli_socket_accept(cr);
- break;
- case SCM_ACCEPTING:
- dapli_socket_accept_data(cr);
- break;
- case SCM_ACCEPTED:
- dapli_socket_accept_rtu(cr);
- break;
- case SCM_RTU_PENDING:
- dapli_socket_connect_rtu(cr);
- break;
- case SCM_CONNECTED:
- dapli_socket_disconnect(cr);
- break;
- default:
- break;
- }
- }
- /* connect socket is writable, check status */
- } else if (ret == DAPL_FD_WRITE ||
- (cr->state == SCM_CONN_PENDING &&
- ret == DAPL_FD_ERROR)) {
- opt = 0;
- opt_len = sizeof(opt);
- ret = getsockopt(cr->socket, SOL_SOCKET,
- SO_ERROR, (char *)&opt,
- &opt_len);
- if (!ret)
- dapli_socket_connected(cr, opt);
- else
- dapli_socket_connected(cr, errno);
-
- /* POLLUP, ERR, NVAL, or poll error - DISC */
- } else if (ret < 0 || ret == DAPL_FD_ERROR) {
- dapl_log(DAPL_DBG_TYPE_WARN,
- " poll=%d cr->st=%s sk=%d ep %p, %d\n",
- ret, dapl_cm_state_str(cr->state),
- cr->socket, cr->ep,
- cr->ep ? cr->ep->param.ep_state:0);
- dapli_socket_disconnect(cr);
- }
- dapl_os_lock(&hca_ptr->ib_trans.lock);
- }
-
- /* set to exit and all resources destroyed */
- if ((hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) &&
- (dapl_llist_is_empty(&hca_ptr->ib_trans.list)))
- break;
-
- dapl_os_unlock(&hca_ptr->ib_trans.lock);
- dapl_select(set);
-
- /* if pipe used to wakeup, consume */
- while (dapl_poll(hca_ptr->ib_trans.scm[0],
- DAPL_FD_READ) == DAPL_FD_READ) {
- if (recv(hca_ptr->ib_trans.scm[0], rbuf, 2, 0) == -1)
- dapl_log(DAPL_DBG_TYPE_CM,
- " cr_thread: read pipe error = %s\n",
- strerror(errno));
- }
- dapl_os_lock(&hca_ptr->ib_trans.lock);
-
- /* set to exit and all resources destroyed */
- if ((hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) &&
- (dapl_llist_is_empty(&hca_ptr->ib_trans.list)))
- break;
- }
-
- dapl_os_unlock(&hca_ptr->ib_trans.lock);
- free(set);
- out:
- hca_ptr->ib_trans.cr_state = IB_THREAD_EXIT;
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cr_thread(hca %p) exit\n", hca_ptr);
-}
-
-
-#ifdef DAPL_COUNTERS
-/* Debug aid: List all Connections in process and state */
-void dapls_print_cm_list(IN DAPL_IA *ia_ptr)
-{
- /* Print in process CR's for this IA, if debug type set */
- int i = 0;
- dp_ib_cm_handle_t cr, next_cr;
-
- dapl_os_lock(&ia_ptr->hca_ptr->ib_trans.lock);
- if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)
- &ia_ptr->hca_ptr->ib_trans.list))
- next_cr = dapl_llist_peek_head((DAPL_LLIST_HEAD*)
- &ia_ptr->hca_ptr->ib_trans.list);
- else
- next_cr = NULL;
-
- printf("\n DAPL IA CONNECTIONS IN PROCESS:\n");
- while (next_cr) {
- cr = next_cr;
- next_cr = dapl_llist_next_entry((DAPL_LLIST_HEAD*)
- &ia_ptr->hca_ptr->ib_trans.list,
- (DAPL_LLIST_ENTRY*)&cr->entry);
-
- printf( " CONN[%d]: sp %p ep %p sock %d %s %s %s %s %d\n",
- i, cr->sp, cr->ep, cr->socket,
- cr->dst.qp_type == IBV_QPT_RC ? "RC" : "UD",
- dapl_cm_state_str(cr->state),
- cr->sp ? "<-" : "->",
- cr->state == SCM_LISTEN ?
- inet_ntoa(((struct sockaddr_in *)
- &ia_ptr->hca_ptr->hca_address)->sin_addr) :
- inet_ntoa(((struct sockaddr_in *)
- &cr->dst.ia_address)->sin_addr),
- cr->sp ? (int)cr->sp->conn_qual :
- ntohs(((struct sockaddr_in *)
- &cr->dst.ia_address)->sin_port));
- i++;
- }
- printf("\n");
- dapl_os_unlock(&ia_ptr->hca_ptr->ib_trans.lock);
-}
-#endif
diff --git a/dapl/openib_scm/dapl_ib_cq.c b/dapl/openib_scm/dapl_ib_cq.c
deleted file mode 100644
index 2af1889..0000000
--- a/dapl/openib_scm/dapl_ib_cq.c
+++ /dev/null
@@ -1,705 +0,0 @@
-/*
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- * copy of which is available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/***************************************************************************
- *
- * Module: uDAPL
- *
- * Filename: dapl_ib_cq.c
- *
- * Author: Arlin Davis
- *
- * Created: 3/10/2005
- *
- * Description:
- *
- * The uDAPL openib provider - completion queue
- *
- ****************************************************************************
- * Source Control System Information
- *
- * $Id: $
- *
- * Copyright (c) 2005 Intel Corporation. All rights reserved.
- *
- **************************************************************************/
-
-#include "openib_osd.h"
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_lmr_util.h"
-#include "dapl_evd_util.h"
-#include "dapl_ring_buffer_util.h"
-
-#if defined(_WIN64) || defined(_WIN32)
-#include "..\..\..\..\..\etc\user\comp_channel.cpp"
-#include "..\..\..\..\..\etc\user\dlist.c"
-
-void dapli_cq_thread_destroy(struct dapl_hca *hca_ptr)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cq_thread_destroy(%p)\n", hca_ptr);
-
- if (hca_ptr->ib_trans.cq_state != IB_THREAD_RUN)
- return;
-
- /* destroy cr_thread and lock */
- hca_ptr->ib_trans.cq_state = IB_THREAD_CANCEL;
- CompChannelCancel(&hca_ptr->ib_trans.ib_cq->comp_channel);
- dapl_dbg_log(DAPL_DBG_TYPE_CM, " cq_thread_destroy(%p) cancel\n",
- hca_ptr);
- while (hca_ptr->ib_trans.cq_state != IB_THREAD_EXIT) {
- dapl_os_sleep_usec(20000);
- }
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cq_thread_destroy(%d) exit\n",
- dapl_os_getpid());
-}
-
-static void cq_thread(void *arg)
-{
- struct dapl_hca *hca_ptr = arg;
- struct dapl_evd *evd_ptr;
- struct ibv_cq *ibv_cq = NULL;
-
- hca_ptr->ib_trans.cq_state = IB_THREAD_RUN;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cq_thread: ENTER hca %p\n", hca_ptr);
-
- /* wait on DTO event, or signal to abort */
- while (hca_ptr->ib_trans.cq_state == IB_THREAD_RUN) {
- if (!ibv_get_cq_event
- (hca_ptr->ib_trans.ib_cq, &ibv_cq, (void *)&evd_ptr)) {
-
- if (DAPL_BAD_HANDLE(evd_ptr, DAPL_MAGIC_EVD)) {
- ibv_ack_cq_events(ibv_cq, 1);
- return;
- }
-
- /* process DTO event via callback */
- dapl_evd_dto_callback(hca_ptr->ib_hca_handle,
- evd_ptr->ib_cq_handle,
- (void *)evd_ptr);
-
- ibv_ack_cq_events(ibv_cq, 1);
- }
- }
- hca_ptr->ib_trans.cq_state = IB_THREAD_EXIT;
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cq_thread: EXIT: hca %p \n",
- hca_ptr);
-}
-
-#else // _WIN32 || _WIN64
-
-void dapli_cq_thread_destroy(struct dapl_hca *hca_ptr)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cq_thread_destroy(%p)\n", hca_ptr);
-
- if (hca_ptr->ib_trans.cq_state != IB_THREAD_RUN)
- return;
-
- /* destroy cr_thread and lock */
- hca_ptr->ib_trans.cq_state = IB_THREAD_CANCEL;
- pthread_kill(hca_ptr->ib_trans.cq_thread, SIGUSR1);
- dapl_dbg_log(DAPL_DBG_TYPE_CM, " cq_thread_destroy(%p) cancel\n",
- hca_ptr);
- while (hca_ptr->ib_trans.cq_state != IB_THREAD_EXIT) {
- dapl_os_sleep_usec(20000);
- }
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cq_thread_destroy(%d) exit\n",
- dapl_os_getpid());
-}
-
-/* catch the signal */
-static void ib_cq_handler(int signum)
-{
- return;
-}
-
-static void cq_thread(void *arg)
-{
- struct dapl_hca *hca_ptr = arg;
- struct dapl_evd *evd_ptr;
- struct ibv_cq *ibv_cq = NULL;
- sigset_t sigset;
-
- sigemptyset(&sigset);
- sigaddset(&sigset, SIGUSR1);
- pthread_sigmask(SIG_UNBLOCK, &sigset, NULL);
- signal(SIGUSR1, ib_cq_handler);
-
- hca_ptr->ib_trans.cq_state = IB_THREAD_RUN;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cq_thread: ENTER hca %p\n", hca_ptr);
-
- /* wait on DTO event, or signal to abort */
- while (hca_ptr->ib_trans.cq_state == IB_THREAD_RUN) {
- struct pollfd cq_fd = {
- .fd = hca_ptr->ib_trans.ib_cq->fd,
- .events = POLLIN,
- .revents = 0
- };
- if ((poll(&cq_fd, 1, -1) == 1) &&
- (!ibv_get_cq_event
- (hca_ptr->ib_trans.ib_cq, &ibv_cq, (void *)&evd_ptr))) {
-
- if (DAPL_BAD_HANDLE(evd_ptr, DAPL_MAGIC_EVD)) {
- ibv_ack_cq_events(ibv_cq, 1);
- return;
- }
-
- /* process DTO event via callback */
- dapl_evd_dto_callback(hca_ptr->ib_hca_handle,
- evd_ptr->ib_cq_handle,
- (void *)evd_ptr);
-
- ibv_ack_cq_events(ibv_cq, 1);
- }
- }
- hca_ptr->ib_trans.cq_state = IB_THREAD_EXIT;
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cq_thread: EXIT: hca %p \n",
- hca_ptr);
-}
-
-#endif // _WIN32 || _WIN64
-
-int dapli_cq_thread_init(struct dapl_hca *hca_ptr)
-{
- DAT_RETURN dat_status;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cq_thread_init(%p)\n", hca_ptr);
-
- /* create thread to process inbound connect request */
- hca_ptr->ib_trans.cq_state = IB_THREAD_INIT;
- dat_status =
- dapl_os_thread_create(cq_thread, (void *)hca_ptr,
- &hca_ptr->ib_trans.cq_thread);
- if (dat_status != DAT_SUCCESS) {
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- " cq_thread_init: failed to create thread\n");
- return 1;
- }
-
- /* wait for thread to start */
- while (hca_ptr->ib_trans.cq_state != IB_THREAD_RUN) {
- dapl_os_sleep_usec(20000);
- }
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cq_thread_init(%d) exit\n",
- dapl_os_getpid());
- return 0;
-}
-
-/*
- * Map all verbs DTO completion codes to the DAT equivelent.
- *
- * Not returned by verbs: DAT_DTO_ERR_PARTIAL_PACKET
- */
-static struct ib_status_map {
- int ib_status;
- DAT_DTO_COMPLETION_STATUS dat_status;
-} ib_status_map[] = {
- /* 00 */ {
- IBV_WC_SUCCESS, DAT_DTO_SUCCESS},
- /* 01 */ {
- IBV_WC_LOC_LEN_ERR, DAT_DTO_ERR_LOCAL_LENGTH},
- /* 02 */ {
- IBV_WC_LOC_QP_OP_ERR, DAT_DTO_ERR_LOCAL_EP},
- /* 03 */ {
- IBV_WC_LOC_EEC_OP_ERR, DAT_DTO_ERR_TRANSPORT},
- /* 04 */ {
- IBV_WC_LOC_PROT_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
- /* 05 */ {
- IBV_WC_WR_FLUSH_ERR, DAT_DTO_ERR_FLUSHED},
- /* 06 */ {
- IBV_WC_MW_BIND_ERR, DAT_RMR_OPERATION_FAILED},
- /* 07 */ {
- IBV_WC_BAD_RESP_ERR, DAT_DTO_ERR_BAD_RESPONSE},
- /* 08 */ {
- IBV_WC_LOC_ACCESS_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
- /* 09 */ {
- IBV_WC_REM_INV_REQ_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
- /* 10 */ {
- IBV_WC_REM_ACCESS_ERR, DAT_DTO_ERR_REMOTE_ACCESS},
- /* 11 */ {
- IBV_WC_REM_OP_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
- /* 12 */ {
- IBV_WC_RETRY_EXC_ERR, DAT_DTO_ERR_TRANSPORT},
- /* 13 */ {
- IBV_WC_RNR_RETRY_EXC_ERR, DAT_DTO_ERR_RECEIVER_NOT_READY},
- /* 14 */ {
- IBV_WC_LOC_RDD_VIOL_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
- /* 15 */ {
- IBV_WC_REM_INV_RD_REQ_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
- /* 16 */ {
- IBV_WC_REM_ABORT_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
- /* 17 */ {
- IBV_WC_INV_EECN_ERR, DAT_DTO_ERR_TRANSPORT},
- /* 18 */ {
- IBV_WC_INV_EEC_STATE_ERR, DAT_DTO_ERR_TRANSPORT},
- /* 19 */ {
- IBV_WC_FATAL_ERR, DAT_DTO_ERR_TRANSPORT},
- /* 20 */ {
- IBV_WC_RESP_TIMEOUT_ERR, DAT_DTO_ERR_RECEIVER_NOT_READY},
- /* 21 */ {
-IBV_WC_GENERAL_ERR, DAT_DTO_ERR_TRANSPORT},};
-
-/*
- * dapls_ib_get_dto_status
- *
- * Return the DAT status of a DTO operation
- *
- * Input:
- * cqe_ptr pointer to completion queue entry
- *
- * Output:
- * none
- *
- * Returns:
- * Value from ib_status_map table above
- */
-
-DAT_DTO_COMPLETION_STATUS
-dapls_ib_get_dto_status(IN ib_work_completion_t * cqe_ptr)
-{
- uint32_t ib_status;
- int i;
-
- ib_status = DAPL_GET_CQE_STATUS(cqe_ptr);
-
- /*
- * Due to the implementation of verbs completion code, we need to
- * search the table for the correct value rather than assuming
- * linear distribution.
- */
- for (i = 0; i <= IBV_WC_GENERAL_ERR; i++) {
- if (ib_status == ib_status_map[i].ib_status) {
- if (ib_status != IBV_WC_SUCCESS) {
- dapl_dbg_log(DAPL_DBG_TYPE_DTO_COMP_ERR,
- " DTO completion ERROR: %d: op %#x\n",
- ib_status,
- DAPL_GET_CQE_OPTYPE(cqe_ptr));
- }
- return ib_status_map[i].dat_status;
- }
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_DTO_COMP_ERR,
- " DTO completion ERROR: %d: op %#x\n",
- ib_status, DAPL_GET_CQE_OPTYPE(cqe_ptr));
-
- return DAT_DTO_FAILURE;
-}
-
-DAT_RETURN dapls_ib_get_async_event(IN ib_error_record_t * err_record,
- OUT DAT_EVENT_NUMBER * async_event)
-{
- DAT_RETURN dat_status = DAT_SUCCESS;
- int err_code = err_record->event_type;
-
- switch (err_code) {
- /* OVERFLOW error */
- case IBV_EVENT_CQ_ERR:
- *async_event = DAT_ASYNC_ERROR_EVD_OVERFLOW;
- break;
- /* INTERNAL errors */
- case IBV_EVENT_DEVICE_FATAL:
- *async_event = DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR;
- break;
- /* CATASTROPHIC errors */
- case IBV_EVENT_PORT_ERR:
- *async_event = DAT_ASYNC_ERROR_IA_CATASTROPHIC;
- break;
- /* BROKEN QP error */
- case IBV_EVENT_SQ_DRAINED:
- case IBV_EVENT_QP_FATAL:
- case IBV_EVENT_QP_REQ_ERR:
- case IBV_EVENT_QP_ACCESS_ERR:
- *async_event = DAT_ASYNC_ERROR_EP_BROKEN;
- break;
-
- /* connection completion */
- case IBV_EVENT_COMM_EST:
- *async_event = DAT_CONNECTION_EVENT_ESTABLISHED;
- break;
-
- /* TODO: process HW state changes */
- case IBV_EVENT_PATH_MIG:
- case IBV_EVENT_PATH_MIG_ERR:
- case IBV_EVENT_PORT_ACTIVE:
- case IBV_EVENT_LID_CHANGE:
- case IBV_EVENT_PKEY_CHANGE:
- case IBV_EVENT_SM_CHANGE:
- default:
- dat_status = DAT_ERROR(DAT_NOT_IMPLEMENTED, 0);
- }
- return dat_status;
-}
-
-/*
- * dapl_ib_cq_alloc
- *
- * Alloc a CQ
- *
- * Input:
- * ia_handle IA handle
- * evd_ptr pointer to EVD struct
- * cqlen minimum QLen
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN
-dapls_ib_cq_alloc(IN DAPL_IA * ia_ptr,
- IN DAPL_EVD * evd_ptr, IN DAT_COUNT * cqlen)
-{
- struct ibv_comp_channel *channel = ia_ptr->hca_ptr->ib_trans.ib_cq;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- "dapls_ib_cq_alloc: evd %p cqlen=%d \n", evd_ptr, *cqlen);
-
-#ifdef CQ_WAIT_OBJECT
- if (evd_ptr->cq_wait_obj_handle)
- channel = evd_ptr->cq_wait_obj_handle;
-#endif
-
- /* Call IB verbs to create CQ */
- evd_ptr->ib_cq_handle = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
- *cqlen, evd_ptr, channel, 0);
-
- if (evd_ptr->ib_cq_handle == IB_INVALID_HANDLE)
- return DAT_INSUFFICIENT_RESOURCES;
-
- /* arm cq for events */
- dapls_set_cq_notify(ia_ptr, evd_ptr);
-
- /* update with returned cq entry size */
- *cqlen = evd_ptr->ib_cq_handle->cqe;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- "dapls_ib_cq_alloc: new_cq %p cqlen=%d \n",
- evd_ptr->ib_cq_handle, *cqlen);
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_cq_resize
- *
- * Alloc a CQ
- *
- * Input:
- * ia_handle IA handle
- * evd_ptr pointer to EVD struct
- * cqlen minimum QLen
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN
-dapls_ib_cq_resize(IN DAPL_IA * ia_ptr,
- IN DAPL_EVD * evd_ptr, IN DAT_COUNT * cqlen)
-{
- ib_cq_handle_t new_cq;
- struct ibv_comp_channel *channel = ia_ptr->hca_ptr->ib_trans.ib_cq;
-
- /* IB verbs doe not support resize. Try to re-create CQ
- * with new size. Can only be done if QP is not attached.
- * destroy EBUSY == QP still attached.
- */
-
-#ifdef CQ_WAIT_OBJECT
- if (evd_ptr->cq_wait_obj_handle)
- channel = evd_ptr->cq_wait_obj_handle;
-#endif
-
- /* Call IB verbs to create CQ */
- new_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle, *cqlen,
- evd_ptr, channel, 0);
-
- if (new_cq == IB_INVALID_HANDLE)
- return DAT_INSUFFICIENT_RESOURCES;
-
- /* destroy the original and replace if successful */
- if (ibv_destroy_cq(evd_ptr->ib_cq_handle)) {
- ibv_destroy_cq(new_cq);
- return (dapl_convert_errno(errno, "resize_cq"));
- }
-
- /* update EVD with new cq handle and size */
- evd_ptr->ib_cq_handle = new_cq;
- *cqlen = new_cq->cqe;
-
- /* arm cq for events */
- dapls_set_cq_notify(ia_ptr, evd_ptr);
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_cq_free
- *
- * destroy a CQ
- *
- * Input:
- * ia_handle IA handle
- * evd_ptr pointer to EVD struct
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN dapls_ib_cq_free(IN DAPL_IA * ia_ptr, IN DAPL_EVD * evd_ptr)
-{
- DAT_EVENT event;
- ib_work_completion_t wc;
-
- if (evd_ptr->ib_cq_handle != IB_INVALID_HANDLE) {
- /* pull off CQ and EVD entries and toss */
- while (ibv_poll_cq(evd_ptr->ib_cq_handle, 1, &wc) == 1) ;
- while (dapl_evd_dequeue(evd_ptr, &event) == DAT_SUCCESS) ;
- if (ibv_destroy_cq(evd_ptr->ib_cq_handle))
- return (dapl_convert_errno(errno, "ibv_destroy_cq"));
- evd_ptr->ib_cq_handle = IB_INVALID_HANDLE;
- }
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_set_cq_notify
- *
- * Set the CQ notification for next
- *
- * Input:
- * hca_handl hca handle
- * DAPL_EVD evd handle
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * dapl_convert_errno
- */
-DAT_RETURN dapls_set_cq_notify(IN DAPL_IA * ia_ptr, IN DAPL_EVD * evd_ptr)
-{
- if (ibv_req_notify_cq(evd_ptr->ib_cq_handle, 0))
- return (dapl_convert_errno(errno, "notify_cq"));
- else
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_completion_notify
- *
- * Set the CQ notification type
- *
- * Input:
- * hca_handl hca handle
- * evd_ptr evd handle
- * type notification type
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * dapl_convert_errno
- */
-DAT_RETURN dapls_ib_completion_notify(IN ib_hca_handle_t hca_handle,
- IN DAPL_EVD * evd_ptr,
- IN ib_notification_type_t type)
-{
- if (ibv_req_notify_cq(evd_ptr->ib_cq_handle, type))
- return (dapl_convert_errno(errno, "notify_cq_type"));
- else
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_completion_poll
- *
- * CQ poll for completions
- *
- * Input:
- * hca_handl hca handle
- * evd_ptr evd handle
- * wc_ptr work completion
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_QUEUE_EMPTY
- *
- */
-DAT_RETURN dapls_ib_completion_poll(IN DAPL_HCA * hca_ptr,
- IN DAPL_EVD * evd_ptr,
- IN ib_work_completion_t * wc_ptr)
-{
- int ret;
-
- ret = ibv_poll_cq(evd_ptr->ib_cq_handle, 1, wc_ptr);
- if (ret == 1)
- return DAT_SUCCESS;
-
- return DAT_QUEUE_EMPTY;
-}
-
-#ifdef CQ_WAIT_OBJECT
-
-/* NEW common wait objects for providers with direct CQ wait objects */
-DAT_RETURN
-dapls_ib_wait_object_create(IN DAPL_EVD * evd_ptr,
- IN ib_wait_obj_handle_t * p_cq_wait_obj_handle)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " cq_object_create: (%p,%p)\n",
- evd_ptr, p_cq_wait_obj_handle);
-
- /* set cq_wait object to evd_ptr */
- *p_cq_wait_obj_handle =
- ibv_create_comp_channel(evd_ptr->header.owner_ia->hca_ptr->
- ib_hca_handle);
-
- return DAT_SUCCESS;
-}
-
-DAT_RETURN
-dapls_ib_wait_object_destroy(IN ib_wait_obj_handle_t p_cq_wait_obj_handle)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " cq_object_destroy: wait_obj=%p\n", p_cq_wait_obj_handle);
-
- ibv_destroy_comp_channel(p_cq_wait_obj_handle);
-
- return DAT_SUCCESS;
-}
-
-DAT_RETURN
-dapls_ib_wait_object_wakeup(IN ib_wait_obj_handle_t p_cq_wait_obj_handle)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " cq_object_wakeup: wait_obj=%p\n", p_cq_wait_obj_handle);
-
- /* no wake up mechanism */
- return DAT_SUCCESS;
-}
-
-#if defined(_WIN32) || defined(_WIN64)
-DAT_RETURN
-dapls_ib_wait_object_wait(IN ib_wait_obj_handle_t p_cq_wait_obj_handle,
- IN uint32_t timeout)
-{
- struct dapl_evd *evd_ptr;
- struct ibv_cq *ibv_cq = NULL;
- int status = 0;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " cq_object_wait: CQ channel %p time %d\n",
- p_cq_wait_obj_handle, timeout);
-
- /* uDAPL timeout values in usecs */
- p_cq_wait_obj_handle->comp_channel.Milliseconds = timeout / 1000;
-
- /* returned event */
- status = ibv_get_cq_event(p_cq_wait_obj_handle, &ibv_cq,
- (void *)&evd_ptr);
- if (status == 0) {
- ibv_ack_cq_events(ibv_cq, 1);
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " cq_object_wait: RET evd %p ibv_cq %p %s\n",
- evd_ptr, ibv_cq, strerror(errno));
-
- return (dapl_convert_errno(status, "cq_wait_object_wait"));
-}
-#else //_WIN32 || _WIN64
-DAT_RETURN
-dapls_ib_wait_object_wait(IN ib_wait_obj_handle_t p_cq_wait_obj_handle,
- IN uint32_t timeout)
-{
- struct dapl_evd *evd_ptr;
- struct ibv_cq *ibv_cq = NULL;
- int status = 0;
- int timeout_ms = -1;
- struct pollfd cq_fd = {
- .fd = p_cq_wait_obj_handle->fd,
- .events = POLLIN,
- .revents = 0
- };
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " cq_object_wait: CQ channel %p time %d\n",
- p_cq_wait_obj_handle, timeout);
-
- /* uDAPL timeout values in usecs */
- if (timeout != DAT_TIMEOUT_INFINITE)
- timeout_ms = timeout / 1000;
-
- status = poll(&cq_fd, 1, timeout_ms);
-
- /* returned event */
- if (status > 0) {
- if (!ibv_get_cq_event(p_cq_wait_obj_handle,
- &ibv_cq, (void *)&evd_ptr)) {
- ibv_ack_cq_events(ibv_cq, 1);
- }
- status = 0;
-
- /* timeout */
- } else if (status == 0)
- status = ETIMEDOUT;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " cq_object_wait: RET evd %p ibv_cq %p %s\n",
- evd_ptr, ibv_cq, strerror(errno));
-
- return (dapl_convert_errno(status, "cq_wait_object_wait"));
-
-}
-#endif //_WIN32 || _WIN64
-#endif // CQ_WAIT_OBJECT
-
-/*
- * Local variables:
- * c-indent-level: 4
- * c-basic-offset: 4
- * tab-width: 8
- * End:
- */
diff --git a/dapl/openib_scm/dapl_ib_dto.h b/dapl/openib_scm/dapl_ib_dto.h
deleted file mode 100644
index 9118b2e..0000000
--- a/dapl/openib_scm/dapl_ib_dto.h
+++ /dev/null
@@ -1,527 +0,0 @@
-/*
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- * copy of which is available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/***************************************************************************
- *
- * Module: uDAPL
- *
- * Filename: dapl_ib_dto.h
- *
- * Author: Arlin Davis
- *
- * Created: 3/10/2005
- *
- * Description:
- *
- * The OpenIB uCMA provider - DTO operations and CQE macros
- *
- ****************************************************************************
- * Source Control System Information
- *
- * $Id: $
- *
- * Copyright (c) 2005 Intel Corporation. All rights reserved.
- *
- **************************************************************************/
-#ifndef _DAPL_IB_DTO_H_
-#define _DAPL_IB_DTO_H_
-
-#include "dapl_ib_util.h"
-
-#ifdef DAT_EXTENSIONS
-#include <dat2/dat_ib_extensions.h>
-#endif
-
-#define DEFAULT_DS_ENTRIES 8
-
-STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p);
-
-#define CQE_WR_TYPE_UD(id) \
- (((DAPL_COOKIE *)(uintptr_t)id)->ep->qp_handle->qp_type == IBV_QPT_UD)
-
-/*
- * dapls_ib_post_recv
- *
- * Provider specific Post RECV function
- */
-STATIC _INLINE_ DAT_RETURN
-dapls_ib_post_recv (
- IN DAPL_EP *ep_ptr,
- IN DAPL_COOKIE *cookie,
- IN DAT_COUNT segments,
- IN DAT_LMR_TRIPLET *local_iov )
-{
- struct ibv_recv_wr wr;
- struct ibv_recv_wr *bad_wr;
- ib_data_segment_t *ds = (ib_data_segment_t *)local_iov;
- DAT_COUNT i, total_len;
- int ret;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_rcv: ep %p cookie %p segs %d l_iov %p\n",
- ep_ptr, cookie, segments, local_iov);
-
- /* setup work request */
- total_len = 0;
- wr.next = 0;
- wr.num_sge = segments;
- wr.wr_id = (uint64_t)(uintptr_t)cookie;
- wr.sg_list = ds;
-
- if (cookie != NULL) {
- for (i = 0; i < segments; i++) {
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_rcv: l_key 0x%x va %p len %d\n",
- ds->lkey, ds->addr, ds->length );
- total_len += ds->length;
- ds++;
- }
- cookie->val.dto.size = total_len;
- }
-
- ret = ibv_post_recv(ep_ptr->qp_handle, &wr, &bad_wr);
-
- if (ret)
- return(dapl_convert_errno(errno,"ibv_recv"));
-
- DAPL_CNTR(ep_ptr, DCNT_EP_POST_RECV);
- DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_RECV_DATA, total_len);
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_post_send
- *
- * Provider specific Post SEND function
- */
-STATIC _INLINE_ DAT_RETURN
-dapls_ib_post_send (
- IN DAPL_EP *ep_ptr,
- IN ib_send_op_type_t op_type,
- IN DAPL_COOKIE *cookie,
- IN DAT_COUNT segments,
- IN DAT_LMR_TRIPLET *local_iov,
- IN const DAT_RMR_TRIPLET *remote_iov,
- IN DAT_COMPLETION_FLAGS completion_flags)
-{
- struct ibv_send_wr wr;
- struct ibv_send_wr *bad_wr;
- ib_data_segment_t *ds = (ib_data_segment_t *)local_iov;
- ib_hca_transport_t *ibt_ptr =
- &ep_ptr->header.owner_ia->hca_ptr->ib_trans;
- DAT_COUNT i, total_len;
- int ret;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_snd: ep %p op %d ck %p sgs",
- "%d l_iov %p r_iov %p f %d\n",
- ep_ptr, op_type, cookie, segments, local_iov,
- remote_iov, completion_flags);
-
-#ifdef DAT_EXTENSIONS
- if (ep_ptr->qp_handle->qp_type != IBV_QPT_RC)
- return(DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
-#endif
- /* setup the work request */
- wr.next = 0;
- wr.opcode = op_type;
- wr.num_sge = segments;
- wr.send_flags = 0;
- wr.wr_id = (uint64_t)(uintptr_t)cookie;
- wr.sg_list = ds;
- total_len = 0;
-
- if (cookie != NULL) {
- for (i = 0; i < segments; i++ ) {
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_snd: lkey 0x%x va %p len %d\n",
- ds->lkey, ds->addr, ds->length );
- total_len += ds->length;
- ds++;
- }
- cookie->val.dto.size = total_len;
- }
-
- if (wr.num_sge &&
- (op_type == OP_RDMA_WRITE || op_type == OP_RDMA_READ)) {
- wr.wr.rdma.remote_addr = remote_iov->virtual_address;
- wr.wr.rdma.rkey = remote_iov->rmr_context;
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_snd_rdma: rkey 0x%x va %#016Lx\n",
- wr.wr.rdma.rkey, wr.wr.rdma.remote_addr);
- }
-
-
- /* inline data for send or write ops */
- if ((total_len <= ibt_ptr->max_inline_send) &&
- ((op_type == OP_SEND) || (op_type == OP_RDMA_WRITE)))
- wr.send_flags |= IBV_SEND_INLINE;
-
- /* set completion flags in work request */
- wr.send_flags |= (DAT_COMPLETION_SUPPRESS_FLAG &
- completion_flags) ? 0 : IBV_SEND_SIGNALED;
- wr.send_flags |= (DAT_COMPLETION_BARRIER_FENCE_FLAG &
- completion_flags) ? IBV_SEND_FENCE : 0;
- wr.send_flags |= (DAT_COMPLETION_SOLICITED_WAIT_FLAG &
- completion_flags) ? IBV_SEND_SOLICITED : 0;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_snd: op 0x%x flags 0x%x sglist %p, %d\n",
- wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
-
- ret = ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr);
-
- if (ret)
- return(dapl_convert_errno(errno,"ibv_send"));
-
-#ifdef DAPL_COUNTERS
- switch (op_type) {
- case OP_SEND:
- DAPL_CNTR(ep_ptr, DCNT_EP_POST_SEND);
- DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_SEND_DATA,total_len);
- break;
- case OP_RDMA_WRITE:
- DAPL_CNTR(ep_ptr, DCNT_EP_POST_WRITE);
- DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_WRITE_DATA,total_len);
- break;
- case OP_RDMA_READ:
- DAPL_CNTR(ep_ptr, DCNT_EP_POST_READ);
- DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_READ_DATA,total_len);
- break;
- default:
- break;
- }
-#endif /* DAPL_COUNTERS */
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP," post_snd: returned\n");
- return DAT_SUCCESS;
-}
-
-/* map Work Completions to DAPL WR operations */
-STATIC _INLINE_ DAT_DTOS dapls_cqe_dtos_opcode(ib_work_completion_t *cqe_p)
-{
- switch (cqe_p->opcode) {
-
- case IBV_WC_SEND:
-#ifdef DAT_EXTENSIONS
- if (CQE_WR_TYPE_UD(cqe_p->wr_id))
- return (DAT_IB_DTO_SEND_UD);
- else
-#endif
- return (DAT_DTO_SEND);
- case IBV_WC_RDMA_READ:
- return (DAT_DTO_RDMA_READ);
- case IBV_WC_BIND_MW:
- return (DAT_DTO_BIND_MW);
-#ifdef DAT_EXTENSIONS
- case IBV_WC_RDMA_WRITE:
- if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
- return (DAT_IB_DTO_RDMA_WRITE_IMMED);
- else
- return (DAT_DTO_RDMA_WRITE);
- case IBV_WC_COMP_SWAP:
- return (DAT_IB_DTO_CMP_SWAP);
- case IBV_WC_FETCH_ADD:
- return (DAT_IB_DTO_FETCH_ADD);
- case IBV_WC_RECV_RDMA_WITH_IMM:
- return (DAT_IB_DTO_RECV_IMMED);
-#else
- case IBV_WC_RDMA_WRITE:
- return (DAT_DTO_RDMA_WRITE);
-#endif
- case IBV_WC_RECV:
-#ifdef DAT_EXTENSIONS
- if (CQE_WR_TYPE_UD(cqe_p->wr_id))
- return (DAT_IB_DTO_RECV_UD);
- else if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
- return (DAT_IB_DTO_RECV_MSG_IMMED);
- else
-#endif
- return (DAT_DTO_RECEIVE);
- default:
- return (0xff);
- }
-}
-#define DAPL_GET_CQE_DTOS_OPTYPE(cqe_p) dapls_cqe_dtos_opcode(cqe_p)
-
-
-#ifdef DAT_EXTENSIONS
-/*
- * dapls_ib_post_ext_send
- *
- * Provider specific extended Post SEND function for atomics
- * OP_COMP_AND_SWAP and OP_FETCH_AND_ADD
- */
-STATIC _INLINE_ DAT_RETURN
-dapls_ib_post_ext_send (
- IN DAPL_EP *ep_ptr,
- IN ib_send_op_type_t op_type,
- IN DAPL_COOKIE *cookie,
- IN DAT_COUNT segments,
- IN DAT_LMR_TRIPLET *local_iov,
- IN const DAT_RMR_TRIPLET *remote_iov,
- IN DAT_UINT32 immed_data,
- IN DAT_UINT64 compare_add,
- IN DAT_UINT64 swap,
- IN DAT_COMPLETION_FLAGS completion_flags,
- IN DAT_IB_ADDR_HANDLE *remote_ah)
-{
- struct ibv_send_wr wr;
- struct ibv_send_wr *bad_wr;
- ib_data_segment_t *ds = (ib_data_segment_t *)local_iov;
- DAT_COUNT i, total_len;
- int ret;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_ext_snd: ep %p op %d ck %p sgs",
- "%d l_iov %p r_iov %p f %d\n",
- ep_ptr, op_type, cookie, segments, local_iov,
- remote_iov, completion_flags, remote_ah);
-
- /* setup the work request */
- wr.next = 0;
- wr.opcode = op_type;
- wr.num_sge = segments;
- wr.send_flags = 0;
- wr.wr_id = (uint64_t)(uintptr_t)cookie;
- wr.sg_list = ds;
- total_len = 0;
-
- if (cookie != NULL) {
- for (i = 0; i < segments; i++ ) {
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_snd: lkey 0x%x va %p len %d\n",
- ds->lkey, ds->addr, ds->length );
- total_len += ds->length;
- ds++;
- }
- cookie->val.dto.size = total_len;
- }
-
- switch (op_type) {
- case OP_RDMA_WRITE_IMM:
- /* OP_RDMA_WRITE)IMMED has direct IB wr_type mapping */
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_ext: rkey 0x%x va %#016Lx immed=0x%x\n",
- remote_iov?remote_iov->rmr_context:0,
- remote_iov?remote_iov->virtual_address:0,
- immed_data);
-
- wr.imm_data = immed_data;
- if (wr.num_sge) {
- wr.wr.rdma.remote_addr = remote_iov->virtual_address;
- wr.wr.rdma.rkey = remote_iov->rmr_context;
- }
- break;
- case OP_COMP_AND_SWAP:
- /* OP_COMP_AND_SWAP has direct IB wr_type mapping */
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_ext: OP_COMP_AND_SWAP=%lx,"
- "%lx rkey 0x%x va %#016Lx\n",
- compare_add, swap, remote_iov->rmr_context,
- remote_iov->virtual_address);
-
- wr.wr.atomic.compare_add = compare_add;
- wr.wr.atomic.swap = swap;
- wr.wr.atomic.remote_addr = remote_iov->virtual_address;
- wr.wr.atomic.rkey = remote_iov->rmr_context;
- break;
- case OP_FETCH_AND_ADD:
- /* OP_FETCH_AND_ADD has direct IB wr_type mapping */
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_ext: OP_FETCH_AND_ADD=%lx,"
- "%lx rkey 0x%x va %#016Lx\n",
- compare_add, remote_iov->rmr_context,
- remote_iov->virtual_address);
-
- wr.wr.atomic.compare_add = compare_add;
- wr.wr.atomic.remote_addr = remote_iov->virtual_address;
- wr.wr.atomic.rkey = remote_iov->rmr_context;
- break;
- case OP_SEND_UD:
- /* post must be on EP with service_type of UD */
- if (ep_ptr->qp_handle->qp_type != IBV_QPT_UD)
- return(DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_ext: OP_SEND_UD ah=%p"
- " qp_num=0x%x\n",
- remote_ah, remote_ah->qpn);
-
- wr.opcode = OP_SEND;
- wr.wr.ud.ah = remote_ah->ah;
- wr.wr.ud.remote_qpn = remote_ah->qpn;
- wr.wr.ud.remote_qkey = SCM_UD_QKEY;
- break;
- default:
- break;
- }
-
- /* set completion flags in work request */
- wr.send_flags |= (DAT_COMPLETION_SUPPRESS_FLAG &
- completion_flags) ? 0 : IBV_SEND_SIGNALED;
- wr.send_flags |= (DAT_COMPLETION_BARRIER_FENCE_FLAG &
- completion_flags) ? IBV_SEND_FENCE : 0;
- wr.send_flags |= (DAT_COMPLETION_SOLICITED_WAIT_FLAG &
- completion_flags) ? IBV_SEND_SOLICITED : 0;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_snd: op 0x%x flags 0x%x sglist %p, %d\n",
- wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
-
- ret = ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr);
-
- if (ret)
- return( dapl_convert_errno(errno,"ibv_send") );
-
-#ifdef DAPL_COUNTERS
- switch (op_type) {
- case OP_RDMA_WRITE_IMM:
- DAPL_CNTR(ep_ptr, DCNT_EP_POST_WRITE_IMM);
- DAPL_CNTR_DATA(ep_ptr,
- DCNT_EP_POST_WRITE_IMM_DATA, total_len);
- break;
- case OP_COMP_AND_SWAP:
- DAPL_CNTR(ep_ptr, DCNT_EP_POST_CMP_SWAP);
- break;
- case OP_FETCH_AND_ADD:
- DAPL_CNTR(ep_ptr, DCNT_EP_POST_FETCH_ADD);
- break;
- case OP_SEND_UD:
- DAPL_CNTR(ep_ptr, DCNT_EP_POST_SEND_UD);
- DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_SEND_UD_DATA, total_len);
- break;
- default:
- break;
- }
-#endif /* DAPL_COUNTERS */
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP," post_snd: returned\n");
- return DAT_SUCCESS;
-}
-#endif
-
-STATIC _INLINE_ DAT_RETURN
-dapls_ib_optional_prv_dat(
- IN DAPL_CR *cr_ptr,
- IN const void *event_data,
- OUT DAPL_CR **cr_pp)
-{
- return DAT_SUCCESS;
-}
-
-
-/* map Work Completions to DAPL WR operations */
-STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p)
-{
-#ifdef DAPL_COUNTERS
- DAPL_COOKIE *cookie = (DAPL_COOKIE *)(uintptr_t)cqe_p->wr_id;
-#endif /* DAPL_COUNTERS */
-
- switch (cqe_p->opcode) {
- case IBV_WC_SEND:
- if (CQE_WR_TYPE_UD(cqe_p->wr_id))
- return(OP_SEND_UD);
- else
- return (OP_SEND);
- case IBV_WC_RDMA_WRITE:
- if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
- return (OP_RDMA_WRITE_IMM);
- else
- return (OP_RDMA_WRITE);
- case IBV_WC_RDMA_READ:
- return (OP_RDMA_READ);
- case IBV_WC_COMP_SWAP:
- return (OP_COMP_AND_SWAP);
- case IBV_WC_FETCH_ADD:
- return (OP_FETCH_AND_ADD);
- case IBV_WC_BIND_MW:
- return (OP_BIND_MW);
- case IBV_WC_RECV:
- if (CQE_WR_TYPE_UD(cqe_p->wr_id)) {
- DAPL_CNTR(cookie->ep, DCNT_EP_RECV_UD);
- DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_UD_DATA,
- cqe_p->byte_len);
- return (OP_RECV_UD);
- }
- else if (cqe_p->wc_flags & IBV_WC_WITH_IMM) {
- DAPL_CNTR(cookie->ep, DCNT_EP_RECV_IMM);
- DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_IMM_DATA,
- cqe_p->byte_len);
- return (OP_RECEIVE_IMM);
- } else {
- DAPL_CNTR(cookie->ep, DCNT_EP_RECV);
- DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_DATA,
- cqe_p->byte_len);
- return (OP_RECEIVE);
- }
- case IBV_WC_RECV_RDMA_WITH_IMM:
- DAPL_CNTR(cookie->ep, DCNT_EP_RECV_RDMA_IMM);
- DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_RDMA_IMM_DATA,
- cqe_p->byte_len);
- return (OP_RECEIVE_IMM);
- default:
- return (OP_INVALID);
- }
-}
-
-#define DAPL_GET_CQE_OPTYPE(cqe_p) dapls_cqe_opcode(cqe_p)
-#define DAPL_GET_CQE_WRID(cqe_p) ((ib_work_completion_t*)cqe_p)->wr_id
-#define DAPL_GET_CQE_STATUS(cqe_p) ((ib_work_completion_t*)cqe_p)->status
-#define DAPL_GET_CQE_VENDOR_ERR(cqe_p) ((ib_work_completion_t*)cqe_p)->vendor_err
-#define DAPL_GET_CQE_BYTESNUM(cqe_p) ((ib_work_completion_t*)cqe_p)->byte_len
-#define DAPL_GET_CQE_IMMED_DATA(cqe_p) ((ib_work_completion_t*)cqe_p)->imm_data
-
-STATIC _INLINE_ char * dapls_dto_op_str(int op)
-{
- static char *optable[] =
- {
- "OP_RDMA_WRITE",
- "OP_RDMA_WRITE_IMM",
- "OP_SEND",
- "OP_SEND_IMM",
- "OP_RDMA_READ",
- "OP_COMP_AND_SWAP",
- "OP_FETCH_AND_ADD",
- "OP_RECEIVE",
- "OP_RECEIVE_MSG_IMM",
- "OP_RECEIVE_RDMA_IMM",
- "OP_BIND_MW"
- "OP_SEND_UD"
- "OP_RECV_UD"
- };
- return ((op < 0 || op > 12) ? "Invalid CQE OP?" : optable[op]);
-}
-
-static _INLINE_ char *
-dapls_cqe_op_str(IN ib_work_completion_t *cqe_ptr)
-{
- return dapls_dto_op_str(DAPL_GET_CQE_OPTYPE(cqe_ptr));
-}
-
-#define DAPL_GET_CQE_OP_STR(cqe) dapls_cqe_op_str(cqe)
-
-#endif /* _DAPL_IB_DTO_H_ */
diff --git a/dapl/openib_scm/dapl_ib_extensions.c b/dapl/openib_scm/dapl_ib_extensions.c
deleted file mode 100755
index 98a07ec..0000000
--- a/dapl/openib_scm/dapl_ib_extensions.c
+++ /dev/null
@@ -1,371 +0,0 @@
-/*
- * Copyright (c) 2007 Intel Corporation. All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- * copy of which is available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- *
- * MODULE: dapl_ib_extensions.c
- *
- * PURPOSE: Extensions routines for OpenIB uCMA provider
- *
- * $Id: $
- *
- **********************************************************************/
-
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_evd_util.h"
-#include "dapl_ib_util.h"
-#include "dapl_ep_util.h"
-#include "dapl_cookie.h"
-#include <stdarg.h>
-
-DAT_RETURN
-dapli_post_ext(IN DAT_EP_HANDLE ep_handle,
- IN DAT_UINT64 cmp_add,
- IN DAT_UINT64 swap,
- IN DAT_UINT32 immed_data,
- IN DAT_COUNT segments,
- IN DAT_LMR_TRIPLET * local_iov,
- IN DAT_DTO_COOKIE user_cookie,
- IN const DAT_RMR_TRIPLET * remote_iov,
- IN int op_type,
- IN DAT_COMPLETION_FLAGS flags, IN DAT_IB_ADDR_HANDLE * ah);
-
-/*
- * dapl_extensions
- *
- * Process extension requests
- *
- * Input:
- * ext_type,
- * ...
- *
- * Output:
- * Depends....
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_NOT_IMPLEMENTED
- * .....
- *
- */
-DAT_RETURN
-dapl_extensions(IN DAT_HANDLE dat_handle,
- IN DAT_EXTENDED_OP ext_op, IN va_list args)
-{
- DAT_EP_HANDLE ep;
- DAT_IB_ADDR_HANDLE *ah = NULL;
- DAT_LMR_TRIPLET *lmr_p;
- DAT_DTO_COOKIE cookie;
- const DAT_RMR_TRIPLET *rmr_p;
- DAT_UINT64 dat_uint64a, dat_uint64b;
- DAT_UINT32 dat_uint32;
- DAT_COUNT segments = 1;
- DAT_COMPLETION_FLAGS comp_flags;
- DAT_RETURN status = DAT_NOT_IMPLEMENTED;
-
- dapl_dbg_log(DAPL_DBG_TYPE_API,
- "dapl_extensions(hdl %p operation %d, ...)\n",
- dat_handle, ext_op);
-
- switch ((int)ext_op) {
-
- case DAT_IB_RDMA_WRITE_IMMED_OP:
- dapl_dbg_log(DAPL_DBG_TYPE_RTN,
- " WRITE_IMMED_DATA extension call\n");
-
- ep = dat_handle; /* ep_handle */
- segments = va_arg(args, DAT_COUNT); /* num segments */
- lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
- cookie = va_arg(args, DAT_DTO_COOKIE);
- rmr_p = va_arg(args, const DAT_RMR_TRIPLET *);
- dat_uint32 = va_arg(args, DAT_UINT32); /* immed data */
- comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
-
- status = dapli_post_ext(ep, 0, 0, dat_uint32, segments, lmr_p,
- cookie, rmr_p, OP_RDMA_WRITE_IMM,
- comp_flags, ah);
- break;
-
- case DAT_IB_CMP_AND_SWAP_OP:
- dapl_dbg_log(DAPL_DBG_TYPE_RTN,
- " CMP_AND_SWAP extension call\n");
-
- ep = dat_handle; /* ep_handle */
- dat_uint64a = va_arg(args, DAT_UINT64); /* cmp_value */
- dat_uint64b = va_arg(args, DAT_UINT64); /* swap_value */
- lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
- cookie = va_arg(args, DAT_DTO_COOKIE);
- rmr_p = va_arg(args, const DAT_RMR_TRIPLET *);
- comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
-
- status = dapli_post_ext(ep, dat_uint64a, dat_uint64b,
- 0, segments, lmr_p, cookie, rmr_p,
- OP_COMP_AND_SWAP, comp_flags, ah);
- break;
-
- case DAT_IB_FETCH_AND_ADD_OP:
- dapl_dbg_log(DAPL_DBG_TYPE_RTN,
- " FETCH_AND_ADD extension call\n");
-
- ep = dat_handle; /* ep_handle */
- dat_uint64a = va_arg(args, DAT_UINT64); /* add value */
- lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
- cookie = va_arg(args, DAT_DTO_COOKIE);
- rmr_p = va_arg(args, const DAT_RMR_TRIPLET *);
- comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
-
- status = dapli_post_ext(ep, dat_uint64a, 0, 0, segments,
- lmr_p, cookie, rmr_p,
- OP_FETCH_AND_ADD, comp_flags, ah);
- break;
-
- case DAT_IB_UD_SEND_OP:
- dapl_dbg_log(DAPL_DBG_TYPE_RTN,
- " UD post_send extension call\n");
-
- ep = dat_handle; /* ep_handle */
- segments = va_arg(args, DAT_COUNT); /* segments */
- lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
- ah = va_arg(args, DAT_IB_ADDR_HANDLE *);
- cookie = va_arg(args, DAT_DTO_COOKIE);
- comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
-
- status = dapli_post_ext(ep, 0, 0, 0, segments,
- lmr_p, cookie, NULL,
- OP_SEND_UD, comp_flags, ah);
- break;
-
-#ifdef DAPL_COUNTERS
- case DAT_QUERY_COUNTERS_OP:
- {
- int cntr, reset;
- DAT_UINT64 *p_cntr_out;
-
- dapl_dbg_log(DAPL_DBG_TYPE_RTN,
- " Query counter extension call\n");
-
- cntr = va_arg(args, int);
- p_cntr_out = va_arg(args, DAT_UINT64 *);
- reset = va_arg(args, int);
-
- status = dapl_query_counter(dat_handle, cntr,
- p_cntr_out, reset);
- break;
- }
- case DAT_PRINT_COUNTERS_OP:
- {
- int cntr, reset;
-
- dapl_dbg_log(DAPL_DBG_TYPE_RTN,
- " Print counter extension call\n");
-
- cntr = va_arg(args, int);
- reset = va_arg(args, int);
-
- dapl_print_counter(dat_handle, cntr, reset);
- status = DAT_SUCCESS;
- break;
- }
-#endif /* DAPL_COUNTERS */
-
- default:
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- "unsupported extension(%d)\n", (int)ext_op);
- }
-
- return (status);
-}
-
-DAT_RETURN
-dapli_post_ext(IN DAT_EP_HANDLE ep_handle,
- IN DAT_UINT64 cmp_add,
- IN DAT_UINT64 swap,
- IN DAT_UINT32 immed_data,
- IN DAT_COUNT segments,
- IN DAT_LMR_TRIPLET * local_iov,
- IN DAT_DTO_COOKIE user_cookie,
- IN const DAT_RMR_TRIPLET * remote_iov,
- IN int op_type,
- IN DAT_COMPLETION_FLAGS flags, IN DAT_IB_ADDR_HANDLE * ah)
-{
- DAPL_EP *ep_ptr;
- ib_qp_handle_t qp_ptr;
- DAPL_COOKIE *cookie = NULL;
- DAT_RETURN dat_status = DAT_SUCCESS;
-
- dapl_dbg_log(DAPL_DBG_TYPE_API,
- " post_ext_op: ep %p cmp_val %d "
- "swap_val %d cookie 0x%x, r_iov %p, flags 0x%x, ah %p\n",
- ep_handle, (unsigned)cmp_add, (unsigned)swap,
- (unsigned)user_cookie.as_64, remote_iov, flags, ah);
-
- if (DAPL_BAD_HANDLE(ep_handle, DAPL_MAGIC_EP))
- return (DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
-
- ep_ptr = (DAPL_EP *) ep_handle;
- qp_ptr = ep_ptr->qp_handle;
-
- /*
- * Synchronization ok since this buffer is only used for send
- * requests, which aren't allowed to race with each other.
- */
- dat_status = dapls_dto_cookie_alloc(&ep_ptr->req_buffer,
- DAPL_DTO_TYPE_EXTENSION,
- user_cookie, &cookie);
- if (dat_status != DAT_SUCCESS)
- goto bail;
-
- /*
- * Take reference before posting to avoid race conditions with
- * completions
- */
- dapl_os_atomic_inc(&ep_ptr->req_count);
-
- /*
- * Invoke provider specific routine to post DTO
- */
- dat_status = dapls_ib_post_ext_send(ep_ptr, op_type, cookie, segments, /* data segments */
- local_iov, remote_iov, immed_data, /* immed data */
- cmp_add, /* compare or add */
- swap, /* swap */
- flags, ah);
-
- if (dat_status != DAT_SUCCESS) {
- dapl_os_atomic_dec(&ep_ptr->req_count);
- dapls_cookie_dealloc(&ep_ptr->req_buffer, cookie);
- }
-
- bail:
- return dat_status;
-
-}
-
-/*
- * New provider routine to process extended DTO events
- */
-void
-dapls_cqe_to_event_extension(IN DAPL_EP * ep_ptr,
- IN DAPL_COOKIE * cookie,
- IN ib_work_completion_t * cqe_ptr,
- IN DAT_EVENT * event_ptr)
-{
- uint32_t ibtype;
- DAT_DTO_COMPLETION_EVENT_DATA *dto =
- &event_ptr->event_data.dto_completion_event_data;
- DAT_IB_EXTENSION_EVENT_DATA *ext_data = (DAT_IB_EXTENSION_EVENT_DATA *)
- & event_ptr->event_extension_data[0];
- DAT_DTO_COMPLETION_STATUS dto_status;
-
- /* Get status from cqe */
- dto_status = dapls_ib_get_dto_status(cqe_ptr);
-
- dapl_dbg_log(DAPL_DBG_TYPE_EVD,
- " cqe_to_event_ext: dto_ptr %p ext_ptr %p status %d\n",
- dto, ext_data, dto_status);
-
- event_ptr->event_number = DAT_IB_DTO_EVENT;
- dto->ep_handle = cookie->ep;
- dto->user_cookie = cookie->val.dto.cookie;
- dto->operation = DAPL_GET_CQE_DTOS_OPTYPE(cqe_ptr); /* new for 2.0 */
- dto->status = ext_data->status = dto_status;
-
- if (dto_status != DAT_DTO_SUCCESS)
- return;
-
- /*
- * Get operation type from CQ work completion entry and
- * if extented operation then set extended event data
- */
- ibtype = DAPL_GET_CQE_OPTYPE(cqe_ptr);
-
- switch (ibtype) {
-
- case OP_RDMA_WRITE_IMM:
- dapl_dbg_log(DAPL_DBG_TYPE_EVD,
- " cqe_to_event_ext: OP_RDMA_WRITE_IMMED\n");
-
- /* type and outbound rdma write transfer size */
- dto->transfered_length = cookie->val.dto.size;
- ext_data->type = DAT_IB_RDMA_WRITE_IMMED;
- break;
- case OP_RECEIVE_IMM:
- dapl_dbg_log(DAPL_DBG_TYPE_EVD,
- " cqe_to_event_ext: OP_RECEIVE_RDMA_IMMED\n");
-
- /* immed recvd, type and inbound rdma write transfer size */
- dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
- ext_data->type = DAT_IB_RDMA_WRITE_IMMED_DATA;
- ext_data->val.immed.data = DAPL_GET_CQE_IMMED_DATA(cqe_ptr);
- break;
- case OP_RECEIVE_MSG_IMM:
- dapl_dbg_log(DAPL_DBG_TYPE_EVD,
- " cqe_to_event_ext: OP_RECEIVE_MSG_IMMED\n");
-
- /* immed recvd, type and inbound recv message transfer size */
- dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
- ext_data->type = DAT_IB_RECV_IMMED_DATA;
- ext_data->val.immed.data = DAPL_GET_CQE_IMMED_DATA(cqe_ptr);
- break;
- case OP_COMP_AND_SWAP:
- dapl_dbg_log(DAPL_DBG_TYPE_EVD,
- " cqe_to_event_ext: COMP_AND_SWAP_RESP\n");
-
- /* original data is returned in LMR provided with post */
- ext_data->type = DAT_IB_CMP_AND_SWAP;
- dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
- break;
- case OP_FETCH_AND_ADD:
- dapl_dbg_log(DAPL_DBG_TYPE_EVD,
- " cqe_to_event_ext: FETCH_AND_ADD_RESP\n");
-
- /* original data is returned in LMR provided with post */
- ext_data->type = DAT_IB_FETCH_AND_ADD;
- dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
- break;
- case OP_SEND_UD:
- dapl_dbg_log(DAPL_DBG_TYPE_EVD, " cqe_to_event_ext: UD_SEND\n");
-
- /* type and outbound send transfer size */
- ext_data->type = DAT_IB_UD_SEND;
- dto->transfered_length = cookie->val.dto.size;
- break;
- case OP_RECV_UD:
- dapl_dbg_log(DAPL_DBG_TYPE_EVD, " cqe_to_event_ext: UD_RECV\n");
-
- /* type and inbound recv message transfer size */
- ext_data->type = DAT_IB_UD_RECV;
- dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
- break;
-
- default:
- /* not extended operation */
- ext_data->status = DAT_IB_OP_ERR;
- dto->status = DAT_DTO_ERR_TRANSPORT;
- break;
- }
-}
diff --git a/dapl/openib_scm/dapl_ib_mem.c b/dapl/openib_scm/dapl_ib_mem.c
deleted file mode 100644
index e45a2b3..0000000
--- a/dapl/openib_scm/dapl_ib_mem.c
+++ /dev/null
@@ -1,382 +0,0 @@
- /*
- * Copyright (c) 2005-2007 Intel Corporation. All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- * copy of which is available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- *
- * MODULE: dapl_ib_mem.c
- *
- * PURPOSE: Memory windows, registration, and protection domain
- *
- * $Id:$
- *
- **********************************************************************/
-
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_lmr_util.h"
-
-/*
- * dapls_convert_privileges
- *
- * Convert LMR privileges to provider
- *
- * Input:
- * DAT_MEM_PRIV_FLAGS
- *
- * Output:
- * none
- *
- * Returns:
- * ibv_access_flags
- *
- */
-STATIC _INLINE_ int dapls_convert_privileges(IN DAT_MEM_PRIV_FLAGS privileges)
-{
- int access = 0;
-
- /*
- * if (DAT_MEM_PRIV_LOCAL_READ_FLAG & privileges) do nothing
- */
- if (DAT_MEM_PRIV_LOCAL_WRITE_FLAG & privileges)
- access |= IBV_ACCESS_LOCAL_WRITE;
- if (DAT_MEM_PRIV_REMOTE_WRITE_FLAG & privileges)
- access |= IBV_ACCESS_REMOTE_WRITE;
- if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
- access |= IBV_ACCESS_REMOTE_READ;
- if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
- access |= IBV_ACCESS_REMOTE_READ;
- if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
- access |= IBV_ACCESS_REMOTE_READ;
-#ifdef DAT_EXTENSIONS
- if (DAT_IB_MEM_PRIV_REMOTE_ATOMIC & privileges)
- access |= IBV_ACCESS_REMOTE_ATOMIC;
-#endif
-
- return access;
-}
-
-/*
- * dapl_ib_pd_alloc
- *
- * Alloc a PD
- *
- * Input:
- * ia_handle IA handle
- * pz pointer to PZ struct
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN dapls_ib_pd_alloc(IN DAPL_IA * ia_ptr, IN DAPL_PZ * pz)
-{
- /* get a protection domain */
- pz->pd_handle = ibv_alloc_pd(ia_ptr->hca_ptr->ib_hca_handle);
- if (!pz->pd_handle)
- return (dapl_convert_errno(ENOMEM, "alloc_pd"));
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " pd_alloc: pd_handle=%p\n", pz->pd_handle);
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_pd_free
- *
- * Free a PD
- *
- * Input:
- * ia_handle IA handle
- * PZ_ptr pointer to PZ struct
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_STATE
- *
- */
-DAT_RETURN dapls_ib_pd_free(IN DAPL_PZ * pz)
-{
- if (pz->pd_handle != IB_INVALID_HANDLE) {
- if (ibv_dealloc_pd(pz->pd_handle))
- return (dapl_convert_errno(errno, "ibv_dealloc_pd"));
- pz->pd_handle = IB_INVALID_HANDLE;
- }
- return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_mr_register
- *
- * Register a virtual memory region
- *
- * Input:
- * ia_handle IA handle
- * lmr pointer to dapl_lmr struct
- * virt_addr virtual address of beginning of mem region
- * length length of memory region
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN
-dapls_ib_mr_register(IN DAPL_IA * ia_ptr,
- IN DAPL_LMR * lmr,
- IN DAT_PVOID virt_addr,
- IN DAT_VLEN length,
- IN DAT_MEM_PRIV_FLAGS privileges, IN DAT_VA_TYPE va_type)
-{
- ib_pd_handle_t ib_pd_handle;
- struct ibv_device *ibv_dev = ia_ptr->hca_ptr->ib_hca_handle->device;
-
- ib_pd_handle = ((DAPL_PZ *) lmr->param.pz_handle)->pd_handle;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " mr_register: ia=%p, lmr=%p va=%p ln=%d pv=0x%x\n",
- ia_ptr, lmr, virt_addr, length, privileges);
-
- /* TODO: shared memory */
- if (lmr->param.mem_type == DAT_MEM_TYPE_SHARED_VIRTUAL) {
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- " mr_register_shared: NOT IMPLEMENTED\n");
- return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
- }
-
- /* iWARP only support */
- if ((va_type == DAT_VA_TYPE_ZB) &&
- (ibv_dev->transport_type != IBV_TRANSPORT_IWARP)) {
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- " va_type == DAT_VA_TYPE_ZB: NOT SUPPORTED\n");
- return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
- }
-
- /* local read is default on IB */
- lmr->mr_handle =
- ibv_reg_mr(((DAPL_PZ *) lmr->param.pz_handle)->pd_handle,
- virt_addr, length, dapls_convert_privileges(privileges));
-
- if (!lmr->mr_handle)
- return (dapl_convert_errno(ENOMEM, "reg_mr"));
-
- lmr->param.lmr_context = lmr->mr_handle->lkey;
- lmr->param.rmr_context = lmr->mr_handle->rkey;
- lmr->param.registered_size = length;
- lmr->param.registered_address = (DAT_VADDR) (uintptr_t) virt_addr;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " mr_register: mr=%p addr=%p pd %p ctx %p "
- "lkey=0x%x rkey=0x%x priv=%x\n",
- lmr->mr_handle, lmr->mr_handle->addr,
- lmr->mr_handle->pd, lmr->mr_handle->context,
- lmr->mr_handle->lkey, lmr->mr_handle->rkey,
- length, dapls_convert_privileges(privileges));
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_mr_deregister
- *
- * Free a memory region
- *
- * Input:
- * lmr pointer to dapl_lmr struct
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_STATE
- *
- */
-DAT_RETURN dapls_ib_mr_deregister(IN DAPL_LMR * lmr)
-{
- if (lmr->mr_handle != IB_INVALID_HANDLE) {
- if (ibv_dereg_mr(lmr->mr_handle))
- return (dapl_convert_errno(errno, "dereg_pd"));
- lmr->mr_handle = IB_INVALID_HANDLE;
- }
- return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_mr_register_shared
- *
- * Register a virtual memory region
- *
- * Input:
- * ia_ptr IA handle
- * lmr pointer to dapl_lmr struct
- * privileges
- * va_type
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN
-dapls_ib_mr_register_shared(IN DAPL_IA * ia_ptr,
- IN DAPL_LMR * lmr,
- IN DAT_MEM_PRIV_FLAGS privileges,
- IN DAT_VA_TYPE va_type)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- " mr_register_shared: NOT IMPLEMENTED\n");
-
- return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-}
-
-/*
- * dapls_ib_mw_alloc
- *
- * Bind a protection domain to a memory window
- *
- * Input:
- * rmr Initialized rmr to hold binding handles
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN dapls_ib_mw_alloc(IN DAPL_RMR * rmr)
-{
-
- dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_alloc: NOT IMPLEMENTED\n");
-
- return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-}
-
-/*
- * dapls_ib_mw_free
- *
- * Release bindings of a protection domain to a memory window
- *
- * Input:
- * rmr Initialized rmr to hold binding handles
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_STATE
- *
- */
-DAT_RETURN dapls_ib_mw_free(IN DAPL_RMR * rmr)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_free: NOT IMPLEMENTED\n");
-
- return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-}
-
-/*
- * dapls_ib_mw_bind
- *
- * Bind a protection domain to a memory window
- *
- * Input:
- * rmr Initialized rmr to hold binding handles
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_PARAMETER;
- * DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN
-dapls_ib_mw_bind(IN DAPL_RMR * rmr,
- IN DAPL_LMR * lmr,
- IN DAPL_EP * ep,
- IN DAPL_COOKIE * cookie,
- IN DAT_VADDR virtual_address,
- IN DAT_VLEN length,
- IN DAT_MEM_PRIV_FLAGS mem_priv, IN DAT_BOOLEAN is_signaled)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_bind: NOT IMPLEMENTED\n");
-
- return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-}
-
-/*
- * dapls_ib_mw_unbind
- *
- * Unbind a protection domain from a memory window
- *
- * Input:
- * rmr Initialized rmr to hold binding handles
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_PARAMETER;
- * DAT_INVALID_STATE;
- * DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN
-dapls_ib_mw_unbind(IN DAPL_RMR * rmr,
- IN DAPL_EP * ep,
- IN DAPL_COOKIE * cookie, IN DAT_BOOLEAN is_signaled)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_unbind: NOT IMPLEMENTED\n");
-
- return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-}
-
-/*
- * Local variables:
- * c-indent-level: 4
- * c-basic-offset: 4
- * tab-width: 8
- * End:
- */
diff --git a/dapl/openib_scm/dapl_ib_qp.c b/dapl/openib_scm/dapl_ib_qp.c
deleted file mode 100644
index f943ff8..0000000
--- a/dapl/openib_scm/dapl_ib_qp.c
+++ /dev/null
@@ -1,513 +0,0 @@
-/*
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- * copy of which is available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- *
- * MODULE: dapl_ib_qp.c
- *
- * PURPOSE: QP routines for access to ofa rdma verbs
- *
- * $Id: $
- **********************************************************************/
-
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-
-/*
- * dapl_ib_qp_alloc
- *
- * Alloc a QP
- *
- * Input:
- * *ep_ptr pointer to EP INFO
- * ib_hca_handle provider HCA handle
- * ib_pd_handle provider protection domain handle
- * cq_recv provider recv CQ handle
- * cq_send provider send CQ handle
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- * DAT_INTERNAL_ERROR
- *
- */
-DAT_RETURN
-dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,
- IN DAPL_EP * ep_ptr, IN DAPL_EP * ep_ctx_ptr)
-{
- DAT_EP_ATTR *attr;
- DAPL_EVD *rcv_evd, *req_evd;
- ib_cq_handle_t rcv_cq, req_cq;
- ib_pd_handle_t ib_pd_handle;
- struct ibv_qp_init_attr qp_create;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " qp_alloc: ia_ptr %p ep_ptr %p ep_ctx_ptr %p\n",
- ia_ptr, ep_ptr, ep_ctx_ptr);
-
- attr = &ep_ptr->param.ep_attr;
- ib_pd_handle = ((DAPL_PZ *) ep_ptr->param.pz_handle)->pd_handle;
- rcv_evd = (DAPL_EVD *) ep_ptr->param.recv_evd_handle;
- req_evd = (DAPL_EVD *) ep_ptr->param.request_evd_handle;
-
- /*
- * DAT allows usage model of EP's with no EVD's but IB does not.
- * Create a CQ with zero entries under the covers to support and
- * catch any invalid posting.
- */
- if (rcv_evd != DAT_HANDLE_NULL)
- rcv_cq = rcv_evd->ib_cq_handle;
- else if (!ia_ptr->hca_ptr->ib_trans.ib_cq_empty)
- rcv_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
- else {
- struct ibv_comp_channel *channel =
- ia_ptr->hca_ptr->ib_trans.ib_cq;
-#ifdef CQ_WAIT_OBJECT
- if (rcv_evd->cq_wait_obj_handle)
- channel = rcv_evd->cq_wait_obj_handle;
-#endif
- /* Call IB verbs to create CQ */
- rcv_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
- 0, NULL, channel, 0);
-
- if (rcv_cq == IB_INVALID_HANDLE)
- return (dapl_convert_errno(ENOMEM, "create_cq"));
-
- ia_ptr->hca_ptr->ib_trans.ib_cq_empty = rcv_cq;
- }
- if (req_evd != DAT_HANDLE_NULL)
- req_cq = req_evd->ib_cq_handle;
- else
- req_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
-
- /* Setup attributes and create qp */
- dapl_os_memzero((void *)&qp_create, sizeof(qp_create));
- qp_create.send_cq = req_cq;
- qp_create.cap.max_send_wr = attr->max_request_dtos;
- qp_create.cap.max_send_sge = attr->max_request_iov;
- qp_create.cap.max_inline_data =
- ia_ptr->hca_ptr->ib_trans.max_inline_send;
- qp_create.qp_type = IBV_QPT_RC;
-
-#ifdef DAT_EXTENSIONS
- if (attr->service_type == DAT_IB_SERVICE_TYPE_UD) {
- qp_create.qp_type = IBV_QPT_UD;
- if (attr->max_message_size >
- (128 << ia_ptr->hca_ptr->ib_trans.mtu)) {
- return (DAT_INVALID_PARAMETER | DAT_INVALID_ARG6);
- }
- }
-#endif
- qp_create.qp_context = (void *)ep_ptr;
-
- /* ibv assumes rcv_cq is never NULL, set to req_cq */
- if (rcv_cq == NULL) {
- qp_create.recv_cq = req_cq;
- qp_create.cap.max_recv_wr = 0;
- qp_create.cap.max_recv_sge = 0;
- } else {
- qp_create.recv_cq = rcv_cq;
- qp_create.cap.max_recv_wr = attr->max_recv_dtos;
- qp_create.cap.max_recv_sge = attr->max_recv_iov;
- }
-
- ep_ptr->qp_handle = ibv_create_qp(ib_pd_handle, &qp_create);
- if (!ep_ptr->qp_handle)
- return (dapl_convert_errno(ENOMEM, "create_qp"));
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " qp_alloc: qpn %p sq %d,%d rq %d,%d\n",
- ep_ptr->qp_handle->qp_num,
- qp_create.cap.max_send_wr, qp_create.cap.max_send_sge,
- qp_create.cap.max_recv_wr, qp_create.cap.max_recv_sge);
-
- /* Setup QP attributes for INIT state on the way out */
- if (dapls_modify_qp_state(ep_ptr->qp_handle,
- IBV_QPS_INIT, NULL) != DAT_SUCCESS) {
- ibv_destroy_qp(ep_ptr->qp_handle);
- ep_ptr->qp_handle = IB_INVALID_HANDLE;
- return DAT_INTERNAL_ERROR;
- }
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_qp_free
- *
- * Free a QP
- *
- * Input:
- * ia_handle IA handle
- * *ep_ptr pointer to EP INFO
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * dapl_convert_errno
- *
- */
-DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_EP, " qp_free: ep_ptr %p qp %p\n",
- ep_ptr, ep_ptr->qp_handle);
-
- if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {
- /* force error state to flush queue, then destroy */
- dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, NULL);
-
- if (ibv_destroy_qp(ep_ptr->qp_handle))
- return (dapl_convert_errno(errno, "destroy_qp"));
-
- ep_ptr->qp_handle = IB_INVALID_HANDLE;
- }
-
-#ifdef DAT_EXTENSIONS
-{
- dp_ib_cm_handle_t cr, next_cr;
-
- /*
- * UD CR objects are kept active because of direct private data references
- * from CONN events. The cr->socket is closed and marked inactive but the
- * object remains allocated and queued on the CR resource list. There can
- * be multiple CR's associated with a given EP. There is no way to determine
- * when consumer is finished with event until the dat_ep_free.
- *
- * Schedule destruction for all CR's associated with this EP, cr_thread will
- * complete the cleanup with state == SCM_DESTROY.
- */
- dapl_os_lock(&ia_ptr->hca_ptr->ib_trans.lock);
- if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)
- &ia_ptr->hca_ptr->ib_trans.list))
- next_cr = dapl_llist_peek_head((DAPL_LLIST_HEAD*)
- &ia_ptr->hca_ptr->ib_trans.list);
- else
- next_cr = NULL;
-
- while (next_cr) {
- cr = next_cr;
- next_cr = dapl_llist_next_entry((DAPL_LLIST_HEAD*)
- &ia_ptr->hca_ptr->ib_trans.list,
- (DAPL_LLIST_ENTRY*)&cr->entry);
- if (cr->ep == ep_ptr) {
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " qp_free CR: ep %p cr %p\n", ep_ptr, cr);
- dapli_socket_disconnect(cr);
- dapl_os_lock(&cr->lock);
- cr->ep = NULL;
- cr->state = SCM_DESTROY;
- dapl_os_unlock(&cr->lock);
- }
- }
- dapl_os_unlock(&ia_ptr->hca_ptr->ib_trans.lock);
- send(ia_ptr->hca_ptr->ib_trans.scm[1], "w", sizeof "w", 0);
-}
-#endif
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_qp_modify
- *
- * Set the QP to the parameters specified in an EP_PARAM
- *
- * The EP_PARAM structure that is provided has been
- * sanitized such that only non-zero values are valid.
- *
- * Input:
- * ib_hca_handle HCA handle
- * qp_handle QP handle
- * ep_attr Sanitized EP Params
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- * DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN
-dapls_ib_qp_modify(IN DAPL_IA * ia_ptr,
- IN DAPL_EP * ep_ptr, IN DAT_EP_ATTR * attr)
-{
- struct ibv_qp_attr qp_attr;
-
- if (ep_ptr->qp_handle == IB_INVALID_HANDLE)
- return DAT_INVALID_PARAMETER;
-
- /*
- * EP state, qp_handle state should be an indication
- * of current state but the only way to be sure is with
- * a user mode ibv_query_qp call which is NOT available
- */
-
- /* move to error state if necessary */
- if ((ep_ptr->qp_state == IB_QP_STATE_ERROR) &&
- (ep_ptr->qp_handle->state != IBV_QPS_ERR)) {
- return (dapls_modify_qp_state(ep_ptr->qp_handle,
- IBV_QPS_ERR, NULL));
- }
-
- /*
- * Check if we have the right qp_state to modify attributes
- */
- if ((ep_ptr->qp_handle->state != IBV_QPS_RTR) &&
- (ep_ptr->qp_handle->state != IBV_QPS_RTS))
- return DAT_INVALID_STATE;
-
- /* Adjust to current EP attributes */
- dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
- qp_attr.cap.max_send_wr = attr->max_request_dtos;
- qp_attr.cap.max_recv_wr = attr->max_recv_dtos;
- qp_attr.cap.max_send_sge = attr->max_request_iov;
- qp_attr.cap.max_recv_sge = attr->max_recv_iov;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- "modify_qp: qp %p sq %d,%d, rq %d,%d\n",
- ep_ptr->qp_handle,
- qp_attr.cap.max_send_wr, qp_attr.cap.max_send_sge,
- qp_attr.cap.max_recv_wr, qp_attr.cap.max_recv_sge);
-
- if (ibv_modify_qp(ep_ptr->qp_handle, &qp_attr, IBV_QP_CAP)) {
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- "modify_qp: modify ep %p qp %p failed\n",
- ep_ptr, ep_ptr->qp_handle);
- return (dapl_convert_errno(errno, "modify_qp_state"));
- }
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_reinit_ep
- *
- * Move the QP to INIT state again.
- *
- * Input:
- * ep_ptr DAPL_EP
- *
- * Output:
- * none
- *
- * Returns:
- * void
- *
- */
-#if defined(_WIN32) || defined(_WIN64)
-void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)
-{
- /* work around bug in low level driver - 3/24/09 */
- /* RTS -> RESET -> INIT -> ERROR QP transition crashes system */
- if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {
- dapls_ib_qp_free(ep_ptr->header.owner_ia, ep_ptr);
- dapls_ib_qp_alloc(ep_ptr->header.owner_ia, ep_ptr, ep_ptr);
- }
-}
-#else // _WIN32 || _WIN64
-void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)
-{
- if (ep_ptr->qp_handle != IB_INVALID_HANDLE &&
- ep_ptr->qp_handle->qp_type != IBV_QPT_UD) {
- /* move to RESET state and then to INIT */
- dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET, 0);
- dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT, 0);
- }
-}
-#endif // _WIN32 || _WIN64
-
-/*
- * Generic QP modify for init, reset, error, RTS, RTR
- * For UD, create_ah on RTR, qkey on INIT
- */
-DAT_RETURN
-dapls_modify_qp_state(IN ib_qp_handle_t qp_handle,
- IN ib_qp_state_t qp_state, IN struct ib_cm_handle *cm_ptr)
-{
- struct ibv_qp_attr qp_attr;
- enum ibv_qp_attr_mask mask = IBV_QP_STATE;
- DAPL_EP *ep_ptr = (DAPL_EP *) qp_handle->qp_context;
- DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
- ib_qp_cm_t *qp_cm = &cm_ptr->dst;
- int ret;
-
- dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
- qp_attr.qp_state = qp_state;
- switch (qp_state) {
- /* additional attributes with RTR and RTS */
- case IBV_QPS_RTR:
- {
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " QPS_RTR: type %d state %d qpn %x lid %x"
- " port %x ep %p qp_state %d\n",
- qp_handle->qp_type, qp_handle->qp_type,
- qp_cm->qpn, qp_cm->lid, qp_cm->port,
- ep_ptr, ep_ptr->qp_state);
-
- mask |= IBV_QP_AV |
- IBV_QP_PATH_MTU |
- IBV_QP_DEST_QPN |
- IBV_QP_RQ_PSN |
- IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER;
-
- qp_attr.dest_qp_num = qp_cm->qpn;
- qp_attr.rq_psn = 1;
- qp_attr.path_mtu = ia_ptr->hca_ptr->ib_trans.mtu;
- qp_attr.max_dest_rd_atomic =
- ep_ptr->param.ep_attr.max_rdma_read_out;
- qp_attr.min_rnr_timer =
- ia_ptr->hca_ptr->ib_trans.rnr_timer;
-
- /* address handle. RC and UD */
- qp_attr.ah_attr.dlid = qp_cm->lid;
- if (ia_ptr->hca_ptr->ib_trans.global) {
- qp_attr.ah_attr.is_global = 1;
- qp_attr.ah_attr.grh.dgid = qp_cm->gid;
- qp_attr.ah_attr.grh.hop_limit =
- ia_ptr->hca_ptr->ib_trans.hop_limit;
- qp_attr.ah_attr.grh.traffic_class =
- ia_ptr->hca_ptr->ib_trans.tclass;
- }
- qp_attr.ah_attr.sl = 0;
- qp_attr.ah_attr.src_path_bits = 0;
- qp_attr.ah_attr.port_num = ia_ptr->hca_ptr->port_num;
-#ifdef DAT_EXTENSIONS
- /* UD: create AH for remote side */
- if (qp_handle->qp_type == IBV_QPT_UD) {
- ib_pd_handle_t pz;
- pz = ((DAPL_PZ *)
- ep_ptr->param.pz_handle)->pd_handle;
- mask = IBV_QP_STATE;
- cm_ptr->ah = ibv_create_ah(pz,
- &qp_attr.ah_attr);
- if (!cm_ptr->ah)
- return (dapl_convert_errno(errno,
- "ibv_ah"));
-
- /* already RTR, multi remote AH's on QP */
- if (ep_ptr->qp_state == IBV_QPS_RTR ||
- ep_ptr->qp_state == IBV_QPS_RTS)
- return DAT_SUCCESS;
- }
-#endif
- break;
- }
- case IBV_QPS_RTS:
- {
- /* RC only */
- if (qp_handle->qp_type == IBV_QPT_RC) {
- mask |= IBV_QP_SQ_PSN |
- IBV_QP_TIMEOUT |
- IBV_QP_RETRY_CNT |
- IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC;
- qp_attr.timeout =
- ia_ptr->hca_ptr->ib_trans.ack_timer;
- qp_attr.retry_cnt =
- ia_ptr->hca_ptr->ib_trans.ack_retry;
- qp_attr.rnr_retry =
- ia_ptr->hca_ptr->ib_trans.rnr_retry;
- qp_attr.max_rd_atomic =
- ep_ptr->param.ep_attr.max_rdma_read_out;
- }
- /* RC and UD */
- qp_attr.qp_state = IBV_QPS_RTS;
- qp_attr.sq_psn = 1;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " QPS_RTS: psn %x rd_atomic %d ack %d "
- " retry %d rnr_retry %d ep %p qp_state %d\n",
- qp_attr.sq_psn, qp_attr.max_rd_atomic,
- qp_attr.timeout, qp_attr.retry_cnt,
- qp_attr.rnr_retry, ep_ptr,
- ep_ptr->qp_state);
-#ifdef DAT_EXTENSIONS
- if (qp_handle->qp_type == IBV_QPT_UD) {
- /* already RTS, multi remote AH's on QP */
- if (ep_ptr->qp_state == IBV_QPS_RTS)
- return DAT_SUCCESS;
- else
- mask = IBV_QP_STATE | IBV_QP_SQ_PSN;
- }
-#endif
- break;
- }
- case IBV_QPS_INIT:
- {
- mask |= IBV_QP_PKEY_INDEX | IBV_QP_PORT;
- if (qp_handle->qp_type == IBV_QPT_RC) {
- mask |= IBV_QP_ACCESS_FLAGS;
- qp_attr.qp_access_flags =
- IBV_ACCESS_LOCAL_WRITE |
- IBV_ACCESS_REMOTE_WRITE |
- IBV_ACCESS_REMOTE_READ |
- IBV_ACCESS_REMOTE_ATOMIC |
- IBV_ACCESS_MW_BIND;
- }
-#ifdef DAT_EXTENSIONS
- if (qp_handle->qp_type == IBV_QPT_UD) {
- /* already INIT, multi remote AH's on QP */
- if (ep_ptr->qp_state == IBV_QPS_INIT)
- return DAT_SUCCESS;
- mask |= IBV_QP_QKEY;
- qp_attr.qkey = SCM_UD_QKEY;
- }
-#endif
- qp_attr.pkey_index = 0;
- qp_attr.port_num = ia_ptr->hca_ptr->port_num;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " QPS_INIT: pi %x port %x acc %x qkey 0x%x\n",
- qp_attr.pkey_index, qp_attr.port_num,
- qp_attr.qp_access_flags, qp_attr.qkey);
- break;
- }
- default:
- break;
-
- }
-
- ret = ibv_modify_qp(qp_handle, &qp_attr, mask);
- if (ret == 0) {
- ep_ptr->qp_state = qp_state;
- return DAT_SUCCESS;
- } else {
- return (dapl_convert_errno(errno, "modify_qp_state"));
- }
-}
-
-/*
- * Local variables:
- * c-indent-level: 4
- * c-basic-offset: 4
- * tab-width: 8
- * End:
- */
diff --git a/dapl/openib_scm/dapl_ib_util.c b/dapl/openib_scm/dapl_ib_util.c
deleted file mode 100644
index ad30f73..0000000
--- a/dapl/openib_scm/dapl_ib_util.c
+++ /dev/null
@@ -1,743 +0,0 @@
-/*
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- * copy of which is available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/***************************************************************************
- *
- * Module: uDAPL
- *
- * Filename: dapl_ib_util.c
- *
- * Author: Arlin Davis
- *
- * Created: 3/10/2005
- *
- * Description:
- *
- * The uDAPL openib provider - init, open, close, utilities
- *
- ****************************************************************************
- * Source Control System Information
- *
- * $Id: $
- *
- * Copyright (c) 2005 Intel Corporation. All rights reserved.
- *
- **************************************************************************/
-#ifdef RCSID
-static const char rcsid[] = "$Id: $";
-#endif
-
-#include "openib_osd.h"
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_ib_util.h"
-#include "dapl_osd.h"
-
-#include <stdlib.h>
-
-int g_dapl_loopback_connection = 0;
-
-enum ibv_mtu dapl_ib_mtu(int mtu)
-{
- switch (mtu) {
- case 256:
- return IBV_MTU_256;
- case 512:
- return IBV_MTU_512;
- case 1024:
- return IBV_MTU_1024;
- case 2048:
- return IBV_MTU_2048;
- case 4096:
- return IBV_MTU_4096;
- default:
- return IBV_MTU_1024;
- }
-}
-
-char *dapl_ib_mtu_str(enum ibv_mtu mtu)
-{
- switch (mtu) {
- case IBV_MTU_256:
- return "256";
- case IBV_MTU_512:
- return "512";
- case IBV_MTU_1024:
- return "1024";
- case IBV_MTU_2048:
- return "2048";
- case IBV_MTU_4096:
- return "4096";
- default:
- return "1024";
- }
-}
-
-static DAT_RETURN getlocalipaddr(DAT_SOCK_ADDR * addr, int addr_len)
-{
- struct sockaddr_in *sin;
- struct addrinfo *res, hint, *ai;
- int ret;
- char hostname[256];
-
- if (addr_len < sizeof(*sin)) {
- return DAT_INTERNAL_ERROR;
- }
-
- ret = gethostname(hostname, 256);
- if (ret)
- return dapl_convert_errno(ret, "gethostname");
-
- memset(&hint, 0, sizeof hint);
- hint.ai_flags = AI_PASSIVE;
- hint.ai_family = AF_INET;
- hint.ai_socktype = SOCK_STREAM;
- hint.ai_protocol = IPPROTO_TCP;
-
- ret = getaddrinfo(hostname, NULL, &hint, &res);
- if (ret) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " getaddrinfo ERR: %d %s\n", ret, gai_strerror(ret));
- return DAT_INVALID_ADDRESS;
- }
-
- ret = DAT_INVALID_ADDRESS;
- for (ai = res; ai; ai = ai->ai_next) {
- sin = (struct sockaddr_in *)ai->ai_addr;
- if (*((uint32_t *) & sin->sin_addr) != htonl(0x7f000001)) {
- *((struct sockaddr_in *)addr) = *sin;
- ret = DAT_SUCCESS;
- break;
- }
- }
-
- freeaddrinfo(res);
- return ret;
-}
-
-static int32_t create_cr_pipe(IN DAPL_HCA * hca_ptr)
-{
- DAPL_SOCKET listen_socket;
- struct sockaddr_in addr;
- socklen_t addrlen = sizeof(addr);
- int ret;
-
- listen_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
- if (listen_socket == DAPL_INVALID_SOCKET)
- return 1;
-
- memset(&addr, 0, sizeof addr);
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(0x7f000001);
- ret = bind(listen_socket, (struct sockaddr *)&addr, sizeof addr);
- if (ret)
- goto err1;
-
- ret = getsockname(listen_socket, (struct sockaddr *)&addr, &addrlen);
- if (ret)
- goto err1;
-
- ret = listen(listen_socket, 0);
- if (ret)
- goto err1;
-
- hca_ptr->ib_trans.scm[1] = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
- if (hca_ptr->ib_trans.scm[1] == DAPL_INVALID_SOCKET)
- goto err1;
-
- ret = connect(hca_ptr->ib_trans.scm[1],
- (struct sockaddr *)&addr, sizeof(addr));
- if (ret)
- goto err2;
-
- hca_ptr->ib_trans.scm[0] = accept(listen_socket, NULL, NULL);
- if (hca_ptr->ib_trans.scm[0] == DAPL_INVALID_SOCKET)
- goto err2;
-
- closesocket(listen_socket);
- return 0;
-
- err2:
- closesocket(hca_ptr->ib_trans.scm[1]);
- err1:
- closesocket(listen_socket);
- return 1;
-}
-
-static void destroy_cr_pipe(IN DAPL_HCA * hca_ptr)
-{
- closesocket(hca_ptr->ib_trans.scm[0]);
- closesocket(hca_ptr->ib_trans.scm[1]);
-}
-
-
-/*
- * dapls_ib_init, dapls_ib_release
- *
- * Initialize Verb related items for device open
- *
- * Input:
- * none
- *
- * Output:
- * none
- *
- * Returns:
- * 0 success, -1 error
- *
- */
-int32_t dapls_ib_init(void)
-{
- return 0;
-}
-
-int32_t dapls_ib_release(void)
-{
- return 0;
-}
-
-#if defined(_WIN64) || defined(_WIN32)
-int dapls_config_comp_channel(struct ibv_comp_channel *channel)
-{
- return 0;
-}
-#else // _WIN64 || WIN32
-int dapls_config_comp_channel(struct ibv_comp_channel *channel)
-{
- int opts;
-
- opts = fcntl(channel->fd, F_GETFL); /* uCQ */
- if (opts < 0 || fcntl(channel->fd, F_SETFL, opts | O_NONBLOCK) < 0) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " dapls_create_comp_channel: fcntl on ib_cq->fd %d ERR %d %s\n",
- channel->fd, opts, strerror(errno));
- return errno;
- }
-
- return 0;
-}
-#endif
-
-/*
- * dapls_ib_open_hca
- *
- * Open HCA
- *
- * Input:
- * *hca_name pointer to provider device name
- * *ib_hca_handle_p pointer to provide HCA handle
- *
- * Output:
- * none
- *
- * Return:
- * DAT_SUCCESS
- * dapl_convert_errno
- *
- */
-DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr)
-{
- struct ibv_device **dev_list;
- struct ibv_port_attr port_attr;
- int i;
- DAT_RETURN dat_status;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " open_hca: %s - %p\n", hca_name, hca_ptr);
-
- /* get the IP address of the device */
- dat_status = getlocalipaddr((DAT_SOCK_ADDR *) & hca_ptr->hca_address,
- sizeof(DAT_SOCK_ADDR6));
- if (dat_status != DAT_SUCCESS)
- return dat_status;
-
- /* Get list of all IB devices, find match, open */
- dev_list = ibv_get_device_list(NULL);
- if (!dev_list) {
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- " open_hca: ibv_get_device_list() failed\n",
- hca_name);
- return DAT_INTERNAL_ERROR;
- }
-
- for (i = 0; dev_list[i]; ++i) {
- hca_ptr->ib_trans.ib_dev = dev_list[i];
- if (!strcmp(ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
- hca_name))
- goto found;
- }
-
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: device %s not found\n", hca_name);
- goto err;
-
- found:
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " open_hca: Found dev %s %016llx\n",
- ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
- (unsigned long long)
- ntohll(ibv_get_device_guid(hca_ptr->ib_trans.ib_dev)));
-
- hca_ptr->ib_hca_handle = ibv_open_device(hca_ptr->ib_trans.ib_dev);
- if (!hca_ptr->ib_hca_handle) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: dev open failed for %s, err=%s\n",
- ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
- strerror(errno));
- goto err;
- }
-
- /* get lid for this hca-port, network order */
- if (ibv_query_port(hca_ptr->ib_hca_handle,
- (uint8_t) hca_ptr->port_num, &port_attr)) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: get lid ERR for %s, err=%s\n",
- ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
- strerror(errno));
- goto err;
- } else {
- hca_ptr->ib_trans.lid = htons(port_attr.lid);
- }
-
- /* get gid for this hca-port, network order */
- if (ibv_query_gid(hca_ptr->ib_hca_handle,
- (uint8_t) hca_ptr->port_num,
- 0, &hca_ptr->ib_trans.gid)) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: query GID ERR for %s, err=%s\n",
- ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
- strerror(errno));
- goto err;
- }
-
- /* set RC tunables via enviroment or default */
- hca_ptr->ib_trans.max_inline_send =
- dapl_os_get_env_val("DAPL_MAX_INLINE", INLINE_SEND_DEFAULT);
- hca_ptr->ib_trans.ack_retry =
- dapl_os_get_env_val("DAPL_ACK_RETRY", SCM_ACK_RETRY);
- hca_ptr->ib_trans.ack_timer =
- dapl_os_get_env_val("DAPL_ACK_TIMER", SCM_ACK_TIMER);
- hca_ptr->ib_trans.rnr_retry =
- dapl_os_get_env_val("DAPL_RNR_RETRY", SCM_RNR_RETRY);
- hca_ptr->ib_trans.rnr_timer =
- dapl_os_get_env_val("DAPL_RNR_TIMER", SCM_RNR_TIMER);
- hca_ptr->ib_trans.global =
- dapl_os_get_env_val("DAPL_GLOBAL_ROUTING", SCM_GLOBAL);
- hca_ptr->ib_trans.hop_limit =
- dapl_os_get_env_val("DAPL_HOP_LIMIT", SCM_HOP_LIMIT);
- hca_ptr->ib_trans.tclass =
- dapl_os_get_env_val("DAPL_TCLASS", SCM_TCLASS);
- hca_ptr->ib_trans.mtu =
- dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", SCM_IB_MTU));
-
-#ifndef CQ_WAIT_OBJECT
- /* initialize cq_lock */
- dat_status = dapl_os_lock_init(&hca_ptr->ib_trans.cq_lock);
- if (dat_status != DAT_SUCCESS) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: failed to init cq_lock\n");
- goto bail;
- }
- /* EVD events without direct CQ channels, non-blocking */
- hca_ptr->ib_trans.ib_cq =
- ibv_create_comp_channel(hca_ptr->ib_hca_handle);
- if (hca_ptr->ib_trans.ib_cq == NULL) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: ibv_create_comp_channel ERR %s\n",
- strerror(errno));
- goto bail;
- }
-
- if (dapls_config_comp_channel(hca_ptr->ib_trans.ib_cq)) {
- goto bail;
- }
-
- if (dapli_cq_thread_init(hca_ptr)) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: cq_thread_init failed for %s\n",
- ibv_get_device_name(hca_ptr->ib_trans.ib_dev));
- goto bail;
- }
-#endif /* CQ_WAIT_OBJECT */
-
- /* initialize cr_list lock */
- dat_status = dapl_os_lock_init(&hca_ptr->ib_trans.lock);
- if (dat_status != DAT_SUCCESS) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: failed to init cr_list lock\n");
- goto bail;
- }
-
- /* initialize CM list for listens on this HCA */
- dapl_llist_init_head(&hca_ptr->ib_trans.list);
-
- /* initialize pipe, user level wakeup on select */
- if (create_cr_pipe(hca_ptr)) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: failed to init cr pipe - %s\n",
- strerror(errno));
- goto bail;
- }
-
- /* create thread to process inbound connect request */
- hca_ptr->ib_trans.cr_state = IB_THREAD_INIT;
- dat_status = dapl_os_thread_create(cr_thread,
- (void *)hca_ptr,
- &hca_ptr->ib_trans.thread);
- if (dat_status != DAT_SUCCESS) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: failed to create thread\n");
- goto bail;
- }
-
- /* wait for thread */
- while (hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
- dapl_os_sleep_usec(2000);
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " open_hca: devname %s, port %d, hostname_IP %s\n",
- ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
- hca_ptr->port_num, inet_ntoa(((struct sockaddr_in *)
- &hca_ptr->hca_address)->
- sin_addr));
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " open_hca: LID 0x%x GID Subnet 0x" F64x " ID 0x" F64x
- "\n", ntohs(hca_ptr->ib_trans.lid), (unsigned long long)
- htonll(hca_ptr->ib_trans.gid.global.subnet_prefix),
- (unsigned long long)htonll(hca_ptr->ib_trans.gid.global.
- interface_id));
-
- ibv_free_device_list(dev_list);
- return dat_status;
-
- bail:
- ibv_close_device(hca_ptr->ib_hca_handle);
- hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
- err:
- ibv_free_device_list(dev_list);
- return DAT_INTERNAL_ERROR;
-}
-
-/*
- * dapls_ib_close_hca
- *
- * Open HCA
- *
- * Input:
- * DAPL_HCA provide CA handle
- *
- * Output:
- * none
- *
- * Return:
- * DAT_SUCCESS
- * dapl_convert_errno
- *
- */
-DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p\n", hca_ptr);
-
-#ifndef CQ_WAIT_OBJECT
- dapli_cq_thread_destroy(hca_ptr);
- dapl_os_lock_destroy(&hca_ptr->ib_trans.cq_lock);
-#endif /* CQ_WAIT_OBJECT */
-
- if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
- if (ibv_close_device(hca_ptr->ib_hca_handle))
- return (dapl_convert_errno(errno, "ib_close_device"));
- hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
- }
-
- /* destroy cr_thread and lock */
- hca_ptr->ib_trans.cr_state = IB_THREAD_CANCEL;
- if (send(hca_ptr->ib_trans.scm[1], "w", sizeof "w", 0) == -1)
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " thread_destroy: thread wakeup err = %s\n",
- strerror(errno));
- while (hca_ptr->ib_trans.cr_state != IB_THREAD_EXIT) {
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " close_hca: waiting for cr_thread\n");
- if (send(hca_ptr->ib_trans.scm[1], "w", sizeof "w", 0) == -1)
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " thread_destroy: thread wakeup err = %s\n",
- strerror(errno));
- dapl_os_sleep_usec(2000);
- }
- dapl_os_lock_destroy(&hca_ptr->ib_trans.lock);
- destroy_cr_pipe(hca_ptr); /* no longer need pipe */
- return (DAT_SUCCESS);
-}
-
-/*
- * dapls_ib_query_hca
- *
- * Query the hca attribute
- *
- * Input:
- * hca_handl hca handle
- * ia_attr attribute of the ia
- * ep_attr attribute of the ep
- * ip_addr ip address of DET NIC
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_HANDLE
- */
-
-DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA * hca_ptr,
- OUT DAT_IA_ATTR * ia_attr,
- OUT DAT_EP_ATTR * ep_attr,
- OUT DAT_SOCK_ADDR6 * ip_addr)
-{
- struct ibv_device_attr dev_attr;
- struct ibv_port_attr port_attr;
-
- if (hca_ptr->ib_hca_handle == NULL) {
- dapl_dbg_log(DAPL_DBG_TYPE_ERR, " query_hca: BAD handle\n");
- return (DAT_INVALID_HANDLE);
- }
-
- /* local IP address of device, set during ia_open */
- if (ip_addr != NULL)
- memcpy(ip_addr, &hca_ptr->hca_address, sizeof(DAT_SOCK_ADDR6));
-
- if (ia_attr == NULL && ep_attr == NULL)
- return DAT_SUCCESS;
-
- /* query verbs for this device and port attributes */
- if (ibv_query_device(hca_ptr->ib_hca_handle, &dev_attr) ||
- ibv_query_port(hca_ptr->ib_hca_handle,
- hca_ptr->port_num, &port_attr))
- return (dapl_convert_errno(errno, "ib_query_hca"));
-
- if (ia_attr != NULL) {
- (void)dapl_os_memzero(ia_attr, sizeof(*ia_attr));
- ia_attr->adapter_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
- ia_attr->vendor_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
- ia_attr->ia_address_ptr =
- (DAT_IA_ADDRESS_PTR) & hca_ptr->hca_address;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " query_hca: %s %s \n",
- ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
- inet_ntoa(((struct sockaddr_in *)
- &hca_ptr->hca_address)->sin_addr));
-
- ia_attr->hardware_version_major = dev_attr.hw_ver;
- /* ia_attr->hardware_version_minor = dev_attr.fw_ver; */
- ia_attr->max_eps = dev_attr.max_qp;
- ia_attr->max_dto_per_ep = dev_attr.max_qp_wr;
- ia_attr->max_rdma_read_in = dev_attr.max_qp_rd_atom;
- ia_attr->max_rdma_read_out = dev_attr.max_qp_init_rd_atom;
- ia_attr->max_rdma_read_per_ep_in = dev_attr.max_qp_rd_atom;
- ia_attr->max_rdma_read_per_ep_out =
- dev_attr.max_qp_init_rd_atom;
- ia_attr->max_rdma_read_per_ep_in_guaranteed = DAT_TRUE;
- ia_attr->max_rdma_read_per_ep_out_guaranteed = DAT_TRUE;
- ia_attr->max_evds = dev_attr.max_cq;
- ia_attr->max_evd_qlen = dev_attr.max_cqe;
- ia_attr->max_iov_segments_per_dto = dev_attr.max_sge;
- ia_attr->max_lmrs = dev_attr.max_mr;
- /* 32bit attribute from 64bit, 4G-1 limit, DAT v2 needs fix */
- ia_attr->max_lmr_block_size =
- (dev_attr.max_mr_size >> 32) ? ~0 : dev_attr.max_mr_size;
- ia_attr->max_rmrs = dev_attr.max_mw;
- ia_attr->max_lmr_virtual_address = dev_attr.max_mr_size;
- ia_attr->max_rmr_target_address = dev_attr.max_mr_size;
- ia_attr->max_pzs = dev_attr.max_pd;
- ia_attr->max_message_size = port_attr.max_msg_sz;
- ia_attr->max_rdma_size = port_attr.max_msg_sz;
- ia_attr->max_iov_segments_per_rdma_read = dev_attr.max_sge;
- ia_attr->max_iov_segments_per_rdma_write = dev_attr.max_sge;
- ia_attr->num_transport_attr = 0;
- ia_attr->transport_attr = NULL;
- ia_attr->num_vendor_attr = 0;
- ia_attr->vendor_attr = NULL;
-#ifdef DAT_EXTENSIONS
- ia_attr->extension_supported = DAT_EXTENSION_IB;
- ia_attr->extension_version = DAT_IB_EXTENSION_VERSION;
-#endif
- hca_ptr->ib_trans.mtu = DAPL_MIN(port_attr.active_mtu,
- hca_ptr->ib_trans.mtu);
- hca_ptr->ib_trans.ack_timer =
- DAPL_MAX(dev_attr.local_ca_ack_delay,
- hca_ptr->ib_trans.ack_timer);
-
- /* set MTU in transport specific named attribute */
- hca_ptr->ib_trans.named_attr.name = "DAT_IB_TRANSPORT_MTU";
- hca_ptr->ib_trans.named_attr.value =
- dapl_ib_mtu_str(hca_ptr->ib_trans.mtu);
-
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " query_hca: (%x.%x) ep %d ep_q %d evd %d"
- " evd_q %d mtu %d\n",
- ia_attr->hardware_version_major,
- ia_attr->hardware_version_minor,
- ia_attr->max_eps, ia_attr->max_dto_per_ep,
- ia_attr->max_evds, ia_attr->max_evd_qlen,
- 128 << hca_ptr->ib_trans.mtu);
-
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " query_hca: msg %llu rdma %llu iov %d lmr %d rmr %d"
- " ack_time %d mr %u\n",
- ia_attr->max_message_size, ia_attr->max_rdma_size,
- ia_attr->max_iov_segments_per_dto,
- ia_attr->max_lmrs, ia_attr->max_rmrs,
- hca_ptr->ib_trans.ack_timer,
- ia_attr->max_lmr_block_size);
- }
-
- if (ep_attr != NULL) {
- (void)dapl_os_memzero(ep_attr, sizeof(*ep_attr));
- ep_attr->max_message_size = port_attr.max_msg_sz;
- ep_attr->max_rdma_size = port_attr.max_msg_sz;
- ep_attr->max_recv_dtos = dev_attr.max_qp_wr;
- ep_attr->max_request_dtos = dev_attr.max_qp_wr;
- ep_attr->max_recv_iov = dev_attr.max_sge;
- ep_attr->max_request_iov = dev_attr.max_sge;
- ep_attr->max_rdma_read_in = dev_attr.max_qp_rd_atom;
- ep_attr->max_rdma_read_out = dev_attr.max_qp_init_rd_atom;
- ep_attr->max_rdma_read_iov = dev_attr.max_sge;
- ep_attr->max_rdma_write_iov = dev_attr.max_sge;
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " query_hca: MAX msg %llu mtu %d dto %d iov %d"
- " rdma i%d,o%d\n",
- ep_attr->max_message_size,
- ep_attr->max_recv_dtos, ep_attr->max_recv_iov,
- ep_attr->max_rdma_read_in,
- ep_attr->max_rdma_read_out);
- }
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_setup_async_callback
- *
- * Set up an asynchronous callbacks of various kinds
- *
- * Input:
- * ia_handle IA handle
- * handler_type type of handler to set up
- * callback_handle handle param for completion callbacks
- * callback callback routine pointer
- * context argument for callback routine
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- * DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN dapls_ib_setup_async_callback(IN DAPL_IA * ia_ptr,
- IN DAPL_ASYNC_HANDLER_TYPE
- handler_type, IN DAPL_EVD * evd_ptr,
- IN ib_async_handler_t callback,
- IN void *context)
-{
- ib_hca_transport_t *hca_ptr;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " setup_async_cb: ia %p type %d handle %p cb %p ctx %p\n",
- ia_ptr, handler_type, evd_ptr, callback, context);
-
- hca_ptr = &ia_ptr->hca_ptr->ib_trans;
- switch (handler_type) {
- case DAPL_ASYNC_UNAFILIATED:
- hca_ptr->async_unafiliated = (ib_async_handler_t) callback;
- hca_ptr->async_un_ctx = context;
- break;
- case DAPL_ASYNC_CQ_ERROR:
- hca_ptr->async_cq_error = (ib_async_cq_handler_t) callback;
- break;
- case DAPL_ASYNC_CQ_COMPLETION:
- hca_ptr->async_cq = (ib_async_dto_handler_t) callback;
- break;
- case DAPL_ASYNC_QP_ERROR:
- hca_ptr->async_qp_error = (ib_async_qp_handler_t) callback;
- break;
- default:
- break;
- }
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_set_provider_specific_attr
- *
- * Input:
- * attr_ptr Pointer provider specific attributes
- *
- * Output:
- * none
- *
- * Returns:
- * void
- */
-DAT_NAMED_ATTR ib_attrs[] = {
- {
- "DAT_IB_TRANSPORT_MTU", "2048"}
- ,
-#ifdef DAT_EXTENSIONS
- {
- "DAT_EXTENSION_INTERFACE", "TRUE"}
- ,
- {
- DAT_IB_ATTR_FETCH_AND_ADD, "TRUE"}
- ,
- {
- DAT_IB_ATTR_CMP_AND_SWAP, "TRUE"}
- ,
- {
- DAT_IB_ATTR_IMMED_DATA, "TRUE"}
- ,
- {
- DAT_IB_ATTR_UD, "TRUE"}
- ,
-#ifdef DAPL_COUNTERS
- {
- DAT_ATTR_COUNTERS, "TRUE"}
- ,
-#endif /* DAPL_COUNTERS */
-#endif
-};
-
-#define SPEC_ATTR_SIZE( x ) (sizeof( x ) / sizeof( DAT_NAMED_ATTR))
-
-void dapls_query_provider_specific_attr(IN DAPL_IA * ia_ptr,
- IN DAT_PROVIDER_ATTR * attr_ptr)
-{
- attr_ptr->num_provider_specific_attr = SPEC_ATTR_SIZE(ib_attrs);
- attr_ptr->provider_specific_attr = ib_attrs;
-
- /* set MTU to actual settings */
- ib_attrs[0].value = ia_ptr->hca_ptr->ib_trans.named_attr.value;
-}
diff --git a/dapl/openib_scm/dapl_ib_util.h b/dapl/openib_scm/dapl_ib_util.h
index a668af7..a5e734e 100644
--- a/dapl/openib_scm/dapl_ib_util.h
+++ b/dapl/openib_scm/dapl_ib_util.h
@@ -23,92 +23,19 @@
* and/or other materials provided with the distribution.
*/
-/***************************************************************************
- *
- * Module: uDAPL
- *
- * Filename: dapl_ib_util.h
- *
- * Author: Arlin Davis
- *
- * Created: 3/10/2005
- *
- * Description:
- *
- * The uDAPL openib provider - definitions, prototypes,
- *
- ****************************************************************************
- * Source Control System Information
- *
- * $Id: $
- *
- * Copyright (c) 2005 Intel Corporation. All rights reserved.
- *
- **************************************************************************/
-
#ifndef _DAPL_IB_UTIL_H_
#define _DAPL_IB_UTIL_H_
+#define _OPENIB_SCM_
-#include "openib_osd.h"
#include <infiniband/verbs.h>
-
-#ifdef DAT_EXTENSIONS
-#include <dat2/dat_ib_extensions.h>
-#endif
-
-#ifndef __cplusplus
-#define false 0
-#define true 1
-#endif /*__cplusplus */
-
-/* Typedefs to map common DAPL provider types to IB verbs */
-typedef struct ibv_qp *ib_qp_handle_t;
-typedef struct ibv_cq *ib_cq_handle_t;
-typedef struct ibv_pd *ib_pd_handle_t;
-typedef struct ibv_mr *ib_mr_handle_t;
-typedef struct ibv_mw *ib_mw_handle_t;
-typedef struct ibv_wc ib_work_completion_t;
-
-/* HCA context type maps to IB verbs */
-typedef struct ibv_context *ib_hca_handle_t;
-typedef ib_hca_handle_t dapl_ibal_ca_t;
-
-/* destination info to exchange, define wire protocol version */
-#define DSCM_VER 4
-typedef struct _ib_qp_cm
-{
- uint16_t ver;
- uint16_t rej;
- uint16_t lid;
- uint16_t port;
- uint32_t qpn;
- uint32_t p_size;
- union ibv_gid gid;
- DAT_SOCK_ADDR6 ia_address;
- uint16_t qp_type;
-} ib_qp_cm_t;
-
-typedef enum scm_state
-{
- SCM_INIT,
- SCM_LISTEN,
- SCM_CONN_PENDING,
- SCM_RTU_PENDING,
- SCM_ACCEPTING,
- SCM_ACCEPTING_DATA,
- SCM_ACCEPTED,
- SCM_REJECTED,
- SCM_CONNECTED,
- SCM_RELEASED,
- SCM_DISCONNECTED,
- SCM_DESTROY
-} SCM_STATE;
+#include "openib_osd.h"
+#include "dapl_ib_common.h"
struct ib_cm_handle
{
struct dapl_llist_entry entry;
DAPL_OS_LOCK lock;
- SCM_STATE state;
+ int state;
DAPL_SOCKET socket;
struct dapl_hca *hca;
struct dapl_sp *sp;
@@ -121,58 +48,12 @@ struct ib_cm_handle
typedef struct ib_cm_handle *dp_ib_cm_handle_t;
typedef dp_ib_cm_handle_t ib_cm_srvc_handle_t;
-/* CM events */
-typedef enum
-{
- IB_CME_CONNECTED,
- IB_CME_DISCONNECTED,
- IB_CME_DISCONNECTED_ON_LINK_DOWN,
- IB_CME_CONNECTION_REQUEST_PENDING,
- IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
- IB_CME_DESTINATION_REJECT,
- IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
- IB_CME_DESTINATION_UNREACHABLE,
- IB_CME_TOO_MANY_CONNECTION_REQUESTS,
- IB_CME_LOCAL_FAILURE,
- IB_CM_LOCAL_FAILURE
-
-} ib_cm_events_t;
-
-/* Operation and state mappings */
-typedef int ib_send_op_type_t;
-typedef struct ibv_sge ib_data_segment_t;
-typedef enum ibv_qp_state ib_qp_state_t;
-typedef enum ibv_event_type ib_async_event_type;
-typedef struct ibv_async_event ib_error_record_t;
-
-/* CQ notifications */
-typedef enum
-{
- IB_NOTIFY_ON_NEXT_COMP,
- IB_NOTIFY_ON_SOLIC_COMP
-
-} ib_notification_type_t;
-
-/* other mappings */
-typedef int ib_bool_t;
-typedef union ibv_gid GID;
-typedef char *IB_HCA_NAME;
-typedef uint16_t ib_hca_port_t;
-typedef uint32_t ib_comp_handle_t;
-
-#ifdef CQ_WAIT_OBJECT
-typedef struct ibv_comp_channel *ib_wait_obj_handle_t;
-#endif
-
/* Definitions */
#define IB_INVALID_HANDLE NULL
/* inline send rdma threshold */
#define INLINE_SEND_DEFAULT 200
-/* qkey for UD QP's */
-#define SCM_UD_QKEY 0x78654321
-
/* RC timer - retry count defaults */
#define SCM_ACK_TIMER 16 /* 5 bits, 4.096us*2^ack_timer. 16== 268ms */
#define SCM_ACK_RETRY 7 /* 3 bits, 7 * 268ms = 1.8 seconds */
@@ -193,87 +74,6 @@ typedef struct ibv_comp_channel *ib_wait_obj_handle_t;
#define IB_MAX_DREP_PDATA_SIZE 224
#define IB_MAX_RTU_PDATA_SIZE 224
-/* DTO OPs, ordered for DAPL ENUM definitions */
-#define OP_RDMA_WRITE IBV_WR_RDMA_WRITE
-#define OP_RDMA_WRITE_IMM IBV_WR_RDMA_WRITE_WITH_IMM
-#define OP_SEND IBV_WR_SEND
-#define OP_SEND_IMM IBV_WR_SEND_WITH_IMM
-#define OP_RDMA_READ IBV_WR_RDMA_READ
-#define OP_COMP_AND_SWAP IBV_WR_ATOMIC_CMP_AND_SWP
-#define OP_FETCH_AND_ADD IBV_WR_ATOMIC_FETCH_AND_ADD
-#define OP_RECEIVE 7 /* internal op */
-#define OP_RECEIVE_IMM 8 /* rdma write with immed, internel op */
-#define OP_RECEIVE_MSG_IMM 9 /* recv msg with immed, internel op */
-#define OP_BIND_MW 10 /* internal op */
-#define OP_SEND_UD 11 /* internal op */
-#define OP_RECV_UD 12 /* internal op */
-#define OP_INVALID 0xff
-
-/* Definitions to map QP state */
-#define IB_QP_STATE_RESET IBV_QPS_RESET
-#define IB_QP_STATE_INIT IBV_QPS_INIT
-#define IB_QP_STATE_RTR IBV_QPS_RTR
-#define IB_QP_STATE_RTS IBV_QPS_RTS
-#define IB_QP_STATE_SQD IBV_QPS_SQD
-#define IB_QP_STATE_SQE IBV_QPS_SQE
-#define IB_QP_STATE_ERROR IBV_QPS_ERR
-
-/* Definitions for ibverbs/mthca return codes, should be defined in verbs.h */
-/* some are errno and some are -n values */
-
-/**
- * ibv_get_device_name - Return kernel device name
- * ibv_get_device_guid - Return device's node GUID
- * ibv_open_device - Return ibv_context or NULL
- * ibv_close_device - Return 0, (errno?)
- * ibv_get_async_event - Return 0, -1
- * ibv_alloc_pd - Return ibv_pd, NULL
- * ibv_dealloc_pd - Return 0, errno
- * ibv_reg_mr - Return ibv_mr, NULL
- * ibv_dereg_mr - Return 0, errno
- * ibv_create_cq - Return ibv_cq, NULL
- * ibv_destroy_cq - Return 0, errno
- * ibv_get_cq_event - Return 0 & ibv_cq/context, int
- * ibv_poll_cq - Return n & ibv_wc, 0 ok, -1 empty, -2 error
- * ibv_req_notify_cq - Return 0 (void?)
- * ibv_create_qp - Return ibv_qp, NULL
- * ibv_modify_qp - Return 0, errno
- * ibv_destroy_qp - Return 0, errno
- * ibv_post_send - Return 0, -1 & bad_wr
- * ibv_post_recv - Return 0, -1 & bad_wr
- */
-
-/* async handler for DTO, CQ, QP, and unafiliated */
-typedef void (*ib_async_dto_handler_t)(
- IN ib_hca_handle_t ib_hca_handle,
- IN ib_error_record_t *err_code,
- IN void *context);
-
-typedef void (*ib_async_cq_handler_t)(
- IN ib_hca_handle_t ib_hca_handle,
- IN ib_cq_handle_t ib_cq_handle,
- IN ib_error_record_t *err_code,
- IN void *context);
-
-typedef void (*ib_async_qp_handler_t)(
- IN ib_hca_handle_t ib_hca_handle,
- IN ib_qp_handle_t ib_qp_handle,
- IN ib_error_record_t *err_code,
- IN void *context);
-
-typedef void (*ib_async_handler_t)(
- IN ib_hca_handle_t ib_hca_handle,
- IN ib_error_record_t *err_code,
- IN void *context);
-
-typedef enum
-{
- IB_THREAD_INIT,
- IB_THREAD_RUN,
- IB_THREAD_CANCEL,
- IB_THREAD_EXIT
-
-} ib_thread_state_t;
/* ib_hca_transport_t, specific to this implementation */
typedef struct _ib_hca_transport
@@ -295,6 +95,8 @@ typedef struct _ib_hca_transport
ib_async_cq_handler_t async_cq_error;
ib_async_dto_handler_t async_cq;
ib_async_qp_handler_t async_qp_error;
+ int rd_atom_in;
+ int rd_atom_out;
uint16_t lid;
uint8_t ack_timer;
uint8_t ack_retry;
@@ -308,96 +110,16 @@ typedef struct _ib_hca_transport
DAPL_SOCKET scm[2];
} ib_hca_transport_t;
-/* provider specfic fields for shared memory support */
-typedef uint32_t ib_shm_transport_t;
-
/* prototypes */
-int32_t dapls_ib_init (void);
-int32_t dapls_ib_release (void);
void cr_thread(void *arg);
int dapli_cq_thread_init(struct dapl_hca *hca_ptr);
void dapli_cq_thread_destroy(struct dapl_hca *hca_ptr);
DAT_RETURN dapli_socket_disconnect(dp_ib_cm_handle_t cm_ptr);
void dapls_print_cm_list(IN DAPL_IA *ia_ptr);
-
-DAT_RETURN
-dapls_modify_qp_state ( IN ib_qp_handle_t qp_handle,
- IN ib_qp_state_t qp_state,
- IN struct ib_cm_handle *cm_ptr );
-
-/* inline functions */
-STATIC _INLINE_ IB_HCA_NAME dapl_ib_convert_name (IN char *name)
-{
- /* use ascii; name of local device */
- return dapl_os_strdup(name);
-}
-
-STATIC _INLINE_ void dapl_ib_release_name (IN IB_HCA_NAME name)
-{
- return;
-}
-
-/*
- * Convert errno to DAT_RETURN values
- */
-STATIC _INLINE_ DAT_RETURN
-dapl_convert_errno( IN int err, IN const char *str )
-{
- if (!err) return DAT_SUCCESS;
-
-#if DAPL_DBG
- if ((err != EAGAIN) && (err != ETIMEDOUT))
- dapl_dbg_log (DAPL_DBG_TYPE_ERR," %s %s\n", str, strerror(err));
-#endif
-
- switch( err )
- {
- case EOVERFLOW : return DAT_LENGTH_ERROR;
- case EACCES : return DAT_PRIVILEGES_VIOLATION;
- case EPERM : return DAT_PROTECTION_VIOLATION;
- case EINVAL : return DAT_INVALID_HANDLE;
- case EISCONN : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_CONNECTED;
- case ECONNREFUSED : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_NOTREADY;
- case ETIMEDOUT : return DAT_TIMEOUT_EXPIRED;
- case ENETUNREACH: return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_UNREACHABLE;
- case EADDRINUSE : return DAT_CONN_QUAL_IN_USE;
- case EALREADY : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_ACTCONNPENDING;
- case ENOMEM : return DAT_INSUFFICIENT_RESOURCES;
- case EAGAIN : return DAT_QUEUE_EMPTY;
- case EINTR : return DAT_INTERRUPTED_CALL;
- case EAFNOSUPPORT : return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_MALFORMED;
- case EFAULT :
- default : return DAT_INTERNAL_ERROR;
- }
- }
-
-STATIC _INLINE_ char * dapl_cm_state_str(IN int st)
-{
- static char *cm_state[] = {
- "SCM_INIT",
- "SCM_LISTEN",
- "SCM_CONN_PENDING",
- "SCM_RTU_PENDING",
- "SCM_ACCEPTING",
- "SCM_ACCEPTING_DATA",
- "SCM_ACCEPTED",
- "SCM_REJECTED",
- "SCM_CONNECTED",
- "SCM_RELEASED",
- "SCM_DISCONNECTED",
- "SCM_DESTROY"
- };
- return ((st < 0 || st > 11) ? "Invalid CM state?" : cm_state[st]);
-}
-
-/*
- * Definitions required only for DAT 1.1 builds
- */
-#define IB_ACCESS_LOCAL_READ IBV_ACCESS_LOCAL_WRITE
-#define IB_ACCESS_LOCAL_WRITE IBV_ACCESS_LOCAL_WRITE
-#define IB_ACCESS_REMOTE_READ IBV_ACCESS_REMOTE_READ
-#define IB_ACCESS_REMOTE_WRITE IBV_ACCESS_REMOTE_WRITE
-#define IB_ACCESS_MW_BIND IBV_ACCESS_LOCAL_WRITE
-#define IB_ACCESS_ATOMIC
+dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep);
+void dapls_ib_cm_free(dp_ib_cm_handle_t cm, DAPL_EP *ep);
+DAT_RETURN dapls_modify_qp_state(IN ib_qp_handle_t qp_handle,
+ IN ib_qp_state_t qp_state,
+ IN dp_ib_cm_handle_t cm);
#endif /* _DAPL_IB_UTIL_H_ */
diff --git a/dapl/openib_scm/device.c b/dapl/openib_scm/device.c
new file mode 100644
index 0000000..d5089aa
--- /dev/null
+++ b/dapl/openib_scm/device.c
@@ -0,0 +1,412 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ * copy of which is available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/***************************************************************************
+ *
+ * Module: uDAPL
+ *
+ * Filename: dapl_ib_util.c
+ *
+ * Author: Arlin Davis
+ *
+ * Created: 3/10/2005
+ *
+ * Description:
+ *
+ * The uDAPL openib provider - init, open, close, utilities
+ *
+ ****************************************************************************
+ * Source Control System Information
+ *
+ * $Id: $
+ *
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ *
+ **************************************************************************/
+#ifdef RCSID
+static const char rcsid[] = "$Id: $";
+#endif
+
+#include "openib_osd.h"
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_ib_util.h"
+#include "dapl_osd.h"
+
+#include <stdlib.h>
+
+static int32_t create_cr_pipe(IN DAPL_HCA * hca_ptr)
+{
+ DAPL_SOCKET listen_socket;
+ struct sockaddr_in addr;
+ socklen_t addrlen = sizeof(addr);
+ int ret;
+
+ listen_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+ if (listen_socket == DAPL_INVALID_SOCKET)
+ return 1;
+
+ memset(&addr, 0, sizeof addr);
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl(0x7f000001);
+ ret = bind(listen_socket, (struct sockaddr *)&addr, sizeof addr);
+ if (ret)
+ goto err1;
+
+ ret = getsockname(listen_socket, (struct sockaddr *)&addr, &addrlen);
+ if (ret)
+ goto err1;
+
+ ret = listen(listen_socket, 0);
+ if (ret)
+ goto err1;
+
+ hca_ptr->ib_trans.scm[1] = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+ if (hca_ptr->ib_trans.scm[1] == DAPL_INVALID_SOCKET)
+ goto err1;
+
+ ret = connect(hca_ptr->ib_trans.scm[1],
+ (struct sockaddr *)&addr, sizeof(addr));
+ if (ret)
+ goto err2;
+
+ hca_ptr->ib_trans.scm[0] = accept(listen_socket, NULL, NULL);
+ if (hca_ptr->ib_trans.scm[0] == DAPL_INVALID_SOCKET)
+ goto err2;
+
+ closesocket(listen_socket);
+ return 0;
+
+ err2:
+ closesocket(hca_ptr->ib_trans.scm[1]);
+ err1:
+ closesocket(listen_socket);
+ return 1;
+}
+
+static void destroy_cr_pipe(IN DAPL_HCA * hca_ptr)
+{
+ closesocket(hca_ptr->ib_trans.scm[0]);
+ closesocket(hca_ptr->ib_trans.scm[1]);
+}
+
+
+/*
+ * dapls_ib_init, dapls_ib_release
+ *
+ * Initialize Verb related items for device open
+ *
+ * Input:
+ * none
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * 0 success, -1 error
+ *
+ */
+int32_t dapls_ib_init(void)
+{
+ return 0;
+}
+
+int32_t dapls_ib_release(void)
+{
+ return 0;
+}
+
+#if defined(_WIN64) || defined(_WIN32)
+int dapls_config_comp_channel(struct ibv_comp_channel *channel)
+{
+ return 0;
+}
+#else // _WIN64 || WIN32
+int dapls_config_comp_channel(struct ibv_comp_channel *channel)
+{
+ int opts;
+
+ opts = fcntl(channel->fd, F_GETFL); /* uCQ */
+ if (opts < 0 || fcntl(channel->fd, F_SETFL, opts | O_NONBLOCK) < 0) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " dapls_create_comp_channel: fcntl on ib_cq->fd %d ERR %d %s\n",
+ channel->fd, opts, strerror(errno));
+ return errno;
+ }
+
+ return 0;
+}
+#endif
+
+/*
+ * dapls_ib_open_hca
+ *
+ * Open HCA
+ *
+ * Input:
+ * *hca_name pointer to provider device name
+ * *ib_hca_handle_p pointer to provide HCA handle
+ *
+ * Output:
+ * none
+ *
+ * Return:
+ * DAT_SUCCESS
+ * dapl_convert_errno
+ *
+ */
+DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr)
+{
+ struct ibv_device **dev_list;
+ struct ibv_port_attr port_attr;
+ int i;
+ DAT_RETURN dat_status;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " open_hca: %s - %p\n", hca_name, hca_ptr);
+
+ /* get the IP address of the device */
+ dat_status = getlocalipaddr((DAT_SOCK_ADDR *) &hca_ptr->hca_address,
+ sizeof(DAT_SOCK_ADDR6));
+ if (dat_status != DAT_SUCCESS)
+ return dat_status;
+
+ /* Get list of all IB devices, find match, open */
+ dev_list = ibv_get_device_list(NULL);
+ if (!dev_list) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " open_hca: ibv_get_device_list() failed\n",
+ hca_name);
+ return DAT_INTERNAL_ERROR;
+ }
+
+ for (i = 0; dev_list[i]; ++i) {
+ hca_ptr->ib_trans.ib_dev = dev_list[i];
+ if (!strcmp(ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+ hca_name))
+ goto found;
+ }
+
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " open_hca: device %s not found\n", hca_name);
+ goto err;
+
+ found:
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " open_hca: Found dev %s %016llx\n",
+ ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+ (unsigned long long)
+ ntohll(ibv_get_device_guid(hca_ptr->ib_trans.ib_dev)));
+
+ hca_ptr->ib_hca_handle = ibv_open_device(hca_ptr->ib_trans.ib_dev);
+ if (!hca_ptr->ib_hca_handle) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " open_hca: dev open failed for %s, err=%s\n",
+ ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+ strerror(errno));
+ goto err;
+ }
+
+ /* get lid for this hca-port, network order */
+ if (ibv_query_port(hca_ptr->ib_hca_handle,
+ (uint8_t) hca_ptr->port_num, &port_attr)) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " open_hca: get lid ERR for %s, err=%s\n",
+ ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+ strerror(errno));
+ goto err;
+ } else {
+ hca_ptr->ib_trans.lid = htons(port_attr.lid);
+ }
+
+ /* get gid for this hca-port, network order */
+ if (ibv_query_gid(hca_ptr->ib_hca_handle,
+ (uint8_t) hca_ptr->port_num,
+ 0, &hca_ptr->ib_trans.gid)) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " open_hca: query GID ERR for %s, err=%s\n",
+ ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+ strerror(errno));
+ goto err;
+ }
+
+ /* set RC tunables via enviroment or default */
+ hca_ptr->ib_trans.max_inline_send =
+ dapl_os_get_env_val("DAPL_MAX_INLINE", INLINE_SEND_DEFAULT);
+ hca_ptr->ib_trans.ack_retry =
+ dapl_os_get_env_val("DAPL_ACK_RETRY", SCM_ACK_RETRY);
+ hca_ptr->ib_trans.ack_timer =
+ dapl_os_get_env_val("DAPL_ACK_TIMER", SCM_ACK_TIMER);
+ hca_ptr->ib_trans.rnr_retry =
+ dapl_os_get_env_val("DAPL_RNR_RETRY", SCM_RNR_RETRY);
+ hca_ptr->ib_trans.rnr_timer =
+ dapl_os_get_env_val("DAPL_RNR_TIMER", SCM_RNR_TIMER);
+ hca_ptr->ib_trans.global =
+ dapl_os_get_env_val("DAPL_GLOBAL_ROUTING", SCM_GLOBAL);
+ hca_ptr->ib_trans.hop_limit =
+ dapl_os_get_env_val("DAPL_HOP_LIMIT", SCM_HOP_LIMIT);
+ hca_ptr->ib_trans.tclass =
+ dapl_os_get_env_val("DAPL_TCLASS", SCM_TCLASS);
+ hca_ptr->ib_trans.mtu =
+ dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", SCM_IB_MTU));
+
+#ifndef CQ_WAIT_OBJECT
+ /* initialize cq_lock */
+ dat_status = dapl_os_lock_init(&hca_ptr->ib_trans.cq_lock);
+ if (dat_status != DAT_SUCCESS) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " open_hca: failed to init cq_lock\n");
+ goto bail;
+ }
+ /* EVD events without direct CQ channels, non-blocking */
+ hca_ptr->ib_trans.ib_cq =
+ ibv_create_comp_channel(hca_ptr->ib_hca_handle);
+ if (hca_ptr->ib_trans.ib_cq == NULL) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " open_hca: ibv_create_comp_channel ERR %s\n",
+ strerror(errno));
+ goto bail;
+ }
+
+ if (dapls_config_comp_channel(hca_ptr->ib_trans.ib_cq)) {
+ goto bail;
+ }
+
+ if (dapli_cq_thread_init(hca_ptr)) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " open_hca: cq_thread_init failed for %s\n",
+ ibv_get_device_name(hca_ptr->ib_trans.ib_dev));
+ goto bail;
+ }
+#endif /* CQ_WAIT_OBJECT */
+
+ /* initialize cr_list lock */
+ dat_status = dapl_os_lock_init(&hca_ptr->ib_trans.lock);
+ if (dat_status != DAT_SUCCESS) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " open_hca: failed to init cr_list lock\n");
+ goto bail;
+ }
+
+ /* initialize CM list for listens on this HCA */
+ dapl_llist_init_head(&hca_ptr->ib_trans.list);
+
+ /* initialize pipe, user level wakeup on select */
+ if (create_cr_pipe(hca_ptr)) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " open_hca: failed to init cr pipe - %s\n",
+ strerror(errno));
+ goto bail;
+ }
+
+ /* create thread to process inbound connect request */
+ hca_ptr->ib_trans.cr_state = IB_THREAD_INIT;
+ dat_status = dapl_os_thread_create(cr_thread,
+ (void *)hca_ptr,
+ &hca_ptr->ib_trans.thread);
+ if (dat_status != DAT_SUCCESS) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " open_hca: failed to create thread\n");
+ goto bail;
+ }
+
+ /* wait for thread */
+ while (hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
+ dapl_os_sleep_usec(2000);
+ }
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " open_hca: devname %s, port %d, hostname_IP %s\n",
+ ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+ hca_ptr->port_num, inet_ntoa(((struct sockaddr_in *)
+ &hca_ptr->hca_address)->
+ sin_addr));
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " open_hca: LID 0x%x GID Subnet 0x" F64x " ID 0x" F64x
+ "\n", ntohs(hca_ptr->ib_trans.lid), (unsigned long long)
+ htonll(hca_ptr->ib_trans.gid.global.subnet_prefix),
+ (unsigned long long)htonll(hca_ptr->ib_trans.gid.global.
+ interface_id));
+
+ ibv_free_device_list(dev_list);
+ return dat_status;
+
+ bail:
+ ibv_close_device(hca_ptr->ib_hca_handle);
+ hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
+ err:
+ ibv_free_device_list(dev_list);
+ return DAT_INTERNAL_ERROR;
+}
+
+/*
+ * dapls_ib_close_hca
+ *
+ * Open HCA
+ *
+ * Input:
+ * DAPL_HCA provide CA handle
+ *
+ * Output:
+ * none
+ *
+ * Return:
+ * DAT_SUCCESS
+ * dapl_convert_errno
+ *
+ */
+DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr)
+{
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p\n", hca_ptr);
+
+#ifndef CQ_WAIT_OBJECT
+ dapli_cq_thread_destroy(hca_ptr);
+ dapl_os_lock_destroy(&hca_ptr->ib_trans.cq_lock);
+#endif /* CQ_WAIT_OBJECT */
+
+ if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
+ if (ibv_close_device(hca_ptr->ib_hca_handle))
+ return (dapl_convert_errno(errno, "ib_close_device"));
+ hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
+ }
+
+ /* destroy cr_thread and lock */
+ hca_ptr->ib_trans.cr_state = IB_THREAD_CANCEL;
+ if (send(hca_ptr->ib_trans.scm[1], "w", sizeof "w", 0) == -1)
+ dapl_log(DAPL_DBG_TYPE_UTIL,
+ " thread_destroy: thread wakeup err = %s\n",
+ strerror(errno));
+ while (hca_ptr->ib_trans.cr_state != IB_THREAD_EXIT) {
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " close_hca: waiting for cr_thread\n");
+ if (send(hca_ptr->ib_trans.scm[1], "w", sizeof "w", 0) == -1)
+ dapl_log(DAPL_DBG_TYPE_UTIL,
+ " thread_destroy: thread wakeup err = %s\n",
+ strerror(errno));
+ dapl_os_sleep_usec(2000);
+ }
+ dapl_os_lock_destroy(&hca_ptr->ib_trans.lock);
+ destroy_cr_pipe(hca_ptr); /* no longer need pipe */
+ return (DAT_SUCCESS);
+}
--
1.5.2.5
More information about the general
mailing list