[openib-general] [PATCH] new uDAPL openIB provider using socket CM
Arlin Davis
arlin.r.davis at intel.com
Tue Oct 25 11:17:50 PDT 2005
James,
Here is a patch to add an optional openIB uDAPL provider that uses the socket CM for anyone having
problems scaling out with the uCM/uAT version. To build the new provider, simply "make
VERBS=openib_scm". This version does not require IPoIB, uCM, or uAT.
-arlin
Signed-off by: Arlin Davis <ardavis at ichips.intel.com>
Index: dapl/udapl/Makefile
===================================================================
--- dapl/udapl/Makefile (revision 3848)
+++ dapl/udapl/Makefile (working copy)
@@ -139,6 +139,16 @@ CFLAGS += -I/usr/local/include/infinib
endif
#
+# OpenIB provider with Socket CM
+#
+ifeq ($(VERBS),openib_scm)
+PROVIDER = $(TOPDIR)/../openib_scm
+CFLAGS += -DOPENIB
+CFLAGS += -DCQ_WAIT_OBJECT
+CFLAGS += -I/usr/local/include/infiniband
+endif
+
+#
# If an implementation supports CM and DTO completions on the same EVD
# then DAPL_MERGE_CM_DTO should be set
# CFLAGS += -DDAPL_MERGE_CM_DTO=1
@@ -251,6 +261,13 @@ PROVIDER_SRCS = dapl_ib_util.c dapl_ib_
PROVIDER_SRCS += dapl_ib_cm.c dapl_ib_mem.c
endif
+ifeq ($(VERBS),openib_scm)
+LDFLAGS += -libverbs
+LDFLAGS += -rpath /usr/local/lib -L /usr/local/lib
+PROVIDER_SRCS = dapl_ib_util.c dapl_ib_cq.c dapl_ib_qp.c \
+ dapl_ib_cm.c dapl_ib_mem.c
+endif
+
UDAPL_SRCS = dapl_init.c \
dapl_evd_create.c \
dapl_evd_query.c \
Index: dapl/openib_scm/dapl_ib_dto.h
===================================================================
--- dapl/openib_scm/dapl_ib_dto.h (revision 0)
+++ dapl/openib_scm/dapl_ib_dto.h (revision 0)
@@ -0,0 +1,261 @@
+/*
+ * This Software is licensed under both of the following two licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ * in the file LICENSE.txt in the root directory. The license is also
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/cpl.php.
+ * OR
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is in the file
+ * LICENSE2.txt in the root directory. The license is also available from
+ * the Open Source Initiative, see
+ * http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * Licensee has the right to choose either one of the above two licenses.
+ *
+ * Redistributions of source code must retain both the above copyright
+ * notice and either one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, either one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/***************************************************************************
+ *
+ * Module: uDAPL
+ *
+ * Filename: dapl_ib_dto.h
+ *
+ * Author: Arlin Davis
+ *
+ * Created: 3/10/2005
+ *
+ * Description:
+ *
+ * The uDAPL openib provider - DTO operations and CQE macros
+ *
+ ****************************************************************************
+ * Source Control System Information
+ *
+ * $Id: $
+ *
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ *
+ **************************************************************************/
+#ifndef _DAPL_IB_DTO_H_
+#define _DAPL_IB_DTO_H_
+
+#include "dapl_ib_util.h"
+
+#define DEFAULT_DS_ENTRIES 8
+
+STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p);
+
+/*
+ * dapls_ib_post_recv
+ *
+ * Provider specific Post RECV function
+ */
+STATIC _INLINE_ DAT_RETURN
+dapls_ib_post_recv (
+ IN DAPL_EP *ep_ptr,
+ IN DAPL_COOKIE *cookie,
+ IN DAT_COUNT segments,
+ IN DAT_LMR_TRIPLET *local_iov )
+{
+ ib_data_segment_t ds_array[DEFAULT_DS_ENTRIES];
+ ib_data_segment_t *ds_array_p;
+ struct ibv_recv_wr wr;
+ struct ibv_recv_wr *bad_wr;
+ DAT_COUNT i, total_len;
+
+ dapl_dbg_log (DAPL_DBG_TYPE_EP,
+ " post_rcv: ep %p cookie %p segs %d l_iov %p\n",
+ ep_ptr, cookie, segments, local_iov);
+
+ if ( segments <= DEFAULT_DS_ENTRIES )
+ ds_array_p = ds_array;
+ else
+ ds_array_p = dapl_os_alloc(segments * sizeof(ib_data_segment_t));
+
+ if (NULL == ds_array_p)
+ return (DAT_INSUFFICIENT_RESOURCES);
+
+ /* setup work request */
+ total_len = 0;
+ wr.next = 0;
+ wr.num_sge = 0;
+ wr.wr_id = (uint64_t)(uintptr_t)cookie;
+ wr.sg_list = ds_array_p;
+
+ for (i = 0; i < segments; i++ ) {
+ if ( !local_iov[i].segment_length )
+ continue;
+
+ ds_array_p->addr = (uint64_t) local_iov[i].virtual_address;
+ ds_array_p->length = local_iov[i].segment_length;
+ ds_array_p->lkey = local_iov[i].lmr_context;
+
+ dapl_dbg_log ( DAPL_DBG_TYPE_EP,
+ " post_rcv: l_key 0x%x va %p len %d\n",
+ ds_array_p->lkey, ds_array_p->addr,
+ ds_array_p->length );
+
+ total_len += ds_array_p->length;
+ wr.num_sge++;
+ ds_array_p++;
+ }
+
+ if (cookie != NULL)
+ cookie->val.dto.size = total_len;
+
+ if (ibv_post_recv(ep_ptr->qp_handle, &wr, &bad_wr))
+ return( dapl_convert_errno(EFAULT,"ibv_recv") );
+
+ return DAT_SUCCESS;
+}
+
+
+/*
+ * dapls_ib_post_send
+ *
+ * Provider specific Post SEND function
+ */
+STATIC _INLINE_ DAT_RETURN
+dapls_ib_post_send (
+ IN DAPL_EP *ep_ptr,
+ IN ib_send_op_type_t op_type,
+ IN DAPL_COOKIE *cookie,
+ IN DAT_COUNT segments,
+ IN DAT_LMR_TRIPLET *local_iov,
+ IN const DAT_RMR_TRIPLET *remote_iov,
+ IN DAT_COMPLETION_FLAGS completion_flags)
+{
+ dapl_dbg_log (DAPL_DBG_TYPE_EP,
+ " post_snd: ep %p op %d ck %p sgs %d l_iov %p r_iov %p f %d\n",
+ ep_ptr, op_type, cookie, segments, local_iov,
+ remote_iov, completion_flags);
+
+ ib_data_segment_t ds_array[DEFAULT_DS_ENTRIES];
+ ib_data_segment_t *ds_array_p;
+ struct ibv_send_wr wr;
+ struct ibv_send_wr *bad_wr;
+ ib_hca_transport_t *ibt_ptr = &ep_ptr->header.owner_ia->hca_ptr->ib_trans;
+ DAT_COUNT i, total_len;
+
+ dapl_dbg_log (DAPL_DBG_TYPE_EP,
+ " post_snd: ep %p cookie %p segs %d l_iov %p\n",
+ ep_ptr, cookie, segments, local_iov);
+
+ if( segments <= DEFAULT_DS_ENTRIES )
+ ds_array_p = ds_array;
+ else
+ ds_array_p = dapl_os_alloc(segments * sizeof(ib_data_segment_t));
+
+ if (NULL == ds_array_p)
+ return (DAT_INSUFFICIENT_RESOURCES);
+
+ /* setup the work request */
+ wr.next = 0;
+ wr.opcode = op_type;
+ wr.num_sge = 0;
+ wr.send_flags = 0;
+ wr.wr_id = (uint64_t)(uintptr_t)cookie;
+ wr.sg_list = ds_array_p;
+ total_len = 0;
+
+ for (i = 0; i < segments; i++ ) {
+ if ( !local_iov[i].segment_length )
+ continue;
+
+ ds_array_p->addr = (uint64_t) local_iov[i].virtual_address;
+ ds_array_p->length = local_iov[i].segment_length;
+ ds_array_p->lkey = local_iov[i].lmr_context;
+
+ dapl_dbg_log ( DAPL_DBG_TYPE_EP,
+ " post_snd: lkey 0x%x va %p len %d \n",
+ ds_array_p->lkey, ds_array_p->addr,
+ ds_array_p->length );
+
+ total_len += ds_array_p->length;
+ wr.num_sge++;
+ ds_array_p++;
+ }
+
+ if (cookie != NULL)
+ cookie->val.dto.size = total_len;
+
+ if ((op_type == OP_RDMA_WRITE) || (op_type == OP_RDMA_READ)) {
+ wr.wr.rdma.remote_addr = remote_iov->target_address;
+ wr.wr.rdma.rkey = remote_iov->rmr_context;
+ dapl_dbg_log ( DAPL_DBG_TYPE_EP,
+ " post_snd_rdma: rkey 0x%x va %#016Lx\n",
+ wr.wr.rdma.rkey, wr.wr.rdma.remote_addr );
+ }
+
+ /* inline data for send or write ops */
+ if ((total_len <= ibt_ptr->max_inline_send ) &&
+ ((op_type == OP_SEND) || (op_type == OP_RDMA_WRITE)))
+ wr.send_flags |= IBV_SEND_INLINE;
+
+ /* set completion flags in work request */
+ wr.send_flags |= (DAT_COMPLETION_SUPPRESS_FLAG &
+ completion_flags) ? 0 : IBV_SEND_SIGNALED;
+ wr.send_flags |= (DAT_COMPLETION_BARRIER_FENCE_FLAG &
+ completion_flags) ? IBV_SEND_FENCE : 0;
+ wr.send_flags |= (DAT_COMPLETION_SOLICITED_WAIT_FLAG &
+ completion_flags) ? IBV_SEND_SOLICITED : 0;
+
+ dapl_dbg_log (DAPL_DBG_TYPE_EP,
+ " post_snd: op 0x%x flags 0x%x sglist %p, %d\n",
+ wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
+
+ if (ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr))
+ return( dapl_convert_errno(EFAULT,"ibv_recv") );
+
+ dapl_dbg_log (DAPL_DBG_TYPE_EP," post_snd: returned\n");
+ return DAT_SUCCESS;
+}
+
+STATIC _INLINE_ DAT_RETURN
+dapls_ib_optional_prv_dat (
+ IN DAPL_CR *cr_ptr,
+ IN const void *event_data,
+ OUT DAPL_CR **cr_pp)
+{
+ return DAT_SUCCESS;
+}
+
+STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p)
+{
+ switch (cqe_p->opcode) {
+ case IBV_WC_SEND:
+ return (OP_SEND);
+ case IBV_WC_RDMA_WRITE:
+ return (OP_RDMA_WRITE);
+ case IBV_WC_RDMA_READ:
+ return (OP_RDMA_READ);
+ case IBV_WC_COMP_SWAP:
+ return (OP_COMP_AND_SWAP);
+ case IBV_WC_FETCH_ADD:
+ return (OP_FETCH_AND_ADD);
+ case IBV_WC_BIND_MW:
+ return (OP_BIND_MW);
+ case IBV_WC_RECV:
+ return (OP_RECEIVE);
+ case IBV_WC_RECV_RDMA_WITH_IMM:
+ return (OP_RECEIVE_IMM);
+ default:
+ return (OP_INVALID);
+ }
+}
+
+#define DAPL_GET_CQE_OPTYPE(cqe_p) dapls_cqe_opcode(cqe_p)
+#define DAPL_GET_CQE_WRID(cqe_p) ((ib_work_completion_t*)cqe_p)->wr_id
+#define DAPL_GET_CQE_STATUS(cqe_p) ((ib_work_completion_t*)cqe_p)->status
+#define DAPL_GET_CQE_BYTESNUM(cqe_p) ((ib_work_completion_t*)cqe_p)->byte_len
+#define DAPL_GET_CQE_IMMED_DATA(cqe_p) ((ib_work_completion_t*)cqe_p)->imm_data
+
+#endif /* _DAPL_IB_DTO_H_ */
Index: dapl/openib_scm/dapl_ib_util.c
===================================================================
--- dapl/openib_scm/dapl_ib_util.c (revision 0)
+++ dapl/openib_scm/dapl_ib_util.c (revision 0)
@@ -0,0 +1,471 @@
+/*
+ * This Software is licensed under both of the following two licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ * in the file LICENSE.txt in the root directory. The license is also
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/cpl.php.
+ * OR
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is in the file
+ * LICENSE2.txt in the root directory. The license is also available from
+ * the Open Source Initiative, see
+ * http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * Licensee has the right to choose either one of the above two licenses.
+ *
+ * Redistributions of source code must retain both the above copyright
+ * notice and either one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, either one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/***************************************************************************
+ *
+ * Module: uDAPL
+ *
+ * Filename: dapl_ib_util.c
+ *
+ * Author: Arlin Davis
+ *
+ * Created: 3/10/2005
+ *
+ * Description:
+ *
+ * The uDAPL openib provider - init, open, close, utilities
+ *
+ ****************************************************************************
+ * Source Control System Information
+ *
+ * $Id: $
+ *
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ *
+ **************************************************************************/
+#ifdef RCSID
+static const char rcsid[] = "$Id: $";
+#endif
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_ib_util.h"
+
+#include <stdlib.h>
+#include <netinet/tcp.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+int g_dapl_loopback_connection = 0;
+
+/* just get IP address for hostname */
+DAT_RETURN getipaddr( char *addr, int addr_len)
+{
+ struct sockaddr_in *ipv4_addr = (struct sockaddr_in*)addr;
+ struct hostent *h_ptr;
+ struct utsname ourname;
+
+ if ( uname( &ourname ) < 0 )
+ return DAT_INTERNAL_ERROR;
+
+ h_ptr = gethostbyname( ourname.nodename );
+ if ( h_ptr == NULL )
+ return DAT_INTERNAL_ERROR;
+
+ if ( h_ptr->h_addrtype == AF_INET ) {
+ ipv4_addr = (struct sockaddr_in*) addr;
+ ipv4_addr->sin_family = AF_INET;
+ dapl_os_memcpy( &ipv4_addr->sin_addr, h_ptr->h_addr_list[0], 4 );
+ } else
+ return DAT_INVALID_ADDRESS;
+
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_init, dapls_ib_release
+ *
+ * Initialize Verb related items for device open
+ *
+ * Input:
+ * none
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * 0 success, -1 error
+ *
+ */
+int32_t dapls_ib_init (void)
+{
+ return 0;
+}
+
+int32_t dapls_ib_release (void)
+{
+ return 0;
+}
+
+/*
+ * dapls_ib_open_hca
+ *
+ * Open HCA
+ *
+ * Input:
+ * *hca_name pointer to provider device name
+ * *ib_hca_handle_p pointer to provide HCA handle
+ *
+ * Output:
+ * none
+ *
+ * Return:
+ * DAT_SUCCESS
+ * dapl_convert_errno
+ *
+ */
+DAT_RETURN dapls_ib_open_hca (
+ IN IB_HCA_NAME hca_name,
+ IN DAPL_HCA *hca_ptr)
+{
+ struct dlist *dev_list;
+ int opts;
+ DAT_RETURN dat_status = DAT_SUCCESS;
+
+ dapl_dbg_log (DAPL_DBG_TYPE_UTIL,
+ " open_hca: %s - %p\n", hca_name, hca_ptr );
+
+ /* Get list of all IB devices, find match, open */
+ dev_list = ibv_get_devices();
+ dlist_start(dev_list);
+ dlist_for_each_data(dev_list,hca_ptr->ib_trans.ib_dev,struct ibv_device) {
+ if (!strcmp(ibv_get_device_name(hca_ptr->ib_trans.ib_dev),hca_name))
+ break;
+ }
+
+ if (!hca_ptr->ib_trans.ib_dev) {
+ dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+ " open_hca: IB device %s not found\n",
+ hca_name);
+ return DAT_INTERNAL_ERROR;
+ }
+
+ dapl_dbg_log (DAPL_DBG_TYPE_UTIL," open_hca: Found dev %s %016llx\n",
+ ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+ (unsigned long
long)bswap_64(ibv_get_device_guid(hca_ptr->ib_trans.ib_dev)));
+
+ hca_ptr->ib_hca_handle = ibv_open_device(hca_ptr->ib_trans.ib_dev);
+ if (!hca_ptr->ib_hca_handle) {
+ dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+ " open_hca: IB dev open failed for %s\n",
+ ibv_get_device_name(hca_ptr->ib_trans.ib_dev) );
+ return DAT_INTERNAL_ERROR;
+ }
+
+ /* set inline max with enviroment or default */
+ hca_ptr->ib_trans.max_inline_send =
+ dapl_os_get_env_val ( "DAPL_MAX_INLINE", INLINE_SEND_DEFAULT );
+
+ /* initialize cq_lock */
+ dat_status = dapl_os_lock_init(&hca_ptr->ib_trans.cq_lock);
+ if (dat_status != DAT_SUCCESS)
+ {
+ dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+ " open_hca: failed to init cq_lock\n");
+ goto bail;
+ }
+
+ /* EVD events without direct CQ channels, non-blocking */
+ hca_ptr->ib_trans.ib_cq =
+ ibv_create_comp_channel(hca_ptr->ib_hca_handle);
+ opts = fcntl(hca_ptr->ib_trans.ib_cq->fd, F_GETFL); /* uCQ */
+ if (opts < 0 || fcntl(hca_ptr->ib_trans.ib_cq->fd,
+ F_SETFL, opts | O_NONBLOCK) < 0) {
+ dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+ " open_hca: ERR with CQ FD\n" );
+ goto bail;
+ }
+
+ if (dapli_cq_thread_init(hca_ptr)) {
+ dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+ " open_hca: cq_thread_init failed for %s\n",
+ ibv_get_device_name(hca_ptr->ib_trans.ib_dev) );
+ goto bail;
+ }
+
+ /* initialize cr_list lock */
+ dat_status = dapl_os_lock_init(&hca_ptr->ib_trans.lock);
+ if (dat_status != DAT_SUCCESS)
+ {
+ dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+ " open_hca: failed to init lock\n");
+ goto bail;
+ }
+
+ /* initialize CM list for listens on this HCA */
+ dapl_llist_init_head(&hca_ptr->ib_trans.list);
+
+ /* create thread to process inbound connect request */
+ hca_ptr->ib_trans.cr_state = IB_THREAD_INIT;
+ dat_status = dapl_os_thread_create(cr_thread,
+ (void*)hca_ptr,
+ &hca_ptr->ib_trans.thread );
+ if (dat_status != DAT_SUCCESS)
+ {
+ dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+ " open_hca: failed to create thread\n");
+ goto bail;
+ }
+
+ /* wait for thread */
+ while (hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
+ struct timespec sleep, remain;
+ sleep.tv_sec = 0;
+ sleep.tv_nsec = 20000000; /* 20 ms */
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " open_hca: waiting for cr_thread\n");
+ nanosleep (&sleep, &remain);
+ }
+
+ /* get the IP address of the device */
+ dat_status = getipaddr((char*)&hca_ptr->hca_address,
+ sizeof(DAT_SOCK_ADDR6) );
+ dapl_dbg_log (DAPL_DBG_TYPE_UTIL,
+ " open_hca: %s, port %d, %s %d.%d.%d.%d\n",
+ ibv_get_device_name(hca_ptr->ib_trans.ib_dev), hca_ptr->port_num,
+ ((struct sockaddr_in *)&hca_ptr->hca_address)->sin_family == AF_INET ?
"AF_INET":"AF_INET6",
+ ((struct sockaddr_in *)&hca_ptr->hca_address)->sin_addr.s_addr >> 0 & 0xff,
+ ((struct sockaddr_in *)&hca_ptr->hca_address)->sin_addr.s_addr >> 8 & 0xff,
+ ((struct sockaddr_in *)&hca_ptr->hca_address)->sin_addr.s_addr >> 16 & 0xff,
+ ((struct sockaddr_in *)&hca_ptr->hca_address)->sin_addr.s_addr >> 24 & 0xff );
+
+ return dat_status;
+bail:
+ ibv_close_device(hca_ptr->ib_hca_handle);
+ hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
+ return DAT_INTERNAL_ERROR;
+}
+
+
+/*
+ * dapls_ib_close_hca
+ *
+ * Open HCA
+ *
+ * Input:
+ * DAPL_HCA provide CA handle
+ *
+ * Output:
+ * none
+ *
+ * Return:
+ * DAT_SUCCESS
+ * dapl_convert_errno
+ *
+ */
+DAT_RETURN dapls_ib_close_hca ( IN DAPL_HCA *hca_ptr )
+{
+ dapl_dbg_log (DAPL_DBG_TYPE_UTIL," close_hca: %p\n",hca_ptr);
+
+ dapli_cq_thread_destroy(hca_ptr);
+
+ if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
+ if (ibv_close_device(hca_ptr->ib_hca_handle))
+ return(dapl_convert_errno(errno,"ib_close_device"));
+ hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
+ }
+
+ dapl_os_lock_destroy(&hca_ptr->ib_trans.cq_lock);
+
+ /* destroy cr_thread and lock */
+ hca_ptr->ib_trans.cr_state = IB_THREAD_CANCEL;
+ while (hca_ptr->ib_trans.cr_state != IB_THREAD_EXIT) {
+ struct timespec sleep, remain;
+ sleep.tv_sec = 0;
+ sleep.tv_nsec = 20000000; /* 20 ms */
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " close_hca: waiting for cr_thread\n");
+ nanosleep (&sleep, &remain);
+ }
+ dapl_os_lock_destroy(&hca_ptr->ib_trans.lock);
+
+ return (DAT_SUCCESS);
+}
+
+/*
+ * dapls_ib_query_hca
+ *
+ * Query the hca attribute
+ *
+ * Input:
+ * hca_handl hca handle
+ * ia_attr attribute of the ia
+ * ep_attr attribute of the ep
+ * ip_addr ip address of DET NIC
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_HANDLE
+ */
+
+DAT_RETURN dapls_ib_query_hca (
+ IN DAPL_HCA *hca_ptr,
+ OUT DAT_IA_ATTR *ia_attr,
+ OUT DAT_EP_ATTR *ep_attr,
+ OUT DAT_SOCK_ADDR6 *ip_addr)
+{
+ struct ibv_device_attr dev_attr;
+ struct ibv_port_attr port_attr;
+
+ if (hca_ptr->ib_hca_handle == NULL) {
+ dapl_dbg_log (DAPL_DBG_TYPE_ERR," query_hca: BAD handle\n");
+ return (DAT_INVALID_HANDLE);
+ }
+
+ /* local IP address of device, set during ia_open */
+ if (ip_addr != NULL)
+ memcpy(ip_addr, &hca_ptr->hca_address, sizeof(DAT_SOCK_ADDR6));
+
+ if (ia_attr == NULL && ep_attr == NULL)
+ return DAT_SUCCESS;
+
+ /* query verbs for this device and port attributes */
+ if (ibv_query_device(hca_ptr->ib_hca_handle, &dev_attr) ||
+ ibv_query_port(hca_ptr->ib_hca_handle,
+ hca_ptr->port_num, &port_attr))
+ return(dapl_convert_errno(errno,"ib_query_hca"));
+
+ if (ia_attr != NULL) {
+ ia_attr->adapter_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
+ ia_attr->vendor_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
+ ia_attr->ia_address_ptr = (DAT_IA_ADDRESS_PTR)&hca_ptr->hca_address;
+
+ dapl_dbg_log (DAPL_DBG_TYPE_UTIL,
+ " query_hca: %s %s %d.%d.%d.%d\n",
+ ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+ ((struct sockaddr_in *)ia_attr->ia_address_ptr)->sin_family == AF_INET ?
"AF_INET":"AF_INET6",
+ ((struct sockaddr_in *)ia_attr->ia_address_ptr)->sin_addr.s_addr >> 0 &
0xff,
+ ((struct sockaddr_in *)ia_attr->ia_address_ptr)->sin_addr.s_addr >> 8 &
0xff,
+ ((struct sockaddr_in *)ia_attr->ia_address_ptr)->sin_addr.s_addr >> 16 &
0xff,
+ ((struct sockaddr_in *)ia_attr->ia_address_ptr)->sin_addr.s_addr >> 24 &
0xff );
+
+ ia_attr->hardware_version_major = dev_attr.hw_ver;
+ /* ia_attr->hardware_version_minor = dev_attr.fw_ver; */
+ ia_attr->max_eps = dev_attr.max_qp;
+ ia_attr->max_dto_per_ep = dev_attr.max_qp_wr;
+ ia_attr->max_rdma_read_per_ep = dev_attr.max_qp_rd_atom;
+ ia_attr->max_evds = dev_attr.max_cq;
+ ia_attr->max_evd_qlen = dev_attr.max_cqe;
+ ia_attr->max_iov_segments_per_dto = dev_attr.max_sge;
+ ia_attr->max_lmrs = dev_attr.max_mr;
+ ia_attr->max_lmr_block_size = dev_attr.max_mr_size;
+ ia_attr->max_rmrs = dev_attr.max_mw;
+ ia_attr->max_lmr_virtual_address = dev_attr.max_mr_size;
+ ia_attr->max_rmr_target_address = dev_attr.max_mr_size;
+ ia_attr->max_pzs = dev_attr.max_pd;
+ ia_attr->max_mtu_size = port_attr.max_msg_sz;
+ ia_attr->max_rdma_size = port_attr.max_msg_sz;
+ ia_attr->num_transport_attr = 0;
+ ia_attr->transport_attr = NULL;
+ ia_attr->num_vendor_attr = 0;
+ ia_attr->vendor_attr = NULL;
+
+ dapl_dbg_log (DAPL_DBG_TYPE_UTIL,
+ " query_hca: (%x.%x) ep %d ep_q %d evd %d evd_q %d\n",
+ ia_attr->hardware_version_major,
+ ia_attr->hardware_version_minor,
+ ia_attr->max_eps, ia_attr->max_dto_per_ep,
+ ia_attr->max_evds, ia_attr->max_evd_qlen );
+ dapl_dbg_log (DAPL_DBG_TYPE_UTIL,
+ " query_hca: msg %llu rdma %llu iov %d lmr %d rmr %d\n",
+ ia_attr->max_mtu_size, ia_attr->max_rdma_size,
+ ia_attr->max_iov_segments_per_dto, ia_attr->max_lmrs,
+ ia_attr->max_rmrs );
+
+ }
+
+ if (ep_attr != NULL) {
+ ep_attr->max_mtu_size = port_attr.max_msg_sz;
+ ep_attr->max_rdma_size = port_attr.max_msg_sz;
+ ep_attr->max_recv_dtos = dev_attr.max_qp_wr;
+ ep_attr->max_request_dtos = dev_attr.max_qp_wr;
+ ep_attr->max_recv_iov = dev_attr.max_sge;
+ ep_attr->max_request_iov = dev_attr.max_sge;
+ ep_attr->max_rdma_read_in = dev_attr.max_qp_rd_atom;
+ ep_attr->max_rdma_read_out= dev_attr.max_qp_rd_atom;
+ dapl_dbg_log (DAPL_DBG_TYPE_UTIL,
+ " query_hca: MAX msg %llu dto %d iov %d rdma i%d,o%d\n",
+ ep_attr->max_mtu_size,
+ ep_attr->max_recv_dtos, ep_attr->max_recv_iov,
+ ep_attr->max_rdma_read_in, ep_attr->max_rdma_read_out);
+ }
+
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_setup_async_callback
+ *
+ * Set up an asynchronous callbacks of various kinds
+ *
+ * Input:
+ * ia_handle IA handle
+ * handler_type type of handler to set up
+ * callback_handle handle param for completion callbacks
+ * callback callback routine pointer
+ * context argument for callback routine
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ * DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN dapls_ib_setup_async_callback (
+ IN DAPL_IA *ia_ptr,
+ IN DAPL_ASYNC_HANDLER_TYPE handler_type,
+ IN DAPL_EVD *evd_ptr,
+ IN ib_async_handler_t callback,
+ IN void *context )
+
+{
+ ib_hca_transport_t *hca_ptr;
+
+ dapl_dbg_log (DAPL_DBG_TYPE_UTIL,
+ " setup_async_cb: ia %p type %d handle %p cb %p ctx %p\n",
+ ia_ptr, handler_type, evd_ptr, callback, context);
+
+ hca_ptr = &ia_ptr->hca_ptr->ib_trans;
+ switch(handler_type)
+ {
+ case DAPL_ASYNC_UNAFILIATED:
+ hca_ptr->async_unafiliated =
+ (ib_async_handler_t)callback;
+ hca_ptr->async_un_ctx = context;
+ break;
+ case DAPL_ASYNC_CQ_ERROR:
+ hca_ptr->async_cq_error =
+ (ib_async_cq_handler_t)callback;
+ break;
+ case DAPL_ASYNC_CQ_COMPLETION:
+ hca_ptr->async_cq =
+ (ib_async_dto_handler_t)callback;
+ break;
+ case DAPL_ASYNC_QP_ERROR:
+ hca_ptr->async_qp_error =
+ (ib_async_qp_handler_t)callback;
+ break;
+ default:
+ break;
+ }
+ return DAT_SUCCESS;
+}
+
Index: dapl/openib_scm/dapl_ib_mem.c
===================================================================
--- dapl/openib_scm/dapl_ib_mem.c (revision 0)
+++ dapl/openib_scm/dapl_ib_mem.c (revision 0)
@@ -0,0 +1,392 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ * copy of which is available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/**********************************************************************
+ *
+ * MODULE: dapl_det_mem.c
+ *
+ * PURPOSE: Intel DET APIs: Memory windows, registration,
+ * and protection domain
+ *
+ * $Id: $
+ *
+ **********************************************************************/
+
+#include <sys/ioctl.h> /* for IOCTL's */
+#include <sys/types.h> /* for socket(2) and related bits and pieces */
+#include <sys/socket.h> /* for socket(2) */
+#include <net/if.h> /* for struct ifreq */
+#include <net/if_arp.h> /* for ARPHRD_ETHER */
+#include <unistd.h> /* for _SC_CLK_TCK */
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_lmr_util.h"
+
+/*
+ * dapls_convert_privileges
+ *
+ * Convert LMR privileges to provider
+ *
+ * Input:
+ * DAT_MEM_PRIV_FLAGS
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * ibv_access_flags
+ *
+ */
+STATIC _INLINE_ int
+dapls_convert_privileges (
+ IN DAT_MEM_PRIV_FLAGS privileges)
+{
+ int access = 0;
+
+ /*
+ * if (DAT_MEM_PRIV_LOCAL_READ_FLAG & privileges) do nothing
+ */
+ if (DAT_MEM_PRIV_LOCAL_WRITE_FLAG & privileges)
+ access |= IBV_ACCESS_LOCAL_WRITE;
+ if (DAT_MEM_PRIV_REMOTE_WRITE_FLAG & privileges)
+ access |= IBV_ACCESS_REMOTE_WRITE;
+ if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
+ access |= IBV_ACCESS_REMOTE_READ;
+ if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
+ access |= IBV_ACCESS_REMOTE_READ;
+ if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
+ access |= IBV_ACCESS_REMOTE_READ;
+
+ return access;
+}
+
+/*
+ * dapl_ib_pd_alloc
+ *
+ * Alloc a PD
+ *
+ * Input:
+ * ia_handle IA handle
+ * pz pointer to PZ struct
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_pd_alloc (
+ IN DAPL_IA *ia_ptr,
+ IN DAPL_PZ *pz )
+{
+ /* get a protection domain */
+ pz->pd_handle = ibv_alloc_pd(ia_ptr->hca_ptr->ib_hca_handle);
+ if (!pz->pd_handle)
+ return(dapl_convert_errno(ENOMEM,"alloc_pd"));
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " pd_alloc: pd_handle=%p\n",
+ pz->pd_handle );
+
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_pd_free
+ *
+ * Free a PD
+ *
+ * Input:
+ * ia_handle IA handle
+ * PZ_ptr pointer to PZ struct
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN
+dapls_ib_pd_free (
+ IN DAPL_PZ *pz )
+{
+ if (pz->pd_handle != IB_INVALID_HANDLE) {
+ if (ibv_dealloc_pd(pz->pd_handle))
+ return(dapl_convert_errno(errno,"dealloc_pd"));
+ pz->pd_handle = IB_INVALID_HANDLE;
+ }
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_mr_register
+ *
+ * Register a virtual memory region
+ *
+ * Input:
+ * ia_handle IA handle
+ * lmr pointer to dapl_lmr struct
+ * virt_addr virtual address of beginning of mem region
+ * length length of memory region
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mr_register (
+ IN DAPL_IA *ia_ptr,
+ IN DAPL_LMR *lmr,
+ IN DAT_PVOID virt_addr,
+ IN DAT_VLEN length,
+ IN DAT_MEM_PRIV_FLAGS privileges)
+{
+ ib_pd_handle_t ib_pd_handle;
+
+ ib_pd_handle = ((DAPL_PZ *)lmr->param.pz_handle)->pd_handle;
+
+ dapl_dbg_log ( DAPL_DBG_TYPE_UTIL,
+ " mr_register: ia=%p, lmr=%p va=%p ln=%d pv=0x%x\n",
+ ia_ptr, lmr, virt_addr, length, privileges );
+
+ /* TODO: shared memory */
+ if (lmr->param.mem_type == DAT_MEM_TYPE_SHARED_VIRTUAL) {
+ dapl_dbg_log( DAPL_DBG_TYPE_ERR,
+ " mr_register_shared: NOT IMPLEMENTED\n");
+ return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+ }
+
+ /* local read is default on IB */
+ lmr->mr_handle =
+ ibv_reg_mr(((DAPL_PZ *)lmr->param.pz_handle)->pd_handle,
+ virt_addr,
+ length,
+ dapls_convert_privileges(privileges));
+
+ if (!lmr->mr_handle)
+ return(dapl_convert_errno(ENOMEM,"reg_mr"));
+
+ lmr->param.lmr_context = lmr->mr_handle->lkey;
+ lmr->param.rmr_context = lmr->mr_handle->rkey;
+ lmr->param.registered_size = length;
+ lmr->param.registered_address = (DAT_VADDR)(uintptr_t) virt_addr;
+
+ dapl_dbg_log ( DAPL_DBG_TYPE_UTIL,
+ " mr_register: mr=%p h %x pd %p ctx %p ,lkey=0x%x, rkey=0x%x priv=%x\n",
+ lmr->mr_handle, lmr->mr_handle->handle,
+ lmr->mr_handle->pd,
+ lmr->mr_handle->context,
+ lmr->mr_handle->lkey,
+ lmr->mr_handle->rkey,
+ length, dapls_convert_privileges(privileges) );
+
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_mr_deregister
+ *
+ * Free a memory region
+ *
+ * Input:
+ * lmr pointer to dapl_lmr struct
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN
+dapls_ib_mr_deregister (
+ IN DAPL_LMR *lmr )
+{
+ if (lmr->mr_handle != IB_INVALID_HANDLE) {
+ if (ibv_dereg_mr(lmr->mr_handle))
+ return(dapl_convert_errno(errno,"dereg_pd"));
+ lmr->mr_handle = IB_INVALID_HANDLE;
+ }
+ return DAT_SUCCESS;
+}
+
+
+/*
+ * dapl_ib_mr_register_shared
+ *
+ * Register a virtual memory region
+ *
+ * Input:
+ * ia_ptr IA handle
+ * lmr pointer to dapl_lmr struct
+ * virt_addr virtual address of beginning of mem region
+ * length length of memory region
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mr_register_shared (
+ IN DAPL_IA *ia_ptr,
+ IN DAPL_LMR *lmr,
+ IN DAT_MEM_PRIV_FLAGS privileges )
+{
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR," mr_register_shared: NOT IMPLEMENTED\n");
+ return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+}
+
+/*
+ * dapls_ib_mw_alloc
+ *
+ * Bind a protection domain to a memory window
+ *
+ * Input:
+ * rmr Initialized rmr to hold binding handles
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mw_alloc (
+ IN DAPL_RMR *rmr )
+{
+
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR," mw_alloc: NOT IMPLEMENTED\n");
+ return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+}
+
+/*
+ * dapls_ib_mw_free
+ *
+ * Release bindings of a protection domain to a memory window
+ *
+ * Input:
+ * rmr Initialized rmr to hold binding handles
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN
+dapls_ib_mw_free (
+ IN DAPL_RMR *rmr )
+{
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR," mw_free: NOT IMPLEMENTED\n");
+ return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+}
+
+/*
+ * dapls_ib_mw_bind
+ *
+ * Bind a protection domain to a memory window
+ *
+ * Input:
+ * rmr Initialized rmr to hold binding handles
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_PARAMETER;
+ * DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mw_bind (
+ IN DAPL_RMR *rmr,
+ IN DAPL_LMR *lmr,
+ IN DAPL_EP *ep,
+ IN DAPL_COOKIE *cookie,
+ IN DAT_VADDR virtual_address,
+ IN DAT_VLEN length,
+ IN DAT_MEM_PRIV_FLAGS mem_priv,
+ IN DAT_BOOLEAN is_signaled)
+{
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR," mw_bind: NOT IMPLEMENTED\n");
+ return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+}
+
+/*
+ * dapls_ib_mw_unbind
+ *
+ * Unbind a protection domain from a memory window
+ *
+ * Input:
+ * rmr Initialized rmr to hold binding handles
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_PARAMETER;
+ * DAT_INVALID_STATE;
+ * DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mw_unbind (
+ IN DAPL_RMR *rmr,
+ IN DAPL_EP *ep,
+ IN DAPL_COOKIE *cookie,
+ IN DAT_BOOLEAN is_signaled )
+{
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR," mw_unbind: NOT IMPLEMENTED\n");
+ return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 4
+ * c-basic-offset: 4
+ * tab-width: 8
+ * End:
+ */
+
Index: dapl/openib_scm/dapl_ib_cm.c
===================================================================
--- dapl/openib_scm/dapl_ib_cm.c (revision 0)
+++ dapl/openib_scm/dapl_ib_cm.c (revision 0)
@@ -0,0 +1,1073 @@
+/*
+ * This Software is licensed under both of the following two licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ * in the file LICENSE.txt in the root directory. The license is also
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/cpl.php.
+ * OR
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is in the file
+ * LICENSE2.txt in the root directory. The license is also available from
+ * the Open Source Initiative, see
+ * http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * Licensee has the right to choose either one of the above two licenses.
+ *
+ * Redistributions of source code must retain both the above copyright
+ * notice and either one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, either one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/***************************************************************************
+ *
+ * Module: uDAPL
+ *
+ * Filename: dapl_ib_cm.c
+ *
+ * Author: Arlin Davis
+ *
+ * Created: 3/10/2005
+ *
+ * Description:
+ *
+ * The uDAPL openib provider - connection management
+ *
+ ****************************************************************************
+ * Source Control System Information
+ *
+ * $Id: $
+ *
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ *
+ **************************************************************************/
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_evd_util.h"
+#include "dapl_cr_util.h"
+#include "dapl_name_service.h"
+#include "dapl_ib_util.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <netinet/tcp.h>
+#include <sysfs/libsysfs.h>
+
+/* prototypes */
+static uint16_t dapli_get_lid( struct ibv_device *dev, int port );
+
+static DAT_RETURN dapli_socket_connect ( DAPL_EP *ep_ptr,
+ DAT_IA_ADDRESS_PTR r_addr,
+ DAT_CONN_QUAL r_qual,
+ DAT_COUNT p_size,
+ DAT_PVOID p_data );
+
+static DAT_RETURN dapli_socket_listen ( DAPL_IA *ia_ptr,
+ DAT_CONN_QUAL serviceID,
+ DAPL_SP *sp_ptr );
+
+static DAT_RETURN dapli_socket_accept( ib_cm_srvc_handle_t cm_ptr );
+
+static DAT_RETURN dapli_socket_accept_final( DAPL_EP *ep_ptr,
+ DAPL_CR *cr_ptr,
+ DAT_COUNT p_size,
+ DAT_PVOID p_data );
+
+/* XXX temporary hack to get lid */
+static uint16_t dapli_get_lid(IN struct ibv_device *dev, IN int port)
+{
+ char path[128];
+ char val[16];
+ char name[256];
+
+ if (sysfs_get_mnt_path(path, sizeof path)) {
+ fprintf(stderr, "Couldn't find sysfs mount.\n");
+ return 0;
+ }
+ sprintf(name, "%s/class/infiniband/%s/ports/%d/lid", path,
+ ibv_get_device_name(dev), port);
+
+ if (sysfs_read_attribute_value(name, val, sizeof val)) {
+ fprintf(stderr, "Couldn't read LID at %s\n", name);
+ return 0;
+ }
+ return strtol(val, NULL, 0);
+}
+
+/*
+ * ACTIVE: Create socket, connect, and exchange QP information
+ */
+static DAT_RETURN
+dapli_socket_connect ( DAPL_EP *ep_ptr,
+ DAT_IA_ADDRESS_PTR r_addr,
+ DAT_CONN_QUAL r_qual,
+ DAT_COUNT p_size,
+ DAT_PVOID p_data )
+{
+ ib_cm_handle_t cm_ptr;
+ DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
+ int len, opt = 1;
+ struct iovec iovec[2];
+ short rtu_data = htons(0x0E0F);
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect: r_qual %d\n", r_qual);
+
+ /*
+ * Allocate CM and initialize
+ */
+ if ((cm_ptr = dapl_os_alloc(sizeof(*cm_ptr))) == NULL ) {
+ return DAT_INSUFFICIENT_RESOURCES;
+ }
+
+ (void) dapl_os_memzero( cm_ptr, sizeof( *cm_ptr ) );
+ cm_ptr->socket = -1;
+
+ /* create, connect, sockopt, and exchange QP information */
+ if ((cm_ptr->socket = socket(AF_INET,SOCK_STREAM,0)) < 0 ) {
+ dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
+ return DAT_INSUFFICIENT_RESOURCES;
+ }
+
+ ((struct sockaddr_in*)r_addr)->sin_port = htons(r_qual);
+
+ if ( connect(cm_ptr->socket, r_addr, sizeof(*r_addr)) < 0 ) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " connect: %s on r_qual %d\n",
+ strerror(errno), (unsigned int)r_qual);
+ dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
+ return DAT_INVALID_ADDRESS;
+ }
+ setsockopt(cm_ptr->socket,IPPROTO_TCP,TCP_NODELAY,&opt,sizeof(opt));
+
+ /* Send QP info, IA address, and private data */
+ cm_ptr->dst.qpn = ep_ptr->qp_handle->qp_num;
+ cm_ptr->dst.port = ia_ptr->hca_ptr->port_num;
+ cm_ptr->dst.lid = dapli_get_lid( ia_ptr->hca_ptr->ib_trans.ib_dev,
+ ia_ptr->hca_ptr->port_num );
+ cm_ptr->dst.ia_address = ia_ptr->hca_ptr->hca_address;
+ cm_ptr->dst.p_size = p_size;
+ iovec[0].iov_base = &cm_ptr->dst;
+ iovec[0].iov_len = sizeof(ib_qp_cm_t);
+ if ( p_size ) {
+ iovec[1].iov_base = p_data;
+ iovec[1].iov_len = p_size;
+ }
+ len = writev( cm_ptr->socket, iovec, (p_size ? 2:1) );
+ if ( len != (p_size + sizeof(ib_qp_cm_t)) ) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " connect write: ERR %s, wcnt=%d\n",
+ strerror(errno), len);
+ goto bail;
+ }
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " connect: SRC port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
+ cm_ptr->dst.port, cm_ptr->dst.lid,
+ cm_ptr->dst.qpn, cm_ptr->dst.p_size );
+
+ /* read DST information into cm_ptr, overwrite SRC info */
+ len = readv( cm_ptr->socket, iovec, 1 );
+ if ( len != sizeof(ib_qp_cm_t) ) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " connect read: ERR %s, rcnt=%d\n",
+ strerror(errno), len);
+ goto bail;
+ }
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " connect: DST port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
+ cm_ptr->dst.port, cm_ptr->dst.lid,
+ cm_ptr->dst.qpn, cm_ptr->dst.p_size );
+
+ /* validate private data size before reading */
+ if ( cm_ptr->dst.p_size > IB_MAX_REP_PDATA_SIZE ) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " connect read: psize (%d) wrong\n",
+ cm_ptr->dst.p_size );
+ goto bail;
+ }
+
+ /* read private data into cm_handle if any present */
+ if ( cm_ptr->dst.p_size ) {
+ iovec[0].iov_base = cm_ptr->p_data;
+ iovec[0].iov_len = cm_ptr->dst.p_size;
+ len = readv( cm_ptr->socket, iovec, 1 );
+ if ( len != cm_ptr->dst.p_size ) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " connect read pdata: ERR %s, rcnt=%d\n",
+ strerror(errno), len);
+ goto bail;
+ }
+ }
+
+ /* modify QP to RTR and then to RTS with remote info */
+ if ( dapls_modify_qp_state( ep_ptr->qp_handle,
+ IBV_QPS_RTR, &cm_ptr->dst ) != DAT_SUCCESS )
+ goto bail;
+
+ if ( dapls_modify_qp_state( ep_ptr->qp_handle,
+ IBV_QPS_RTS, &cm_ptr->dst ) != DAT_SUCCESS )
+ goto bail;
+
+ ep_ptr->qp_state = IB_QP_STATE_RTS;
+
+ /* complete handshake after final QP state change */
+ write(cm_ptr->socket, &rtu_data, sizeof(rtu_data) );
+
+ /* init cm_handle and post the event with private data */
+ ep_ptr->cm_handle = cm_ptr;
+ dapl_dbg_log( DAPL_DBG_TYPE_EP," ACTIVE: connected!\n" );
+ dapl_evd_connection_callback( ep_ptr->cm_handle,
+ IB_CME_CONNECTED,
+ cm_ptr->p_data,
+ ep_ptr );
+ return DAT_SUCCESS;
+
+bail:
+ /* close socket, free cm structure and post error event */
+ if ( cm_ptr->socket >= 0 )
+ close(cm_ptr->socket);
+ dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
+ dapls_ib_reinit_ep( ep_ptr ); /* reset QP state */
+
+ dapl_evd_connection_callback( ep_ptr->cm_handle,
+ IB_CME_LOCAL_FAILURE,
+ NULL,
+ ep_ptr );
+ return DAT_INTERNAL_ERROR;
+}
+
+
+/*
+ * PASSIVE: Create socket, listen, accept, exchange QP information
+ */
+static DAT_RETURN
+dapli_socket_listen ( DAPL_IA *ia_ptr,
+ DAT_CONN_QUAL serviceID,
+ DAPL_SP *sp_ptr )
+{
+ struct sockaddr_in addr;
+ ib_cm_srvc_handle_t cm_ptr = NULL;
+ int opt = 1;
+ DAT_RETURN dat_status = DAT_SUCCESS;
+
+ dapl_dbg_log ( DAPL_DBG_TYPE_EP,
+ " listen(ia_ptr %p ServiceID %d sp_ptr %p)\n",
+ ia_ptr, serviceID, sp_ptr);
+
+ /* Allocate CM and initialize */
+ if ((cm_ptr = dapl_os_alloc(sizeof(*cm_ptr))) == NULL)
+ return DAT_INSUFFICIENT_RESOURCES;
+
+ (void) dapl_os_memzero( cm_ptr, sizeof( *cm_ptr ) );
+
+ cm_ptr->socket = cm_ptr->l_socket = -1;
+ cm_ptr->sp = sp_ptr;
+ cm_ptr->hca_ptr = ia_ptr->hca_ptr;
+
+ /* bind, listen, set sockopt, accept, exchange data */
+ if ((cm_ptr->l_socket = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+ dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+ "socket for listen returned %d\n", errno);
+ dat_status = DAT_INSUFFICIENT_RESOURCES;
+ goto bail;
+ }
+
+ setsockopt(cm_ptr->l_socket,SOL_SOCKET,SO_REUSEADDR,&opt,sizeof(opt));
+ addr.sin_port = htons(serviceID);
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = INADDR_ANY;
+
+ if (( bind( cm_ptr->l_socket,(struct sockaddr*)&addr, sizeof(addr) ) < 0) ||
+ (listen( cm_ptr->l_socket, 128 ) < 0) ) {
+
+ dapl_dbg_log( DAPL_DBG_TYPE_ERR,
+ " listen: ERROR %s on conn_qual 0x%x\n",
+ strerror(errno),serviceID);
+
+ if ( errno == EADDRINUSE )
+ dat_status = DAT_CONN_QUAL_IN_USE;
+ else
+ dat_status = DAT_CONN_QUAL_UNAVAILABLE;
+
+ goto bail;
+ }
+
+ /* set cm_handle for this service point, save listen socket */
+ sp_ptr->cm_srvc_handle = cm_ptr;
+
+ /* add to SP->CR thread list */
+ dapl_llist_init_entry((DAPL_LLIST_ENTRY*)&cm_ptr->entry);
+ dapl_os_lock( &cm_ptr->hca_ptr->ib_trans.lock );
+ dapl_llist_add_tail(&cm_ptr->hca_ptr->ib_trans.list,
+ (DAPL_LLIST_ENTRY*)&cm_ptr->entry, cm_ptr);
+ dapl_os_unlock(&cm_ptr->hca_ptr->ib_trans.lock);
+
+ dapl_dbg_log( DAPL_DBG_TYPE_CM,
+ " listen: qual 0x%x cr %p s_fd %d\n",
+ ntohs(serviceID), cm_ptr, cm_ptr->l_socket );
+
+ return dat_status;
+bail:
+ dapl_dbg_log( DAPL_DBG_TYPE_ERR,
+ " listen: ERROR on conn_qual 0x%x\n",serviceID);
+ if ( cm_ptr->l_socket >= 0 )
+ close( cm_ptr->l_socket );
+ dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
+ return dat_status;
+}
+
+
+/*
+ * PASSIVE: send local QP information, private data, and wait for
+ * active side to respond with QP RTS/RTR status
+ */
+static DAT_RETURN
+dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr)
+{
+ ib_cm_handle_t acm_ptr;
+ void *p_data = NULL;
+ int len;
+ DAT_RETURN dat_status = DAT_SUCCESS;
+
+ /* Allocate accept CM and initialize */
+ if ((acm_ptr = dapl_os_alloc(sizeof(*acm_ptr))) == NULL)
+ return DAT_INSUFFICIENT_RESOURCES;
+
+ (void) dapl_os_memzero( acm_ptr, sizeof( *acm_ptr ) );
+
+ acm_ptr->socket = -1;
+ acm_ptr->sp = cm_ptr->sp;
+ acm_ptr->hca_ptr = cm_ptr->hca_ptr;
+
+ len = sizeof(acm_ptr->dst.ia_address);
+ acm_ptr->socket = accept(cm_ptr->l_socket,
+ (struct sockaddr*)&acm_ptr->dst.ia_address,
+ &len );
+
+ if ( acm_ptr->socket < 0 ) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " accept: ERR %s on FD %d l_cr %p\n",
+ strerror(errno),cm_ptr->l_socket,cm_ptr);
+ dat_status = DAT_INTERNAL_ERROR;
+ goto bail;
+ }
+
+ /* read in DST QP info, IA address. check for private data */
+ len = read( acm_ptr->socket, &acm_ptr->dst, sizeof(ib_qp_cm_t) );
+ if ( len != sizeof(ib_qp_cm_t) ) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " accept read: ERR %s, rcnt=%d\n",
+ strerror(errno), len);
+ dat_status = DAT_INTERNAL_ERROR;
+ goto bail;
+
+ }
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " accept: DST port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
+ acm_ptr->dst.port, acm_ptr->dst.lid,
+ acm_ptr->dst.qpn, acm_ptr->dst.p_size );
+
+ /* validate private data size before reading */
+ if ( acm_ptr->dst.p_size > IB_MAX_REQ_PDATA_SIZE ) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " accept read: psize (%d) wrong\n",
+ acm_ptr->dst.p_size );
+ dat_status = DAT_INTERNAL_ERROR;
+ goto bail;
+ }
+
+ /* read private data into cm_handle if any present */
+ if ( acm_ptr->dst.p_size ) {
+ len = read( acm_ptr->socket,
+ acm_ptr->p_data, acm_ptr->dst.p_size );
+ if ( len != acm_ptr->dst.p_size ) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " accept read pdata: ERR %s, rcnt=%d\n",
+ strerror(errno), len );
+ dat_status = DAT_INTERNAL_ERROR;
+ goto bail;
+ }
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " accept: psize=%d read\n",
+ acm_ptr->dst.p_size);
+ p_data = acm_ptr->p_data;
+ }
+
+ /* trigger CR event and return SUCCESS */
+ dapls_cr_callback( acm_ptr,
+ IB_CME_CONNECTION_REQUEST_PENDING,
+ p_data,
+ acm_ptr->sp );
+
+ return DAT_SUCCESS;
+
+bail:
+ if ( acm_ptr->socket >=0 )
+ close( acm_ptr->socket );
+ dapl_os_free( acm_ptr, sizeof( *acm_ptr ) );
+ return DAT_INTERNAL_ERROR;
+}
+
+
+static DAT_RETURN
+dapli_socket_accept_final( DAPL_EP *ep_ptr,
+ DAPL_CR *cr_ptr,
+ DAT_COUNT p_size,
+ DAT_PVOID p_data )
+{
+ DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
+ ib_cm_handle_t cm_ptr = cr_ptr->ib_cm_handle;
+ ib_qp_cm_t qp_cm;
+ struct iovec iovec[2];
+ int len;
+ short rtu_data = 0;
+
+ if (p_size > IB_MAX_REP_PDATA_SIZE)
+ return DAT_LENGTH_ERROR;
+
+ /* must have a accepted socket */
+ if ( cm_ptr->socket < 0 )
+ return DAT_INTERNAL_ERROR;
+
+ /* modify QP to RTR and then to RTS with remote info already read */
+ if ( dapls_modify_qp_state( ep_ptr->qp_handle,
+ IBV_QPS_RTR, &cm_ptr->dst ) != DAT_SUCCESS )
+ goto bail;
+
+ if ( dapls_modify_qp_state( ep_ptr->qp_handle,
+ IBV_QPS_RTS, &cm_ptr->dst ) != DAT_SUCCESS )
+ goto bail;
+
+ ep_ptr->qp_state = IB_QP_STATE_RTS;
+
+ /* Send QP info, IA address, and private data */
+ qp_cm.qpn = ep_ptr->qp_handle->qp_num;
+ qp_cm.port = ia_ptr->hca_ptr->port_num;
+ qp_cm.lid = dapli_get_lid( ia_ptr->hca_ptr->ib_trans.ib_dev,
+ ia_ptr->hca_ptr->port_num );
+ qp_cm.ia_address = ia_ptr->hca_ptr->hca_address;
+ qp_cm.p_size = p_size;
+ iovec[0].iov_base = &qp_cm;
+ iovec[0].iov_len = sizeof(ib_qp_cm_t);
+ if (p_size) {
+ iovec[1].iov_base = p_data;
+ iovec[1].iov_len = p_size;
+ }
+ len = writev( cm_ptr->socket, iovec, (p_size ? 2:1) );
+ if (len != (p_size + sizeof(ib_qp_cm_t))) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " accept_final: ERR %s, wcnt=%d\n",
+ strerror(errno), len);
+ goto bail;
+ }
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " accept_final: SRC port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
+ qp_cm.port, qp_cm.lid, qp_cm.qpn, qp_cm.p_size );
+
+ /* complete handshake after final QP state change */
+ len = read(cm_ptr->socket, &rtu_data, sizeof(rtu_data) );
+ if ( len != sizeof(rtu_data) || ntohs(rtu_data) != 0x0e0f ) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " accept_final: ERR %s, rcnt=%d rdata=%x\n",
+ strerror(errno), len, ntohs(rtu_data) );
+ goto bail;
+ }
+
+ /* final data exchange if remote QP state is good to go */
+ dapl_dbg_log( DAPL_DBG_TYPE_EP," PASSIVE: connected!\n" );
+ dapls_cr_callback ( cm_ptr, IB_CME_CONNECTED, NULL, cm_ptr->sp );
+ return DAT_SUCCESS;
+
+bail:
+ dapl_dbg_log( DAPL_DBG_TYPE_ERR," accept_final: ERR !QP_RTR_RTS \n");
+ if ( cm_ptr >= 0 )
+ close( cm_ptr->socket );
+ dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
+ dapls_ib_reinit_ep( ep_ptr ); /* reset QP state */
+
+ return DAT_INTERNAL_ERROR;
+}
+
+
+/*
+ * dapls_ib_connect
+ *
+ * Initiate a connection with the passive listener on another node
+ *
+ * Input:
+ * ep_handle,
+ * remote_ia_address,
+ * remote_conn_qual,
+ * prd_size size of private data and structure
+ * prd_prt pointer to private data structure
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ * DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN
+dapls_ib_connect (
+ IN DAT_EP_HANDLE ep_handle,
+ IN DAT_IA_ADDRESS_PTR remote_ia_address,
+ IN DAT_CONN_QUAL remote_conn_qual,
+ IN DAT_COUNT private_data_size,
+ IN void *private_data )
+{
+ DAPL_EP *ep_ptr;
+ ib_qp_handle_t qp_ptr;
+
+ dapl_dbg_log ( DAPL_DBG_TYPE_EP,
+ " connect(ep_handle %p ....)\n", ep_handle);
+ /*
+ * Sanity check
+ */
+ if ( NULL == ep_handle )
+ return DAT_SUCCESS;
+
+ ep_ptr = (DAPL_EP*)ep_handle;
+ qp_ptr = ep_ptr->qp_handle;
+
+ return (dapli_socket_connect( ep_ptr, remote_ia_address,
+ remote_conn_qual,
+ private_data_size, private_data ));
+}
+
+/*
+ * dapls_ib_disconnect
+ *
+ * Disconnect an EP
+ *
+ * Input:
+ * ep_handle,
+ * disconnect_flags
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ *
+ */
+DAT_RETURN
+dapls_ib_disconnect (
+ IN DAPL_EP *ep_ptr,
+ IN DAT_CLOSE_FLAGS close_flags )
+{
+ ib_cm_handle_t cm_ptr = ep_ptr->cm_handle;
+
+ dapl_dbg_log (DAPL_DBG_TYPE_EP,
+ "dapls_ib_disconnect(ep_handle %p ....)\n",
+ ep_ptr);
+
+ if ( cm_ptr->socket >= 0 ) {
+ close( cm_ptr->socket );
+ cm_ptr->socket = -1;
+ }
+
+ /* reinit to modify QP state */
+ dapls_ib_reinit_ep(ep_ptr);
+
+ if ( ep_ptr->cr_ptr ) {
+ dapls_cr_callback ( ep_ptr->cm_handle,
+ IB_CME_DISCONNECTED,
+ NULL,
+ ((DAPL_CR *)ep_ptr->cr_ptr)->sp_ptr );
+ } else {
+ dapl_evd_connection_callback ( ep_ptr->cm_handle,
+ IB_CME_DISCONNECTED,
+ NULL,
+ ep_ptr );
+ ep_ptr->cm_handle = NULL;
+ dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
+ }
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_disconnect_clean
+ *
+ * Clean up outstanding connection data. This routine is invoked
+ * after the final disconnect callback has occurred. Only on the
+ * ACTIVE side of a connection.
+ *
+ * Input:
+ * ep_ptr DAPL_EP
+ * active Indicates active side of connection
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * void
+ *
+ */
+void
+dapls_ib_disconnect_clean (
+ IN DAPL_EP *ep_ptr,
+ IN DAT_BOOLEAN active,
+ IN const ib_cm_events_t ib_cm_event )
+{
+ return;
+}
+
+/*
+ * dapl_ib_setup_conn_listener
+ *
+ * Have the CM set up a connection listener.
+ *
+ * Input:
+ * ibm_hca_handle HCA handle
+ * qp_handle QP handle
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ * DAT_INTERNAL_ERROR
+ * DAT_CONN_QUAL_UNAVAILBLE
+ * DAT_CONN_QUAL_IN_USE
+ *
+ */
+DAT_RETURN
+dapls_ib_setup_conn_listener (
+ IN DAPL_IA *ia_ptr,
+ IN DAT_UINT64 ServiceID,
+ IN DAPL_SP *sp_ptr )
+{
+ return (dapli_socket_listen( ia_ptr, ServiceID, sp_ptr ));
+}
+
+
+/*
+ * dapl_ib_remove_conn_listener
+ *
+ * Have the CM remove a connection listener.
+ *
+ * Input:
+ * ia_handle IA handle
+ * ServiceID IB Channel Service ID
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN
+dapls_ib_remove_conn_listener (
+ IN DAPL_IA *ia_ptr,
+ IN DAPL_SP *sp_ptr )
+{
+ ib_cm_srvc_handle_t cm_ptr = sp_ptr->cm_srvc_handle;
+
+ dapl_dbg_log (DAPL_DBG_TYPE_EP,
+ "dapls_ib_remove_conn_listener(ia_ptr %p sp_ptr %p cm_ptr %p)\n",
+ ia_ptr, sp_ptr, cm_ptr );
+
+ /* close accepted socket, free cm_srvc_handle and return */
+ if ( cm_ptr != NULL ) {
+ if ( cm_ptr->l_socket >= 0 ) {
+ close( cm_ptr->l_socket );
+ cm_ptr->socket = -1;
+ }
+ /* cr_thread will free */
+ sp_ptr->cm_srvc_handle = NULL;
+ }
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_accept_connection
+ *
+ * Perform necessary steps to accept a connection
+ *
+ * Input:
+ * cr_handle
+ * ep_handle
+ * private_data_size
+ * private_data
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ * DAT_INTERNAL_ERROR
+ *
+ */
+DAT_RETURN
+dapls_ib_accept_connection (
+ IN DAT_CR_HANDLE cr_handle,
+ IN DAT_EP_HANDLE ep_handle,
+ IN DAT_COUNT p_size,
+ IN const DAT_PVOID p_data )
+{
+ DAPL_CR *cr_ptr;
+ DAPL_EP *ep_ptr;
+
+ dapl_dbg_log (DAPL_DBG_TYPE_EP,
+ "dapls_ib_accept_connection(cr %p ep %p prd %p,%d)\n",
+ cr_handle, ep_handle, p_data, p_size );
+
+ cr_ptr = (DAPL_CR *) cr_handle;
+ ep_ptr = (DAPL_EP *) ep_handle;
+
+ /* allocate and attach a QP if necessary */
+ if ( ep_ptr->qp_state == DAPL_QP_STATE_UNATTACHED ) {
+ DAT_RETURN status;
+ status = dapls_ib_qp_alloc( ep_ptr->header.owner_ia,
+ ep_ptr, ep_ptr );
+ if ( status != DAT_SUCCESS )
+ return status;
+ }
+
+ return ( dapli_socket_accept_final(ep_ptr, cr_ptr, p_size, p_data) );
+}
+
+
+/*
+ * dapls_ib_reject_connection
+ *
+ * Reject a connection
+ *
+ * Input:
+ * cr_handle
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INTERNAL_ERROR
+ *
+ */
+DAT_RETURN
+dapls_ib_reject_connection (
+ IN ib_cm_handle_t ib_cm_handle,
+ IN int reject_reason )
+{
+ ib_cm_srvc_handle_t cm_ptr = ib_cm_handle;
+
+ dapl_dbg_log (DAPL_DBG_TYPE_EP,
+ "dapls_ib_reject_connection(cm_handle %p reason %x)\n",
+ ib_cm_handle, reject_reason );
+
+ /* just close the socket and return */
+ if ( cm_ptr->socket > 0 ) {
+ close( cm_ptr->socket );
+ cm_ptr->socket = -1;
+ }
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_cm_remote_addr
+ *
+ * Obtain the remote IP address given a connection
+ *
+ * Input:
+ * cr_handle
+ *
+ * Output:
+ * remote_ia_address: where to place the remote address
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_HANDLE
+ *
+ */
+DAT_RETURN
+dapls_ib_cm_remote_addr (
+ IN DAT_HANDLE dat_handle,
+ OUT DAT_SOCK_ADDR6 *remote_ia_address )
+{
+ DAPL_HEADER *header;
+ ib_cm_handle_t ib_cm_handle;
+
+ dapl_dbg_log (DAPL_DBG_TYPE_EP,
+ "dapls_ib_cm_remote_addr(dat_handle %p, ....)\n",
+ dat_handle );
+
+ header = (DAPL_HEADER *)dat_handle;
+
+ if (header->magic == DAPL_MAGIC_EP)
+ ib_cm_handle = ((DAPL_EP *) dat_handle)->cm_handle;
+ else if (header->magic == DAPL_MAGIC_CR)
+ ib_cm_handle = ((DAPL_CR *) dat_handle)->ib_cm_handle;
+ else
+ return DAT_INVALID_HANDLE;
+
+ dapl_os_memcpy( remote_ia_address,
+ &ib_cm_handle->dst.ia_address,
+ sizeof(DAT_SOCK_ADDR6) );
+
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_private_data_size
+ *
+ * Return the size of private data given a connection op type
+ *
+ * Input:
+ * prd_ptr private data pointer
+ * conn_op connection operation type
+ *
+ * If prd_ptr is NULL, this is a query for the max size supported by
+ * the provider, otherwise it is the actual size of the private data
+ * contained in prd_ptr.
+ *
+ *
+ * Output:
+ * None
+ *
+ * Returns:
+ * length of private data
+ *
+ */
+int dapls_ib_private_data_size (
+ IN DAPL_PRIVATE *prd_ptr,
+ IN DAPL_PDATA_OP conn_op)
+{
+ int size;
+
+ switch (conn_op)
+ {
+ case DAPL_PDATA_CONN_REQ:
+ {
+ size = IB_MAX_REQ_PDATA_SIZE;
+ break;
+ }
+ case DAPL_PDATA_CONN_REP:
+ {
+ size = IB_MAX_REP_PDATA_SIZE;
+ break;
+ }
+ case DAPL_PDATA_CONN_REJ:
+ {
+ size = IB_MAX_REJ_PDATA_SIZE;
+ break;
+ }
+ case DAPL_PDATA_CONN_DREQ:
+ {
+ size = IB_MAX_DREQ_PDATA_SIZE;
+ break;
+ }
+ case DAPL_PDATA_CONN_DREP:
+ {
+ size = IB_MAX_DREP_PDATA_SIZE;
+ break;
+ }
+ default:
+ {
+ size = 0;
+ }
+
+ } /* end case */
+
+ return size;
+}
+
+/*
+ * Map all socket CM event codes to the DAT equivelent.
+ */
+#define DAPL_IB_EVENT_CNT 11
+
+static struct ib_cm_event_map
+{
+ const ib_cm_events_t ib_cm_event;
+ DAT_EVENT_NUMBER dat_event_num;
+ } ib_cm_event_map[DAPL_IB_EVENT_CNT] = {
+ /* 00 */ { IB_CME_CONNECTED,
+ DAT_CONNECTION_EVENT_ESTABLISHED},
+ /* 01 */ { IB_CME_DISCONNECTED,
+ DAT_CONNECTION_EVENT_DISCONNECTED},
+ /* 02 */ { IB_CME_DISCONNECTED_ON_LINK_DOWN,
+ DAT_CONNECTION_EVENT_DISCONNECTED},
+ /* 03 */ { IB_CME_CONNECTION_REQUEST_PENDING,
+ DAT_CONNECTION_REQUEST_EVENT},
+ /* 04 */ { IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
+ DAT_CONNECTION_REQUEST_EVENT},
+ /* 05 */ { IB_CME_DESTINATION_REJECT,
+ DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
+ /* 06 */ { IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
+ DAT_CONNECTION_EVENT_PEER_REJECTED},
+ /* 07 */ { IB_CME_DESTINATION_UNREACHABLE,
+ DAT_CONNECTION_EVENT_UNREACHABLE},
+ /* 08 */ { IB_CME_TOO_MANY_CONNECTION_REQUESTS,
+ DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
+ /* 09 */ { IB_CME_LOCAL_FAILURE,
+ DAT_CONNECTION_EVENT_BROKEN},
+ /* 10 */ { IB_CM_LOCAL_FAILURE,
+ DAT_CONNECTION_EVENT_BROKEN}
+};
+
+/*
+ * dapls_ib_get_cm_event
+ *
+ * Return a DAT connection event given a provider CM event.
+ *
+ * Input:
+ * dat_event_num DAT event we need an equivelent CM event for
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * ib_cm_event of translated DAPL value
+ */
+DAT_EVENT_NUMBER
+dapls_ib_get_dat_event (
+ IN const ib_cm_events_t ib_cm_event,
+ IN DAT_BOOLEAN active)
+{
+ DAT_EVENT_NUMBER dat_event_num;
+ int i;
+
+ active = active;
+
+ if (ib_cm_event > IB_CM_LOCAL_FAILURE)
+ return (DAT_EVENT_NUMBER) 0;
+
+ dat_event_num = 0;
+ for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
+ if (ib_cm_event == ib_cm_event_map[i].ib_cm_event) {
+ dat_event_num = ib_cm_event_map[i].dat_event_num;
+ break;
+ }
+ }
+ dapl_dbg_log (DAPL_DBG_TYPE_CALLBACK,
+ "dapls_ib_get_dat_event: event translate(%s) ib=0x%x dat=0x%x\n",
+ active ? "active" : "passive", ib_cm_event, dat_event_num);
+
+ return dat_event_num;
+}
+
+
+/*
+ * dapls_ib_get_dat_event
+ *
+ * Return a DAT connection event given a provider CM event.
+ *
+ * Input:
+ * ib_cm_event event provided to the dapl callback routine
+ * active switch indicating active or passive connection
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_EVENT_NUMBER of translated provider value
+ */
+ib_cm_events_t
+dapls_ib_get_cm_event (
+ IN DAT_EVENT_NUMBER dat_event_num)
+{
+ ib_cm_events_t ib_cm_event;
+ int i;
+
+ ib_cm_event = 0;
+ for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
+ if ( dat_event_num == ib_cm_event_map[i].dat_event_num ) {
+ ib_cm_event = ib_cm_event_map[i].ib_cm_event;
+ break;
+ }
+ }
+ return ib_cm_event;
+}
+
+/* async CR processing thread to avoid blocking applications */
+void cr_thread(void *arg)
+{
+ struct dapl_hca *hca_ptr = arg;
+ ib_cm_srvc_handle_t cr, next_cr;
+ int max_fd;
+ fd_set rfd,rfds;
+ struct timeval to;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cr_thread: ENTER hca %p\n",hca_ptr);
+
+ dapl_os_lock( &hca_ptr->ib_trans.lock );
+ hca_ptr->ib_trans.cr_state = IB_THREAD_RUN;
+ while (hca_ptr->ib_trans.cr_state == IB_THREAD_RUN) {
+
+ FD_ZERO( &rfds );
+ max_fd = -1;
+
+ if (!dapl_llist_is_empty(&hca_ptr->ib_trans.list))
+ next_cr = dapl_llist_peek_head (&hca_ptr->ib_trans.list);
+ else
+ next_cr = NULL;
+
+ while (next_cr) {
+ cr = next_cr;
+ dapl_dbg_log (DAPL_DBG_TYPE_CM," thread: cm_ptr %p\n", cr );
+ if (cr->l_socket == -1 ||
+ hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM," thread: Freeing %p\n", cr);
+ next_cr = dapl_llist_next_entry(&hca_ptr->ib_trans.list,
+ (DAPL_LLIST_ENTRY*)&cr->entry );
+ dapl_llist_remove_entry(&hca_ptr->ib_trans.list,
+ (DAPL_LLIST_ENTRY*)&cr->entry);
+ dapl_os_free( cr, sizeof(*cr) );
+ continue;
+ }
+
+ FD_SET( cr->l_socket, &rfds ); /* add to select set */
+ if ( cr->l_socket > max_fd )
+ max_fd = cr->l_socket;
+
+ /* individual select poll to check for work */
+ FD_ZERO(&rfd);
+ FD_SET(cr->l_socket, &rfd);
+ dapl_os_unlock(&hca_ptr->ib_trans.lock);
+ to.tv_sec = 0;
+ to.tv_usec = 0;
+ if ( select(cr->l_socket + 1,&rfd, NULL, NULL, &to) < 0) {
+ dapl_dbg_log (DAPL_DBG_TYPE_CM,
+ " thread: ERR %s on cr %p sk %d\n",
+ strerror(errno), cr, cr->l_socket);
+ close(cr->l_socket);
+ cr->l_socket = -1;
+ } else if ( FD_ISSET(cr->l_socket, &rfd) &&
+ dapli_socket_accept(cr)) {
+ close(cr->l_socket);
+ cr->l_socket = -1;
+ }
+ dapl_os_lock( &hca_ptr->ib_trans.lock );
+ next_cr = dapl_llist_next_entry(&hca_ptr->ib_trans.list,
+ (DAPL_LLIST_ENTRY*)&cr->entry );
+ }
+ dapl_os_unlock( &hca_ptr->ib_trans.lock );
+ to.tv_sec = 0;
+ to.tv_usec = 100000; /* wakeup and check destroy */
+ select(max_fd + 1, &rfds, NULL, NULL, &to);
+ dapl_os_lock( &hca_ptr->ib_trans.lock );
+ }
+ dapl_os_unlock( &hca_ptr->ib_trans.lock );
+ hca_ptr->ib_trans.cr_state = IB_THREAD_EXIT;
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cr_thread(hca %p) exit\n",hca_ptr);
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 4
+ * c-basic-offset: 4
+ * tab-width: 8
+ * End:
+ */
Index: dapl/openib_scm/dapl_ib_qp.c
===================================================================
--- dapl/openib_scm/dapl_ib_qp.c (revision 0)
+++ dapl/openib_scm/dapl_ib_qp.c (revision 0)
@@ -0,0 +1,399 @@
+/*
+ * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved.
+ *
+ * This Software is licensed under either one of the following two licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ * in the file LICENSE.txt in the root directory. The license is also
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/cpl.php.
+ * OR
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is in the file
+ * LICENSE2.txt in the root directory. The license is also available from
+ * the Open Source Initiative, see
+ * http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * Licensee has the right to choose either one of the above two licenses.
+ *
+ * Redistributions of source code must retain both the above copyright
+ * notice and either one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, either one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/**********************************************************************
+ *
+ * MODULE: dapl_det_qp.c
+ *
+ * PURPOSE: QP routines for access to DET Verbs
+ *
+ * $Id: $
+ **********************************************************************/
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+
+/*
+ * dapl_ib_qp_alloc
+ *
+ * Alloc a QP
+ *
+ * Input:
+ * *ep_ptr pointer to EP INFO
+ * ib_hca_handle provider HCA handle
+ * ib_pd_handle provider protection domain handle
+ * cq_recv provider recv CQ handle
+ * cq_send provider send CQ handle
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ * DAT_INTERNAL_ERROR
+ *
+ */
+DAT_RETURN
+dapls_ib_qp_alloc (
+ IN DAPL_IA *ia_ptr,
+ IN DAPL_EP *ep_ptr,
+ IN DAPL_EP *ep_ctx_ptr )
+{
+ DAT_EP_ATTR *attr;
+ DAPL_EVD *rcv_evd, *req_evd;
+ ib_cq_handle_t rcv_cq, req_cq;
+ ib_pd_handle_t ib_pd_handle;
+ struct ibv_qp_init_attr qp_create;
+
+ dapl_dbg_log (DAPL_DBG_TYPE_EP,
+ " qp_alloc: ia_ptr %p ep_ptr %p ep_ctx_ptr %p\n",
+ ia_ptr, ep_ptr, ep_ctx_ptr);
+
+ attr = &ep_ptr->param.ep_attr;
+ ib_pd_handle = ((DAPL_PZ *)ep_ptr->param.pz_handle)->pd_handle;
+ rcv_evd = (DAPL_EVD *) ep_ptr->param.recv_evd_handle;
+ req_evd = (DAPL_EVD *) ep_ptr->param.request_evd_handle;
+
+ /*
+ * DAT allows usage model of EP's with no EVD's but IB does not.
+ * Create a CQ with zero entries under the covers to support and
+ * catch any invalid posting.
+ */
+ if ( rcv_evd != DAT_HANDLE_NULL )
+ rcv_cq = rcv_evd->ib_cq_handle;
+ else if (!ia_ptr->hca_ptr->ib_trans.ib_cq_empty)
+ rcv_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
+ else {
+ struct ibv_comp_channel *channel =
+ ia_ptr->hca_ptr->ib_trans.ib_cq;
+#ifdef CQ_WAIT_OBJECT
+ if (rcv_evd->cq_wait_obj_handle)
+ channel = rcv_evd->cq_wait_obj_handle;
+#endif
+ /* Call IB verbs to create CQ */
+ rcv_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
+ 0, NULL, channel, 0);
+
+ if (rcv_cq == IB_INVALID_HANDLE)
+ return(dapl_convert_errno(ENOMEM, "create_cq"));
+
+ ia_ptr->hca_ptr->ib_trans.ib_cq_empty = rcv_cq;
+ }
+ if (req_evd != DAT_HANDLE_NULL)
+ req_cq = req_evd->ib_cq_handle;
+ else
+ req_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
+
+ /* Setup attributes and create qp */
+ dapl_os_memzero((void*)&qp_create, sizeof(qp_create));
+ qp_create.send_cq = req_cq;
+ qp_create.recv_cq = rcv_cq;
+ qp_create.cap.max_send_wr = attr->max_request_dtos;
+ qp_create.cap.max_recv_wr = attr->max_recv_dtos;
+ qp_create.cap.max_send_sge = attr->max_request_iov;
+ qp_create.cap.max_recv_sge = attr->max_recv_iov;
+ qp_create.cap.max_inline_data = ia_ptr->hca_ptr->ib_trans.max_inline_send;
+ qp_create.qp_type = IBV_QPT_RC;
+ qp_create.qp_context = (void*)ep_ptr;
+
+ ep_ptr->qp_handle = ibv_create_qp( ib_pd_handle, &qp_create);
+ if (!ep_ptr->qp_handle)
+ return(dapl_convert_errno(ENOMEM, "create_qp"));
+
+ dapl_dbg_log ( DAPL_DBG_TYPE_EP,
+ " qp_alloc: qpn %p sq %d,%d rq %d,%d\n",
+ ep_ptr->qp_handle->qp_num,
+ qp_create.cap.max_send_wr,qp_create.cap.max_send_sge,
+ qp_create.cap.max_recv_wr,qp_create.cap.max_recv_sge );
+
+ /* Setup QP attributes for INIT state on the way out */
+ if (dapls_modify_qp_state(ep_ptr->qp_handle,
+ IBV_QPS_INIT,
+ NULL ) != DAT_SUCCESS ) {
+ ibv_destroy_qp(ep_ptr->qp_handle);
+ ep_ptr->qp_handle = IB_INVALID_HANDLE;
+ return DAT_INTERNAL_ERROR;
+ }
+
+ ep_ptr->qp_state = IB_QP_STATE_INIT;
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_qp_free
+ *
+ * Free a QP
+ *
+ * Input:
+ * ia_handle IA handle
+ * *ep_ptr pointer to EP INFO
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * dapl_convert_errno
+ *
+ */
+DAT_RETURN
+dapls_ib_qp_free (
+ IN DAPL_IA *ia_ptr,
+ IN DAPL_EP *ep_ptr )
+{
+ dapl_dbg_log (DAPL_DBG_TYPE_EP, " qp_free: ep_ptr %p qp %p\n",
+ ep_ptr, ep_ptr->qp_handle);
+
+ if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {
+ /* force error state to flush queue, then destroy */
+ dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, NULL);
+
+ if (ibv_destroy_qp(ep_ptr->qp_handle))
+ return(dapl_convert_errno(errno,"destroy_qp"));
+
+ ep_ptr->qp_handle = IB_INVALID_HANDLE;
+ ep_ptr->qp_state = IB_QP_STATE_ERROR;
+ }
+
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_qp_modify
+ *
+ * Set the QP to the parameters specified in an EP_PARAM
+ *
+ * The EP_PARAM structure that is provided has been
+ * sanitized such that only non-zero values are valid.
+ *
+ * Input:
+ * ib_hca_handle HCA handle
+ * qp_handle QP handle
+ * ep_attr Sanitized EP Params
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ * DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN
+dapls_ib_qp_modify (
+ IN DAPL_IA *ia_ptr,
+ IN DAPL_EP *ep_ptr,
+ IN DAT_EP_ATTR *attr )
+{
+ struct ibv_qp_attr qp_attr;
+
+ if (ep_ptr->qp_handle == IB_INVALID_HANDLE)
+ return DAT_INVALID_PARAMETER;
+
+ /*
+ * EP state, qp_handle state should be an indication
+ * of current state but the only way to be sure is with
+ * a user mode ibv_query_qp call which is NOT available
+ */
+
+ /* move to error state if necessary */
+ if ((ep_ptr->qp_state == IB_QP_STATE_ERROR) &&
+ (ep_ptr->qp_handle->state != IBV_QPS_ERR)) {
+ ep_ptr->qp_state = IB_QP_STATE_ERROR;
+ return (dapls_modify_qp_state(ep_ptr->qp_handle,
+ IBV_QPS_ERR, NULL));
+ }
+
+ /*
+ * Check if we have the right qp_state to modify attributes
+ */
+ if ((ep_ptr->qp_handle->state != IBV_QPS_RTR ) &&
+ (ep_ptr->qp_handle->state != IBV_QPS_RTS ))
+ return DAT_INVALID_STATE;
+
+ /* Adjust to current EP attributes */
+ dapl_os_memzero((void*)&qp_attr, sizeof(qp_attr));
+ qp_attr.cap.max_send_wr = attr->max_request_dtos;
+ qp_attr.cap.max_recv_wr = attr->max_recv_dtos;
+ qp_attr.cap.max_send_sge = attr->max_request_iov;
+ qp_attr.cap.max_recv_sge = attr->max_recv_iov;
+
+ dapl_dbg_log (DAPL_DBG_TYPE_EP,
+ "modify_qp: qp %p sq %d,%d, rq %d,%d\n",
+ ep_ptr->qp_handle,
+ qp_attr.cap.max_send_wr, qp_attr.cap.max_send_sge,
+ qp_attr.cap.max_recv_wr, qp_attr.cap.max_recv_sge );
+
+ if (ibv_modify_qp(ep_ptr->qp_handle, &qp_attr, IBV_QP_CAP)) {
+ dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+ "modify_qp: modify ep %p qp %p failed\n",
+ ep_ptr, ep_ptr->qp_handle);
+ return(dapl_convert_errno(errno,"modify_qp_state"));
+ }
+
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_reinit_ep
+ *
+ * Move the QP to INIT state again.
+ *
+ * Input:
+ * ep_ptr DAPL_EP
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * void
+ *
+ */
+void
+dapls_ib_reinit_ep (
+ IN DAPL_EP *ep_ptr)
+{
+
+ if ( ep_ptr->qp_handle != IB_INVALID_HANDLE ) {
+ /* move to RESET state and then to INIT */
+ dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET, 0);
+ dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT, 0);
+ ep_ptr->qp_state = IB_QP_STATE_INIT;
+ }
+
+ /* TODO: When IB-CM is implement then handle timewait before
+ * allowing re-use of this QP
+ */
+}
+
+/*
+ * Generic QP modify for init, reset, error, RTS, RTR
+ */
+DAT_RETURN
+dapls_modify_qp_state ( IN ib_qp_handle_t qp_handle,
+ IN ib_qp_state_t qp_state,
+ IN ib_qp_cm_t *qp_cm )
+{
+ struct ibv_qp_attr qp_attr;
+ enum ibv_qp_attr_mask mask = IBV_QP_STATE;
+
+ dapl_os_memzero((void*)&qp_attr, sizeof(qp_attr));
+ qp_attr.qp_state = qp_state;
+
+ switch (qp_state) {
+ /* additional attributes with RTR and RTS */
+ case IBV_QPS_RTR:
+ {
+ mask |= IBV_QP_AV |
+ IBV_QP_PATH_MTU |
+ IBV_QP_DEST_QPN |
+ IBV_QP_RQ_PSN |
+ IBV_QP_MAX_DEST_RD_ATOMIC |
+ IBV_QP_MIN_RNR_TIMER;
+ qp_attr.qp_state = IBV_QPS_RTR;
+ qp_attr.path_mtu = IBV_MTU_1024;
+ qp_attr.dest_qp_num = qp_cm->qpn;
+ qp_attr.rq_psn = 1;
+ qp_attr.max_dest_rd_atomic = 8;
+ qp_attr.min_rnr_timer = 12;
+ qp_attr.ah_attr.is_global = 0;
+ qp_attr.ah_attr.dlid = qp_cm->lid;
+ qp_attr.ah_attr.sl = 0;
+ qp_attr.ah_attr.src_path_bits = 0;
+ qp_attr.ah_attr.port_num = qp_cm->port;
+
+ dapl_dbg_log (DAPL_DBG_TYPE_EP,
+ " modify_qp_rtr: qpn %x lid %x port %x\n",
+ qp_cm->qpn,qp_cm->lid,qp_cm->port );
+ break;
+ }
+ case IBV_QPS_RTS:
+ {
+ mask |= IBV_QP_TIMEOUT |
+ IBV_QP_RETRY_CNT |
+ IBV_QP_RNR_RETRY |
+ IBV_QP_SQ_PSN |
+ IBV_QP_MAX_QP_RD_ATOMIC;
+ qp_attr.qp_state = IBV_QPS_RTS;
+ qp_attr.timeout = 14;
+ qp_attr.retry_cnt = 7;
+ qp_attr.rnr_retry = 7;
+ qp_attr.sq_psn = 1;
+ qp_attr.max_rd_atomic = 8;
+ dapl_dbg_log (DAPL_DBG_TYPE_EP,
+ " modify_qp_rts: psn %x or %x\n",
+ qp_attr.sq_psn, qp_attr.max_rd_atomic );
+ break;
+ }
+ case IBV_QPS_INIT:
+ {
+ DAPL_IA *ia_ptr;
+ DAPL_EP *ep_ptr;
+ /* need to find way back to port num */
+ ep_ptr = (DAPL_EP*)qp_handle->qp_context;
+ if (ep_ptr)
+ ia_ptr = ep_ptr->header.owner_ia;
+ else
+ break;
+
+ mask |= IBV_QP_PKEY_INDEX |
+ IBV_QP_PORT |
+ IBV_QP_ACCESS_FLAGS;
+
+ qp_attr.pkey_index = 0;
+ qp_attr.port_num = ia_ptr->hca_ptr->port_num;
+ qp_attr.qp_access_flags =
+ IBV_ACCESS_LOCAL_WRITE |
+ IBV_ACCESS_REMOTE_WRITE |
+ IBV_ACCESS_REMOTE_READ |
+ IBV_ACCESS_REMOTE_ATOMIC;
+
+ dapl_dbg_log (DAPL_DBG_TYPE_EP,
+ " modify_qp_init: pi %x port %x acc %x\n",
+ qp_attr.pkey_index, qp_attr.port_num,
+ qp_attr.qp_access_flags );
+ break;
+ }
+ default:
+ break;
+
+ }
+
+ if (ibv_modify_qp(qp_handle, &qp_attr, mask))
+ return(dapl_convert_errno(errno,"modify_qp_state"));
+
+ return DAT_SUCCESS;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 4
+ * c-basic-offset: 4
+ * tab-width: 8
+ * End:
+ */
Index: dapl/openib_scm/README
===================================================================
--- dapl/openib_scm/README (revision 0)
+++ dapl/openib_scm/README (revision 0)
@@ -0,0 +1,40 @@
+
+OpenIB uDAPL provider using socket-based CM, in leiu of uCM/uAT, to setup QP/channels.
+
+to build:
+
+cd dapl/udapl
+make VERBS=openib_scm clean
+make VERBS=openib_scm
+
+
+Modifications to common code:
+
+- added dapl/openib_scm directory
+
+ dapl/udapl/Makefile
+
+New files for openib_scm provider
+
+ dapl/openib/dapl_ib_cq.c
+ dapl/openib/dapl_ib_dto.h
+ dapl/openib/dapl_ib_mem.c
+ dapl/openib/dapl_ib_qp.c
+ dapl/openib/dapl_ib_util.c
+ dapl/openib/dapl_ib_util.h
+ dapl/openib/dapl_ib_cm.c
+
+A simple dapl test just for openib_scm testing...
+
+ test/dtest/dtest.c
+ test/dtest/makefile
+
+ server: dtest -s
+ client: dtest -h hostname
+
+known issues:
+
+ no memory windows support in ibverbs, dat_create_rmr fails.
+
+
+
Index: dapl/openib_scm/dapl_ib_util.h
===================================================================
--- dapl/openib_scm/dapl_ib_util.h (revision 0)
+++ dapl/openib_scm/dapl_ib_util.h (revision 0)
@@ -0,0 +1,355 @@
+/*
+ * This Software is licensed under both of the following two licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ * in the file LICENSE.txt in the root directory. The license is also
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/cpl.php.
+ * OR
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is in the file
+ * LICENSE2.txt in the root directory. The license is also available from
+ * the Open Source Initiative, see
+ * http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * Licensee has the right to choose either one of the above two licenses.
+ *
+ * Redistributions of source code must retain both the above copyright
+ * notice and either one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, either one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/***************************************************************************
+ *
+ * Module: uDAPL
+ *
+ * Filename: dapl_ib_util.h
+ *
+ * Author: Arlin Davis
+ *
+ * Created: 3/10/2005
+ *
+ * Description:
+ *
+ * The uDAPL openib provider - definitions, prototypes,
+ *
+ ****************************************************************************
+ * Source Control System Information
+ *
+ * $Id: $
+ *
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ *
+ **************************************************************************/
+
+#ifndef _DAPL_IB_UTIL_H_
+#define _DAPL_IB_UTIL_H_
+
+#include "verbs.h"
+#include <byteswap.h>
+
+#ifndef __cplusplus
+#define false 0
+#define true 1
+#endif /*__cplusplus */
+
+/* Typedefs to map common DAPL provider types to IB verbs */
+typedef struct ibv_qp *ib_qp_handle_t;
+typedef struct ibv_cq *ib_cq_handle_t;
+typedef struct ibv_pd *ib_pd_handle_t;
+typedef struct ibv_mr *ib_mr_handle_t;
+typedef struct ibv_mw *ib_mw_handle_t;
+typedef struct ibv_wc ib_work_completion_t;
+
+/* HCA context type maps to IB verbs */
+typedef struct ibv_context *ib_hca_handle_t;
+typedef ib_hca_handle_t dapl_ibal_ca_t;
+
+/* CM mappings, user CM not complete use SOCKETS */
+
+/* destination info to exchange until real IB CM shows up */
+typedef struct _ib_qp_cm
+{
+ uint32_t qpn;
+ uint16_t lid;
+ uint16_t port;
+ int p_size;
+ DAT_SOCK_ADDR6 ia_address;
+
+} ib_qp_cm_t;
+
+/*
+ * dapl_llist_entry in dapl.h but dapl.h depends on provider
+ * typedef's in this file first. move dapl_llist_entry out of dapl.h
+ */
+struct ib_llist_entry
+{
+ struct dapl_llist_entry *flink;
+ struct dapl_llist_entry *blink;
+ void *data;
+ struct dapl_llist_entry *list_head;
+};
+
+struct ib_cm_handle
+{
+ struct ib_llist_entry entry;
+ int socket;
+ int l_socket;
+ struct dapl_hca *hca_ptr;
+ DAT_HANDLE cr;
+ DAT_HANDLE sp;
+ ib_qp_cm_t dst;
+ unsigned char p_data[256];
+};
+
+typedef struct ib_cm_handle *ib_cm_handle_t;
+typedef ib_cm_handle_t ib_cm_srvc_handle_t;
+
+DAT_RETURN getipaddr(char *addr, int addr_len);
+
+/* CM events */
+typedef enum
+{
+ IB_CME_CONNECTED,
+ IB_CME_DISCONNECTED,
+ IB_CME_DISCONNECTED_ON_LINK_DOWN,
+ IB_CME_CONNECTION_REQUEST_PENDING,
+ IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
+ IB_CME_DESTINATION_REJECT,
+ IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
+ IB_CME_DESTINATION_UNREACHABLE,
+ IB_CME_TOO_MANY_CONNECTION_REQUESTS,
+ IB_CME_LOCAL_FAILURE,
+ IB_CM_LOCAL_FAILURE
+
+} ib_cm_events_t;
+
+/* prototype for cm thread */
+void cr_thread (void *arg);
+
+/* Operation and state mappings */
+typedef enum ibv_send_flags ib_send_op_type_t;
+typedef struct ibv_sge ib_data_segment_t;
+typedef enum ibv_qp_state ib_qp_state_t;
+typedef enum ibv_event_type ib_async_event_type;
+typedef struct ibv_async_event ib_error_record_t;
+
+/* CQ notifications */
+typedef enum
+{
+ IB_NOTIFY_ON_NEXT_COMP,
+ IB_NOTIFY_ON_SOLIC_COMP
+
+} ib_notification_type_t;
+
+/* other mappings */
+typedef int ib_bool_t;
+typedef union ibv_gid GID;
+typedef char *IB_HCA_NAME;
+typedef uint16_t ib_hca_port_t;
+typedef uint32_t ib_comp_handle_t;
+
+#ifdef CQ_WAIT_OBJECT
+typedef struct ibv_comp_channel *ib_wait_obj_handle_t;
+#endif
+
+/* Definitions */
+#define IB_INVALID_HANDLE NULL
+
+/* inline send rdma threshold */
+#define INLINE_SEND_DEFAULT 128
+
+/* CM private data areas */
+#define IB_MAX_REQ_PDATA_SIZE 92
+#define IB_MAX_REP_PDATA_SIZE 196
+#define IB_MAX_REJ_PDATA_SIZE 148
+#define IB_MAX_DREQ_PDATA_SIZE 220
+#define IB_MAX_DREP_PDATA_SIZE 224
+
+/* DTO OPs, ordered for DAPL ENUM definitions ???*/
+#define OP_RDMA_WRITE IBV_WR_RDMA_WRITE
+#define OP_RDMA_WRITE_IMM IBV_WR_RDMA_WRITE_WITH_IMM
+#define OP_SEND IBV_WR_SEND
+#define OP_SEND_IMM IBV_WR_SEND_WITH_IMM
+#define OP_RDMA_READ IBV_WR_RDMA_READ
+#define OP_COMP_AND_SWAP IBV_WR_ATOMIC_CMP_AND_SWP
+#define OP_FETCH_AND_ADD IBV_WR_ATOMIC_FETCH_AND_ADD
+#define OP_RECEIVE 7 /* internal op */
+#define OP_RECEIVE_IMM 8 /* internel op */
+#define OP_BIND_MW 9 /* internal op */
+#define OP_INVALID 0xff
+
+/* Definitions to map QP state */
+#define IB_QP_STATE_RESET IBV_QPS_RESET
+#define IB_QP_STATE_INIT IBV_QPS_INIT
+#define IB_QP_STATE_RTR IBV_QPS_RTR
+#define IB_QP_STATE_RTS IBV_QPS_RTS
+#define IB_QP_STATE_SQD IBV_QPS_SQD
+#define IB_QP_STATE_SQE IBV_QPS_SQE
+#define IB_QP_STATE_ERROR IBV_QPS_ERR
+
+/* Definitions for ibverbs/mthca return codes, should be defined in verbs.h */
+/* some are errno and some are -n values */
+
+/**
+ * ibv_get_device_name - Return kernel device name
+ * ibv_get_device_guid - Return device's node GUID
+ * ibv_open_device - Return ibv_context or NULL
+ * ibv_close_device - Return 0, (errno?)
+ * ibv_get_async_event - Return 0, -1
+ * ibv_alloc_pd - Return ibv_pd, NULL
+ * ibv_dealloc_pd - Return 0, errno
+ * ibv_reg_mr - Return ibv_mr, NULL
+ * ibv_dereg_mr - Return 0, errno
+ * ibv_create_cq - Return ibv_cq, NULL
+ * ibv_destroy_cq - Return 0, errno
+ * ibv_get_cq_event - Return 0 & ibv_cq/context, int
+ * ibv_poll_cq - Return n & ibv_wc, 0 ok, -1 empty, -2 error
+ * ibv_req_notify_cq - Return 0 (void?)
+ * ibv_create_qp - Return ibv_qp, NULL
+ * ibv_modify_qp - Return 0, errno
+ * ibv_destroy_qp - Return 0, errno
+ * ibv_post_send - Return 0, -1 & bad_wr
+ * ibv_post_recv - Return 0, -1 & bad_wr
+ */
+
+/* async handler for DTO, CQ, QP, and unafiliated */
+typedef void (*ib_async_dto_handler_t)(
+ IN ib_hca_handle_t ib_hca_handle,
+ IN ib_error_record_t *err_code,
+ IN void *context);
+
+typedef void (*ib_async_cq_handler_t)(
+ IN ib_hca_handle_t ib_hca_handle,
+ IN ib_cq_handle_t ib_cq_handle,
+ IN ib_error_record_t *err_code,
+ IN void *context);
+
+typedef void (*ib_async_qp_handler_t)(
+ IN ib_hca_handle_t ib_hca_handle,
+ IN ib_qp_handle_t ib_qp_handle,
+ IN ib_error_record_t *err_code,
+ IN void *context);
+
+typedef void (*ib_async_handler_t)(
+ IN ib_hca_handle_t ib_hca_handle,
+ IN ib_error_record_t *err_code,
+ IN void *context);
+
+typedef enum
+{
+ IB_THREAD_INIT,
+ IB_THREAD_RUN,
+ IB_THREAD_CANCEL,
+ IB_THREAD_EXIT
+
+} ib_thread_state_t;
+
+/* ib_hca_transport_t, specific to this implementation */
+typedef struct _ib_hca_transport
+{
+ struct ibv_device *ib_dev;
+ ib_cq_handle_t ib_cq_empty;
+ DAPL_OS_LOCK cq_lock;
+ int max_inline_send;
+ ib_thread_state_t cq_state;
+ DAPL_OS_THREAD cq_thread;
+ struct ibv_comp_channel *ib_cq;
+ int cr_state;
+ DAPL_OS_THREAD thread;
+ DAPL_OS_LOCK lock;
+ struct dapl_llist_entry *list;
+ ib_async_handler_t async_unafiliated;
+ void *async_un_ctx;
+ ib_async_cq_handler_t async_cq_error;
+ ib_async_dto_handler_t async_cq;
+ ib_async_qp_handler_t async_qp_error;
+
+} ib_hca_transport_t;
+
+/* provider specfic fields for shared memory support */
+typedef uint32_t ib_shm_transport_t;
+
+/* prototypes */
+int32_t dapls_ib_init (void);
+int32_t dapls_ib_release (void);
+void cq_thread (void *arg);
+void cr_thread(void *arg);
+int dapli_cq_thread_init(struct dapl_hca *hca_ptr);
+void dapli_cq_thread_destroy(struct dapl_hca *hca_ptr);
+
+
+DAT_RETURN
+dapls_modify_qp_state ( IN ib_qp_handle_t qp_handle,
+ IN ib_qp_state_t qp_state,
+ IN ib_qp_cm_t *qp_cm );
+
+/* inline functions */
+STATIC _INLINE_ IB_HCA_NAME dapl_ib_convert_name (IN char *name)
+{
+ /* use ascii; name of local device */
+ return dapl_os_strdup(name);
+}
+
+STATIC _INLINE_ void dapl_ib_release_name (IN IB_HCA_NAME name)
+{
+ return;
+}
+
+/*
+ * Convert errno to DAT_RETURN values
+ */
+STATIC _INLINE_ DAT_RETURN
+dapl_convert_errno( IN int err, IN const char *str )
+{
+ if (!err) return DAT_SUCCESS;
+
+#if DAPL_DBG
+ if ((err != EAGAIN) && (err != ETIME) && (err != ETIMEDOUT))
+ dapl_dbg_log (DAPL_DBG_TYPE_ERR," %s %s\n", str, strerror(err));
+#endif
+
+ switch( err )
+ {
+ case EOVERFLOW : return DAT_LENGTH_ERROR;
+ case EACCES : return DAT_PRIVILEGES_VIOLATION;
+ case ENXIO :
+ case ERANGE :
+ case EPERM : return DAT_PROTECTION_VIOLATION;
+ case EINVAL :
+ case EBADF :
+ case ENOENT :
+ case ENOTSOCK : return DAT_INVALID_HANDLE;
+ case EISCONN : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_CONNECTED;
+ case ECONNREFUSED : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_NOTREADY;
+ case ETIME :
+ case ETIMEDOUT : return DAT_TIMEOUT_EXPIRED;
+ case ENETUNREACH: return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_UNREACHABLE;
+ case EADDRINUSE : return DAT_CONN_QUAL_IN_USE;
+ case EALREADY : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_ACTCONNPENDING;
+ case ENOSPC :
+ case ENOMEM :
+ case E2BIG :
+ case EDQUOT : return DAT_INSUFFICIENT_RESOURCES;
+ case EAGAIN : return DAT_QUEUE_EMPTY;
+ case EINTR : return DAT_INTERRUPTED_CALL;
+ case EAFNOSUPPORT : return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_MALFORMED;
+ case EFAULT :
+ default : return DAT_INTERNAL_ERROR;
+ }
+ }
+
+/*
+ * Definitions required only for DAT 1.1 builds
+ */
+#define IB_ACCESS_LOCAL_READ IBV_ACCESS_LOCAL_WRITE
+#define IB_ACCESS_LOCAL_WRITE IBV_ACCESS_LOCAL_WRITE
+#define IB_ACCESS_REMOTE_READ IBV_ACCESS_REMOTE_READ
+#define IB_ACCESS_REMOTE_WRITE IBV_ACCESS_REMOTE_WRITE
+#define IB_ACCESS_MW_BIND IBV_ACCESS_LOCAL_WRITE
+#define IB_ACCESS_ATOMIC
+
+#endif /* _DAPL_IB_UTIL_H_ */
Index: dapl/openib_scm/dapl_ib_cq.c
===================================================================
--- dapl/openib_scm/dapl_ib_cq.c (revision 0)
+++ dapl/openib_scm/dapl_ib_cq.c (revision 0)
@@ -0,0 +1,619 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ * copy of which is available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/***************************************************************************
+ *
+ * Module: uDAPL
+ *
+ * Filename: dapl_ib_cq.c
+ *
+ * Author: Arlin Davis
+ *
+ * Created: 3/10/2005
+ *
+ * Description:
+ *
+ * The uDAPL openib provider - completion queue
+ *
+ ****************************************************************************
+ * Source Control System Information
+ *
+ * $Id: $
+ *
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ *
+ **************************************************************************/
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_lmr_util.h"
+#include "dapl_evd_util.h"
+#include "dapl_ring_buffer_util.h"
+#include <sys/poll.h>
+#include <signal.h>
+
+int dapli_cq_thread_init(struct dapl_hca *hca_ptr)
+{
+ DAT_RETURN dat_status;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cq_thread_init(%p)\n", hca_ptr);
+
+ /* create thread to process inbound connect request */
+ hca_ptr->ib_trans.cq_state = IB_THREAD_INIT;
+ dat_status = dapl_os_thread_create(cq_thread, (void*)hca_ptr,
&hca_ptr->ib_trans.cq_thread);
+ if (dat_status != DAT_SUCCESS)
+ {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " cq_thread_init: failed to create thread\n");
+ return 1;
+ }
+
+ /* wait for thread to start */
+ while (hca_ptr->ib_trans.cq_state != IB_THREAD_RUN) {
+ struct timespec sleep, remain;
+ sleep.tv_sec = 0;
+ sleep.tv_nsec = 20000000; /* 20 ms */
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " cq_thread_init: waiting for cq_thread\n");
+ nanosleep (&sleep, &remain);
+ }
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cq_thread_init(%d) exit\n",getpid());
+ return 0;
+}
+
+void dapli_cq_thread_destroy(struct dapl_hca *hca_ptr)
+{
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cq_thread_destroy(%p)\n", hca_ptr);
+
+ if (hca_ptr->ib_trans.cq_state != IB_THREAD_RUN)
+ return;
+
+ /* destroy cr_thread and lock */
+ hca_ptr->ib_trans.cq_state = IB_THREAD_CANCEL;
+ pthread_kill(hca_ptr->ib_trans.cq_thread, SIGUSR1);
+ dapl_dbg_log(DAPL_DBG_TYPE_CM," cq_thread_destroy(%p) cancel\n",hca_ptr);
+ while (hca_ptr->ib_trans.cq_state != IB_THREAD_EXIT) {
+ struct timespec sleep, remain;
+ sleep.tv_sec = 0;
+ sleep.tv_nsec = 200000000; /* 200 ms */
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ " cq_thread_destroy: waiting for cq_thread\n");
+ nanosleep (&sleep, &remain);
+ }
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cq_thread_destroy(%d) exit\n",getpid());
+}
+
+/* catch the signal */
+static void ib_cq_handler(int signum)
+{
+ return;
+}
+
+void cq_thread( void *arg )
+{
+ struct dapl_hca *hca_ptr = arg;
+ struct dapl_evd *evd_ptr;
+ struct ibv_cq *ibv_cq = NULL;
+ sigset_t sigset;
+
+ sigemptyset(&sigset);
+ sigaddset(&sigset,SIGUSR1);
+ pthread_sigmask(SIG_UNBLOCK, &sigset, NULL);
+ signal(SIGUSR1, ib_cq_handler);
+
+ hca_ptr->ib_trans.cq_state = IB_THREAD_RUN;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cq_thread: ENTER hca %p\n",hca_ptr);
+
+ /* wait on DTO event, or signal to abort */
+ while (hca_ptr->ib_trans.cq_state == IB_THREAD_RUN) {
+ struct pollfd cq_fd = {
+ .fd = hca_ptr->ib_trans.ib_cq->fd,
+ .events = POLLIN,
+ .revents = 0
+ };
+ if ((poll(&cq_fd, 1, -1) == 1) &&
+ (!ibv_get_cq_event(hca_ptr->ib_trans.ib_cq,
+ &ibv_cq, (void*)&evd_ptr))) {
+
+ if (DAPL_BAD_HANDLE(evd_ptr, DAPL_MAGIC_EVD)) {
+ ibv_ack_cq_events(ibv_cq, 1);
+ return;
+ }
+
+ /* process DTO event via callback */
+ dapl_evd_dto_callback ( hca_ptr->ib_hca_handle,
+ evd_ptr->ib_cq_handle,
+ (void*)evd_ptr );
+
+ ibv_ack_cq_events(ibv_cq, 1);
+ }
+ }
+ hca_ptr->ib_trans.cq_state = IB_THREAD_EXIT;
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cq_thread: EXIT: hca %p \n", hca_ptr);
+}
+
+
+/*
+ * Map all verbs DTO completion codes to the DAT equivelent.
+ *
+ * Not returned by verbs: DAT_DTO_ERR_PARTIAL_PACKET
+ */
+static struct ib_status_map
+{
+ int ib_status;
+ DAT_DTO_COMPLETION_STATUS dat_status;
+} ib_status_map[] = {
+ /* 00 */ { IBV_WC_SUCCESS, DAT_DTO_SUCCESS},
+ /* 01 */ { IBV_WC_LOC_LEN_ERR, DAT_DTO_ERR_LOCAL_LENGTH},
+ /* 02 */ { IBV_WC_LOC_QP_OP_ERR, DAT_DTO_ERR_LOCAL_EP},
+ /* 03 */ { IBV_WC_LOC_EEC_OP_ERR, DAT_DTO_ERR_TRANSPORT},
+ /* 04 */ { IBV_WC_LOC_PROT_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
+ /* 05 */ { IBV_WC_WR_FLUSH_ERR, DAT_DTO_ERR_FLUSHED},
+ /* 06 */ { IBV_WC_MW_BIND_ERR, DAT_RMR_OPERATION_FAILED},
+ /* 07 */ { IBV_WC_BAD_RESP_ERR, DAT_DTO_ERR_BAD_RESPONSE},
+ /* 08 */ { IBV_WC_LOC_ACCESS_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
+ /* 09 */ { IBV_WC_REM_INV_REQ_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
+ /* 10 */ { IBV_WC_REM_ACCESS_ERR, DAT_DTO_ERR_REMOTE_ACCESS},
+ /* 11 */ { IBV_WC_REM_OP_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
+ /* 12 */ { IBV_WC_RETRY_EXC_ERR, DAT_DTO_ERR_TRANSPORT},
+ /* 13 */ { IBV_WC_RNR_RETRY_EXC_ERR, DAT_DTO_ERR_RECEIVER_NOT_READY},
+ /* 14 */ { IBV_WC_LOC_RDD_VIOL_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
+ /* 15 */ { IBV_WC_REM_INV_RD_REQ_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
+ /* 16 */ { IBV_WC_REM_ABORT_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
+ /* 17 */ { IBV_WC_INV_EECN_ERR, DAT_DTO_ERR_TRANSPORT},
+ /* 18 */ { IBV_WC_INV_EEC_STATE_ERR, DAT_DTO_ERR_TRANSPORT},
+ /* 19 */ { IBV_WC_FATAL_ERR, DAT_DTO_ERR_TRANSPORT},
+ /* 20 */ { IBV_WC_RESP_TIMEOUT_ERR, DAT_DTO_ERR_RECEIVER_NOT_READY},
+ /* 21 */ { IBV_WC_GENERAL_ERR, DAT_DTO_ERR_TRANSPORT},
+};
+
+/*
+ * dapls_ib_get_dto_status
+ *
+ * Return the DAT status of a DTO operation
+ *
+ * Input:
+ * cqe_ptr pointer to completion queue entry
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * Value from ib_status_map table above
+ */
+
+DAT_DTO_COMPLETION_STATUS
+dapls_ib_get_dto_status (
+ IN ib_work_completion_t *cqe_ptr)
+{
+ uint32_t ib_status;
+ int i;
+
+ ib_status = DAPL_GET_CQE_STATUS (cqe_ptr);
+
+ /*
+ * Due to the implementation of verbs completion code, we need to
+ * search the table for the correct value rather than assuming
+ * linear distribution.
+ */
+ for (i = 0; i <= IBV_WC_GENERAL_ERR; i++) {
+ if (ib_status == ib_status_map[i].ib_status) {
+ if ( ib_status != IBV_WC_SUCCESS ) {
+ dapl_dbg_log (DAPL_DBG_TYPE_DTO_COMP_ERR,
+ " DTO completion ERROR: %d: op %#x\n",
+ ib_status, DAPL_GET_CQE_OPTYPE (cqe_ptr));
+ }
+ return ib_status_map[i].dat_status;
+ }
+ }
+
+ dapl_dbg_log (DAPL_DBG_TYPE_DTO_COMP_ERR,
+ " DTO completion ERROR: %d: op %#x\n",
+ ib_status,
+ DAPL_GET_CQE_OPTYPE (cqe_ptr));
+
+ return DAT_DTO_FAILURE;
+}
+
+DAT_RETURN dapls_ib_get_async_event (
+ IN ib_error_record_t *err_record,
+ OUT DAT_EVENT_NUMBER *async_event)
+{
+ DAT_RETURN dat_status = DAT_SUCCESS;
+ int err_code = err_record->event_type;
+
+ switch (err_code) {
+ /* OVERFLOW error */
+ case IBV_EVENT_CQ_ERR:
+ *async_event = DAT_ASYNC_ERROR_EVD_OVERFLOW;
+ break;
+ /* INTERNAL errors */
+ case IBV_EVENT_DEVICE_FATAL:
+ *async_event = DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR;
+ break;
+ /* CATASTROPHIC errors */
+ case IBV_EVENT_PORT_ERR:
+ *async_event = DAT_ASYNC_ERROR_IA_CATASTROPHIC;
+ break;
+ /* BROKEN QP error */
+ case IBV_EVENT_SQ_DRAINED:
+ case IBV_EVENT_QP_FATAL:
+ case IBV_EVENT_QP_REQ_ERR:
+ case IBV_EVENT_QP_ACCESS_ERR:
+ *async_event = DAT_ASYNC_ERROR_EP_BROKEN;
+ break;
+
+ /* connection completion */
+ case IBV_EVENT_COMM_EST:
+ *async_event = DAT_CONNECTION_EVENT_ESTABLISHED;
+ break;
+
+ /* TODO: process HW state changes */
+ case IBV_EVENT_PATH_MIG:
+ case IBV_EVENT_PATH_MIG_ERR:
+ case IBV_EVENT_PORT_ACTIVE:
+ case IBV_EVENT_LID_CHANGE:
+ case IBV_EVENT_PKEY_CHANGE:
+ case IBV_EVENT_SM_CHANGE:
+ default:
+ dat_status = DAT_ERROR (DAT_NOT_IMPLEMENTED, 0);
+ }
+ return dat_status;
+}
+
+/*
+ * dapl_ib_cq_alloc
+ *
+ * Alloc a CQ
+ *
+ * Input:
+ * ia_handle IA handle
+ * evd_ptr pointer to EVD struct
+ * cqlen minimum QLen
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_cq_alloc (
+ IN DAPL_IA *ia_ptr,
+ IN DAPL_EVD *evd_ptr,
+ IN DAT_COUNT *cqlen )
+{
+ dapl_dbg_log ( DAPL_DBG_TYPE_UTIL,
+ "dapls_ib_cq_alloc: evd %p cqlen=%d \n", evd_ptr, *cqlen );
+
+ struct ibv_comp_channel *channel = ia_ptr->hca_ptr->ib_trans.ib_cq;
+
+#ifdef CQ_WAIT_OBJECT
+ if (evd_ptr->cq_wait_obj_handle)
+ channel = evd_ptr->cq_wait_obj_handle;
+#endif
+
+ /* Call IB verbs to create CQ */
+ evd_ptr->ib_cq_handle = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
+ *cqlen,
+ evd_ptr,
+ channel, 0);
+
+ if (evd_ptr->ib_cq_handle == IB_INVALID_HANDLE)
+ return DAT_INSUFFICIENT_RESOURCES;
+
+ /* arm cq for events */
+ dapls_set_cq_notify(ia_ptr, evd_ptr);
+
+ /* update with returned cq entry size */
+ *cqlen = evd_ptr->ib_cq_handle->cqe;
+
+ dapl_dbg_log ( DAPL_DBG_TYPE_UTIL,
+ "dapls_ib_cq_alloc: new_cq %p cqlen=%d \n",
+ evd_ptr->ib_cq_handle, *cqlen );
+
+ return DAT_SUCCESS;
+}
+
+
+/*
+ * dapl_ib_cq_resize
+ *
+ * Alloc a CQ
+ *
+ * Input:
+ * ia_handle IA handle
+ * evd_ptr pointer to EVD struct
+ * cqlen minimum QLen
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN
+dapls_ib_cq_resize (
+ IN DAPL_IA *ia_ptr,
+ IN DAPL_EVD *evd_ptr,
+ IN DAT_COUNT *cqlen )
+{
+ ib_cq_handle_t new_cq;
+ struct ibv_comp_channel *channel = ia_ptr->hca_ptr->ib_trans.ib_cq;
+
+ /* IB verbs doe not support resize. Try to re-create CQ
+ * with new size. Can only be done if QP is not attached.
+ * destroy EBUSY == QP still attached.
+ */
+
+#ifdef CQ_WAIT_OBJECT
+ if (evd_ptr->cq_wait_obj_handle)
+ channel = evd_ptr->cq_wait_obj_handle;
+#endif
+
+ /* Call IB verbs to create CQ */
+ new_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle, *cqlen,
+ evd_ptr, channel, 0);
+
+ if (new_cq == IB_INVALID_HANDLE)
+ return DAT_INSUFFICIENT_RESOURCES;
+
+ /* destroy the original and replace if successful */
+ if (ibv_destroy_cq(evd_ptr->ib_cq_handle)) {
+ ibv_destroy_cq(new_cq);
+ return(dapl_convert_errno(errno,"resize_cq"));
+ }
+
+ /* update EVD with new cq handle and size */
+ evd_ptr->ib_cq_handle = new_cq;
+ *cqlen = new_cq->cqe;
+
+ /* arm cq for events */
+ dapls_set_cq_notify (ia_ptr, evd_ptr);
+
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_cq_free
+ *
+ * destroy a CQ
+ *
+ * Input:
+ * ia_handle IA handle
+ * evd_ptr pointer to EVD struct
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN dapls_ib_cq_free (
+ IN DAPL_IA *ia_ptr,
+ IN DAPL_EVD *evd_ptr)
+{
+ if ( evd_ptr->ib_cq_handle != IB_INVALID_HANDLE ) {
+ /* copy all entries on CQ to EVD before destroying */
+ dapls_evd_copy_cq(evd_ptr);
+ if (ibv_destroy_cq(evd_ptr->ib_cq_handle))
+ return(dapl_convert_errno(errno,"destroy_cq"));
+ evd_ptr->ib_cq_handle = IB_INVALID_HANDLE;
+ }
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_set_cq_notify
+ *
+ * Set the CQ notification for next
+ *
+ * Input:
+ * hca_handl hca handle
+ * DAPL_EVD evd handle
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * dapl_convert_errno
+ */
+DAT_RETURN dapls_set_cq_notify (
+ IN DAPL_IA *ia_ptr,
+ IN DAPL_EVD *evd_ptr)
+{
+ if (ibv_req_notify_cq( evd_ptr->ib_cq_handle, 0 ))
+ return(dapl_convert_errno(errno,"notify_cq"));
+ else
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_completion_notify
+ *
+ * Set the CQ notification type
+ *
+ * Input:
+ * hca_handl hca handle
+ * evd_ptr evd handle
+ * type notification type
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * dapl_convert_errno
+ */
+DAT_RETURN dapls_ib_completion_notify (
+ IN ib_hca_handle_t hca_handle,
+ IN DAPL_EVD *evd_ptr,
+ IN ib_notification_type_t type)
+{
+ if (ibv_req_notify_cq( evd_ptr->ib_cq_handle, type ))
+ return(dapl_convert_errno(errno,"notify_cq_type"));
+ else
+ return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_completion_poll
+ *
+ * CQ poll for completions
+ *
+ * Input:
+ * hca_handl hca handle
+ * evd_ptr evd handle
+ * wc_ptr work completion
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_SUCCESS
+ * DAT_QUEUE_EMPTY
+ *
+ */
+DAT_RETURN dapls_ib_completion_poll (
+ IN DAPL_HCA *hca_ptr,
+ IN DAPL_EVD *evd_ptr,
+ IN ib_work_completion_t *wc_ptr)
+{
+ int ret;
+
+ ret = ibv_poll_cq(evd_ptr->ib_cq_handle, 1, wc_ptr);
+ if (ret == 1)
+ return DAT_SUCCESS;
+
+ return DAT_QUEUE_EMPTY;
+}
+
+#ifdef CQ_WAIT_OBJECT
+
+/* NEW common wait objects for providers with direct CQ wait objects */
+DAT_RETURN
+dapls_ib_wait_object_create (
+ IN DAPL_EVD *evd_ptr,
+ IN ib_wait_obj_handle_t *p_cq_wait_obj_handle )
+{
+ dapl_dbg_log ( DAPL_DBG_TYPE_CM,
+ " cq_object_create: (%p,%p)\n",
+ evd_ptr, p_cq_wait_obj_handle );
+
+ /* set cq_wait object to evd_ptr */
+ *p_cq_wait_obj_handle =
+ ibv_create_comp_channel(evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle);
+
+ return DAT_SUCCESS;
+}
+
+DAT_RETURN
+dapls_ib_wait_object_destroy (
+ IN ib_wait_obj_handle_t p_cq_wait_obj_handle)
+{
+ dapl_dbg_log ( DAPL_DBG_TYPE_UTIL,
+ " cq_object_destroy: wait_obj=%p\n",
+ p_cq_wait_obj_handle );
+
+ ibv_destroy_comp_channel(p_cq_wait_obj_handle);
+
+ return DAT_SUCCESS;
+}
+
+DAT_RETURN
+dapls_ib_wait_object_wakeup (
+ IN ib_wait_obj_handle_t p_cq_wait_obj_handle)
+{
+ dapl_dbg_log ( DAPL_DBG_TYPE_UTIL,
+ " cq_object_wakeup: wait_obj=%p\n",
+ p_cq_wait_obj_handle );
+
+ /* no wake up mechanism */
+ return DAT_SUCCESS;
+}
+
+DAT_RETURN
+dapls_ib_wait_object_wait (
+ IN ib_wait_obj_handle_t p_cq_wait_obj_handle,
+ IN u_int32_t timeout)
+{
+ struct dapl_evd *evd_ptr;
+ struct ibv_cq *ibv_cq = NULL;
+ void *ibv_ctx = NULL;
+ int status = 0;
+ int timeout_ms = -1;
+ struct pollfd cq_fd = {
+ .fd = p_cq_wait_obj_handle->fd,
+ .events = POLLIN,
+ .revents = 0
+ };
+
+ dapl_dbg_log ( DAPL_DBG_TYPE_CM,
+ " cq_object_wait: CQ channel %p time %d\n",
+ p_cq_wait_obj_handle, timeout );
+
+ /* uDAPL timeout values in usecs */
+ if (timeout != DAT_TIMEOUT_INFINITE)
+ timeout_ms = timeout/1000;
+
+ status = poll(&cq_fd, 1, timeout_ms);
+
+ /* returned event */
+ if (status > 0) {
+ if (!ibv_get_cq_event(p_cq_wait_obj_handle,
+ &ibv_cq, (void*)&evd_ptr)) {
+ ibv_ack_cq_events(ibv_cq, 1);
+ }
+ status = 0;
+
+ /* timeout */
+ } else if (status == 0)
+ status = ETIMEDOUT;
+
+ dapl_dbg_log (DAPL_DBG_TYPE_CM,
+ " cq_object_wait: RET evd %p ibv_cq %p ibv_ctx %p %s\n",
+ evd_ptr, ibv_cq,ibv_ctx,strerror(errno));
+
+ return(dapl_convert_errno(status,"cq_wait_object_wait"));
+
+}
+#endif
+
+/*
+ * Local variables:
+ * c-indent-level: 4
+ * c-basic-offset: 4
+ * tab-width: 8
+ * End:
+ */
+
More information about the general
mailing list