[ofa-general] [PATCH 3/4] uDAPL v2 - IB UD extension - dapl scm: add support for UD extensions in socket cm provider
Arlin Davis
arlin.r.davis at intel.com
Sun Jul 20 15:19:11 PDT 2008
add qp_type in connection information exchange
add new post_send_ud call
changes to connection manager to support qp types beyond RC.
changes to connection events to use new extended event calls.
exchange address handle information during connection phase.
changes to modify_qp to handle both RC and UD types.
Signed-off by: Arlin Davis ardavis at ichips.intel.com
---
dapl/openib_scm/dapl_ib_cm.c | 157 ++++++++++++++++++++++++++--------
dapl/openib_scm/dapl_ib_dto.h | 62 ++++++++++++--
dapl/openib_scm/dapl_ib_extensions.c | 61 ++++++++++++--
dapl/openib_scm/dapl_ib_qp.c | 104 +++++++++++++++-------
dapl/openib_scm/dapl_ib_util.c | 14 ++-
dapl/openib_scm/dapl_ib_util.h | 22 ++++-
6 files changed, 324 insertions(+), 96 deletions(-)
diff --git a/dapl/openib_scm/dapl_ib_cm.c b/dapl/openib_scm/dapl_ib_cm.c
index b87c060..e712f9d 100644
--- a/dapl/openib_scm/dapl_ib_cm.c
+++ b/dapl/openib_scm/dapl_ib_cm.c
@@ -243,6 +243,7 @@ dapli_socket_connect(DAPL_EP *ep_ptr,
/* Send QP info, IA address, and private data */
cm_ptr->dst.qpn = htonl(ep_ptr->qp_handle->qp_num);
+ cm_ptr->dst.qp_type = htons(ep_ptr->qp_handle->qp_type);
cm_ptr->dst.port = htons(ia_ptr->hca_ptr->port_num);
cm_ptr->dst.lid =
htons(dapli_get_lid(ia_ptr->hca_ptr->ib_hca_handle,
@@ -266,7 +267,10 @@ dapli_socket_connect(DAPL_EP *ep_ptr,
iovec[1].iov_len = p_size;
}
- dapl_dbg_log(DAPL_DBG_TYPE_EP," socket connected, write QP and private data\n");
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " socket connected, write QP (%d), private data (%d)\n",
+ sizeof(ib_qp_cm_t),p_size);
+
len = writev(cm_ptr->socket, iovec, (p_size ? 2:1));
if (len != (p_size + sizeof(ib_qp_cm_t))) {
dapl_dbg_log(DAPL_DBG_TYPE_ERR,
@@ -319,7 +323,7 @@ dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
if (len != sizeof(ib_qp_cm_t) || ntohs(cm_ptr->dst.ver) != DSCM_VER) {
dapl_dbg_log(DAPL_DBG_TYPE_ERR,
" connect_rtu read: ERR %s, rcnt=%d, ver=%d\n",
- strerror(errno), len, cm_ptr->dst.ver);
+ strerror(errno), len, ntohs(cm_ptr->dst.ver));
goto bail;
}
/* check for consumer reject */
@@ -335,6 +339,7 @@ dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
cm_ptr->dst.port = ntohs(cm_ptr->dst.port);
cm_ptr->dst.lid = ntohs(cm_ptr->dst.lid);
cm_ptr->dst.qpn = ntohl(cm_ptr->dst.qpn);
+ cm_ptr->dst.qp_type = ntohs(cm_ptr->dst.qp_type);
cm_ptr->dst.p_size = ntohl(cm_ptr->dst.p_size);
/* save remote address information */
@@ -343,10 +348,13 @@ dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
sizeof(ep_ptr->remote_ia_address));
dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " connect_rtu: DST %s port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
- inet_ntoa(((struct sockaddr_in *)&cm_ptr->dst.ia_address)->sin_addr),
+ " connect_rtu: DST %s port=0x%x lid=0x%x,"
+ " qpn=0x%x, qp_type=%d, psize=%d\n",
+ inet_ntoa(((struct sockaddr_in *)
+ &cm_ptr->dst.ia_address)->sin_addr),
cm_ptr->dst.port, cm_ptr->dst.lid,
- cm_ptr->dst.qpn, cm_ptr->dst.p_size);
+ cm_ptr->dst.qpn, cm_ptr->dst.qp_type,
+ cm_ptr->dst.p_size);
/* validate private data size before reading */
if (cm_ptr->dst.p_size > IB_MAX_REP_PDATA_SIZE) {
@@ -357,7 +365,8 @@ dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
}
/* read private data into cm_handle if any present */
- dapl_dbg_log(DAPL_DBG_TYPE_EP," socket connected, read private data\n");
+ dapl_dbg_log(DAPL_DBG_TYPE_EP," socket connected, read pdata\n");
+
if (cm_ptr->dst.p_size) {
iovec[0].iov_base = cm_ptr->p_data;
iovec[0].iov_len = cm_ptr->dst.p_size;
@@ -372,11 +381,11 @@ dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
/* modify QP to RTR and then to RTS with remote info */
if (dapls_modify_qp_state(ep_ptr->qp_handle,
- IBV_QPS_RTR, &cm_ptr->dst) != DAT_SUCCESS)
+ IBV_QPS_RTR, cm_ptr) != DAT_SUCCESS)
goto bail;
if (dapls_modify_qp_state(ep_ptr->qp_handle,
- IBV_QPS_RTS, &cm_ptr->dst) != DAT_SUCCESS)
+ IBV_QPS_RTS, cm_ptr) != DAT_SUCCESS)
goto bail;
ep_ptr->qp_state = IB_QP_STATE_RTS;
@@ -390,10 +399,34 @@ dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
ep_ptr->cm_handle = cm_ptr;
cm_ptr->state = SCM_CONNECTED;
dapl_dbg_log(DAPL_DBG_TYPE_EP," ACTIVE: connected!\n");
+
+#ifdef DAT_EXTENSIONS
+ if (cm_ptr->dst.qp_type == IBV_QPT_UD) {
+ DAT_IB_EXTENSION_EVENT_DATA xevent;
+
+ /* post EVENT, modify_qp created ah */
+ xevent.status = 0;
+ xevent.type = DAT_IB_UD_REMOTE_AH;
+ xevent.remote_ah.ah = cm_ptr->ah;
+ xevent.remote_ah.qpn = cm_ptr->dst.qpn;
+ dapl_os_memcpy( &xevent.remote_ah.ia_addr,
+ &cm_ptr->dst.ia_address,
+ sizeof(cm_ptr->dst.ia_address));
+
+ dapls_evd_post_connection_event_ext(
+ (DAPL_EVD*)ep_ptr->param.connect_evd_handle,
+ DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED,
+ (DAT_EP_HANDLE)ep_ptr,
+ (DAT_COUNT)cm_ptr->dst.p_size,
+ (DAT_PVOID*)cm_ptr->p_data,
+ (DAT_PVOID*)&xevent);
+ } else
+#endif
dapl_evd_connection_callback(cm_ptr,
IB_CME_CONNECTED,
cm_ptr->p_data,
ep_ptr);
+
return;
bail:
/* close socket, free cm structure and post error event */
@@ -515,7 +548,7 @@ dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr)
ntohs(acm_ptr->dst.ver) != DSCM_VER) {
dapl_dbg_log(DAPL_DBG_TYPE_ERR,
" accept read: ERR %s, rcnt=%d, ver=%d\n",
- strerror(errno), len, acm_ptr->dst.ver);
+ strerror(errno), len, ntohs(acm_ptr->dst.ver));
dat_status = DAT_INTERNAL_ERROR;
goto bail;
}
@@ -524,11 +557,13 @@ dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr)
acm_ptr->dst.port = ntohs(acm_ptr->dst.port);
acm_ptr->dst.lid = ntohs(acm_ptr->dst.lid);
acm_ptr->dst.qpn = ntohl(acm_ptr->dst.qpn);
+ acm_ptr->dst.qp_type = ntohs(acm_ptr->dst.qp_type);
acm_ptr->dst.p_size = ntohl(acm_ptr->dst.p_size);
dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " accept: DST %s port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
- inet_ntoa(((struct sockaddr_in *)&acm_ptr->dst.ia_address)->sin_addr),
+ " accept: DST %s port=0x%x lid=0x%x, qpn=0x%x, psz=%d\n",
+ inet_ntoa(((struct sockaddr_in *)
+ &acm_ptr->dst.ia_address)->sin_addr),
acm_ptr->dst.port, acm_ptr->dst.lid,
acm_ptr->dst.qpn, acm_ptr->dst.p_size);
@@ -559,7 +594,24 @@ dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr)
}
acm_ptr->state = SCM_ACCEPTING;
+
+#ifdef DAT_EXTENSIONS
+ if (acm_ptr->dst.qp_type == IBV_QPT_UD) {
+ DAT_IB_EXTENSION_EVENT_DATA xevent;
+ /* post EVENT, modify_qp created ah */
+ xevent.status = 0;
+ xevent.type = DAT_IB_UD_CONNECT_REQUEST;
+
+ dapls_evd_post_cr_event_ext(
+ acm_ptr->sp,
+ DAT_IB_UD_CONNECTION_REQUEST_EVENT,
+ acm_ptr,
+ (DAT_COUNT)acm_ptr->dst.p_size,
+ (DAT_PVOID*)acm_ptr->p_data,
+ (DAT_PVOID*)&xevent);
+ } else
+#endif
/* trigger CR event and return SUCCESS */
dapls_cr_callback(acm_ptr,
IB_CME_CONNECTION_REQUEST_PENDING,
@@ -584,6 +636,7 @@ dapli_socket_accept_usr(DAPL_EP *ep_ptr,
{
DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
dp_ib_cm_handle_t cm_ptr = cr_ptr->ib_cm_handle;
+ ib_qp_cm_t local;
struct iovec iovec[2];
int len;
@@ -596,44 +649,59 @@ dapli_socket_accept_usr(DAPL_EP *ep_ptr,
dapl_dbg_log(DAPL_DBG_TYPE_EP,
" accept_usr: remote port=0x%x lid=0x%x"
- " qpn=0x%x psize=%d\n",
+ " qpn=0x%x qp_type %d, psize=%d\n",
cm_ptr->dst.port, cm_ptr->dst.lid,
- cm_ptr->dst.qpn, cm_ptr->dst.p_size);
+ cm_ptr->dst.qpn, cm_ptr->dst.qp_type,
+ cm_ptr->dst.p_size);
+
+#ifdef DAT_EXTENSIONS
+ if (cm_ptr->dst.qp_type == IBV_QPT_UD &&
+ ep_ptr->qp_handle->qp_type != IBV_QPT_UD) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ " accept_rtu: ERR remote QP is UD,"
+ ", but local QP is not\n");
+ return (DAT_INVALID_HANDLE | DAT_INVALID_HANDLE_EP);
+
+ }
+#endif
/* modify QP to RTR and then to RTS with remote info already read */
if (dapls_modify_qp_state(ep_ptr->qp_handle,
- IBV_QPS_RTR, &cm_ptr->dst) != DAT_SUCCESS)
+ IBV_QPS_RTR, cm_ptr) != DAT_SUCCESS)
goto bail;
if (dapls_modify_qp_state(ep_ptr->qp_handle,
- IBV_QPS_RTS, &cm_ptr->dst) != DAT_SUCCESS)
+ IBV_QPS_RTS, cm_ptr) != DAT_SUCCESS)
goto bail;
ep_ptr->qp_state = IB_QP_STATE_RTS;
- /* save remote address information */
+ /* save remote address information, for qp_query */
dapl_os_memcpy( &ep_ptr->remote_ia_address,
&cm_ptr->dst.ia_address,
sizeof(ep_ptr->remote_ia_address));
- /* send our QP info, IA address, and private data */
- cm_ptr->dst.qpn = htonl(ep_ptr->qp_handle->qp_num);
- cm_ptr->dst.port = htons(ia_ptr->hca_ptr->port_num);
- cm_ptr->dst.lid = htons(dapli_get_lid(ia_ptr->hca_ptr->ib_hca_handle,
+ /* send our QP info, IA address, pdata. Don't overwrite dst data */
+ local.ver = htons(DSCM_VER);
+ local.rej = 0;
+ local.qpn = htonl(ep_ptr->qp_handle->qp_num);
+ local.qp_type = htons(ep_ptr->qp_handle->qp_type);
+ local.port = htons(ia_ptr->hca_ptr->port_num);
+ local.lid = htons(dapli_get_lid(ia_ptr->hca_ptr->ib_hca_handle,
(uint8_t)ia_ptr->hca_ptr->port_num));
- if (cm_ptr->dst.lid == 0xffff)
+ if (local.lid == 0xffff)
goto bail;
/* in network order */
if (ibv_query_gid(ia_ptr->hca_ptr->ib_hca_handle,
(uint8_t)ia_ptr->hca_ptr->port_num,
0,
- &cm_ptr->dst.gid))
+ &local.gid))
goto bail;
- cm_ptr->dst.ia_address = ia_ptr->hca_ptr->hca_address;
- cm_ptr->dst.p_size = htonl(p_size);
- iovec[0].iov_base = &cm_ptr->dst;
+ local.ia_address = ia_ptr->hca_ptr->hca_address;
+ local.p_size = htonl(p_size);
+ iovec[0].iov_base = &local;
iovec[0].iov_len = sizeof(ib_qp_cm_t);
if (p_size) {
iovec[1].iov_base = p_data;
@@ -648,26 +716,22 @@ dapli_socket_accept_usr(DAPL_EP *ep_ptr,
}
dapl_dbg_log(DAPL_DBG_TYPE_CM,
" accept_usr: local port=0x%x lid=0x%x"
- " qpn=0x%x psize=%d\n",
- ntohs(cm_ptr->dst.port), ntohs(cm_ptr->dst.lid),
- ntohl(cm_ptr->dst.qpn), ntohl(cm_ptr->dst.p_size));
+ " qpn=0x%x qp_type=%d psize=%d\n",
+ ntohs(local.port), ntohs(local.lid),
+ ntohl(local.qpn), ntohs(local.qp_type),
+ ntohl(local.p_size));
dapl_dbg_log(DAPL_DBG_TYPE_CM,
" accept_usr SRC GID subnet %016llx id %016llx\n",
(unsigned long long)
- cpu_to_be64(cm_ptr->dst.gid.global.subnet_prefix),
+ cpu_to_be64(local.gid.global.subnet_prefix),
(unsigned long long)
- cpu_to_be64(cm_ptr->dst.gid.global.interface_id));
+ cpu_to_be64(local.gid.global.interface_id));
/* save state and reference to EP, queue for RTU data */
cm_ptr->ep = ep_ptr;
cm_ptr->hca = ia_ptr->hca_ptr;
cm_ptr->state = SCM_ACCEPTED;
- /* restore remote address information for query */
- dapl_os_memcpy( &cm_ptr->dst.ia_address,
- &ep_ptr->remote_ia_address,
- sizeof(cm_ptr->dst.ia_address));
-
dapl_dbg_log( DAPL_DBG_TYPE_EP," PASSIVE: accepted!\n" );
dapli_cm_queue(cm_ptr);
return DAT_SUCCESS;
@@ -701,6 +765,29 @@ dapli_socket_accept_rtu(dp_ib_cm_handle_t cm_ptr)
/* final data exchange if remote QP state is good to go */
dapl_dbg_log( DAPL_DBG_TYPE_EP," PASSIVE: connected!\n" );
+
+#ifdef DAT_EXTENSIONS
+ if (cm_ptr->dst.qp_type == IBV_QPT_UD) {
+ DAT_IB_EXTENSION_EVENT_DATA xevent;
+
+ /* post EVENT, modify_qp created ah */
+ xevent.status = 0;
+ xevent.type = DAT_IB_UD_REMOTE_AH;
+ xevent.remote_ah.ah = cm_ptr->ah;
+ xevent.remote_ah.qpn = cm_ptr->dst.qpn;
+ dapl_os_memcpy( &xevent.remote_ah.ia_addr,
+ &cm_ptr->dst.ia_address,
+ sizeof(cm_ptr->dst.ia_address));
+
+ dapls_evd_post_connection_event_ext(
+ (DAPL_EVD*)cm_ptr->ep->param.connect_evd_handle,
+ DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED,
+ (DAT_EP_HANDLE)cm_ptr->ep,
+ (DAT_COUNT)cm_ptr->dst.p_size,
+ (DAT_PVOID*)cm_ptr->p_data,
+ (DAT_PVOID*)&xevent);
+ } else
+#endif
dapls_cr_callback(cm_ptr, IB_CME_CONNECTED, NULL, cm_ptr->sp);
return;
bail:
diff --git a/dapl/openib_scm/dapl_ib_dto.h b/dapl/openib_scm/dapl_ib_dto.h
index 4b06b72..b9826f5 100644
--- a/dapl/openib_scm/dapl_ib_dto.h
+++ b/dapl/openib_scm/dapl_ib_dto.h
@@ -58,6 +58,9 @@
STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p);
+#define CQE_WR_TYPE_UD(id) \
+ (((DAPL_COOKIE *)(uintptr_t)id)->ep->qp_handle->qp_type == IBV_QPT_UD)
+
/*
* dapls_ib_post_recv
*
@@ -171,7 +174,11 @@ dapls_ib_post_send (
if (NULL == ds_array_p)
return (DAT_INSUFFICIENT_RESOURCES);
-
+
+#ifdef DAT_EXTENSIONS
+ if (ep_ptr->qp_handle->qp_type != IBV_QPT_RC)
+ return(DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
+#endif
/* setup the work request */
wr.next = 0;
wr.opcode = op_type;
@@ -247,6 +254,11 @@ STATIC _INLINE_ DAT_DTOS dapls_cqe_dtos_opcode(ib_work_completion_t *cqe_p)
switch (cqe_p->opcode) {
case IBV_WC_SEND:
+#ifdef DAT_EXTENSIONS
+ if (CQE_WR_TYPE_UD(cqe_p->wr_id))
+ return (DAT_IB_DTO_SEND_UD);
+ else
+#endif
return (DAT_DTO_SEND);
case IBV_WC_RDMA_READ:
return (DAT_DTO_RDMA_READ);
@@ -269,6 +281,13 @@ STATIC _INLINE_ DAT_DTOS dapls_cqe_dtos_opcode(ib_work_completion_t *cqe_p)
return (DAT_DTO_RDMA_WRITE);
#endif
case IBV_WC_RECV:
+#ifdef DAT_EXTENSIONS
+ if (CQE_WR_TYPE_UD(cqe_p->wr_id))
+ return (DAT_IB_DTO_RECV_UD);
+ else if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
+ return (DAT_IB_DTO_RECV_MSG_IMMED);
+ else
+#endif
return (DAT_DTO_RECEIVE);
default:
return (0xff);
@@ -295,13 +314,14 @@ dapls_ib_post_ext_send (
IN DAT_UINT32 immed_data,
IN DAT_UINT64 compare_add,
IN DAT_UINT64 swap,
- IN DAT_COMPLETION_FLAGS completion_flags)
+ IN DAT_COMPLETION_FLAGS completion_flags,
+ IN DAT_IB_ADDR_HANDLE *remote_ah)
{
dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " post_snd: ep %p op %d ck %p sgs",
+ " post_ext_snd: ep %p op %d ck %p sgs",
"%d l_iov %p r_iov %p f %d\n",
ep_ptr, op_type, cookie, segments, local_iov,
- remote_iov, completion_flags);
+ remote_iov, completion_flags, remote_ah);
ib_data_segment_t ds_array[DEFAULT_DS_ENTRIES];
ib_data_segment_t *ds_array_p, *ds_array_start_p;
@@ -393,6 +413,21 @@ dapls_ib_post_ext_send (
wr.wr.atomic.remote_addr = remote_iov->virtual_address;
wr.wr.atomic.rkey = remote_iov->rmr_context;
break;
+ case OP_SEND_UD:
+ /* post must be on EP with service_type of UD */
+ if (ep_ptr->qp_handle->qp_type != IBV_QPT_UD)
+ return(DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " post_ext: OP_SEND_UD ah=%p"
+ " qp_num=0x%x\n",
+ remote_ah, remote_ah->qpn);
+
+ wr.opcode = OP_SEND;
+ wr.wr.ud.ah = remote_ah->ah;
+ wr.wr.ud.remote_qpn = remote_ah->qpn;
+ wr.wr.ud.remote_qkey = SCM_UD_QKEY;
+ break;
default:
break;
}
@@ -431,12 +466,16 @@ dapls_ib_optional_prv_dat(
return DAT_SUCCESS;
}
+
/* map Work Completions to DAPL WR operations */
STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p)
{
switch (cqe_p->opcode) {
case IBV_WC_SEND:
- return (OP_SEND);
+ if (CQE_WR_TYPE_UD(cqe_p->wr_id))
+ return(OP_SEND_UD);
+ else
+ return (OP_SEND);
case IBV_WC_RDMA_WRITE:
if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
return (OP_RDMA_WRITE_IMM);
@@ -451,8 +490,10 @@ STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p)
case IBV_WC_BIND_MW:
return (OP_BIND_MW);
case IBV_WC_RECV:
- if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
- return (OP_RECEIVE_IMM);
+ if (CQE_WR_TYPE_UD(cqe_p->wr_id))
+ return (OP_RECV_UD);
+ else if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
+ return (OP_RECEIVE_MSG_IMM);
else
return (OP_RECEIVE);
case IBV_WC_RECV_RDMA_WITH_IMM:
@@ -481,10 +522,13 @@ STATIC _INLINE_ char * dapls_dto_op_str(int op)
"OP_COMP_AND_SWAP",
"OP_FETCH_AND_ADD",
"OP_RECEIVE",
- "OP_RECEIVE_IMM",
+ "OP_RECEIVE_MSG_IMM",
+ "OP_RECEIVE_RDMA_IMM",
"OP_BIND_MW"
+ "OP_SEND_UD"
+ "OP_RECV_UD"
};
- return ((op < 0 || op > 9) ? "Invalid CQE OP?" : optable[op]);
+ return ((op < 0 || op > 12) ? "Invalid CQE OP?" : optable[op]);
}
static _INLINE_ char *
diff --git a/dapl/openib_scm/dapl_ib_extensions.c b/dapl/openib_scm/dapl_ib_extensions.c
index 1402057..b88e853 100755
--- a/dapl/openib_scm/dapl_ib_extensions.c
+++ b/dapl/openib_scm/dapl_ib_extensions.c
@@ -54,7 +54,8 @@ dapli_post_ext( IN DAT_EP_HANDLE ep_handle,
IN DAT_DTO_COOKIE user_cookie,
IN const DAT_RMR_TRIPLET *remote_iov,
IN int op_type,
- IN DAT_COMPLETION_FLAGS flags );
+ IN DAT_COMPLETION_FLAGS flags,
+ IN DAT_IB_ADDR_HANDLE *ah);
/*
@@ -81,6 +82,7 @@ dapl_extensions(IN DAT_HANDLE dat_handle,
IN va_list args)
{
DAT_EP_HANDLE ep;
+ DAT_IB_ADDR_HANDLE *ah;
DAT_LMR_TRIPLET *lmr_p;
DAT_DTO_COOKIE cookie;
const DAT_RMR_TRIPLET *rmr_p;
@@ -113,7 +115,7 @@ dapl_extensions(IN DAT_HANDLE dat_handle,
status = dapli_post_ext(ep, 0, 0, dat_uint32, segments, lmr_p,
cookie, rmr_p, OP_RDMA_WRITE_IMM,
- comp_flags );
+ comp_flags, ah);
break;
case DAT_IB_CMP_AND_SWAP_OP:
@@ -130,7 +132,7 @@ dapl_extensions(IN DAT_HANDLE dat_handle,
status = dapli_post_ext(ep, dat_uint64a, dat_uint64b,
0, segments, lmr_p, cookie, rmr_p,
- OP_COMP_AND_SWAP, comp_flags );
+ OP_COMP_AND_SWAP, comp_flags, ah);
break;
case DAT_IB_FETCH_AND_ADD_OP:
@@ -146,8 +148,23 @@ dapl_extensions(IN DAT_HANDLE dat_handle,
status = dapli_post_ext(ep, dat_uint64a, 0, 0, segments,
lmr_p, cookie, rmr_p,
- OP_FETCH_AND_ADD, comp_flags );
+ OP_FETCH_AND_ADD, comp_flags, ah);
+ break;
+ case DAT_IB_UD_SEND_OP:
+ dapl_dbg_log(DAPL_DBG_TYPE_RTN,
+ " UD post_send extension call\n");
+
+ ep = dat_handle; /* ep_handle */
+ segments = va_arg( args, DAT_COUNT); /* segments */
+ lmr_p = va_arg( args, DAT_LMR_TRIPLET*);
+ ah = va_arg( args, DAT_IB_ADDR_HANDLE*);
+ cookie = va_arg( args, DAT_DTO_COOKIE);
+ comp_flags = va_arg( args, DAT_COMPLETION_FLAGS);
+
+ status = dapli_post_ext(ep, 0, 0, 0, segments,
+ lmr_p, cookie, NULL,
+ OP_SEND_UD, comp_flags, ah);
break;
default:
@@ -169,7 +186,8 @@ dapli_post_ext( IN DAT_EP_HANDLE ep_handle,
IN DAT_DTO_COOKIE user_cookie,
IN const DAT_RMR_TRIPLET *remote_iov,
IN int op_type,
- IN DAT_COMPLETION_FLAGS flags )
+ IN DAT_COMPLETION_FLAGS flags,
+ IN DAT_IB_ADDR_HANDLE *ah)
{
DAPL_EP *ep_ptr;
ib_qp_handle_t qp_ptr;
@@ -178,9 +196,9 @@ dapli_post_ext( IN DAT_EP_HANDLE ep_handle,
dapl_dbg_log(DAPL_DBG_TYPE_API,
" post_ext_op: ep %p cmp_val %d "
- "swap_val %d cookie 0x%x, r_iov %p, flags 0x%x\n",
+ "swap_val %d cookie 0x%x, r_iov %p, flags 0x%x, ah %p\n",
ep_handle, (unsigned)cmp_add, (unsigned)swap,
- (unsigned)user_cookie.as_64, remote_iov, flags);
+ (unsigned)user_cookie.as_64, remote_iov, flags, ah);
if (DAPL_BAD_HANDLE(ep_handle, DAPL_MAGIC_EP))
return(DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
@@ -217,7 +235,8 @@ dapli_post_ext( IN DAT_EP_HANDLE ep_handle,
immed_data, /* immed data */
cmp_add, /* compare or add */
swap, /* swap */
- flags);
+ flags,
+ ah);
if (dat_status != DAT_SUCCESS) {
dapl_os_atomic_dec(&ep_ptr->req_count);
@@ -288,6 +307,15 @@ dapls_cqe_to_event_extension(IN DAPL_EP *ep_ptr,
ext_data->type = DAT_IB_RDMA_WRITE_IMMED_DATA;
ext_data->val.immed.data = DAPL_GET_CQE_IMMED_DATA(cqe_ptr);
break;
+ case OP_RECEIVE_MSG_IMM:
+ dapl_dbg_log (DAPL_DBG_TYPE_EVD,
+ " cqe_to_event_ext: OP_RECEIVE_MSG_IMMED\n");
+
+ /* immed recvd, type and inbound recv message transfer size */
+ dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
+ ext_data->type = DAT_IB_RECV_IMMED_DATA;
+ ext_data->val.immed.data = DAPL_GET_CQE_IMMED_DATA(cqe_ptr);
+ break;
case OP_COMP_AND_SWAP:
dapl_dbg_log (DAPL_DBG_TYPE_EVD,
" cqe_to_event_ext: COMP_AND_SWAP_RESP\n");
@@ -304,6 +332,23 @@ dapls_cqe_to_event_extension(IN DAPL_EP *ep_ptr,
ext_data->type = DAT_IB_FETCH_AND_ADD;
dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
break;
+ case OP_SEND_UD:
+ dapl_dbg_log (DAPL_DBG_TYPE_EVD,
+ " cqe_to_event_ext: UD_SEND\n");
+
+ /* type and outbound send transfer size */
+ ext_data->type = DAT_IB_UD_SEND;
+ dto->transfered_length = cookie->val.dto.size;
+ break;
+ case OP_RECV_UD:
+ dapl_dbg_log (DAPL_DBG_TYPE_EVD,
+ " cqe_to_event_ext: UD_RECV\n");
+
+ /* type and inbound recv message transfer size */
+ ext_data->type = DAT_IB_UD_RECV;
+ dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
+ break;
+
default:
/* not extended operation */
ext_data->status = DAT_IB_OP_ERR;
diff --git a/dapl/openib_scm/dapl_ib_qp.c b/dapl/openib_scm/dapl_ib_qp.c
index 8577131..4fae307 100644
--- a/dapl/openib_scm/dapl_ib_qp.c
+++ b/dapl/openib_scm/dapl_ib_qp.c
@@ -114,6 +114,16 @@ dapls_ib_qp_alloc (
qp_create.cap.max_send_sge = attr->max_request_iov;
qp_create.cap.max_inline_data = ia_ptr->hca_ptr->ib_trans.max_inline_send;
qp_create.qp_type = IBV_QPT_RC;
+
+#ifdef DAT_EXTENSIONS
+ if (attr->service_type == DAT_IB_SERVICE_TYPE_UD) {
+ qp_create.qp_type = IBV_QPT_UD;
+ if (attr->max_message_size >
+ (128 << ia_ptr->hca_ptr->ib_trans.mtu)) {
+ return (DAT_INVALID_PARAMETER | DAT_INVALID_ARG6);
+ }
+ }
+#endif
qp_create.qp_context = (void*)ep_ptr;
/* ibv assumes rcv_cq is never NULL, set to req_cq */
@@ -299,12 +309,13 @@ dapls_ib_reinit_ep (
DAT_RETURN
dapls_modify_qp_state ( IN ib_qp_handle_t qp_handle,
IN ib_qp_state_t qp_state,
- IN ib_qp_cm_t *qp_cm )
+ IN struct ib_cm_handle *cm_ptr )
{
struct ibv_qp_attr qp_attr;
enum ibv_qp_attr_mask mask = IBV_QP_STATE;
DAPL_EP *ep_ptr = (DAPL_EP*)qp_handle->qp_context;
DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
+ ib_qp_cm_t *qp_cm = &cm_ptr->dst;
dapl_os_memzero((void*)&qp_attr, sizeof(qp_attr));
qp_attr.qp_state = qp_state;
@@ -313,6 +324,12 @@ dapls_modify_qp_state ( IN ib_qp_handle_t qp_handle,
/* additional attributes with RTR and RTS */
case IBV_QPS_RTR:
{
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " QPS_RTR: type %d qpn %x lid %x"
+ " port %x\n",
+ qp_handle->qp_type,
+ qp_cm->qpn, qp_cm->lid, qp_cm->port);
+
mask |= IBV_QP_AV |
IBV_QP_PATH_MTU |
IBV_QP_DEST_QPN |
@@ -329,46 +346,59 @@ dapls_modify_qp_state ( IN ib_qp_handle_t qp_handle,
ep_ptr->param.ep_attr.max_rdma_read_out;
qp_attr.min_rnr_timer =
ia_ptr->hca_ptr->ib_trans.rnr_timer;
+
+ /* address handle */
qp_attr.ah_attr.dlid = qp_cm->lid;
- /* global routing */
if (ia_ptr->hca_ptr->ib_trans.global) {
qp_attr.ah_attr.is_global = 1;
qp_attr.ah_attr.grh.dgid = qp_cm->gid;
qp_attr.ah_attr.grh.hop_limit =
- ia_ptr->hca_ptr->ib_trans.hop_limit;
+ ia_ptr->hca_ptr->ib_trans.hop_limit;
qp_attr.ah_attr.grh.traffic_class =
- ia_ptr->hca_ptr->ib_trans.tclass;
+ ia_ptr->hca_ptr->ib_trans.tclass;
}
qp_attr.ah_attr.sl = 0;
qp_attr.ah_attr.src_path_bits = 0;
- qp_attr.ah_attr.port_num = qp_cm->port;
-
- dapl_dbg_log (DAPL_DBG_TYPE_EP,
- " modify_qp_rtr: qpn %x lid %x "
- "port %x rd_atomic %d\n",
- qp_cm->qpn, qp_cm->lid, qp_cm->port,
- qp_attr.max_dest_rd_atomic );
-
+ qp_attr.ah_attr.port_num = ia_ptr->hca_ptr->port_num;
+#ifdef DAT_EXTENSIONS
+ /* UD: create AH for remote side */
+ if (qp_handle->qp_type == IBV_QPT_UD) {
+ ib_pd_handle_t pz;
+ pz = ((DAPL_PZ *)
+ ep_ptr->param.pz_handle)->pd_handle;
+ mask = IBV_QP_STATE;
+ cm_ptr->ah = ibv_create_ah(pz,
+ &qp_attr.ah_attr);
+ if (!cm_ptr->ah)
+ return(dapl_convert_errno(errno,
+ "ibv_ah"));
+ }
+#endif
break;
}
case IBV_QPS_RTS:
{
- mask |= IBV_QP_TIMEOUT |
- IBV_QP_RETRY_CNT |
- IBV_QP_RNR_RETRY |
- IBV_QP_SQ_PSN |
- IBV_QP_MAX_QP_RD_ATOMIC;
-
+ mask |= IBV_QP_SQ_PSN;
+ if (qp_handle->qp_type == IBV_QPT_RC) {
+ mask |= IBV_QP_TIMEOUT |
+ IBV_QP_RETRY_CNT |
+ IBV_QP_RNR_RETRY |
+ IBV_QP_MAX_QP_RD_ATOMIC;
+
+ qp_attr.timeout =
+ ia_ptr->hca_ptr->ib_trans.ack_timer;
+ qp_attr.retry_cnt =
+ ia_ptr->hca_ptr->ib_trans.ack_retry;
+ qp_attr.rnr_retry =
+ ia_ptr->hca_ptr->ib_trans.rnr_retry;
+ qp_attr.max_rd_atomic =
+ ep_ptr->param.ep_attr.max_rdma_read_out;
+ }
qp_attr.qp_state = IBV_QPS_RTS;
- qp_attr.timeout = ia_ptr->hca_ptr->ib_trans.ack_timer;
- qp_attr.retry_cnt = ia_ptr->hca_ptr->ib_trans.ack_retry;
- qp_attr.rnr_retry = ia_ptr->hca_ptr->ib_trans.rnr_retry;
qp_attr.sq_psn = 1;
- qp_attr.max_rd_atomic =
- ep_ptr->param.ep_attr.max_rdma_read_out;
dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " modify_qp_rts: psn %x rd_atomic %d ack %d "
+ " QPS_RTS: psn %x rd_atomic %d ack %d "
" retry %d rnr_retry %d\n",
qp_attr.sq_psn, qp_attr.max_rd_atomic,
qp_attr.timeout, qp_attr.retry_cnt,
@@ -377,23 +407,29 @@ dapls_modify_qp_state ( IN ib_qp_handle_t qp_handle,
}
case IBV_QPS_INIT:
{
- mask |= IBV_QP_PKEY_INDEX |
- IBV_QP_PORT |
- IBV_QP_ACCESS_FLAGS;
-
- qp_attr.pkey_index = 0;
- qp_attr.port_num = ia_ptr->hca_ptr->port_num;
- qp_attr.qp_access_flags =
+ mask |= IBV_QP_PKEY_INDEX | IBV_QP_PORT;
+ if (qp_handle->qp_type == IBV_QPT_RC) {
+ mask |= IBV_QP_ACCESS_FLAGS;
+ qp_attr.qp_access_flags =
IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ |
IBV_ACCESS_REMOTE_ATOMIC |
IBV_ACCESS_MW_BIND;
-
+ }
+#ifdef DAT_EXTENSIONS
+ if (qp_handle->qp_type == IBV_QPT_UD) {
+ mask |= IBV_QP_QKEY;
+ qp_attr.qkey = SCM_UD_QKEY;
+ }
+#endif
+ qp_attr.pkey_index = 0;
+ qp_attr.port_num = ia_ptr->hca_ptr->port_num;
+
dapl_dbg_log (DAPL_DBG_TYPE_EP,
- " modify_qp_init: pi %x port %x acc %x\n",
+ " QPS_INIT: pi %x port %x acc %x qkey 0x%x\n",
qp_attr.pkey_index, qp_attr.port_num,
- qp_attr.qp_access_flags );
+ qp_attr.qp_access_flags, qp_attr.qkey);
break;
}
default:
diff --git a/dapl/openib_scm/dapl_ib_util.c b/dapl/openib_scm/dapl_ib_util.c
index 362710d..43f85ac 100644
--- a/dapl/openib_scm/dapl_ib_util.c
+++ b/dapl/openib_scm/dapl_ib_util.c
@@ -436,7 +436,7 @@ DAT_RETURN dapls_ib_query_hca (
ia_attr->max_lmr_virtual_address = dev_attr.max_mr_size;
ia_attr->max_rmr_target_address = dev_attr.max_mr_size;
ia_attr->max_pzs = dev_attr.max_pd;
- ia_attr->max_mtu_size = port_attr.max_msg_sz;
+ ia_attr->max_message_size = port_attr.max_msg_sz;
ia_attr->max_rdma_size = port_attr.max_msg_sz;
ia_attr->max_iov_segments_per_rdma_read = dev_attr.max_sge;
ia_attr->max_iov_segments_per_rdma_write = dev_attr.max_sge;
@@ -463,14 +463,14 @@ DAT_RETURN dapls_ib_query_hca (
ia_attr->max_evds, ia_attr->max_evd_qlen );
dapl_dbg_log (DAPL_DBG_TYPE_UTIL,
" query_hca: msg %llu rdma %llu iov %d lmr %d rmr %d ack_time %d\n",
- ia_attr->max_mtu_size, ia_attr->max_rdma_size,
+ ia_attr->max_message_size, ia_attr->max_rdma_size,
ia_attr->max_iov_segments_per_dto, ia_attr->max_lmrs,
ia_attr->max_rmrs,hca_ptr->ib_trans.ack_timer );
}
if (ep_attr != NULL) {
(void) dapl_os_memzero(ep_attr, sizeof(*ep_attr));
- ep_attr->max_mtu_size = port_attr.max_msg_sz;
+ ep_attr->max_message_size = port_attr.max_msg_sz;
ep_attr->max_rdma_size = port_attr.max_msg_sz;
ep_attr->max_recv_dtos = dev_attr.max_qp_wr;
ep_attr->max_request_dtos = dev_attr.max_qp_wr;
@@ -479,8 +479,9 @@ DAT_RETURN dapls_ib_query_hca (
ep_attr->max_rdma_read_in = dev_attr.max_qp_rd_atom;
ep_attr->max_rdma_read_out= dev_attr.max_qp_rd_atom;
dapl_dbg_log (DAPL_DBG_TYPE_UTIL,
- " query_hca: MAX msg %llu dto %d iov %d rdma i%d,o%d\n",
- ep_attr->max_mtu_size,
+ " query_hca: MAX msg %llu mtu %d dto %d iov %d"
+ " rdma i%d,o%d\n",
+ ep_attr->max_message_size,
ep_attr->max_recv_dtos, ep_attr->max_recv_iov,
ep_attr->max_rdma_read_in, ep_attr->max_rdma_read_out);
}
@@ -574,6 +575,9 @@ DAT_NAMED_ATTR ib_attrs[] = {
{
DAT_IB_ATTR_IMMED_DATA, "TRUE"
},
+ {
+ DAT_IB_ATTR_UD, "TRUE"
+ },
#endif
};
diff --git a/dapl/openib_scm/dapl_ib_util.h b/dapl/openib_scm/dapl_ib_util.h
index 39eb245..bd3ea83 100644
--- a/dapl/openib_scm/dapl_ib_util.h
+++ b/dapl/openib_scm/dapl_ib_util.h
@@ -52,6 +52,10 @@
#include <infiniband/verbs.h>
#include <byteswap.h>
+#ifdef DAT_EXTENSIONS
+#include <dat2/dat_ib_extensions.h>
+#endif
+
#ifndef __cplusplus
#define false 0
#define true 1
@@ -72,7 +76,7 @@ typedef ib_hca_handle_t dapl_ibal_ca_t;
/* CM mappings, user CM not complete use SOCKETS */
/* destination info to exchange, define wire protocol version */
-#define DSCM_VER 2
+#define DSCM_VER 3
typedef struct _ib_qp_cm
{
uint16_t ver;
@@ -83,6 +87,7 @@ typedef struct _ib_qp_cm
uint32_t p_size;
DAT_SOCK_ADDR6 ia_address;
union ibv_gid gid;
+ uint16_t qp_type;
} ib_qp_cm_t;
/*
@@ -117,10 +122,11 @@ struct ib_cm_handle
SCM_STATE state;
int socket;
struct dapl_hca *hca;
- DAT_HANDLE sp;
+ struct dapl_sp *sp;
struct dapl_ep *ep;
ib_qp_cm_t dst;
unsigned char p_data[256];
+ struct ibv_ah *ah;
};
typedef struct ib_cm_handle *dp_ib_cm_handle_t;
@@ -180,6 +186,9 @@ typedef struct ibv_comp_channel *ib_wait_obj_handle_t;
/* inline send rdma threshold */
#define INLINE_SEND_DEFAULT 128
+/* qkey for UD QP's */
+#define SCM_UD_QKEY 0x78654321
+
/* RC timer - retry count defaults */
#define SCM_ACK_TIMER 15 /* 5 bits, 4.096us*2^ack_timer. 15 == 134ms */
#define SCM_ACK_RETRY 7 /* 3 bits, 7 * 134ms = 940ms */
@@ -207,8 +216,11 @@ typedef struct ibv_comp_channel *ib_wait_obj_handle_t;
#define OP_COMP_AND_SWAP IBV_WR_ATOMIC_CMP_AND_SWP
#define OP_FETCH_AND_ADD IBV_WR_ATOMIC_FETCH_AND_ADD
#define OP_RECEIVE 7 /* internal op */
-#define OP_RECEIVE_IMM 8 /* internel op */
-#define OP_BIND_MW 9 /* internal op */
+#define OP_RECEIVE_IMM 8 /* rdma write with immed, internel op */
+#define OP_RECEIVE_MSG_IMM 9 /* recv msg with immed, internel op */
+#define OP_BIND_MW 10 /* internal op */
+#define OP_SEND_UD 11 /* internal op */
+#define OP_RECV_UD 12 /* internal op */
#define OP_INVALID 0xff
/* Definitions to map QP state */
@@ -321,7 +333,7 @@ void dapli_cq_thread_destroy(struct dapl_hca *hca_ptr);
DAT_RETURN
dapls_modify_qp_state ( IN ib_qp_handle_t qp_handle,
IN ib_qp_state_t qp_state,
- IN ib_qp_cm_t *qp_cm );
+ IN struct ib_cm_handle *cm_ptr );
/* inline functions */
STATIC _INLINE_ IB_HCA_NAME dapl_ib_convert_name (IN char *name)
--
1.5.2.5
More information about the general
mailing list