[ofa-general] [PATCH 3/4] uDAPL v2 - IB UD extension - dapl scm: add support for UD extensions in socket cm provider

Arlin Davis arlin.r.davis at intel.com
Sun Jul 20 15:19:11 PDT 2008


add qp_type in connection information exchange
add new post_send_ud call
changes to connection manager to support qp types beyond RC.
changes to connection events to use new extended event calls.
exchange address handle information during connection phase.
changes to modify_qp to handle both RC and UD types.

Signed-off by: Arlin Davis ardavis at ichips.intel.com
---
 dapl/openib_scm/dapl_ib_cm.c         |  157 ++++++++++++++++++++++++++--------
 dapl/openib_scm/dapl_ib_dto.h        |   62 ++++++++++++--
 dapl/openib_scm/dapl_ib_extensions.c |   61 ++++++++++++--
 dapl/openib_scm/dapl_ib_qp.c         |  104 +++++++++++++++-------
 dapl/openib_scm/dapl_ib_util.c       |   14 ++-
 dapl/openib_scm/dapl_ib_util.h       |   22 ++++-
 6 files changed, 324 insertions(+), 96 deletions(-)

diff --git a/dapl/openib_scm/dapl_ib_cm.c b/dapl/openib_scm/dapl_ib_cm.c
index b87c060..e712f9d 100644
--- a/dapl/openib_scm/dapl_ib_cm.c
+++ b/dapl/openib_scm/dapl_ib_cm.c
@@ -243,6 +243,7 @@ dapli_socket_connect(DAPL_EP		*ep_ptr,
 
 	/* Send QP info, IA address, and private data */
 	cm_ptr->dst.qpn = htonl(ep_ptr->qp_handle->qp_num);
+	cm_ptr->dst.qp_type = htons(ep_ptr->qp_handle->qp_type);
 	cm_ptr->dst.port = htons(ia_ptr->hca_ptr->port_num);
 	cm_ptr->dst.lid = 
 		htons(dapli_get_lid(ia_ptr->hca_ptr->ib_hca_handle, 
@@ -266,7 +267,10 @@ dapli_socket_connect(DAPL_EP		*ep_ptr,
 		iovec[1].iov_len  = p_size;
 	}
 
-	dapl_dbg_log(DAPL_DBG_TYPE_EP," socket connected, write QP and private data\n"); 
+	dapl_dbg_log(DAPL_DBG_TYPE_EP,
+		     " socket connected, write QP (%d), private data (%d)\n",
+		     sizeof(ib_qp_cm_t),p_size); 
+	
 	len = writev(cm_ptr->socket, iovec, (p_size ? 2:1));
     	if (len != (p_size + sizeof(ib_qp_cm_t))) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
@@ -319,7 +323,7 @@ dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
 	if (len != sizeof(ib_qp_cm_t) || ntohs(cm_ptr->dst.ver) != DSCM_VER) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
 			     " connect_rtu read: ERR %s, rcnt=%d, ver=%d\n",
-			     strerror(errno), len, cm_ptr->dst.ver); 
+			     strerror(errno), len, ntohs(cm_ptr->dst.ver)); 
 		goto bail;
 	}
 	/* check for consumer reject */
@@ -335,6 +339,7 @@ dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
 	cm_ptr->dst.port = ntohs(cm_ptr->dst.port);
 	cm_ptr->dst.lid = ntohs(cm_ptr->dst.lid);
 	cm_ptr->dst.qpn = ntohl(cm_ptr->dst.qpn);
+	cm_ptr->dst.qp_type = ntohs(cm_ptr->dst.qp_type);
 	cm_ptr->dst.p_size = ntohl(cm_ptr->dst.p_size);
 
 	/* save remote address information */
@@ -343,10 +348,13 @@ dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
 			sizeof(ep_ptr->remote_ia_address));
 
 	dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-		     " connect_rtu: DST %s port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
-		     inet_ntoa(((struct sockaddr_in *)&cm_ptr->dst.ia_address)->sin_addr),
+		     " connect_rtu: DST %s port=0x%x lid=0x%x,"
+		     " qpn=0x%x, qp_type=%d, psize=%d\n",
+		     inet_ntoa(((struct sockaddr_in *)
+				&cm_ptr->dst.ia_address)->sin_addr),
 		     cm_ptr->dst.port, cm_ptr->dst.lid, 
-		     cm_ptr->dst.qpn, cm_ptr->dst.p_size); 
+		     cm_ptr->dst.qpn, cm_ptr->dst.qp_type, 
+		     cm_ptr->dst.p_size); 
 
 	/* validate private data size before reading */
 	if (cm_ptr->dst.p_size > IB_MAX_REP_PDATA_SIZE) {
@@ -357,7 +365,8 @@ dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
 	}
 
 	/* read private data into cm_handle if any present */
-	dapl_dbg_log(DAPL_DBG_TYPE_EP," socket connected, read private data\n"); 
+	dapl_dbg_log(DAPL_DBG_TYPE_EP," socket connected, read pdata\n"); 
+
 	if (cm_ptr->dst.p_size) {
 		iovec[0].iov_base = cm_ptr->p_data;
 		iovec[0].iov_len  = cm_ptr->dst.p_size;
@@ -372,11 +381,11 @@ dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
 
 	/* modify QP to RTR and then to RTS with remote info */
 	if (dapls_modify_qp_state(ep_ptr->qp_handle, 
-				  IBV_QPS_RTR, &cm_ptr->dst) != DAT_SUCCESS)
+				  IBV_QPS_RTR, cm_ptr) != DAT_SUCCESS)
 		goto bail;
 
 	if (dapls_modify_qp_state(ep_ptr->qp_handle, 
-				   IBV_QPS_RTS, &cm_ptr->dst) != DAT_SUCCESS)
+				   IBV_QPS_RTS, cm_ptr) != DAT_SUCCESS)
 		goto bail;
 		 
 	ep_ptr->qp_state = IB_QP_STATE_RTS;
@@ -390,10 +399,34 @@ dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
 	ep_ptr->cm_handle = cm_ptr;
 	cm_ptr->state = SCM_CONNECTED;
 	dapl_dbg_log(DAPL_DBG_TYPE_EP," ACTIVE: connected!\n"); 
+
+#ifdef DAT_EXTENSIONS
+	if (cm_ptr->dst.qp_type == IBV_QPT_UD) {
+		DAT_IB_EXTENSION_EVENT_DATA xevent;
+
+		/* post EVENT, modify_qp created ah */
+		xevent.status = 0;
+		xevent.type = DAT_IB_UD_REMOTE_AH;
+		xevent.remote_ah.ah = cm_ptr->ah;
+		xevent.remote_ah.qpn = cm_ptr->dst.qpn;
+		dapl_os_memcpy( &xevent.remote_ah.ia_addr,
+				&cm_ptr->dst.ia_address, 
+				sizeof(cm_ptr->dst.ia_address));
+	
+		dapls_evd_post_connection_event_ext(
+			(DAPL_EVD*)ep_ptr->param.connect_evd_handle,
+			DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED,
+			(DAT_EP_HANDLE)ep_ptr,
+			(DAT_COUNT)cm_ptr->dst.p_size,
+			(DAT_PVOID*)cm_ptr->p_data,
+			(DAT_PVOID*)&xevent);
+    	} else 
+#endif
 	dapl_evd_connection_callback(cm_ptr, 
 				     IB_CME_CONNECTED, 
 				     cm_ptr->p_data, 
 				     ep_ptr);	
+
 	return;
 bail:
 	/* close socket, free cm structure and post error event */
@@ -515,7 +548,7 @@ dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr)
 	    ntohs(acm_ptr->dst.ver) != DSCM_VER) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
 			     " accept read: ERR %s, rcnt=%d, ver=%d\n",
-			     strerror(errno), len, acm_ptr->dst.ver); 
+			     strerror(errno), len, ntohs(acm_ptr->dst.ver)); 
 		dat_status = DAT_INTERNAL_ERROR;
 		goto bail;
 	}
@@ -524,11 +557,13 @@ dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr)
 	acm_ptr->dst.port = ntohs(acm_ptr->dst.port);
 	acm_ptr->dst.lid = ntohs(acm_ptr->dst.lid);
 	acm_ptr->dst.qpn = ntohl(acm_ptr->dst.qpn);
+	acm_ptr->dst.qp_type = ntohs(acm_ptr->dst.qp_type);
 	acm_ptr->dst.p_size = ntohl(acm_ptr->dst.p_size);
 
 	dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-		     " accept: DST %s port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
-		     inet_ntoa(((struct sockaddr_in *)&acm_ptr->dst.ia_address)->sin_addr),
+		     " accept: DST %s port=0x%x lid=0x%x, qpn=0x%x, psz=%d\n",
+		     inet_ntoa(((struct sockaddr_in *)
+			&acm_ptr->dst.ia_address)->sin_addr),
 		     acm_ptr->dst.port, acm_ptr->dst.lid, 
 		     acm_ptr->dst.qpn, acm_ptr->dst.p_size); 
 
@@ -559,7 +594,24 @@ dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr)
 	}
 	
 	acm_ptr->state = SCM_ACCEPTING;
+	
+#ifdef DAT_EXTENSIONS
+	if (acm_ptr->dst.qp_type == IBV_QPT_UD) {
+		DAT_IB_EXTENSION_EVENT_DATA xevent;
 
+		/* post EVENT, modify_qp created ah */
+		xevent.status = 0;
+		xevent.type = DAT_IB_UD_CONNECT_REQUEST;
+		
+		dapls_evd_post_cr_event_ext(
+			acm_ptr->sp,
+			DAT_IB_UD_CONNECTION_REQUEST_EVENT,
+			acm_ptr,
+			(DAT_COUNT)acm_ptr->dst.p_size,
+			(DAT_PVOID*)acm_ptr->p_data,
+			(DAT_PVOID*)&xevent);
+    	} else 
+#endif
 	/* trigger CR event and return SUCCESS */
 	dapls_cr_callback(acm_ptr,
 			  IB_CME_CONNECTION_REQUEST_PENDING,
@@ -584,6 +636,7 @@ dapli_socket_accept_usr(DAPL_EP		*ep_ptr,
 {
 	DAPL_IA		*ia_ptr = ep_ptr->header.owner_ia;
 	dp_ib_cm_handle_t cm_ptr = cr_ptr->ib_cm_handle;
+	ib_qp_cm_t	local;
 	struct iovec    iovec[2];
 	int		len;
 
@@ -596,44 +649,59 @@ dapli_socket_accept_usr(DAPL_EP		*ep_ptr,
 	
 	dapl_dbg_log(DAPL_DBG_TYPE_EP, 
 		     " accept_usr: remote port=0x%x lid=0x%x"
-		     " qpn=0x%x psize=%d\n",
+		     " qpn=0x%x qp_type %d, psize=%d\n",
 		     cm_ptr->dst.port, cm_ptr->dst.lid,
-		     cm_ptr->dst.qpn, cm_ptr->dst.p_size); 
+		     cm_ptr->dst.qpn, cm_ptr->dst.qp_type,
+		     cm_ptr->dst.p_size); 
+
+#ifdef DAT_EXTENSIONS
+	if (cm_ptr->dst.qp_type == IBV_QPT_UD && 
+	    ep_ptr->qp_handle->qp_type != IBV_QPT_UD) {
+		    dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+				 " accept_rtu: ERR remote QP is UD,"
+				 ", but local QP is not\n"); 
+		    return (DAT_INVALID_HANDLE | DAT_INVALID_HANDLE_EP);
+
+	}
+#endif
 
 	/* modify QP to RTR and then to RTS with remote info already read */
 	if (dapls_modify_qp_state(ep_ptr->qp_handle, 
-				  IBV_QPS_RTR, &cm_ptr->dst) != DAT_SUCCESS)
+				  IBV_QPS_RTR, cm_ptr) != DAT_SUCCESS)
 		goto bail;
 
 	if (dapls_modify_qp_state(ep_ptr->qp_handle, 
-				  IBV_QPS_RTS, &cm_ptr->dst) != DAT_SUCCESS)
+				  IBV_QPS_RTS, cm_ptr) != DAT_SUCCESS)
 		goto bail;
 
 	ep_ptr->qp_state = IB_QP_STATE_RTS;
 	
-	/* save remote address information */
+	/* save remote address information, for qp_query */
 	dapl_os_memcpy( &ep_ptr->remote_ia_address, 
 			&cm_ptr->dst.ia_address, 
 			sizeof(ep_ptr->remote_ia_address));
 
-	/* send our QP info, IA address, and private data */
-	cm_ptr->dst.qpn = htonl(ep_ptr->qp_handle->qp_num);
-	cm_ptr->dst.port = htons(ia_ptr->hca_ptr->port_num);
-	cm_ptr->dst.lid = htons(dapli_get_lid(ia_ptr->hca_ptr->ib_hca_handle, 
+	/* send our QP info, IA address, pdata. Don't overwrite dst data */
+	local.ver = htons(DSCM_VER);
+	local.rej = 0;
+	local.qpn = htonl(ep_ptr->qp_handle->qp_num);
+	local.qp_type = htons(ep_ptr->qp_handle->qp_type);
+	local.port = htons(ia_ptr->hca_ptr->port_num);
+	local.lid = htons(dapli_get_lid(ia_ptr->hca_ptr->ib_hca_handle, 
 				        (uint8_t)ia_ptr->hca_ptr->port_num));
-	if (cm_ptr->dst.lid == 0xffff)
+	if (local.lid == 0xffff)
 		goto bail;
 
         /* in network order */
 	if (ibv_query_gid(ia_ptr->hca_ptr->ib_hca_handle,
 			  (uint8_t)ia_ptr->hca_ptr->port_num,
 			  0,
-			  &cm_ptr->dst.gid))
+			  &local.gid))
 		goto bail;
 
-	cm_ptr->dst.ia_address = ia_ptr->hca_ptr->hca_address;
-	cm_ptr->dst.p_size = htonl(p_size);
-	iovec[0].iov_base = &cm_ptr->dst;
+	local.ia_address = ia_ptr->hca_ptr->hca_address;
+	local.p_size = htonl(p_size);
+	iovec[0].iov_base = &local;
 	iovec[0].iov_len  = sizeof(ib_qp_cm_t);
 	if (p_size) {
 		iovec[1].iov_base = p_data;
@@ -648,26 +716,22 @@ dapli_socket_accept_usr(DAPL_EP		*ep_ptr,
 	}
 	dapl_dbg_log(DAPL_DBG_TYPE_CM, 
 		     " accept_usr: local port=0x%x lid=0x%x"
-		     " qpn=0x%x psize=%d\n",
-		     ntohs(cm_ptr->dst.port), ntohs(cm_ptr->dst.lid), 
-		     ntohl(cm_ptr->dst.qpn), ntohl(cm_ptr->dst.p_size)); 
+		     " qpn=0x%x qp_type=%d psize=%d\n",
+		     ntohs(local.port), ntohs(local.lid), 
+		     ntohl(local.qpn), ntohs(local.qp_type),
+		     ntohl(local.p_size)); 
         dapl_dbg_log(DAPL_DBG_TYPE_CM,
                      " accept_usr SRC GID subnet %016llx id %016llx\n",
                      (unsigned long long) 
-			cpu_to_be64(cm_ptr->dst.gid.global.subnet_prefix),
+			cpu_to_be64(local.gid.global.subnet_prefix),
                      (unsigned long long) 
-			cpu_to_be64(cm_ptr->dst.gid.global.interface_id));
+			cpu_to_be64(local.gid.global.interface_id));
 
 	/* save state and reference to EP, queue for RTU data */
 	cm_ptr->ep = ep_ptr;
 	cm_ptr->hca = ia_ptr->hca_ptr;
 	cm_ptr->state = SCM_ACCEPTED;
 
-	/* restore remote address information for query */
-	dapl_os_memcpy( &cm_ptr->dst.ia_address, 
-			&ep_ptr->remote_ia_address,
-			sizeof(cm_ptr->dst.ia_address));
-
 	dapl_dbg_log( DAPL_DBG_TYPE_EP," PASSIVE: accepted!\n" ); 
 	dapli_cm_queue(cm_ptr);
 	return DAT_SUCCESS;
@@ -701,6 +765,29 @@ dapli_socket_accept_rtu(dp_ib_cm_handle_t cm_ptr)
 
 	/* final data exchange if remote QP state is good to go */
 	dapl_dbg_log( DAPL_DBG_TYPE_EP," PASSIVE: connected!\n" ); 
+
+#ifdef DAT_EXTENSIONS
+	if (cm_ptr->dst.qp_type == IBV_QPT_UD) {
+		DAT_IB_EXTENSION_EVENT_DATA xevent;
+
+		/* post EVENT, modify_qp created ah */
+		xevent.status = 0;
+		xevent.type = DAT_IB_UD_REMOTE_AH;
+		xevent.remote_ah.ah = cm_ptr->ah;
+		xevent.remote_ah.qpn = cm_ptr->dst.qpn;
+		dapl_os_memcpy( &xevent.remote_ah.ia_addr,
+				&cm_ptr->dst.ia_address, 
+				sizeof(cm_ptr->dst.ia_address));
+	
+		dapls_evd_post_connection_event_ext(
+			(DAPL_EVD*)cm_ptr->ep->param.connect_evd_handle,
+			DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED,
+			(DAT_EP_HANDLE)cm_ptr->ep,
+			(DAT_COUNT)cm_ptr->dst.p_size,
+			(DAT_PVOID*)cm_ptr->p_data,
+			(DAT_PVOID*)&xevent);
+    	} else 
+#endif
 	dapls_cr_callback(cm_ptr, IB_CME_CONNECTED, NULL, cm_ptr->sp);
 	return;
 bail:
diff --git a/dapl/openib_scm/dapl_ib_dto.h b/dapl/openib_scm/dapl_ib_dto.h
index 4b06b72..b9826f5 100644
--- a/dapl/openib_scm/dapl_ib_dto.h
+++ b/dapl/openib_scm/dapl_ib_dto.h
@@ -58,6 +58,9 @@
 
 STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p);
 
+#define CQE_WR_TYPE_UD(id) \
+	(((DAPL_COOKIE *)(uintptr_t)id)->ep->qp_handle->qp_type == IBV_QPT_UD)
+
 /*
  * dapls_ib_post_recv
  *
@@ -171,7 +174,11 @@ dapls_ib_post_send (
 
 	if (NULL == ds_array_p)
 		return (DAT_INSUFFICIENT_RESOURCES);
-	
+
+#ifdef DAT_EXTENSIONS	
+	if (ep_ptr->qp_handle->qp_type != IBV_QPT_RC)
+		return(DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
+#endif
 	/* setup the work request */
 	wr.next = 0;
 	wr.opcode = op_type;
@@ -247,6 +254,11 @@ STATIC _INLINE_ DAT_DTOS dapls_cqe_dtos_opcode(ib_work_completion_t *cqe_p)
 	switch (cqe_p->opcode) {
 
 	case IBV_WC_SEND:
+#ifdef DAT_EXTENSIONS
+		if (CQE_WR_TYPE_UD(cqe_p->wr_id))
+			return (DAT_IB_DTO_SEND_UD);
+		else
+#endif			
 		return (DAT_DTO_SEND);
 	case IBV_WC_RDMA_READ:
 		return (DAT_DTO_RDMA_READ);
@@ -269,6 +281,13 @@ STATIC _INLINE_ DAT_DTOS dapls_cqe_dtos_opcode(ib_work_completion_t *cqe_p)
 		return (DAT_DTO_RDMA_WRITE);
 #endif
 	case IBV_WC_RECV:
+#ifdef DAT_EXTENSIONS
+		if (CQE_WR_TYPE_UD(cqe_p->wr_id)) 
+			return (DAT_IB_DTO_RECV_UD);
+		else if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
+			return (DAT_IB_DTO_RECV_MSG_IMMED);
+		else
+#endif	
 		return (DAT_DTO_RECEIVE);
 	default:
 		return (0xff);
@@ -295,13 +314,14 @@ dapls_ib_post_ext_send (
 	IN  DAT_UINT32			immed_data,
 	IN  DAT_UINT64			compare_add,
 	IN  DAT_UINT64			swap,
-	IN  DAT_COMPLETION_FLAGS	completion_flags)
+	IN  DAT_COMPLETION_FLAGS	completion_flags,
+	IN  DAT_IB_ADDR_HANDLE		*remote_ah)
 {
 	dapl_dbg_log(DAPL_DBG_TYPE_EP,
-		     " post_snd: ep %p op %d ck %p sgs",
+		     " post_ext_snd: ep %p op %d ck %p sgs",
 		     "%d l_iov %p r_iov %p f %d\n",
 		     ep_ptr, op_type, cookie, segments, local_iov, 
-		     remote_iov, completion_flags);
+		     remote_iov, completion_flags, remote_ah);
 
 	ib_data_segment_t ds_array[DEFAULT_DS_ENTRIES];
 	ib_data_segment_t *ds_array_p, *ds_array_start_p;
@@ -393,6 +413,21 @@ dapls_ib_post_ext_send (
 		wr.wr.atomic.remote_addr = remote_iov->virtual_address;
 		wr.wr.atomic.rkey = remote_iov->rmr_context;
 		break;
+	case OP_SEND_UD:
+		/* post must be on EP with service_type of UD */
+		if (ep_ptr->qp_handle->qp_type != IBV_QPT_UD)
+			return(DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
+
+		dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+			     " post_ext: OP_SEND_UD ah=%p"
+			     " qp_num=0x%x\n",
+			     remote_ah, remote_ah->qpn);
+		
+		wr.opcode = OP_SEND;
+		wr.wr.ud.ah = remote_ah->ah;
+		wr.wr.ud.remote_qpn = remote_ah->qpn;
+		wr.wr.ud.remote_qkey = SCM_UD_QKEY;
+		break;
 	default:
 		break;
 	}
@@ -431,12 +466,16 @@ dapls_ib_optional_prv_dat(
     return DAT_SUCCESS;
 }
 
+
 /* map Work Completions to DAPL WR operations */
 STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p)
 {
 	switch (cqe_p->opcode) {
 	case IBV_WC_SEND:
-		return (OP_SEND);
+		if (CQE_WR_TYPE_UD(cqe_p->wr_id))
+			return(OP_SEND_UD);
+		else
+			return (OP_SEND);
 	case IBV_WC_RDMA_WRITE:
 		if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
 			return (OP_RDMA_WRITE_IMM);
@@ -451,8 +490,10 @@ STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p)
 	case IBV_WC_BIND_MW:
 		return (OP_BIND_MW);
 	case IBV_WC_RECV:
-		if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
-			return (OP_RECEIVE_IMM);
+		if (CQE_WR_TYPE_UD(cqe_p->wr_id)) 
+			return (OP_RECV_UD);
+		else if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
+			return (OP_RECEIVE_MSG_IMM);
 		else
 			return (OP_RECEIVE);
 	case IBV_WC_RECV_RDMA_WITH_IMM:
@@ -481,10 +522,13 @@ STATIC _INLINE_ char * dapls_dto_op_str(int op)
         "OP_COMP_AND_SWAP",
         "OP_FETCH_AND_ADD",
         "OP_RECEIVE",
-        "OP_RECEIVE_IMM",
+        "OP_RECEIVE_MSG_IMM",
+	"OP_RECEIVE_RDMA_IMM",
         "OP_BIND_MW"
+	"OP_SEND_UD"
+	"OP_RECV_UD"
     };
-    return ((op < 0 || op > 9) ? "Invalid CQE OP?" : optable[op]);
+    return ((op < 0 || op > 12) ? "Invalid CQE OP?" : optable[op]);
 }
 
 static _INLINE_ char *
diff --git a/dapl/openib_scm/dapl_ib_extensions.c b/dapl/openib_scm/dapl_ib_extensions.c
index 1402057..b88e853 100755
--- a/dapl/openib_scm/dapl_ib_extensions.c
+++ b/dapl/openib_scm/dapl_ib_extensions.c
@@ -54,7 +54,8 @@ dapli_post_ext( IN       DAT_EP_HANDLE		ep_handle,
 		IN       DAT_DTO_COOKIE		user_cookie,
 		IN const DAT_RMR_TRIPLET	*remote_iov,
 		IN	 int			op_type, 
-		IN       DAT_COMPLETION_FLAGS	flags );
+		IN       DAT_COMPLETION_FLAGS	flags,
+		IN	 DAT_IB_ADDR_HANDLE	*ah);
 
 
 /*
@@ -81,6 +82,7 @@ dapl_extensions(IN DAT_HANDLE		dat_handle,
 		IN va_list		args)
 {
 	DAT_EP_HANDLE		ep;
+	DAT_IB_ADDR_HANDLE	*ah;
 	DAT_LMR_TRIPLET		*lmr_p;
 	DAT_DTO_COOKIE		cookie;
 	const DAT_RMR_TRIPLET	*rmr_p;
@@ -113,7 +115,7 @@ dapl_extensions(IN DAT_HANDLE		dat_handle,
 		
 		status = dapli_post_ext(ep, 0, 0, dat_uint32, segments, lmr_p,
     					cookie, rmr_p, OP_RDMA_WRITE_IMM,
-    	    				comp_flags );
+    	    				comp_flags, ah);
     		break;
 
 	case DAT_IB_CMP_AND_SWAP_OP:
@@ -130,7 +132,7 @@ dapl_extensions(IN DAT_HANDLE		dat_handle,
 
 		status = dapli_post_ext(ep, dat_uint64a, dat_uint64b,
 					0, segments, lmr_p, cookie, rmr_p, 
-					OP_COMP_AND_SWAP, comp_flags );
+					OP_COMP_AND_SWAP, comp_flags, ah);
    		break;
 
 	case DAT_IB_FETCH_AND_ADD_OP:
@@ -146,8 +148,23 @@ dapl_extensions(IN DAT_HANDLE		dat_handle,
 		
 		status = dapli_post_ext(ep, dat_uint64a, 0, 0, segments, 
 					lmr_p, cookie, rmr_p, 
-					OP_FETCH_AND_ADD, comp_flags );
+					OP_FETCH_AND_ADD, comp_flags, ah);
+    		break;
 
+	case DAT_IB_UD_SEND_OP:
+    		dapl_dbg_log(DAPL_DBG_TYPE_RTN, 
+			     " UD post_send extension call\n");
+		
+		ep          = dat_handle;		/* ep_handle */
+		segments    = va_arg( args, DAT_COUNT);  /* segments */
+		lmr_p       = va_arg( args, DAT_LMR_TRIPLET*);  
+		ah          = va_arg( args, DAT_IB_ADDR_HANDLE*);  
+		cookie      = va_arg( args, DAT_DTO_COOKIE);
+		comp_flags  = va_arg( args, DAT_COMPLETION_FLAGS);  
+				
+		status = dapli_post_ext(ep, 0, 0, 0, segments, 
+					lmr_p, cookie, NULL, 
+					OP_SEND_UD, comp_flags, ah);
     		break;
 
 	default:
@@ -169,7 +186,8 @@ dapli_post_ext( IN       DAT_EP_HANDLE		ep_handle,
 		IN       DAT_DTO_COOKIE		user_cookie,
 		IN const DAT_RMR_TRIPLET	*remote_iov,
 		IN	 int			op_type, 
-		IN       DAT_COMPLETION_FLAGS	flags )
+		IN       DAT_COMPLETION_FLAGS	flags,
+		IN	 DAT_IB_ADDR_HANDLE	*ah)
 {
 	DAPL_EP 	*ep_ptr;
 	ib_qp_handle_t	qp_ptr;
@@ -178,9 +196,9 @@ dapli_post_ext( IN       DAT_EP_HANDLE		ep_handle,
 
 	dapl_dbg_log(DAPL_DBG_TYPE_API,
 		     " post_ext_op: ep %p cmp_val %d "
-		     "swap_val %d cookie 0x%x, r_iov %p, flags 0x%x\n",
+		     "swap_val %d cookie 0x%x, r_iov %p, flags 0x%x, ah %p\n",
 		     ep_handle, (unsigned)cmp_add, (unsigned)swap, 
-		     (unsigned)user_cookie.as_64, remote_iov, flags);
+		     (unsigned)user_cookie.as_64, remote_iov, flags, ah);
 
 	if (DAPL_BAD_HANDLE(ep_handle, DAPL_MAGIC_EP))
 		return(DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
@@ -217,7 +235,8 @@ dapli_post_ext( IN       DAT_EP_HANDLE		ep_handle,
 					    immed_data,	/* immed data */
 					    cmp_add,	/* compare or add */
 					    swap,	/* swap */
-					    flags);
+					    flags,
+					    ah);
 
 	if (dat_status != DAT_SUCCESS) {
 		dapl_os_atomic_dec(&ep_ptr->req_count);
@@ -288,6 +307,15 @@ dapls_cqe_to_event_extension(IN DAPL_EP			*ep_ptr,
 		ext_data->type = DAT_IB_RDMA_WRITE_IMMED_DATA;
 		ext_data->val.immed.data = DAPL_GET_CQE_IMMED_DATA(cqe_ptr);
 		break;
+	case OP_RECEIVE_MSG_IMM:
+		dapl_dbg_log (DAPL_DBG_TYPE_EVD,
+                                " cqe_to_event_ext: OP_RECEIVE_MSG_IMMED\n");
+		
+		/* immed recvd, type and inbound recv message transfer size */
+		dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
+		ext_data->type = DAT_IB_RECV_IMMED_DATA;
+		ext_data->val.immed.data = DAPL_GET_CQE_IMMED_DATA(cqe_ptr);
+		break;
 	case OP_COMP_AND_SWAP:
                 dapl_dbg_log (DAPL_DBG_TYPE_EVD,
                                 " cqe_to_event_ext: COMP_AND_SWAP_RESP\n");
@@ -304,6 +332,23 @@ dapls_cqe_to_event_extension(IN DAPL_EP			*ep_ptr,
 		ext_data->type = DAT_IB_FETCH_AND_ADD;
 		dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
 		break;
+	case OP_SEND_UD:
+                dapl_dbg_log (DAPL_DBG_TYPE_EVD,
+                                " cqe_to_event_ext: UD_SEND\n");
+
+		/* type and outbound send transfer size */
+		ext_data->type = DAT_IB_UD_SEND;
+		dto->transfered_length = cookie->val.dto.size;
+		break;
+	case OP_RECV_UD:
+                dapl_dbg_log (DAPL_DBG_TYPE_EVD,
+                                " cqe_to_event_ext: UD_RECV\n");
+
+		/* type and inbound recv message transfer size */
+		ext_data->type = DAT_IB_UD_RECV;
+		dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
+		break;
+
 	default:
 		/* not extended operation */
 		ext_data->status = DAT_IB_OP_ERR;
diff --git a/dapl/openib_scm/dapl_ib_qp.c b/dapl/openib_scm/dapl_ib_qp.c
index 8577131..4fae307 100644
--- a/dapl/openib_scm/dapl_ib_qp.c
+++ b/dapl/openib_scm/dapl_ib_qp.c
@@ -114,6 +114,16 @@ dapls_ib_qp_alloc (
 	qp_create.cap.max_send_sge = attr->max_request_iov;
 	qp_create.cap.max_inline_data = ia_ptr->hca_ptr->ib_trans.max_inline_send; 
 	qp_create.qp_type = IBV_QPT_RC;
+
+#ifdef DAT_EXTENSIONS
+	if (attr->service_type == DAT_IB_SERVICE_TYPE_UD) {
+		qp_create.qp_type = IBV_QPT_UD;
+		if (attr->max_message_size > 
+		    (128 << ia_ptr->hca_ptr->ib_trans.mtu)) {
+			return (DAT_INVALID_PARAMETER | DAT_INVALID_ARG6);
+		}
+	}
+#endif
 	qp_create.qp_context = (void*)ep_ptr;
 
 	/* ibv assumes rcv_cq is never NULL, set to req_cq */
@@ -299,12 +309,13 @@ dapls_ib_reinit_ep (
 DAT_RETURN
 dapls_modify_qp_state ( IN ib_qp_handle_t	qp_handle,
 			IN ib_qp_state_t	qp_state,
-			IN ib_qp_cm_t		*qp_cm )
+			IN struct ib_cm_handle	*cm_ptr )
 {
 	struct ibv_qp_attr 	qp_attr;
 	enum ibv_qp_attr_mask	mask = IBV_QP_STATE;
 	DAPL_EP			*ep_ptr = (DAPL_EP*)qp_handle->qp_context;
 	DAPL_IA			*ia_ptr = ep_ptr->header.owner_ia;
+	ib_qp_cm_t		*qp_cm = &cm_ptr->dst;
 			
 	dapl_os_memzero((void*)&qp_attr, sizeof(qp_attr));
 	qp_attr.qp_state = qp_state;
@@ -313,6 +324,12 @@ dapls_modify_qp_state ( IN ib_qp_handle_t	qp_handle,
 		/* additional attributes with RTR and RTS */
 		case IBV_QPS_RTR:
 		{
+			dapl_dbg_log(DAPL_DBG_TYPE_EP,
+				     " QPS_RTR: type %d qpn %x lid %x"
+				     " port %x\n",
+			             qp_handle->qp_type,
+				     qp_cm->qpn, qp_cm->lid, qp_cm->port);
+
 			mask |= IBV_QP_AV                 |
 				IBV_QP_PATH_MTU           |
 				IBV_QP_DEST_QPN           |
@@ -329,46 +346,59 @@ dapls_modify_qp_state ( IN ib_qp_handle_t	qp_handle,
 				ep_ptr->param.ep_attr.max_rdma_read_out;
 			qp_attr.min_rnr_timer =
 				ia_ptr->hca_ptr->ib_trans.rnr_timer;
+			
+			/* address handle */
 			qp_attr.ah_attr.dlid = qp_cm->lid;
-			/* global routing */
 			if (ia_ptr->hca_ptr->ib_trans.global) {
 				qp_attr.ah_attr.is_global = 1;
 				qp_attr.ah_attr.grh.dgid = qp_cm->gid;
 				qp_attr.ah_attr.grh.hop_limit = 
-						ia_ptr->hca_ptr->ib_trans.hop_limit;
+					ia_ptr->hca_ptr->ib_trans.hop_limit;
 				qp_attr.ah_attr.grh.traffic_class = 
-						ia_ptr->hca_ptr->ib_trans.tclass;
+					ia_ptr->hca_ptr->ib_trans.tclass;
 			}
 			qp_attr.ah_attr.sl = 0;
 			qp_attr.ah_attr.src_path_bits = 0;
-			qp_attr.ah_attr.port_num = qp_cm->port;
-			
-			dapl_dbg_log (DAPL_DBG_TYPE_EP,
-			      " modify_qp_rtr: qpn %x lid %x "
-			      "port %x rd_atomic %d\n",
-			      qp_cm->qpn, qp_cm->lid, qp_cm->port,
-			      qp_attr.max_dest_rd_atomic );
-
+			qp_attr.ah_attr.port_num = ia_ptr->hca_ptr->port_num;
+#ifdef DAT_EXTENSIONS
+			/* UD: create AH for remote side */
+			if (qp_handle->qp_type == IBV_QPT_UD) {
+				ib_pd_handle_t	pz;
+				pz = ((DAPL_PZ *)
+					ep_ptr->param.pz_handle)->pd_handle;
+				mask = IBV_QP_STATE;
+				cm_ptr->ah = ibv_create_ah(pz,
+							   &qp_attr.ah_attr);
+				if (!cm_ptr->ah)
+					return(dapl_convert_errno(errno,
+								  "ibv_ah"));
+			}
+#endif			
 			break;
 		}		
 		case IBV_QPS_RTS: 
 		{
-			mask |= IBV_QP_TIMEOUT            |
-				IBV_QP_RETRY_CNT          |
-				IBV_QP_RNR_RETRY          |
-				IBV_QP_SQ_PSN             |
-				IBV_QP_MAX_QP_RD_ATOMIC;
-
+			mask |= IBV_QP_SQ_PSN;
+			if (qp_handle->qp_type == IBV_QPT_RC) {
+				mask |= IBV_QP_TIMEOUT            |
+					IBV_QP_RETRY_CNT          |
+					IBV_QP_RNR_RETRY          |
+					IBV_QP_MAX_QP_RD_ATOMIC;
+
+				qp_attr.timeout	= 
+				    ia_ptr->hca_ptr->ib_trans.ack_timer;
+				qp_attr.retry_cnt = 
+				    ia_ptr->hca_ptr->ib_trans.ack_retry;
+				qp_attr.rnr_retry = 
+				    ia_ptr->hca_ptr->ib_trans.rnr_retry;
+				qp_attr.max_rd_atomic = 
+				    ep_ptr->param.ep_attr.max_rdma_read_out;
+			}
 			qp_attr.qp_state	= IBV_QPS_RTS;
-			qp_attr.timeout		= ia_ptr->hca_ptr->ib_trans.ack_timer;
-			qp_attr.retry_cnt	= ia_ptr->hca_ptr->ib_trans.ack_retry;
-			qp_attr.rnr_retry	= ia_ptr->hca_ptr->ib_trans.rnr_retry;
 			qp_attr.sq_psn		= 1;
-			qp_attr.max_rd_atomic	= 
-				ep_ptr->param.ep_attr.max_rdma_read_out;
 
 			dapl_dbg_log(DAPL_DBG_TYPE_EP,
-				" modify_qp_rts: psn %x rd_atomic %d ack %d "
+				" QPS_RTS: psn %x rd_atomic %d ack %d "
 				" retry %d rnr_retry %d\n",
 				qp_attr.sq_psn, qp_attr.max_rd_atomic, 
 				qp_attr.timeout, qp_attr.retry_cnt, 
@@ -377,23 +407,29 @@ dapls_modify_qp_state ( IN ib_qp_handle_t	qp_handle,
 		}
 		case IBV_QPS_INIT: 
 		{
-			mask |= IBV_QP_PKEY_INDEX	|
-				IBV_QP_PORT		|
-				IBV_QP_ACCESS_FLAGS;
-
-			qp_attr.pkey_index  = 0;
-			qp_attr.port_num = ia_ptr->hca_ptr->port_num;
-			qp_attr.qp_access_flags = 
+			mask |= IBV_QP_PKEY_INDEX | IBV_QP_PORT;
+			if (qp_handle->qp_type == IBV_QPT_RC) {
+				mask |= IBV_QP_ACCESS_FLAGS;
+				qp_attr.qp_access_flags = 
 					IBV_ACCESS_LOCAL_WRITE |
 					IBV_ACCESS_REMOTE_WRITE |
 					IBV_ACCESS_REMOTE_READ |
 					IBV_ACCESS_REMOTE_ATOMIC |
 					IBV_ACCESS_MW_BIND;
-			
+			}
+#ifdef DAT_EXTENSIONS
+			if (qp_handle->qp_type == IBV_QPT_UD) {
+				mask |= IBV_QP_QKEY;
+				qp_attr.qkey = SCM_UD_QKEY;
+			}
+#endif
+			qp_attr.pkey_index  = 0;
+			qp_attr.port_num = ia_ptr->hca_ptr->port_num;
+									
 			dapl_dbg_log (DAPL_DBG_TYPE_EP,
-				" modify_qp_init: pi %x port %x acc %x\n",
+				" QPS_INIT: pi %x port %x acc %x qkey 0x%x\n",
 				qp_attr.pkey_index, qp_attr.port_num,
-				qp_attr.qp_access_flags );
+				qp_attr.qp_access_flags, qp_attr.qkey);
 			break;
 		}
 		default:
diff --git a/dapl/openib_scm/dapl_ib_util.c b/dapl/openib_scm/dapl_ib_util.c
index 362710d..43f85ac 100644
--- a/dapl/openib_scm/dapl_ib_util.c
+++ b/dapl/openib_scm/dapl_ib_util.c
@@ -436,7 +436,7 @@ DAT_RETURN dapls_ib_query_hca (
 		ia_attr->max_lmr_virtual_address  = dev_attr.max_mr_size;
 		ia_attr->max_rmr_target_address   = dev_attr.max_mr_size;
 		ia_attr->max_pzs                  = dev_attr.max_pd;
-		ia_attr->max_mtu_size             = port_attr.max_msg_sz;
+		ia_attr->max_message_size         = port_attr.max_msg_sz;
 		ia_attr->max_rdma_size            = port_attr.max_msg_sz;
 		ia_attr->max_iov_segments_per_rdma_read = dev_attr.max_sge;
 		ia_attr->max_iov_segments_per_rdma_write = dev_attr.max_sge;
@@ -463,14 +463,14 @@ DAT_RETURN dapls_ib_query_hca (
 			ia_attr->max_evds, ia_attr->max_evd_qlen );
 		dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
 			" query_hca: msg %llu rdma %llu iov %d lmr %d rmr %d ack_time %d\n", 
-			ia_attr->max_mtu_size, ia_attr->max_rdma_size,
+			ia_attr->max_message_size, ia_attr->max_rdma_size,
 			ia_attr->max_iov_segments_per_dto, ia_attr->max_lmrs, 
 			ia_attr->max_rmrs,hca_ptr->ib_trans.ack_timer );
 	}
 	
 	if (ep_attr != NULL) {
 		(void) dapl_os_memzero(ep_attr, sizeof(*ep_attr));
-		ep_attr->max_mtu_size     = port_attr.max_msg_sz;
+		ep_attr->max_message_size = port_attr.max_msg_sz;
 		ep_attr->max_rdma_size    = port_attr.max_msg_sz;
 		ep_attr->max_recv_dtos    = dev_attr.max_qp_wr;
 		ep_attr->max_request_dtos = dev_attr.max_qp_wr;
@@ -479,8 +479,9 @@ DAT_RETURN dapls_ib_query_hca (
 		ep_attr->max_rdma_read_in = dev_attr.max_qp_rd_atom;
 		ep_attr->max_rdma_read_out= dev_attr.max_qp_rd_atom;
 		dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
-			" query_hca: MAX msg %llu dto %d iov %d rdma i%d,o%d\n", 
-			ep_attr->max_mtu_size,
+			" query_hca: MAX msg %llu mtu %d dto %d iov %d"
+			" rdma i%d,o%d\n", 
+			ep_attr->max_message_size,
 			ep_attr->max_recv_dtos, ep_attr->max_recv_iov,
 			ep_attr->max_rdma_read_in, ep_attr->max_rdma_read_out);
 	}
@@ -574,6 +575,9 @@ DAT_NAMED_ATTR  ib_attrs[] = {
     {
         DAT_IB_ATTR_IMMED_DATA, "TRUE"
     },
+    {
+        DAT_IB_ATTR_UD, "TRUE"
+    },
 #endif
 };
 
diff --git a/dapl/openib_scm/dapl_ib_util.h b/dapl/openib_scm/dapl_ib_util.h
index 39eb245..bd3ea83 100644
--- a/dapl/openib_scm/dapl_ib_util.h
+++ b/dapl/openib_scm/dapl_ib_util.h
@@ -52,6 +52,10 @@
 #include <infiniband/verbs.h>
 #include <byteswap.h>
 
+#ifdef DAT_EXTENSIONS
+#include <dat2/dat_ib_extensions.h>
+#endif
+
 #ifndef __cplusplus
 #define false 0
 #define true  1
@@ -72,7 +76,7 @@ typedef ib_hca_handle_t		dapl_ibal_ca_t;
 /* CM mappings, user CM not complete use SOCKETS */
 
 /* destination info to exchange, define wire protocol version */
-#define DSCM_VER 2
+#define DSCM_VER 3
 typedef struct _ib_qp_cm
 { 
 	uint16_t		ver;
@@ -83,6 +87,7 @@ typedef struct _ib_qp_cm
 	uint32_t		p_size;
 	DAT_SOCK_ADDR6		ia_address;
         union ibv_gid		gid;
+	uint16_t		qp_type; 
 } ib_qp_cm_t;
 
 /* 
@@ -117,10 +122,11 @@ struct ib_cm_handle
 	SCM_STATE		state;
 	int			socket;
 	struct dapl_hca		*hca;
-	DAT_HANDLE		sp;	
+	struct dapl_sp		*sp;	
 	struct dapl_ep 		*ep;	
 	ib_qp_cm_t		dst;
 	unsigned char		p_data[256];
+	struct ibv_ah		*ah;
 };
 
 typedef struct ib_cm_handle	*dp_ib_cm_handle_t;
@@ -180,6 +186,9 @@ typedef struct ibv_comp_channel *ib_wait_obj_handle_t;
 /* inline send rdma threshold */
 #define	INLINE_SEND_DEFAULT	128
 
+/* qkey for UD QP's */
+#define SCM_UD_QKEY	0x78654321
+
 /* RC timer - retry count defaults */
 #define SCM_ACK_TIMER 15	/* 5 bits, 4.096us*2^ack_timer. 15 == 134ms */
 #define SCM_ACK_RETRY 7		/* 3 bits, 7 * 134ms = 940ms */
@@ -207,8 +216,11 @@ typedef struct ibv_comp_channel *ib_wait_obj_handle_t;
 #define OP_COMP_AND_SWAP        IBV_WR_ATOMIC_CMP_AND_SWP
 #define OP_FETCH_AND_ADD        IBV_WR_ATOMIC_FETCH_AND_ADD
 #define OP_RECEIVE              7   /* internal op */
-#define OP_RECEIVE_IMM		8   /* internel op */
-#define OP_BIND_MW              9   /* internal op */
+#define OP_RECEIVE_IMM		8   /* rdma write with immed, internel op */
+#define OP_RECEIVE_MSG_IMM	9   /* recv msg with immed, internel op */
+#define OP_BIND_MW              10   /* internal op */
+#define OP_SEND_UD              11  /* internal op */
+#define OP_RECV_UD              12  /* internal op */
 #define OP_INVALID		0xff
 
 /* Definitions to map QP state */
@@ -321,7 +333,7 @@ void dapli_cq_thread_destroy(struct dapl_hca *hca_ptr);
 DAT_RETURN
 dapls_modify_qp_state ( IN ib_qp_handle_t	qp_handle,
 			IN ib_qp_state_t	qp_state,
-			IN ib_qp_cm_t		*qp_cm );
+			IN struct ib_cm_handle	*cm_ptr );
 
 /* inline functions */
 STATIC _INLINE_ IB_HCA_NAME dapl_ib_convert_name (IN char *name)
-- 
1.5.2.5





More information about the general mailing list