[ofa-general] [PATCH 1/4] DAPL v2: ucm: fix issues with UD type QP's
Arlin Davis
arlin.r.davis at intel.com
Wed Sep 9 15:14:33 PDT 2009
private data size not in host order when processing
connection events.
ud extentions event should include original ia_addr
and qpn used during connection and not the IB qpn.
ucm QP service resource cleanup in wrong order.
cleanup extra cr/lf device.c
Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
---
dapl/openib_common/qp.c | 5 +
dapl/openib_ucm/cm.c | 71 +++++++++++++---
dapl/openib_ucm/device.c | 206 ++++++++++++++++++++++++----------------------
3 files changed, 172 insertions(+), 110 deletions(-)
diff --git a/dapl/openib_common/qp.c b/dapl/openib_common/qp.c
index 581fc83..09a61b1 100644
--- a/dapl/openib_common/qp.c
+++ b/dapl/openib_common/qp.c
@@ -557,6 +557,7 @@ dapls_create_ah(IN DAPL_HCA *hca,
/* address handle. RC and UD */
qp_attr.ah_attr.dlid = ntohs(lid);
if (gid != NULL) {
+ dapl_log(DAPL_DBG_TYPE_CM, "dapl_create_ah: with GID\n");
qp_attr.ah_attr.is_global = 1;
qp_attr.ah_attr.grh.dgid.global.subnet_prefix =
ntohll(gid->global.subnet_prefix);
@@ -569,6 +570,10 @@ dapls_create_ah(IN DAPL_HCA *hca,
qp_attr.ah_attr.src_path_bits = 0;
qp_attr.ah_attr.port_num = hca->port_num;
+ dapl_log(DAPL_DBG_TYPE_CM,
+ " dapls_create_ah: port %x lid %x pd %p ctx %p handle 0x%x\n",
+ hca->port_num,qp_attr.ah_attr.dlid, pd, pd->context, pd->handle);
+
/* UD: create AH for remote side */
ah = ibv_create_ah(pd, &qp_attr.ah_attr);
if (!ah) {
diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c
index a2db64e..72de5d5 100644
--- a/dapl/openib_ucm/cm.c
+++ b/dapl/openib_ucm/cm.c
@@ -381,7 +381,7 @@ dp_ib_cm_handle_t ucm_cm_find(ib_hca_transport_t *tp, ib_cm_msg_t *msg)
continue;
dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " MATCH? cm %p st %s sport %x sqpn %x lid %x\n",
+ " MATCH? cm %p st %s sport %d sqpn %x lid %x\n",
cm, dapl_cm_state_str(cm->state),
ntohs(cm->msg.sport), ntohl(cm->msg.sqpn),
ntohs(cm->msg.saddr.ib.lid));
@@ -755,7 +755,7 @@ DAT_RETURN
dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm)
{
dapl_log(DAPL_DBG_TYPE_EP,
- " connect: lid %x qpn %x lport %d p_sz=%d -> "
+ " connect: lid %x i_qpn %x lport %d p_sz=%d -> "
" lid %x c_qpn %x rport %d\n",
htons(cm->msg.saddr.ib.lid), htonl(cm->msg.saddr.ib.qpn),
htons(cm->msg.sport), htons(cm->msg.p_size),
@@ -934,6 +934,30 @@ ud_bail:
dapl_os_memcpy(&xevent.remote_ah.ia_addr,
&cm->msg.daddr,
sizeof(union dcm_addr));
+ /* remote ia_addr reference includes ucm qpn, not IB qpn */
+ ((union dcm_addr*)
+ &xevent.remote_ah.ia_addr)->ib.qpn = cm->msg.dqpn;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " ACTIVE: UD xevent ah %p qpn 0x%x lid 0x%x\n",
+ xevent.remote_ah.ah, xevent.remote_ah.qpn, lid);
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " ACTIVE: UD xevent ia_addr qp_type %d, port %d"
+ " lid 0x%x qpn 0x%x gid 0x"F64x" 0x"F64x" \n",
+ ((union dcm_addr*)
+ &xevent.remote_ah.ia_addr)->ib.qp_type,
+ ((union dcm_addr*)
+ &xevent.remote_ah.ia_addr)->ib.port_num,
+ ntohs(((union dcm_addr*)
+ &xevent.remote_ah.ia_addr)->ib.lid),
+ ntohl(((union dcm_addr*)
+ &xevent.remote_ah.ia_addr)->ib.qpn),
+ ntohll(((union dcm_addr*)
+ &xevent.remote_ah.ia_addr)->
+ ib.gid.global.subnet_prefix),
+ ntohll(((union dcm_addr*)
+ &xevent.remote_ah.ia_addr)->
+ ib.gid.global.interface_id));
if (event == IB_CME_CONNECTED)
event = DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED;
@@ -944,7 +968,7 @@ ud_bail:
(DAPL_EVD *)cm->ep->param.connect_evd_handle,
event,
(DAT_EP_HANDLE)ep,
- (DAT_COUNT)cm->msg.p_size,
+ (DAT_COUNT)ntohs(cm->msg.p_size),
(DAT_PVOID *)cm->msg.p_data,
(DAT_PVOID *)&xevent);
@@ -1026,7 +1050,7 @@ static void ucm_accept(ib_cm_srvc_handle_t cm, ib_cm_msg_t *msg)
dapls_evd_post_cr_event_ext(acm->sp,
DAT_IB_UD_CONNECTION_REQUEST_EVENT,
acm,
- (DAT_COUNT)acm->msg.p_size,
+ (DAT_COUNT)ntohs(acm->msg.p_size),
(DAT_PVOID *)acm->msg.p_data,
(DAT_PVOID *)&xevent);
} else
@@ -1070,7 +1094,7 @@ static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
dapl_dbg_log(DAPL_DBG_TYPE_CM, " PASSIVE: connected!\n");
#ifdef DAT_EXTENSIONS
- if (cm->msg.daddr.ib.qp_type == IBV_QPT_UD) {
+ if (cm->msg.saddr.ib.qp_type == IBV_QPT_UD) {
DAT_IB_EXTENSION_EVENT_DATA xevent;
uint16_t lid = ntohs(cm->msg.daddr.ib.lid);
@@ -1081,13 +1105,37 @@ static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
xevent.remote_ah.qpn = ntohl(cm->msg.daddr.ib.qpn);
dapl_os_memcpy(&xevent.remote_ah.ia_addr,
&cm->msg.daddr,
- sizeof(cm->msg.daddr));
+ sizeof(union dcm_addr));
+ /* remote ia_addr reference includes ucm qpn, not IB qpn */
+ ((union dcm_addr*)
+ &xevent.remote_ah.ia_addr)->ib.qpn = cm->msg.dqpn;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " PASSIVE: UD xevent ah %p qpn 0x%x lid 0x%x\n",
+ xevent.remote_ah.ah, xevent.remote_ah.qpn, lid);
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,
+ " PASSIVE: UD xevent ia_addr qp_type %d, port %d"
+ " lid 0x%x qpn 0x%x gid 0x"F64x" 0x"F64x" \n",
+ ((union dcm_addr*)
+ &xevent.remote_ah.ia_addr)->ib.qp_type,
+ ((union dcm_addr*)
+ &xevent.remote_ah.ia_addr)->ib.port_num,
+ ntohs(((union dcm_addr*)
+ &xevent.remote_ah.ia_addr)->ib.lid),
+ ntohl(((union dcm_addr*)
+ &xevent.remote_ah.ia_addr)->ib.qpn),
+ ntohll(((union dcm_addr*)
+ &xevent.remote_ah.ia_addr)->
+ ib.gid.global.subnet_prefix),
+ ntohll(((union dcm_addr*)
+ &xevent.remote_ah.ia_addr)->
+ ib.gid.global.interface_id));
dapls_evd_post_connection_event_ext(
(DAPL_EVD *)cm->ep->param.connect_evd_handle,
DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED,
(DAT_EP_HANDLE)cm->ep,
- (DAT_COUNT)cm->msg.p_size,
+ (DAT_COUNT)ntohs(cm->msg.p_size),
(DAT_PVOID *)cm->msg.p_data,
(DAT_PVOID *)&xevent);
@@ -1130,9 +1178,9 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
dapl_dbg_log(DAPL_DBG_TYPE_CM,
" ACCEPT_USR: remote port_num=%d lid=%x"
" iqp=%x qp_type %d, psize=%d\n",
- cm->msg.daddr.ib.port_num, cm->msg.daddr.ib.lid,
- cm->msg.daddr.ib.qpn, cm->msg.daddr.ib.qp_type,
- cm->msg.p_size);
+ cm->msg.daddr.ib.port_num, ntohs(cm->msg.daddr.ib.lid),
+ ntohl(cm->msg.daddr.ib.qpn), cm->msg.daddr.ib.qp_type,
+ ntohs(cm->msg.p_size));
dapl_dbg_log(DAPL_DBG_TYPE_CM,
" ACCEPT_USR: remote GID subnet %016llx id %016llx\n",
@@ -1186,7 +1234,7 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
/* setup local QP info and type from EP, copy pdata, for reply */
cm->msg.op = htons(DCM_REP);
cm->msg.saddr.ib.qpn = htonl(ep->qp_handle->qp_num);
- cm->msg.saddr.ib.qp_type = htons(ep->qp_handle->qp_type);
+ cm->msg.saddr.ib.qp_type = ep->qp_handle->qp_type;
cm->msg.saddr.ib.port_num = cm->hca->port_num;
cm->msg.saddr.ib.lid = cm->hca->ib_trans.addr.ib.lid;
cm->msg.saddr.ib.gid = cm->hca->ib_trans.addr.ib.gid;
@@ -1254,6 +1302,7 @@ dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
/* remote uCM information, comes from consumer provider r_addr */
cm->msg.dport = htons((uint16_t)r_psp);
cm->msg.dqpn = cm->msg.daddr.ib.qpn;
+ cm->msg.daddr.ib.qpn = 0; /* don't have a remote qpn until reply */
if (p_size) {
cm->msg.p_size = htons(p_size);
diff --git a/dapl/openib_ucm/device.c b/dapl/openib_ucm/device.c
index 329b050..243044a 100644
--- a/dapl/openib_ucm/device.c
+++ b/dapl/openib_ucm/device.c
@@ -281,8 +281,9 @@ found:
}
dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " open_hca: devname %s, port %d, hostname_IP %s\n",
+ " open_hca: devname %s, ctx %p port %d, hostname_IP %s\n",
ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+ hca_ptr->ib_hca_handle,
hca_ptr->ib_trans.addr.ib.port_num,
inet_ntoa(((struct sockaddr_in *)
&hca_ptr->hca_address)->sin_addr));
@@ -371,72 +372,79 @@ static void ucm_service_destroy(IN DAPL_HCA *hca)
ib_hca_transport_t *tp = &hca->ib_trans;
int msg_size = sizeof(ib_cm_msg_t);
- if (tp->pd)
- ibv_dealloc_pd(tp->pd);
-
- if (tp->rch)
- ibv_destroy_comp_channel(tp->rch);
-
- if (tp->scq)
- ibv_destroy_cq(tp->scq);
-
- if (tp->rcq)
- ibv_destroy_cq(tp->rcq);
-
- if (tp->qp)
- ibv_destroy_qp(tp->qp);
-
if (tp->mr_sbuf)
ibv_dereg_mr(tp->mr_sbuf);
if (tp->mr_sbuf)
- ibv_dereg_mr(tp->mr_sbuf);
-
- if (tp->ah)
- dapl_os_free(tp->ah, (sizeof(*tp->ah) * 0xffff));
-
- if (tp->sid)
- dapl_os_free(tp->sid, (sizeof(*tp->sid) * 0xffff));
-
- if (tp->rbuf)
- dapl_os_free(tp->rbuf, (msg_size * tp->qpe));
-
- if (tp->sbuf)
- dapl_os_free(tp->sbuf, (msg_size * tp->qpe));
+ ibv_dereg_mr(tp->mr_sbuf);
+
+ if (tp->qp)
+ ibv_destroy_qp(tp->qp);
+
+ if (tp->scq)
+ ibv_destroy_cq(tp->scq);
+
+ if (tp->rcq)
+ ibv_destroy_cq(tp->rcq);
+
+ if (tp->rch)
+ ibv_destroy_comp_channel(tp->rch);
+
+ dapl_log(DAPL_DBG_TYPE_UTIL,
+ " destroy_service: pd %p ctx %p handle 0x%x\n",
+ tp->pd, tp->pd->context, tp->pd->handle);
+ if (tp->pd)
+ ibv_dealloc_pd(tp->pd);
+
+ if (tp->ah)
+ dapl_os_free(tp->ah, (sizeof(*tp->ah) * 0xffff));
+
+ if (tp->sid)
+ dapl_os_free(tp->sid, (sizeof(*tp->sid) * 0xffff));
+
+ if (tp->rbuf)
+ dapl_os_free(tp->rbuf, (msg_size * tp->qpe));
+
+ if (tp->sbuf)
+ dapl_os_free(tp->sbuf, (msg_size * tp->qpe));
}
static int ucm_service_create(IN DAPL_HCA *hca)
{
- struct ibv_qp_init_attr qp_create;
- ib_hca_transport_t *tp = &hca->ib_trans;
- struct ibv_recv_wr recv_wr, *recv_err;
- struct ibv_sge sge;
- int i, mlen = sizeof(ib_cm_msg_t);
- int hlen = sizeof(struct ibv_grh); /* hdr included with UD recv */
+ struct ibv_qp_init_attr qp_create;
+ ib_hca_transport_t *tp = &hca->ib_trans;
+ struct ibv_recv_wr recv_wr, *recv_err;
+ struct ibv_sge sge;
+ int i, mlen = sizeof(ib_cm_msg_t);
+ int hlen = sizeof(struct ibv_grh); /* hdr included with UD recv */
dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ucm_create: \n");
/* get queue sizes */
tp->qpe = dapl_os_get_env_val("DAPL_UCM_QPE", UCM_DEFAULT_QPE);
- tp->cqe = dapl_os_get_env_val("DAPL_UCM_CQE", UCM_DEFAULT_CQE);
- tp->pd = ibv_alloc_pd(hca->ib_hca_handle);
- if (!tp->pd)
- goto bail;
-
- tp->rch = ibv_create_comp_channel(hca->ib_hca_handle);
- if (!tp->rch)
- goto bail;
-
- tp->scq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, NULL, 0);
- if (!tp->scq)
- goto bail;
+ tp->cqe = dapl_os_get_env_val("DAPL_UCM_CQE", UCM_DEFAULT_CQE);
+ tp->pd = ibv_alloc_pd(hca->ib_hca_handle);
+ if (!tp->pd)
+ goto bail;
+
+ dapl_log(DAPL_DBG_TYPE_UTIL,
+ " create_service: pd %p ctx %p handle 0x%x\n",
+ tp->pd, tp->pd->context, tp->pd->handle);
+
+ tp->rch = ibv_create_comp_channel(hca->ib_hca_handle);
+ if (!tp->rch)
+ goto bail;
+
+ tp->scq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, NULL, 0);
+ if (!tp->scq)
+ goto bail;
- tp->rcq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, tp->rch, 0);
- if (!tp->rcq)
- goto bail;
-
- if(ibv_req_notify_cq(tp->rcq, 0))
- goto bail;
+ tp->rcq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, tp->rch, 0);
+ if (!tp->rcq)
+ goto bail;
+
+ if(ibv_req_notify_cq(tp->rcq, 0))
+ goto bail;
dapl_os_memzero((void *)&qp_create, sizeof(qp_create));
qp_create.qp_type = IBV_QPT_UD;
@@ -446,59 +454,59 @@ static int ucm_service_create(IN DAPL_HCA *hca)
qp_create.cap.max_send_sge = qp_create.cap.max_recv_sge = 1;
qp_create.cap.max_inline_data = tp->max_inline_send;
qp_create.qp_context = (void *)hca;
-
- tp->qp = ibv_create_qp(tp->pd, &qp_create);
- if (!tp->qp)
- goto bail;
-
+
+ tp->qp = ibv_create_qp(tp->pd, &qp_create);
+ if (!tp->qp)
+ goto bail;
+
tp->ah = (ib_ah_handle_t*) dapl_os_alloc(sizeof(ib_ah_handle_t) * 0xffff);
tp->sid = (uint8_t*) dapl_os_alloc(sizeof(uint8_t) * 0xffff);
tp->rbuf = (void*) dapl_os_alloc((mlen + hlen) * tp->qpe);
tp->sbuf = (void*) dapl_os_alloc(mlen * tp->qpe);
-
- if (!tp->ah || !tp->rbuf || !tp->sbuf || !tp->sid)
- goto bail;
-
+
+ if (!tp->ah || !tp->rbuf || !tp->sbuf || !tp->sid)
+ goto bail;
+
(void)dapl_os_memzero(tp->ah, (sizeof(ib_ah_handle_t) * 0xffff));
(void)dapl_os_memzero(tp->sid, (sizeof(uint8_t) * 0xffff));
tp->sid[0] = 1; /* resv slot 0, 0 == no ports available */
- (void)dapl_os_memzero(tp->rbuf, ((mlen + hlen) * tp->qpe));
- (void)dapl_os_memzero(tp->sbuf, (mlen * tp->qpe));
-
- tp->mr_sbuf = ibv_reg_mr(tp->pd, tp->sbuf,
- (mlen * tp->qpe),
- IBV_ACCESS_LOCAL_WRITE);
- if (!tp->mr_sbuf)
- goto bail;
-
- tp->mr_rbuf = ibv_reg_mr(tp->pd, tp->rbuf,
- ((mlen + hlen) * tp->qpe),
- IBV_ACCESS_LOCAL_WRITE);
- if (!tp->mr_rbuf)
- goto bail;
-
- /* modify UD QP: init, rtr, rts */
- if ((dapls_modify_qp_ud(hca, tp->qp)) != DAT_SUCCESS)
- goto bail;
-
- /* post receive buffers, setup head, tail pointers */
- recv_wr.next = NULL;
- recv_wr.sg_list = &sge;
- recv_wr.num_sge = 1;
- sge.length = mlen + hlen;
- sge.lkey = tp->mr_rbuf->lkey;
-
- for (i = 0; i < tp->qpe; i++) {
- recv_wr.wr_id =
- (uintptr_t)((char *)&tp->rbuf[i] +
- sizeof(struct ibv_grh));
- sge.addr = (uintptr_t) &tp->rbuf[i];
- if (ibv_post_recv(tp->qp, &recv_wr, &recv_err))
- goto bail;
- }
-
- /* save qp_num as part of ia_address, network order */
- tp->addr.ib.qpn = htonl(tp->qp->qp_num);
+ (void)dapl_os_memzero(tp->rbuf, ((mlen + hlen) * tp->qpe));
+ (void)dapl_os_memzero(tp->sbuf, (mlen * tp->qpe));
+
+ tp->mr_sbuf = ibv_reg_mr(tp->pd, tp->sbuf,
+ (mlen * tp->qpe),
+ IBV_ACCESS_LOCAL_WRITE);
+ if (!tp->mr_sbuf)
+ goto bail;
+
+ tp->mr_rbuf = ibv_reg_mr(tp->pd, tp->rbuf,
+ ((mlen + hlen) * tp->qpe),
+ IBV_ACCESS_LOCAL_WRITE);
+ if (!tp->mr_rbuf)
+ goto bail;
+
+ /* modify UD QP: init, rtr, rts */
+ if ((dapls_modify_qp_ud(hca, tp->qp)) != DAT_SUCCESS)
+ goto bail;
+
+ /* post receive buffers, setup head, tail pointers */
+ recv_wr.next = NULL;
+ recv_wr.sg_list = &sge;
+ recv_wr.num_sge = 1;
+ sge.length = mlen + hlen;
+ sge.lkey = tp->mr_rbuf->lkey;
+
+ for (i = 0; i < tp->qpe; i++) {
+ recv_wr.wr_id =
+ (uintptr_t)((char *)&tp->rbuf[i] +
+ sizeof(struct ibv_grh));
+ sge.addr = (uintptr_t) &tp->rbuf[i];
+ if (ibv_post_recv(tp->qp, &recv_wr, &recv_err))
+ goto bail;
+ }
+
+ /* save qp_num as part of ia_address, network order */
+ tp->addr.ib.qpn = htonl(tp->qp->qp_num);
return 0;
bail:
dapl_log(DAPL_DBG_TYPE_ERR,
--
1.5.2.5
More information about the general
mailing list