[ofw] [PATCH 2/5] uDAPL v2: ucm: increase timers during subsequent retries, add create_ah error checking
Davis, Arlin R
arlin.r.davis at intel.com
Wed Oct 28 16:19:08 PDT 2009
- increase timers during subsequent retries,
- check/process create_ah errors during connect phase,
- cleanup some debug messaging.
Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
---
dapl/openib_ucm/cm.c | 81 ++++++++++++++++++++++++++-----------------------
1 files changed, 43 insertions(+), 38 deletions(-)
diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c
index 96ee382..07b8458 100644
--- a/dapl/openib_ucm/cm.c
+++ b/dapl/openib_ucm/cm.c
@@ -163,17 +163,16 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer)
*timer = cm->hca->ib_trans.cm_timer;
/* wait longer each retry */
if ((time - cm->timer)/1000 >
- (cm->hca->ib_trans.rep_time * cm->retries)) {
+ (cm->hca->ib_trans.rep_time << cm->retries)) {
dapl_log(DAPL_DBG_TYPE_WARN,
" CM_REQ retry %d [lid, port, qpn]:"
- " %x %x %x -> %x %x %x \n",
- cm->retries,
- ntohs(cm->msg.saddr.ib.lid),
- ntohs(cm->msg.sport),
- ntohl(cm->msg.saddr.ib.qpn),
- ntohs(cm->msg.daddr.ib.lid),
- ntohs(cm->msg.dport),
- ntohl(cm->msg.dqpn));
+ " %x %x %x -> %x %x %x Time(ms) %llu > %llu\n",
+ cm->retries, ntohs(cm->msg.saddr.ib.lid),
+ ntohs(cm->msg.sport), ntohl(cm->msg.saddr.ib.qpn),
+ ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport),
+ ntohl(cm->msg.dqpn), (time - cm->timer)/1000,
+ cm->hca->ib_trans.rep_time << cm->retries);
+ cm->retries++;
dapl_os_unlock(&cm->lock);
dapli_cm_connect(cm->ep, cm);
return;
@@ -182,10 +181,10 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer)
case DCM_RTU_PENDING:
*timer = cm->hca->ib_trans.cm_timer;
if ((time - cm->timer)/1000 >
- (cm->hca->ib_trans.rtu_time * cm->retries)) {
+ (cm->hca->ib_trans.rtu_time << cm->retries)) {
dapl_log(DAPL_DBG_TYPE_WARN,
" CM_REPLY retry %d [lid, port, qpn]:"
- " %x %x %x -> %x %x %x r_pid %x,%d\n",
+ " %x %x %x -> %x %x %x r_pid %x,%d Time(ms) %llu > %llu\n",
cm->retries,
ntohs(cm->msg.saddr.ib.lid),
ntohs(cm->msg.sport),
@@ -194,7 +193,9 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer)
ntohs(cm->msg.dport),
ntohl(cm->msg.daddr.ib.qpn),
ntohl(*(DAT_UINT32*)cm->msg.resv),
- ntohl(*(DAT_UINT32*)cm->msg.resv));
+ ntohl(*(DAT_UINT32*)cm->msg.resv),
+ (time - cm->timer)/1000, cm->hca->ib_trans.rtu_time << cm->retries);
+ cm->retries++;
dapl_os_unlock(&cm->lock);
ucm_reply(cm);
return;
@@ -204,10 +205,10 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer)
*timer = cm->hca->ib_trans.cm_timer;
/* wait longer each retry */
if ((time - cm->timer)/1000 >
- (cm->hca->ib_trans.rep_time)) {
+ (cm->hca->ib_trans.rtu_time << cm->retries)) {
dapl_log(DAPL_DBG_TYPE_WARN,
" CM_DREQ retry %d [lid, port, qpn]:"
- " %x %x %x -> %x %x %x r_pid %x,%d\n",
+ " %x %x %x -> %x %x %x r_pid %x,%d Time(ms) %llu > %llu\n",
cm->retries,
ntohs(cm->msg.saddr.ib.lid),
ntohs(cm->msg.sport),
@@ -216,7 +217,9 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer)
ntohs(cm->msg.dport),
ntohl(cm->msg.dqpn),
ntohl(*(DAT_UINT32*)cm->msg.resv),
- ntohl(*(DAT_UINT32*)cm->msg.resv));
+ ntohl(*(DAT_UINT32*)cm->msg.resv),
+ (time - cm->timer)/1000, cm->hca->ib_trans.rtu_time << cm->retries);
+ cm->retries++;
dapl_os_unlock(&cm->lock);
dapli_cm_disconnect(cm);
return;
@@ -448,8 +451,8 @@ retry_listenq:
} else {
/* duplicate; bail and throw away */
dapl_os_unlock(lock);
- dapl_log(DAPL_DBG_TYPE_CM,
- " duplicate: op %s st %s [lid, port, qpn]:"
+ dapl_log(DAPL_DBG_TYPE_WARN,
+ " DUPLICATE: op %s st %s [lid, port, qpn]:"
" 0x%x %d 0x%x <- 0x%x %d 0x%x\n",
dapl_cm_op_str(ntohs(msg->op)),
dapl_cm_state_str(cm->state),
@@ -476,7 +479,18 @@ retry_listenq:
/* not match on listenq for valid request, send reject */
if (ntohs(msg->op) == DCM_REQ && !found)
ucm_reject(tp, msg);
-
+#if DAPL_DBG
+ if (!found) {
+ dapl_log(DAPL_DBG_TYPE_WARN,
+ " ucm_recv: NO MATCH op %s 0x%x %d i0x%x c0x%x"
+ " < 0x%x %d 0x%x\n",
+ dapl_cm_op_str(ntohs(msg->op)),
+ ntohs(msg->daddr.ib.lid), ntohs(msg->dport),
+ ntohl(msg->daddr.ib.qpn), ntohl(msg->sqpn),
+ ntohs(msg->saddr.ib.lid), ntohs(msg->sport),
+ ntohl(msg->saddr.ib.qpn));
+ }
+#endif
return found;
}
@@ -524,21 +538,10 @@ retry:
continue;
}
if (!(cm = ucm_cm_find(tp, msg))) {
- dapl_log(DAPL_DBG_TYPE_WARN,
- " ucm_recv: NO MATCH op %s 0x%x %d i0x%x c0x%x"
- " < 0x%x %d 0x%x\n",
- dapl_cm_op_str(ntohs(msg->op)),
- ntohs(msg->daddr.ib.lid), ntohs(msg->dport),
- ntohl(msg->daddr.ib.qpn),
- ntohl(msg->sqpn),
- ntohs(msg->saddr.ib.lid), ntohs(msg->sport),
- ntohl(msg->saddr.ib.qpn));
-
ucm_post_rmsg(tp, msg);
continue;
}
- dapl_dbg_log(DAPL_DBG_TYPE_CM, " ucm_recv: match %p\n",cm);
-
+
/* match, process it */
ucm_process_recv(tp, msg, cm);
ucm_post_rmsg(tp, msg);
@@ -804,14 +807,13 @@ DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm)
/* send DREQ, event after DREP or DREQ timeout */
cm->state = DCM_DISC_PENDING;
cm->msg.op = htons(DCM_DREQ);
- cm->retries = 1;
finalize = 0; /* wait for DREP, wakeup timer thread */
dapls_thread_signal(&cm->hca->ib_trans.signal);
break;
case DCM_DISC_PENDING:
/* DREQ timeout, resend until retries exhausted */
cm->msg.op = htons(DCM_DREQ);
- if (cm->retries++ >= cm->hca->ib_trans.retries)
+ if (cm->retries >= cm->hca->ib_trans.retries)
finalize = 1;
break;
case DCM_DISC_RECV:
@@ -854,7 +856,7 @@ dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm)
return DAT_INVALID_STATE;
}
- if (cm->retries++ == cm->hca->ib_trans.retries) {
+ if (cm->retries == cm->hca->ib_trans.retries) {
dapl_log(DAPL_DBG_TYPE_WARN,
" CM_REQ: RETRIES EXHAUSTED:"
" 0x%x %d 0x%x -> 0x%x %d 0x%x\n",
@@ -895,7 +897,7 @@ dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm)
goto bail;
/* first time through, put on work queue */
- if (cm->retries == 1)
+ if (!cm->retries)
ucm_queue_conn(cm);
return DAT_SUCCESS;
@@ -1126,7 +1128,6 @@ static void ucm_accept(ib_cm_srvc_handle_t cm, ib_cm_msg_t *msg)
/* dest CM info from CR msg, source CM info from listen */
acm->sp = cm->sp;
acm->hca = cm->hca;
- acm->retries = 1;
acm->msg.dport = msg->sport;
acm->msg.dqpn = msg->sqpn;
acm->msg.sport = cm->msg.sport;
@@ -1220,9 +1221,13 @@ static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
cm->ep->qp_handle,
htons(lid),
NULL);
- if (xevent.remote_ah.ah == NULL)
+ if (xevent.remote_ah.ah == NULL) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " accept_rtu: ERR create_ah"
+ " for qpn 0x%x lid 0x%x\n",
+ xevent.remote_ah.qpn, lid);
goto bail;
-
+ }
cm->ah = xevent.remote_ah.ah; /* keep ref to destroy */
dapl_os_memcpy(&xevent.remote_ah.ia_addr,
&cm->msg.daddr,
@@ -1283,7 +1288,7 @@ static int ucm_reply(dp_ib_cm_handle_t cm)
return -1;
}
- if (++cm->retries == cm->hca->ib_trans.retries) {
+ if (cm->retries == cm->hca->ib_trans.retries) {
dapl_log(DAPL_DBG_TYPE_WARN,
" CM_REP: RETRIES EXHAUSTED"
" 0x%x %d 0x%x -> 0x%x %d 0x%x\n",
--
1.5.2.5
More information about the ofw
mailing list