[ofw] [PATCH 2/3] DAPL v2.0: scm: socket connect request count is reset improperly on retry
Davis, Arlin R
arlin.r.davis at intel.com
Wed Oct 26 14:10:48 PDT 2011
Include current retry count with the new connect request call
and set according after creating the new cm object.
Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
---
dapl/openib_scm/cm.c | 23 ++++++++++++-----------
1 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c
index 305f85b..968d9b9 100644
--- a/dapl/openib_scm/cm.c
+++ b/dapl/openib_scm/cm.c
@@ -64,7 +64,7 @@
static DAT_RETURN
dapli_socket_connect(DAPL_EP * ep_ptr,
DAT_IA_ADDRESS_PTR r_addr,
- DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data);
+ DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data, int retries);
#ifdef DAPL_DBG
/* Check for EP linking to IA and proper connect state */
@@ -505,8 +505,8 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
struct dapl_ep *ep_ptr = cm_ptr->ep;
if (err) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " CONN_PENDING: %s ERR %s -> %s %d - %s\n",
+ dapl_log(DAPL_DBG_TYPE_WARN,
+ " CONN_REQUEST: %s ERR %s -> %s %d - %s %d\n",
err == -1 ? "POLL" : "SOCKOPT",
err == -1 ? strerror(dapl_socket_errno()) : strerror(err),
inet_ntoa(((struct sockaddr_in *)
@@ -514,7 +514,7 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
ntohs(((struct sockaddr_in *)
&cm_ptr->addr)->sin_port),
(err == ETIMEDOUT || err == ECONNREFUSED) ?
- "RETRYING...":"ABORTING");
+ "RETRYING...":"ABORTING", cm_ptr->retry);
/* retry a timeout */
if ((err == ETIMEDOUT) || (err == ECONNREFUSED && --cm_ptr->retry)) {
@@ -522,12 +522,11 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
cm_ptr->socket = DAPL_INVALID_SOCKET;
dapli_socket_connect(cm_ptr->ep, (DAT_IA_ADDRESS_PTR)&cm_ptr->addr,
ntohs(((struct sockaddr_in *)&cm_ptr->addr)->sin_port) - 1000,
- ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data);
+ ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data, cm_ptr->retry);
dapl_ep_unlink_cm(cm_ptr->ep, cm_ptr);
dapli_cm_free(cm_ptr);
return;
}
-
goto bail;
}
@@ -579,7 +578,7 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
bail:
/* mark CM object for cleanup */
dapli_cm_free(cm_ptr);
- dapl_evd_connection_callback(NULL, IB_CME_LOCAL_FAILURE, NULL, 0, ep_ptr);
+ dapl_evd_connection_callback(NULL, IB_CME_TIMEOUT, NULL, 0, ep_ptr);
}
/*
@@ -589,7 +588,7 @@ bail:
static DAT_RETURN
dapli_socket_connect(DAPL_EP * ep_ptr,
DAT_IA_ADDRESS_PTR r_addr,
- DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data)
+ DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data, int retries)
{
dp_ib_cm_handle_t cm_ptr;
int ret;
@@ -604,6 +603,8 @@ dapli_socket_connect(DAPL_EP * ep_ptr,
if (cm_ptr == NULL)
return dat_ret;
+ cm_ptr->retry = retries;
+
/* create, connect, sockopt, and exchange QP information */
if ((cm_ptr->socket =
socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == DAPL_INVALID_SOCKET) {
@@ -724,12 +725,12 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
ntohs(*(uint16_t*)&cm_ptr->msg.resv[2]));
/* Retry; corner case where server tcp stack resets under load */
- if (err == ECONNRESET) {
+ if (err == ECONNRESET && --cm_ptr->retry) {
closesocket(cm_ptr->socket);
cm_ptr->socket = DAPL_INVALID_SOCKET;
dapli_socket_connect(cm_ptr->ep, (DAT_IA_ADDRESS_PTR)&cm_ptr->addr,
ntohs(((struct sockaddr_in *)&cm_ptr->addr)->sin_port) - 1000,
- ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data);
+ ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data, cm_ptr->retry);
dapl_ep_unlink_cm(cm_ptr->ep, cm_ptr);
dapli_cm_free(cm_ptr);
return;
@@ -1455,7 +1456,7 @@ dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
return (dapli_socket_connect(ep_ptr, remote_ia_address,
remote_conn_qual,
- private_data_size, private_data));
+ private_data_size, private_data, SCM_CR_RETRY));
}
/*
--
1.7.3
More information about the ofw
mailing list