[ofw] [PATCH 2/3] DAPL v2.0: scm: socket connect request count is reset improperly on retry

Davis, Arlin R arlin.r.davis at intel.com
Wed Oct 26 14:10:48 PDT 2011


Include current retry count with the new connect request call
and set according after creating the new cm object.

Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
---
 dapl/openib_scm/cm.c |   23 ++++++++++++-----------
 1 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c
index 305f85b..968d9b9 100644
--- a/dapl/openib_scm/cm.c
+++ b/dapl/openib_scm/cm.c
@@ -64,7 +64,7 @@
 static DAT_RETURN
 dapli_socket_connect(DAPL_EP * ep_ptr,
 		     DAT_IA_ADDRESS_PTR r_addr,
-		     DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data);
+		     DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data, int retries);
 
 #ifdef DAPL_DBG
 /* Check for EP linking to IA and proper connect state */
@@ -505,8 +505,8 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
 	struct dapl_ep *ep_ptr = cm_ptr->ep;
 
 	if (err) {
-		dapl_log(DAPL_DBG_TYPE_ERR,
-			 " CONN_PENDING: %s ERR %s -> %s %d - %s\n",
+		dapl_log(DAPL_DBG_TYPE_WARN,
+			 " CONN_REQUEST: %s ERR %s -> %s %d - %s %d\n",
 			 err == -1 ? "POLL" : "SOCKOPT",
 			 err == -1 ? strerror(dapl_socket_errno()) : strerror(err), 
 			 inet_ntoa(((struct sockaddr_in *)
@@ -514,7 +514,7 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
 			 ntohs(((struct sockaddr_in *)
 				&cm_ptr->addr)->sin_port),
 			 (err == ETIMEDOUT || err == ECONNREFUSED) ? 
-			 "RETRYING...":"ABORTING");
+			 "RETRYING...":"ABORTING", cm_ptr->retry);
 
 		/* retry a timeout */
 		if ((err == ETIMEDOUT) || (err == ECONNREFUSED && --cm_ptr->retry)) {
@@ -522,12 +522,11 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
 			cm_ptr->socket = DAPL_INVALID_SOCKET;
 			dapli_socket_connect(cm_ptr->ep, (DAT_IA_ADDRESS_PTR)&cm_ptr->addr, 
 					     ntohs(((struct sockaddr_in *)&cm_ptr->addr)->sin_port) - 1000,
-					     ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data);
+					     ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data, cm_ptr->retry);
 			dapl_ep_unlink_cm(cm_ptr->ep, cm_ptr);
 			dapli_cm_free(cm_ptr);
 			return;
 		}
-
 		goto bail;
 	}
 
@@ -579,7 +578,7 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
 bail:
 	/* mark CM object for cleanup */
 	dapli_cm_free(cm_ptr);
-	dapl_evd_connection_callback(NULL, IB_CME_LOCAL_FAILURE, NULL, 0, ep_ptr);
+	dapl_evd_connection_callback(NULL, IB_CME_TIMEOUT, NULL, 0, ep_ptr);
 }
 
 /*
@@ -589,7 +588,7 @@ bail:
 static DAT_RETURN
 dapli_socket_connect(DAPL_EP * ep_ptr,
 		     DAT_IA_ADDRESS_PTR r_addr,
-		     DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data)
+		     DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data, int retries)
 {
 	dp_ib_cm_handle_t cm_ptr;
 	int ret;
@@ -604,6 +603,8 @@ dapli_socket_connect(DAPL_EP * ep_ptr,
 	if (cm_ptr == NULL)
 		return dat_ret;
 
+	cm_ptr->retry = retries;
+
 	/* create, connect, sockopt, and exchange QP information */
 	if ((cm_ptr->socket =
 	     socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == DAPL_INVALID_SOCKET) {
@@ -724,12 +725,12 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
 			 ntohs(*(uint16_t*)&cm_ptr->msg.resv[2]));
 
 		/* Retry; corner case where server tcp stack resets under load */
-		if (err == ECONNRESET) {
+		if (err == ECONNRESET && --cm_ptr->retry) {
 			closesocket(cm_ptr->socket);
 			cm_ptr->socket = DAPL_INVALID_SOCKET;
 			dapli_socket_connect(cm_ptr->ep, (DAT_IA_ADDRESS_PTR)&cm_ptr->addr, 
 					     ntohs(((struct sockaddr_in *)&cm_ptr->addr)->sin_port) - 1000,
-					     ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data);
+					     ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data, cm_ptr->retry);
 			dapl_ep_unlink_cm(cm_ptr->ep, cm_ptr);
 			dapli_cm_free(cm_ptr);
 			return;
@@ -1455,7 +1456,7 @@ dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
 
 	return (dapli_socket_connect(ep_ptr, remote_ia_address,
 				     remote_conn_qual,
-				     private_data_size, private_data));
+				     private_data_size, private_data, SCM_CR_RETRY));
 }
 
 /*
-- 
1.7.3





More information about the ofw mailing list