[openib-general] [PATCH 3/3] uDAPL cma: add support for address and route retries, call disconnect when recving dreq

Arlin Davis arlin.r.davis at intel.com
Mon Nov 6 14:44:33 PST 2006


Fix some timeout and long disconnect delay issues discovered during scale-out testing. Added support
to retry rdma_cm address and route resolution with configuration options. Provide a disconnect call
when receiving the disconnect request to guarantee a disconnect reply and event on the remote side.
The rdma_disconnect was not being called from dat_ep_disconnect() as a result of the state changing
to DISCONNECTED in the event callback.   
 
Here are the new options (environment variables) with the default setting:
 
DAPL_CM_ARP_TIMEOUT_MS   4000
DAPL_CM_ARP_RETRY_COUNT  15
DAPL_CM_ROUTE_TIMEOUT_MS  4000
DAPL_CM_ROUTE_RETRY_COUNT 15
 
 
Signed-off by: Arlin Davis ardavis at ichips.intel.com


Index: dapl/openib_cma/dapl_ib_cm.c
===================================================================
--- dapl/openib_cma/dapl_ib_cm.c	(revision 10032)
+++ dapl/openib_cma/dapl_ib_cm.c	(working copy)
@@ -58,6 +58,9 @@
 #include "dapl_ib_util.h"
 #include <sys/poll.h>
 #include <signal.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
 #include <rdma/rdma_cma_ib.h>
 
 extern struct rdma_event_channel *g_cm_events;
@@ -99,8 +102,8 @@ static void dapli_addr_resolve(struct da
 			&ipaddr->src_addr)->sin_addr.s_addr),
 		ntohl(((struct sockaddr_in *)
 			&ipaddr->dst_addr)->sin_addr.s_addr));
-
-	ret =  rdma_resolve_route(conn->cm_id, 2000);
+	
+	ret =  rdma_resolve_route(conn->cm_id, conn->route_timeout);
 	if (ret) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
 			     " rdma_connect failed: %s\n",strerror(errno));
@@ -120,6 +123,7 @@ static void dapli_route_resolve(struct d
 	struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
 	struct ib_addr   *ibaddr = &conn->cm_id->route.addr.addr.ibaddr;
 #endif
+
 	dapl_dbg_log(DAPL_DBG_TYPE_CM, 
 		" route_resolve: cm_id %p SRC %x DST %x PORT %d\n", 
 		conn->cm_id, 
@@ -331,21 +335,17 @@ static void dapli_cm_active_cb(struct da
 	case RDMA_CM_EVENT_UNREACHABLE:
 	case RDMA_CM_EVENT_CONNECT_ERROR:
 	{
-		ib_cm_events_t cm_event;
-                dapl_dbg_log(
+		dapl_dbg_log(
                         DAPL_DBG_TYPE_WARN,
                         " dapli_cm_active_handler: CONN_ERR "
                         " event=0x%x status=%d %s\n",
                         event->event, event->status,
                         (event->status == -ETIMEDOUT)?"TIMEOUT":"" );
 
-		/* no device type specified so assume IB for now */
-		if (event->status == -ETIMEDOUT) /* IB timeout */
-			cm_event = IB_CME_TIMEOUT;
-		else 
-			cm_event = IB_CME_DESTINATION_UNREACHABLE;
-
-		dapl_evd_connection_callback(conn, cm_event, NULL, conn->ep);
+		/* per DAT SPEC provider always returns UNREACHABLE */
+		dapl_evd_connection_callback(conn, 
+					     IB_CME_DESTINATION_UNREACHABLE, 
+					     NULL, conn->ep);
 		break;
 	}
 	case RDMA_CM_EVENT_REJECTED:
@@ -381,6 +381,7 @@ static void dapli_cm_active_cb(struct da
 		break;
 
 	case RDMA_CM_EVENT_DISCONNECTED:
+		rdma_disconnect(conn->cm_id); /* force the DREP */
 		/* validate EP handle */
 		if (!DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP)) 
 			dapl_evd_connection_callback(conn, 
@@ -494,6 +495,7 @@ static void dapli_cm_passive_cb(struct d
 		
 		break;
 	case RDMA_CM_EVENT_DISCONNECTED:
+		rdma_disconnect(conn->cm_id); /* force the DREP */
 		/* validate SP handle context */
 		if (!DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_PSP) || 
 		    !DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_RSP))
@@ -543,7 +545,8 @@ DAT_RETURN dapls_ib_connect(IN DAT_EP_HA
 			    IN void *p_data)
 {
 	struct dapl_ep *ep_ptr = ep_handle;
-		
+	struct dapl_cm_id *conn;
+			
 	/* Sanity check */
 	if (NULL == ep_ptr) 
 		return DAT_SUCCESS;
@@ -552,36 +555,38 @@ DAT_RETURN dapls_ib_connect(IN DAT_EP_HA
 		     r_qual,p_data,p_size);
 			
 	/* rdma conn and cm_id pre-bound; reference via qp_handle */
-	ep_ptr->cm_handle = ep_ptr->qp_handle;
+	conn = ep_ptr->cm_handle = ep_ptr->qp_handle;
 
 	/* Setup QP/CM parameters and private data in cm_id */
-	(void)dapl_os_memzero(&ep_ptr->cm_handle->params,
-			      sizeof(ep_ptr->cm_handle->params));
-	ep_ptr->cm_handle->params.responder_resources = IB_TARGET_MAX;
-	ep_ptr->cm_handle->params.initiator_depth = IB_INITIATOR_DEPTH;
-	ep_ptr->cm_handle->params.flow_control = 1;
-	ep_ptr->cm_handle->params.rnr_retry_count = IB_RNR_RETRY_COUNT;
-	ep_ptr->cm_handle->params.retry_count = IB_RC_RETRY_COUNT;
+	(void)dapl_os_memzero(&conn->params, sizeof(conn->params));
+	conn->params.responder_resources = IB_TARGET_MAX;
+	conn->params.initiator_depth = IB_INITIATOR_DEPTH;
+	conn->params.flow_control = 1;
+	conn->params.rnr_retry_count = IB_RNR_RETRY_COUNT;
+	conn->params.retry_count = IB_RC_RETRY_COUNT;
 	if (p_size) {
-		dapl_os_memcpy(ep_ptr->cm_handle->p_data, p_data, p_size);
-		ep_ptr->cm_handle->params.private_data = 
-					ep_ptr->cm_handle->p_data;
-		ep_ptr->cm_handle->params.private_data_len = p_size;
+		dapl_os_memcpy(conn->p_data, p_data, p_size);
+		conn->params.private_data = conn->p_data;
+		conn->params.private_data_len = p_size;
 	}
 
+	/* copy in remote address, need a copy for retry attempts */
+	dapl_os_memcpy(&conn->r_addr, r_addr, sizeof(*r_addr));
+
 	/* Resolve remote address, src already bound during QP create */
-	((struct sockaddr_in*)r_addr)->sin_port = htons(MAKE_PORT(r_qual));
-	if (rdma_resolve_addr(ep_ptr->cm_handle->cm_id, 
-			      NULL, (struct sockaddr *)r_addr, 2000))
+	((struct sockaddr_in*)&conn->r_addr)->sin_port = htons(MAKE_PORT(r_qual));
+	((struct sockaddr_in*)&conn->r_addr)->sin_family = AF_INET;
+
+	if (rdma_resolve_addr(conn->cm_id, NULL, 
+			      (struct sockaddr *)&conn->r_addr, 
+			      conn->arp_timeout))
 		return dapl_convert_errno(errno,"ib_connect");
 
 	dapl_dbg_log(DAPL_DBG_TYPE_CM, 
-		" connect: resolve_addr: cm_id %p SRC %x DST %x port %d\n", 
-		ep_ptr->cm_handle->cm_id, 
-		ntohl(((struct sockaddr_in *)
-		  &ep_ptr->cm_handle->hca->hca_address)->sin_addr.s_addr),
-		ntohl(((struct sockaddr_in *)r_addr)->sin_addr.s_addr),
-		MAKE_PORT(r_qual) );
+		" connect: resolve_addr: cm_id %p -> %s port %d\n", 
+		conn->cm_id, 
+		inet_ntoa(((struct sockaddr_in *)&conn->r_addr)->sin_addr),
+		((struct sockaddr_in*)&conn->r_addr)->sin_port );
 
 	return DAT_SUCCESS;
 }
@@ -1163,15 +1168,60 @@ void dapli_cma_event_cb(void)
 		case RDMA_CM_EVENT_ADDR_RESOLVED:
 			dapli_addr_resolve(conn);
 			break;
+
 		case RDMA_CM_EVENT_ROUTE_RESOLVED:
 			dapli_route_resolve(conn);
 			break;
+
 		case RDMA_CM_EVENT_ADDR_ERROR:
+			dapl_dbg_log(DAPL_DBG_TYPE_WARN,
+				     " CM ADDR ERROR: -> %s retry (%d)..\n", 
+				     inet_ntoa(((struct sockaddr_in *)
+					&conn->r_addr)->sin_addr),
+					conn->arp_retries);
+			
+			/* retry address resolution */
+			if ((--conn->arp_retries) && 
+				(event->status == -ETIMEDOUT)) {
+				int ret;
+				ret = rdma_resolve_addr(
+					conn->cm_id, NULL, 
+					(struct sockaddr *)&conn->r_addr, 
+					conn->arp_timeout);
+				if (!ret) 
+					break;
+				else { 
+					dapl_dbg_log(
+						DAPL_DBG_TYPE_WARN,
+						" ERROR: rdma_resolve_addr = "
+						"%d %s\n", 
+						ret,strerror(errno));
+				}
+			} 
+			/* retries exhausted or resolve_addr failed */
+			dapl_evd_connection_callback(
+				conn, IB_CME_DESTINATION_UNREACHABLE, 
+				NULL, conn->ep);
+			break;
+
+
 		case RDMA_CM_EVENT_ROUTE_ERROR:
-			dapl_evd_connection_callback(conn, 
-						     IB_CME_DESTINATION_UNREACHABLE, 
-						     NULL, conn->ep);
+			dapl_dbg_log(DAPL_DBG_TYPE_WARN, 
+				     " CM ROUTE ERROR: -> %s retry (%d)..\n", 
+				     inet_ntoa(((struct sockaddr_in *)
+					&conn->r_addr)->sin_addr),
+				     conn->route_retries );
+
+			/* retry route resolution */
+			if ((--conn->route_retries) && 
+				(event->status == -ETIMEDOUT))
+				dapli_addr_resolve(conn);
+			else 
+				dapl_evd_connection_callback( conn, 
+					IB_CME_DESTINATION_UNREACHABLE, 
+					NULL, conn->ep);
 			break;
+		
 		case RDMA_CM_EVENT_DEVICE_REMOVAL:
 			dapl_evd_connection_callback(conn, 
 						     IB_CME_LOCAL_FAILURE, 
Index: dapl/openib_cma/dapl_ib_qp.c
===================================================================
--- dapl/openib_cma/dapl_ib_qp.c	(revision 10032)
+++ dapl/openib_cma/dapl_ib_qp.c	(working copy)
@@ -160,6 +168,17 @@ DAT_RETURN dapls_ib_qp_alloc(IN DAPL_IA 
 	conn->cm_id = cm_id;
 	conn->ep = ep_ptr;
 	conn->hca = ia_ptr->hca_ptr;
+
+	/* setup timers for address and route resolution */
+	conn->arp_timeout = dapl_os_get_env_val("DAPL_CM_ARP_TIMEOUT_MS", 
+						IB_ARP_TIMEOUT);
+	conn->arp_retries = dapl_os_get_env_val("DAPL_CM_ARP_RETRY_COUNT", 
+						IB_ARP_RETRY_COUNT);
+	conn->route_timeout = dapl_os_get_env_val("DAPL_CM_ROUTE_TIMEOUT_MS", 
+						    IB_ROUTE_TIMEOUT);
+	conn->route_retries = dapl_os_get_env_val("DAPL_CM_ROUTE_RETRY_COUNT", 
+						    IB_ROUTE_RETRY_COUNT);
+
 	ep_ptr->qp_handle = conn;
 	ep_ptr->qp_state = IB_QP_STATE_INIT;
 	
Index: dapl/openib_cma/dapl_ib_util.h
===================================================================
--- dapl/openib_cma/dapl_ib_util.h	(revision 10032)
+++ dapl/openib_cma/dapl_ib_util.h	(working copy)
@@ -67,8 +67,12 @@ typedef ib_hca_handle_t		dapl_ibal_ca_t;
 
 #define IB_RC_RETRY_COUNT      7
 #define IB_RNR_RETRY_COUNT     7
-#define IB_CM_RESPONSE_TIMEOUT  20	/* 4 sec */
-#define IB_CM_RETRIES           15
+#define IB_CM_RESPONSE_TIMEOUT  23	/* 16 sec */
+#define IB_CM_RETRIES           15	/* 240 sec total default */
+#define IB_ARP_TIMEOUT		4000	/* 4 sec */
+#define IB_ARP_RETRY_COUNT	15	/* 60 sec total */
+#define IB_ROUTE_TIMEOUT	4000	/* 4 sec */
+#define IB_ROUTE_RETRY_COUNT	15	/* 60 sec total */
 #define IB_REQ_MRA_TIMEOUT	27	/* a little over 9 minutes */
 #define IB_MAX_AT_RETRY		3
 #define IB_TARGET_MAX		4	/* max_qp_ous_rd_atom */
@@ -177,12 +181,17 @@ struct ib_llist_entry
 struct dapl_cm_id {
 	DAPL_OS_LOCK			lock;
 	int				destroy;
+	int				arp_retries;
+	int				arp_timeout;
+	int				route_retries;
+	int				route_timeout;
 	int				in_callback;
 	struct rdma_cm_id		*cm_id;
 	struct dapl_hca			*hca;
 	struct dapl_sp			*sp;
 	struct dapl_ep			*ep;
 	struct rdma_conn_param		params;
+	DAT_SOCK_ADDR6			r_addr;
 	int				p_len;
 	unsigned char			p_data[IB_MAX_DREP_PDATA_SIZE];
 };





More information about the general mailing list