[openib-general] [PATCH 3/3] uDAPL cma: add support for address and route retries, call disconnect when recving dreq

Arlin Davis arlin.r.davis at intel.com
Wed Nov 1 16:37:39 PST 2006


Fix some timeout and long disconnect delay issues discovered during scale-out testing. Added support
to retry rdma_cm address and route resolution with configuration options and provide a disconnect
call when receiving the disconnect request to force an immediate disconnect reply to the remote
side. 

 

Here are the new options (environment variables) with the default setting

 

DAPL_CM_ARP_TIMEOUT_MS   4000

DAPL_CM_ARP_RETRY_COUNT  15

DAPL_CM_ROUTE_TIMEOUT_MS  4000

DAPL_CM_ROUTE_RETRY_COUNT 15

 

 

 Signed-off by: Arlin Davis ardavis at ichips.intel.com

 

Index: dapl/openib_cma/dapl_ib_cm.c
===================================================================
--- dapl/openib_cma/dapl_ib_cm.c    (revision 9916)
+++ dapl/openib_cma/dapl_ib_cm.c    (working copy)
@@ -58,6 +58,9 @@
 #include "dapl_ib_util.h"
 #include <sys/poll.h>
 #include <signal.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
 #include <rdma/rdma_cma_ib.h>
 
 extern struct rdma_event_channel *g_cm_events;
@@ -99,8 +102,8 @@
                  &ipaddr->src_addr)->sin_addr.s_addr),
            ntohl(((struct sockaddr_in *)
                  &ipaddr->dst_addr)->sin_addr.s_addr));
-
-     ret =  rdma_resolve_route(conn->cm_id, 2000);
+     
+     ret =  rdma_resolve_route(conn->cm_id, conn->route_timeout);
      if (ret) {
            dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
                       " rdma_connect failed: %s\n",strerror(errno));
@@ -120,6 +123,7 @@
      struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
      struct ib_addr   *ibaddr = &conn->cm_id->route.addr.addr.ibaddr;
 #endif
+
      dapl_dbg_log(DAPL_DBG_TYPE_CM, 
            " route_resolve: cm_id %p SRC %x DST %x PORT %d\n", 
            conn->cm_id, 
@@ -381,6 +385,7 @@
            break;
 
      case RDMA_CM_EVENT_DISCONNECTED:
+           rdma_disconnect(conn->cm_id); /* force the DREP */
            /* validate EP handle */
            if (!DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP)) 
                  dapl_evd_connection_callback(conn, 
@@ -494,6 +499,7 @@
            
            break;
      case RDMA_CM_EVENT_DISCONNECTED:
+           rdma_disconnect(conn->cm_id); /* force the DREP */
            /* validate SP handle context */
            if (!DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_PSP) || 
                !DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_RSP))
@@ -543,7 +549,8 @@
                      IN void *p_data)
 {
      struct dapl_ep *ep_ptr = ep_handle;
-           
+     struct dapl_cm_id *conn;
+                 
      /* Sanity check */
      if (NULL == ep_ptr) 
            return DAT_SUCCESS;
@@ -552,36 +559,38 @@
                 r_qual,p_data,p_size);
                  
      /* rdma conn and cm_id pre-bound; reference via qp_handle */
-     ep_ptr->cm_handle = ep_ptr->qp_handle;
+     conn = ep_ptr->cm_handle = ep_ptr->qp_handle;
 
      /* Setup QP/CM parameters and private data in cm_id */
-     (void)dapl_os_memzero(&ep_ptr->cm_handle->params,
-                       sizeof(ep_ptr->cm_handle->params));
-     ep_ptr->cm_handle->params.responder_resources = IB_TARGET_MAX;
-     ep_ptr->cm_handle->params.initiator_depth = IB_INITIATOR_DEPTH;
-     ep_ptr->cm_handle->params.flow_control = 1;
-     ep_ptr->cm_handle->params.rnr_retry_count = IB_RNR_RETRY_COUNT;
-     ep_ptr->cm_handle->params.retry_count = IB_RC_RETRY_COUNT;
+     (void)dapl_os_memzero(&conn->params, sizeof(conn->params));
+     conn->params.responder_resources = IB_TARGET_MAX;
+     conn->params.initiator_depth = IB_INITIATOR_DEPTH;
+     conn->params.flow_control = 1;
+     conn->params.rnr_retry_count = IB_RNR_RETRY_COUNT;
+     conn->params.retry_count = IB_RC_RETRY_COUNT;
      if (p_size) {
-           dapl_os_memcpy(ep_ptr->cm_handle->p_data, p_data, p_size);
-           ep_ptr->cm_handle->params.private_data = 
-                             ep_ptr->cm_handle->p_data;
-           ep_ptr->cm_handle->params.private_data_len = p_size;
+           dapl_os_memcpy(conn->p_data, p_data, p_size);
+           conn->params.private_data = conn->p_data;
+           conn->params.private_data_len = p_size;
      }
 
+     /* copy in remote address, need a copy for retry attempts */
+     dapl_os_memcpy(&conn->r_addr, r_addr, sizeof(*r_addr));
+
      /* Resolve remote address, src already bound during QP create */
-     ((struct sockaddr_in*)r_addr)->sin_port = htons(MAKE_PORT(r_qual));
-     if (rdma_resolve_addr(ep_ptr->cm_handle->cm_id, 
-                       NULL, (struct sockaddr *)r_addr, 2000))
+     ((struct sockaddr_in*)&conn->r_addr)->sin_port = htons(MAKE_PORT(r_qual));
+     ((struct sockaddr_in*)&conn->r_addr)->sin_family = AF_INET;
+
+     if (rdma_resolve_addr(conn->cm_id, NULL, 
+                       (struct sockaddr *)&conn->r_addr, 
+                       conn->arp_timeout))
            return dapl_convert_errno(errno,"ib_connect");
 
      dapl_dbg_log(DAPL_DBG_TYPE_CM, 
-           " connect: resolve_addr: cm_id %p SRC %x DST %x port %d\n", 
-           ep_ptr->cm_handle->cm_id, 
-           ntohl(((struct sockaddr_in *)
-             &ep_ptr->cm_handle->hca->hca_address)->sin_addr.s_addr),
-           ntohl(((struct sockaddr_in *)r_addr)->sin_addr.s_addr),
-           MAKE_PORT(r_qual) );
+           " connect: resolve_addr: cm_id %p -> %s port %d\n", 
+           conn->cm_id, 
+           inet_ntoa(((struct sockaddr_in *)&conn->r_addr)->sin_addr),
+           ((struct sockaddr_in*)&conn->r_addr)->sin_port );
 
      return DAT_SUCCESS;
 }
@@ -1163,15 +1172,58 @@
            case RDMA_CM_EVENT_ADDR_RESOLVED:
                  dapli_addr_resolve(conn);
                  break;
+
            case RDMA_CM_EVENT_ROUTE_RESOLVED:
                  dapli_route_resolve(conn);
                  break;
+
            case RDMA_CM_EVENT_ADDR_ERROR:
+                 dapl_dbg_log(DAPL_DBG_TYPE_WARN,
+                            " CM ADDR ERROR: -> %s retry (%d)..\n", 
+                            inet_ntoa(((struct sockaddr_in *)
+                             &conn->r_addr)->sin_addr),
+                             conn->arp_retries);
+                 
+                 /* retry address resolution */
+                 if (--conn->arp_retries) {
+                       int ret;
+                       ret = rdma_resolve_addr(
+                             conn->cm_id, NULL, 
+                             (struct sockaddr *)&conn->r_addr, 
+                             conn->arp_timeout);
+                       if (!ret) 
+                             break;
+                       else { 
+                             dapl_dbg_log(
+                                   DAPL_DBG_TYPE_WARN,
+                                   " ERROR: rdma_resolve_addr = "
+                                   "%d %s\n", 
+                                   ret,strerror(errno));
+                       }
+                 } 
+                 /* retries exhausted or resolve_addr failed */
+                 dapl_evd_connection_callback(
+                       conn, IB_CME_DESTINATION_UNREACHABLE, 
+                       NULL, conn->ep);
+                 break;
+
+
            case RDMA_CM_EVENT_ROUTE_ERROR:
-                 dapl_evd_connection_callback(conn, 
-                                        IB_CME_DESTINATION_UNREACHABLE, 
-                                        NULL, conn->ep);
+                 dapl_dbg_log(DAPL_DBG_TYPE_WARN, 
+                            " CM ROUTE ERROR: -> %s retry (%d)..\n", 
+                            inet_ntoa(((struct sockaddr_in *)
+                             &conn->r_addr)->sin_addr),
+                            conn->route_retries );
+
+                 /* retry route resolution */
+                 if (--conn->route_retries) 
+                       dapli_addr_resolve(conn);
+                 else 
+                       dapl_evd_connection_callback( conn, 
+                             IB_CME_DESTINATION_UNREACHABLE, 
+                             NULL, conn->ep);
                  break;
+           
            case RDMA_CM_EVENT_DEVICE_REMOVAL:
                  dapl_evd_connection_callback(conn, 
                                         IB_CME_LOCAL_FAILURE, 
Index: dapl/openib_cma/dapl_ib_qp.c
===================================================================
--- dapl/openib_cma/dapl_ib_qp.c    (revision 10032)
+++ dapl/openib_cma/dapl_ib_qp.c    (working copy)
@@ -160,6 +168,17 @@
      conn->cm_id = cm_id;
      conn->ep = ep_ptr;
      conn->hca = ia_ptr->hca_ptr;
+
+     /* setup timers for address and route resolution */
+     conn->arp_timeout = dapl_os_get_env_val("DAPL_CM_ARP_TIMEOUT_MS", 
+                                   IB_ARP_TIMEOUT);
+     conn->arp_retries = dapl_os_get_env_val("DAPL_CM_ARP_RETRY_COUNT", 
+                                   IB_ARP_RETRY_COUNT);
+     conn->route_timeout = dapl_os_get_env_val("DAPL_CM_ROUTE_TIMEOUT_MS", 
+                                       IB_ROUTE_TIMEOUT);
+     conn->route_retries = dapl_os_get_env_val("DAPL_CM_ROUTE_RETRY_COUNT", 
+                                       IB_ROUTE_RETRY_COUNT);
+
      ep_ptr->qp_handle = conn;
      ep_ptr->qp_state = IB_QP_STATE_INIT;
      

Index: dapl/openib_cma/dapl_ib_util.h
===================================================================
--- dapl/openib_cma/dapl_ib_util.h  (revision 9916)
+++ dapl/openib_cma/dapl_ib_util.h  (working copy)
@@ -67,8 +67,12 @@
 
 #define IB_RC_RETRY_COUNT      7
 #define IB_RNR_RETRY_COUNT     7
-#define IB_CM_RESPONSE_TIMEOUT  20 /* 4 sec */
-#define IB_CM_RETRIES           15
+#define IB_CM_RESPONSE_TIMEOUT  23 /* 16 sec */
+#define IB_CM_RETRIES           15 /* 240 sec total default */
+#define IB_ARP_TIMEOUT       4000  /* 4 sec */
+#define IB_ARP_RETRY_COUNT   15    /* 60 sec total */
+#define IB_ROUTE_TIMEOUT     4000  /* 4 sec */
+#define IB_ROUTE_RETRY_COUNT 15    /* 60 sec total */
 #define IB_REQ_MRA_TIMEOUT   27    /* a little over 9 minutes */
 #define IB_MAX_AT_RETRY            3
 #define IB_TARGET_MAX        4     /* max_qp_ous_rd_atom */
@@ -177,12 +181,17 @@
 struct dapl_cm_id {
      DAPL_OS_LOCK                  lock;
      int                     destroy;
+     int                     arp_retries;
+     int                     arp_timeout;
+     int                     route_retries;
+     int                     route_timeout;
      int                     in_callback;
      struct rdma_cm_id       *cm_id;
      struct dapl_hca               *hca;
      struct dapl_sp                *sp;
      struct dapl_ep                *ep;
      struct rdma_conn_param        params;
+     DAT_SOCK_ADDR6                r_addr;
      int                     p_len;
      unsigned char                 p_data[IB_MAX_DREP_PDATA_SIZE];
 };
 

 

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20061101/f969d934/attachment.html>


More information about the general mailing list