<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns="http://www.w3.org/TR/REC-html40">

<head>
<meta http-equiv=Content-Type content="text/html; charset=us-ascii">
<meta name=Generator content="Microsoft Word 11 (filtered medium)">
<style>
<!--
 /* Style Definitions */
 p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0in;
        margin-bottom:.0001pt;
        font-size:12.0pt;
        font-family:"Times New Roman";}
a:link, span.MsoHyperlink
        {color:blue;
        text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
        {color:purple;
        text-decoration:underline;}
p.MsoPlainText, li.MsoPlainText, div.MsoPlainText
        {margin:0in;
        margin-bottom:.0001pt;
        font-size:10.0pt;
        font-family:"Courier New";}
pre
        {margin:0in;
        margin-bottom:.0001pt;
        font-size:10.0pt;
        font-family:"Courier New";}
span.EmailStyle17
        {mso-style-type:personal-compose;
        font-family:Arial;
        color:windowtext;}
@page Section1
        {size:8.5in 11.0in;
        margin:1.0in 1.25in 1.0in 1.25in;}
div.Section1
        {page:Section1;}
-->
</style>
<!--[if gte mso 9]><xml>
 <o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
 <o:shapelayout v:ext="edit">
  <o:idmap v:ext="edit" data="1" />
 </o:shapelayout></xml><![endif]-->
</head>

<body lang=EN-US link=blue vlink=purple>

<div class=Section1>

<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'>Fix some timeout and long disconnect delay issues discovered
during scale-out testing. Added support to retry rdma_cm address and route
resolution with configuration options and provide a disconnect call when receiving
the disconnect request to force an immediate disconnect reply to the remote
side. <o:p></o:p></span></font></p>

<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'><o:p> </o:p></span></font></p>

<p class=MsoNormal style='text-autospace:none'><font size=2 face=Arial><span
style='font-size:10.0pt;font-family:Arial'>Here are the new options
(environment variables) with the default setting<o:p></o:p></span></font></p>

<p class=MsoNormal style='text-autospace:none'><font size=2 face=Arial><span
style='font-size:10.0pt;font-family:Arial'><o:p> </o:p></span></font></p>

<p class=MsoNormal style='text-autospace:none'><font size=2 face="Courier New"><span
style='font-size:10.0pt;font-family:"Courier New"'>DAPL_CM_ARP_TIMEOUT_MS  
4000<o:p></o:p></span></font></p>

<p class=MsoNormal style='text-autospace:none'><font size=2 face="Courier New"><span
style='font-size:10.0pt;font-family:"Courier New"'>DAPL_CM_ARP_RETRY_COUNT 
15<o:p></o:p></span></font></p>

<p class=MsoNormal style='text-autospace:none'><font size=2 face="Courier New"><span
style='font-size:10.0pt;font-family:"Courier New"'>DAPL_CM_ROUTE_TIMEOUT_MS 
4000<o:p></o:p></span></font></p>

<p class=MsoNormal style='text-autospace:none'><font size=2 face="Courier New"><span
style='font-size:10.0pt;font-family:"Courier New"'>DAPL_CM_ROUTE_RETRY_COUNT 15<o:p></o:p></span></font></p>

<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'><o:p> </o:p></span></font></p>

<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'><o:p> </o:p></span></font></p>

<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'> </span></font>Signed-off by: Arlin Davis <a
href="mailto:ardavis@ichips.intel.com">ardavis@ichips.intel.com</a><o:p></o:p></p>

<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'><o:p> </o:p></span></font></p>

<p class=MsoPlainText><font size=2 face="Courier New"><span style='font-size:
10.0pt'>Index: dapl/openib_cma/dapl_ib_cm.c<br>
===================================================================<br>
--- dapl/openib_cma/dapl_ib_cm.c    (revision 9916)<br>
+++ dapl/openib_cma/dapl_ib_cm.c    (working copy)<br>
@@ -58,6 +58,9 @@<br>
 #include "dapl_ib_util.h"<br>
 #include <sys/poll.h><br>
 #include <signal.h><br>
+#include <sys/socket.h><br>
+#include <netinet/in.h><br>
+#include <arpa/inet.h><br>
 #include <rdma/rdma_cma_ib.h><br>
 <br>
 extern struct rdma_event_channel *g_cm_events;<br>
@@ -99,8 +102,8 @@<br>
                  &ipaddr->src_addr)->sin_addr.s_addr),<br>
            ntohl(((struct
sockaddr_in *)<br>
                  &ipaddr->dst_addr)->sin_addr.s_addr));<br>
-<br>
-     ret =  rdma_resolve_route(conn->cm_id, 2000);<br>
+     <br>
+     ret =  rdma_resolve_route(conn->cm_id,
conn->route_timeout);<br>
      if (ret) {<br>
            dapl_dbg_log(DAPL_DBG_TYPE_ERR,
<br>
                      
" rdma_connect failed: %s\n",strerror(errno));<br>
@@ -120,6 +123,7 @@<br>
      struct rdma_addr *ipaddr =
&conn->cm_id->route.addr;<br>
      struct ib_addr   *ibaddr =
&conn->cm_id->route.addr.addr.ibaddr;<br>
 #endif<br>
+<br>
      dapl_dbg_log(DAPL_DBG_TYPE_CM, <br>
            "
route_resolve: cm_id %p SRC %x DST %x PORT %d\n", <br>
            conn->cm_id,
<br>
@@ -381,6 +385,7 @@<br>
            break;<br>
 <br>
      case RDMA_CM_EVENT_DISCONNECTED:<br>
+           rdma_disconnect(conn->cm_id);
/* force the DREP */<br>
            /* validate
EP handle */<br>
            if
(!DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP)) <br>
                  dapl_evd_connection_callback(conn,
<br>
@@ -494,6 +499,7 @@<br>
            <br>
            break;<br>
      case RDMA_CM_EVENT_DISCONNECTED:<br>
+           rdma_disconnect(conn->cm_id);
/* force the DREP */<br>
            /* validate
SP handle context */<br>
            if
(!DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_PSP) || <br>
               
!DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_RSP))<br>
@@ -543,7 +549,8 @@<br>
                     
IN void *p_data)<br>
 {<br>
      struct dapl_ep *ep_ptr = ep_handle;<br>
-           <br>
+     struct dapl_cm_id *conn;<br>
+                 <br>
      /* Sanity check */<br>
      if (NULL == ep_ptr) <br>
            return
DAT_SUCCESS;<br>
@@ -552,36 +559,38 @@<br>
                
r_qual,p_data,p_size);<br>
                  <br>
      /* rdma conn and cm_id pre-bound; reference via
qp_handle */<br>
-     ep_ptr->cm_handle = ep_ptr->qp_handle;<br>
+     conn = ep_ptr->cm_handle = ep_ptr->qp_handle;<br>
 <br>
      /* Setup QP/CM parameters and private data in
cm_id */<br>
-     (void)dapl_os_memzero(&ep_ptr->cm_handle->params,<br>
-                      
sizeof(ep_ptr->cm_handle->params));<br>
-     ep_ptr->cm_handle->params.responder_resources =
IB_TARGET_MAX;<br>
-     ep_ptr->cm_handle->params.initiator_depth =
IB_INITIATOR_DEPTH;<br>
-     ep_ptr->cm_handle->params.flow_control = 1;<br>
-     ep_ptr->cm_handle->params.rnr_retry_count =
IB_RNR_RETRY_COUNT;<br>
-     ep_ptr->cm_handle->params.retry_count =
IB_RC_RETRY_COUNT;<br>
+     (void)dapl_os_memzero(&conn->params,
sizeof(conn->params));<br>
+     conn->params.responder_resources = IB_TARGET_MAX;<br>
+     conn->params.initiator_depth = IB_INITIATOR_DEPTH;<br>
+     conn->params.flow_control = 1;<br>
+     conn->params.rnr_retry_count = IB_RNR_RETRY_COUNT;<br>
+     conn->params.retry_count = IB_RC_RETRY_COUNT;<br>
      if (p_size) {<br>
-           dapl_os_memcpy(ep_ptr->cm_handle->p_data,
p_data, p_size);<br>
-           ep_ptr->cm_handle->params.private_data
= <br>
-                             ep_ptr->cm_handle->p_data;<br>
-           ep_ptr->cm_handle->params.private_data_len
= p_size;<br>
+           dapl_os_memcpy(conn->p_data,
p_data, p_size);<br>
+           conn->params.private_data
= conn->p_data;<br>
+           conn->params.private_data_len
= p_size;<br>
      }<br>
 <br>
+     /* copy in remote address, need a copy for retry
attempts */<br>
+     dapl_os_memcpy(&conn->r_addr, r_addr,
sizeof(*r_addr));<br>
+<br>
      /* Resolve remote address, src already bound
during QP create */<br>
-     ((struct sockaddr_in*)r_addr)->sin_port =
htons(MAKE_PORT(r_qual));<br>
-     if (rdma_resolve_addr(ep_ptr->cm_handle->cm_id,
<br>
-                      
NULL, (struct sockaddr *)r_addr, 2000))<br>
+     ((struct
sockaddr_in*)&conn->r_addr)->sin_port = htons(MAKE_PORT(r_qual));<br>
+     ((struct
sockaddr_in*)&conn->r_addr)->sin_family = AF_INET;<br>
+<br>
+     if (rdma_resolve_addr(conn->cm_id, NULL, <br>
+                      
(struct sockaddr *)&conn->r_addr, <br>
+                      
conn->arp_timeout))<br>
            return
dapl_convert_errno(errno,"ib_connect");<br>
 <br>
      dapl_dbg_log(DAPL_DBG_TYPE_CM, <br>
-           " connect:
resolve_addr: cm_id %p SRC %x DST %x port %d\n", <br>
-           ep_ptr->cm_handle->cm_id,
<br>
-           ntohl(((struct
sockaddr_in *)<br>
-            
&ep_ptr->cm_handle->hca->hca_address)->sin_addr.s_addr),<br>
-           ntohl(((struct
sockaddr_in *)r_addr)->sin_addr.s_addr),<br>
-           MAKE_PORT(r_qual)
);<br>
+           " connect:
resolve_addr: cm_id %p -> %s port %d\n", <br>
+           conn->cm_id, <br>
+           inet_ntoa(((struct
sockaddr_in *)&conn->r_addr)->sin_addr),<br>
+           ((struct
sockaddr_in*)&conn->r_addr)->sin_port );<br>
 <br>
      return DAT_SUCCESS;<br>
 }<br>
@@ -1163,15 +1172,58 @@<br>
            case
RDMA_CM_EVENT_ADDR_RESOLVED:<br>
                  dapli_addr_resolve(conn);<br>
                  break;<br>
+<br>
            case
RDMA_CM_EVENT_ROUTE_RESOLVED:<br>
                  dapli_route_resolve(conn);<br>
                  break;<br>
+<br>
            case
RDMA_CM_EVENT_ADDR_ERROR:<br>
+                 dapl_dbg_log(DAPL_DBG_TYPE_WARN,<br>
+                           
" CM ADDR ERROR: -> %s retry (%d)..\n", <br>
+                           
inet_ntoa(((struct sockaddr_in *)<br>
+                             &conn->r_addr)->sin_addr),<br>
+                             conn->arp_retries);<br>
+                 <br>
+                 /*
retry address resolution */<br>
+                 if
(--conn->arp_retries) {<br>
+                       int
ret;<br>
+                       ret
= rdma_resolve_addr(<br>
+                             conn->cm_id,
NULL, <br>
+                             (struct
sockaddr *)&conn->r_addr, <br>
+                             conn->arp_timeout);<br>
+                       if
(!ret) <br>
+                             break;<br>
+                       else
{ <br>
+                             dapl_dbg_log(<br>
+                                   DAPL_DBG_TYPE_WARN,<br>
+                                   "
ERROR: rdma_resolve_addr = "<br>
+                                   "%d
%s\n", <br>
+                                   ret,strerror(errno));<br>
+                       }<br>
+                 }
<br>
+                 /*
retries exhausted or resolve_addr failed */<br>
+                 dapl_evd_connection_callback(<br>
+                       conn,
IB_CME_DESTINATION_UNREACHABLE, <br>
+                       NULL,
conn->ep);<br>
+                 break;<br>
+<br>
+<br>
            case
RDMA_CM_EVENT_ROUTE_ERROR:<br>
-                 dapl_evd_connection_callback(conn,
<br>
-                                       
IB_CME_DESTINATION_UNREACHABLE, <br>
-                                       
NULL, conn->ep);<br>
+                 dapl_dbg_log(DAPL_DBG_TYPE_WARN,
<br>
+                           
" CM ROUTE ERROR: -> %s retry (%d)..\n", <br>
+                           
inet_ntoa(((struct sockaddr_in *)<br>
+                             &conn->r_addr)->sin_addr),<br>
+                           
conn->route_retries );<br>
+<br>
+                 /*
retry route resolution */<br>
+                 if
(--conn->route_retries) <br>
+                       dapli_addr_resolve(conn);<br>
+                 else
<br>
+                       dapl_evd_connection_callback(
conn, <br>
+                             IB_CME_DESTINATION_UNREACHABLE,
<br>
+                             NULL,
conn->ep);<br>
                  break;<br>
+           <br>
            case
RDMA_CM_EVENT_DEVICE_REMOVAL:<br>
                  dapl_evd_connection_callback(conn,
<br>
                                        
IB_CME_LOCAL_FAILURE, <br>
Index: dapl/openib_cma/dapl_ib_qp.c<br>
===================================================================<br>
--- dapl/openib_cma/dapl_ib_qp.c    (revision 10032)<br>
+++ dapl/openib_cma/dapl_ib_qp.c    (working copy)<br>
@@ -160,6 +168,17 @@<br>
      conn->cm_id = cm_id;<br>
      conn->ep = ep_ptr;<br>
      conn->hca = ia_ptr->hca_ptr;<br>
+<br>
+     /* setup timers for address and route resolution */<br>
+     conn->arp_timeout =
dapl_os_get_env_val("DAPL_CM_ARP_TIMEOUT_MS", <br>
+                                   IB_ARP_TIMEOUT);<br>
+     conn->arp_retries =
dapl_os_get_env_val("DAPL_CM_ARP_RETRY_COUNT", <br>
+                                   IB_ARP_RETRY_COUNT);<br>
+     conn->route_timeout =
dapl_os_get_env_val("DAPL_CM_ROUTE_TIMEOUT_MS", <br>
+                                      
IB_ROUTE_TIMEOUT);<br>
+     conn->route_retries =
dapl_os_get_env_val("DAPL_CM_ROUTE_RETRY_COUNT", <br>
+                                      
IB_ROUTE_RETRY_COUNT);<br>
+<br>
      ep_ptr->qp_handle = conn;<br>
      ep_ptr->qp_state = IB_QP_STATE_INIT;<br>
      <br>
<br>
Index: dapl/openib_cma/dapl_ib_util.h<br>
===================================================================<br>
--- dapl/openib_cma/dapl_ib_util.h  (revision 9916)<br>
+++ dapl/openib_cma/dapl_ib_util.h  (working copy)<br>
@@ -67,8 +67,12 @@<br>
 <br>
 #define IB_RC_RETRY_COUNT      7<br>
 #define IB_RNR_RETRY_COUNT     7<br>
-#define IB_CM_RESPONSE_TIMEOUT  20 /* 4 sec */<br>
-#define
IB_CM_RETRIES           15<br>
+#define IB_CM_RESPONSE_TIMEOUT  23 /* 16 sec */<br>
+#define
IB_CM_RETRIES           15 /*
240 sec total default */<br>
+#define IB_ARP_TIMEOUT       4000  /* 4 sec
*/<br>
+#define IB_ARP_RETRY_COUNT   15    /* 60 sec total */<br>
+#define IB_ROUTE_TIMEOUT     4000  /* 4 sec */<br>
+#define IB_ROUTE_RETRY_COUNT 15    /* 60 sec total */<br>
 #define IB_REQ_MRA_TIMEOUT   27    /* a little
over 9 minutes */<br>
 #define IB_MAX_AT_RETRY            3<br>
 #define IB_TARGET_MAX        4     /*
max_qp_ous_rd_atom */<br>
@@ -177,12 +181,17 @@<br>
 struct dapl_cm_id {<br>
      DAPL_OS_LOCK                  lock;<br>
      int                     destroy;<br>
+     int                     arp_retries;<br>
+     int                     arp_timeout;<br>
+     int                     route_retries;<br>
+     int                     route_timeout;<br>
      int                     in_callback;<br>
      struct rdma_cm_id       *cm_id;<br>
      struct dapl_hca               *hca;<br>
      struct dapl_sp                *sp;<br>
      struct dapl_ep                *ep;<br>
      struct rdma_conn_param        params;<br>
+     DAT_SOCK_ADDR6                r_addr;<br>
      int                     p_len;<br>
      unsigned char                 p_data[IB_MAX_DREP_PDATA_SIZE];<br>
 };<br>
<o:p> </o:p></span></font></p>

<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'><o:p> </o:p></span></font></p>

</div>

</body>

</html>