<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv=Content-Type content="text/html; charset=us-ascii">
<meta name=Generator content="Microsoft Word 11 (filtered medium)">
<style>
<!--
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{margin:0in;
margin-bottom:.0001pt;
font-size:12.0pt;
font-family:"Times New Roman";}
a:link, span.MsoHyperlink
{color:blue;
text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
{color:purple;
text-decoration:underline;}
p.MsoPlainText, li.MsoPlainText, div.MsoPlainText
{margin:0in;
margin-bottom:.0001pt;
font-size:10.0pt;
font-family:"Courier New";}
pre
{margin:0in;
margin-bottom:.0001pt;
font-size:10.0pt;
font-family:"Courier New";}
span.EmailStyle17
{mso-style-type:personal-compose;
font-family:Arial;
color:windowtext;}
@page Section1
{size:8.5in 11.0in;
margin:1.0in 1.25in 1.0in 1.25in;}
div.Section1
{page:Section1;}
-->
</style>
<!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
</head>
<body lang=EN-US link=blue vlink=purple>
<div class=Section1>
<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'>Fix some timeout and long disconnect delay issues discovered
during scale-out testing. Added support to retry rdma_cm address and route
resolution with configuration options and provide a disconnect call when receiving
the disconnect request to force an immediate disconnect reply to the remote
side. <o:p></o:p></span></font></p>
<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'><o:p> </o:p></span></font></p>
<p class=MsoNormal style='text-autospace:none'><font size=2 face=Arial><span
style='font-size:10.0pt;font-family:Arial'>Here are the new options
(environment variables) with the default setting<o:p></o:p></span></font></p>
<p class=MsoNormal style='text-autospace:none'><font size=2 face=Arial><span
style='font-size:10.0pt;font-family:Arial'><o:p> </o:p></span></font></p>
<p class=MsoNormal style='text-autospace:none'><font size=2 face="Courier New"><span
style='font-size:10.0pt;font-family:"Courier New"'>DAPL_CM_ARP_TIMEOUT_MS
4000<o:p></o:p></span></font></p>
<p class=MsoNormal style='text-autospace:none'><font size=2 face="Courier New"><span
style='font-size:10.0pt;font-family:"Courier New"'>DAPL_CM_ARP_RETRY_COUNT
15<o:p></o:p></span></font></p>
<p class=MsoNormal style='text-autospace:none'><font size=2 face="Courier New"><span
style='font-size:10.0pt;font-family:"Courier New"'>DAPL_CM_ROUTE_TIMEOUT_MS
4000<o:p></o:p></span></font></p>
<p class=MsoNormal style='text-autospace:none'><font size=2 face="Courier New"><span
style='font-size:10.0pt;font-family:"Courier New"'>DAPL_CM_ROUTE_RETRY_COUNT 15<o:p></o:p></span></font></p>
<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'><o:p> </o:p></span></font></p>
<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'><o:p> </o:p></span></font></p>
<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'> </span></font>Signed-off by: Arlin Davis <a
href="mailto:ardavis@ichips.intel.com">ardavis@ichips.intel.com</a><o:p></o:p></p>
<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'><o:p> </o:p></span></font></p>
<p class=MsoPlainText><font size=2 face="Courier New"><span style='font-size:
10.0pt'>Index: dapl/openib_cma/dapl_ib_cm.c<br>
===================================================================<br>
--- dapl/openib_cma/dapl_ib_cm.c (revision 9916)<br>
+++ dapl/openib_cma/dapl_ib_cm.c (working copy)<br>
@@ -58,6 +58,9 @@<br>
#include "dapl_ib_util.h"<br>
#include <sys/poll.h><br>
#include <signal.h><br>
+#include <sys/socket.h><br>
+#include <netinet/in.h><br>
+#include <arpa/inet.h><br>
#include <rdma/rdma_cma_ib.h><br>
<br>
extern struct rdma_event_channel *g_cm_events;<br>
@@ -99,8 +102,8 @@<br>
&ipaddr->src_addr)->sin_addr.s_addr),<br>
ntohl(((struct
sockaddr_in *)<br>
&ipaddr->dst_addr)->sin_addr.s_addr));<br>
-<br>
- ret = rdma_resolve_route(conn->cm_id, 2000);<br>
+ <br>
+ ret = rdma_resolve_route(conn->cm_id,
conn->route_timeout);<br>
if (ret) {<br>
dapl_dbg_log(DAPL_DBG_TYPE_ERR,
<br>
" rdma_connect failed: %s\n",strerror(errno));<br>
@@ -120,6 +123,7 @@<br>
struct rdma_addr *ipaddr =
&conn->cm_id->route.addr;<br>
struct ib_addr *ibaddr =
&conn->cm_id->route.addr.addr.ibaddr;<br>
#endif<br>
+<br>
dapl_dbg_log(DAPL_DBG_TYPE_CM, <br>
"
route_resolve: cm_id %p SRC %x DST %x PORT %d\n", <br>
conn->cm_id,
<br>
@@ -381,6 +385,7 @@<br>
break;<br>
<br>
case RDMA_CM_EVENT_DISCONNECTED:<br>
+ rdma_disconnect(conn->cm_id);
/* force the DREP */<br>
/* validate
EP handle */<br>
if
(!DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP)) <br>
dapl_evd_connection_callback(conn,
<br>
@@ -494,6 +499,7 @@<br>
<br>
break;<br>
case RDMA_CM_EVENT_DISCONNECTED:<br>
+ rdma_disconnect(conn->cm_id);
/* force the DREP */<br>
/* validate
SP handle context */<br>
if
(!DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_PSP) || <br>
!DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_RSP))<br>
@@ -543,7 +549,8 @@<br>
IN void *p_data)<br>
{<br>
struct dapl_ep *ep_ptr = ep_handle;<br>
- <br>
+ struct dapl_cm_id *conn;<br>
+ <br>
/* Sanity check */<br>
if (NULL == ep_ptr) <br>
return
DAT_SUCCESS;<br>
@@ -552,36 +559,38 @@<br>
r_qual,p_data,p_size);<br>
<br>
/* rdma conn and cm_id pre-bound; reference via
qp_handle */<br>
- ep_ptr->cm_handle = ep_ptr->qp_handle;<br>
+ conn = ep_ptr->cm_handle = ep_ptr->qp_handle;<br>
<br>
/* Setup QP/CM parameters and private data in
cm_id */<br>
- (void)dapl_os_memzero(&ep_ptr->cm_handle->params,<br>
-
sizeof(ep_ptr->cm_handle->params));<br>
- ep_ptr->cm_handle->params.responder_resources =
IB_TARGET_MAX;<br>
- ep_ptr->cm_handle->params.initiator_depth =
IB_INITIATOR_DEPTH;<br>
- ep_ptr->cm_handle->params.flow_control = 1;<br>
- ep_ptr->cm_handle->params.rnr_retry_count =
IB_RNR_RETRY_COUNT;<br>
- ep_ptr->cm_handle->params.retry_count =
IB_RC_RETRY_COUNT;<br>
+ (void)dapl_os_memzero(&conn->params,
sizeof(conn->params));<br>
+ conn->params.responder_resources = IB_TARGET_MAX;<br>
+ conn->params.initiator_depth = IB_INITIATOR_DEPTH;<br>
+ conn->params.flow_control = 1;<br>
+ conn->params.rnr_retry_count = IB_RNR_RETRY_COUNT;<br>
+ conn->params.retry_count = IB_RC_RETRY_COUNT;<br>
if (p_size) {<br>
- dapl_os_memcpy(ep_ptr->cm_handle->p_data,
p_data, p_size);<br>
- ep_ptr->cm_handle->params.private_data
= <br>
- ep_ptr->cm_handle->p_data;<br>
- ep_ptr->cm_handle->params.private_data_len
= p_size;<br>
+ dapl_os_memcpy(conn->p_data,
p_data, p_size);<br>
+ conn->params.private_data
= conn->p_data;<br>
+ conn->params.private_data_len
= p_size;<br>
}<br>
<br>
+ /* copy in remote address, need a copy for retry
attempts */<br>
+ dapl_os_memcpy(&conn->r_addr, r_addr,
sizeof(*r_addr));<br>
+<br>
/* Resolve remote address, src already bound
during QP create */<br>
- ((struct sockaddr_in*)r_addr)->sin_port =
htons(MAKE_PORT(r_qual));<br>
- if (rdma_resolve_addr(ep_ptr->cm_handle->cm_id,
<br>
-
NULL, (struct sockaddr *)r_addr, 2000))<br>
+ ((struct
sockaddr_in*)&conn->r_addr)->sin_port = htons(MAKE_PORT(r_qual));<br>
+ ((struct
sockaddr_in*)&conn->r_addr)->sin_family = AF_INET;<br>
+<br>
+ if (rdma_resolve_addr(conn->cm_id, NULL, <br>
+
(struct sockaddr *)&conn->r_addr, <br>
+
conn->arp_timeout))<br>
return
dapl_convert_errno(errno,"ib_connect");<br>
<br>
dapl_dbg_log(DAPL_DBG_TYPE_CM, <br>
- " connect:
resolve_addr: cm_id %p SRC %x DST %x port %d\n", <br>
- ep_ptr->cm_handle->cm_id,
<br>
- ntohl(((struct
sockaddr_in *)<br>
-
&ep_ptr->cm_handle->hca->hca_address)->sin_addr.s_addr),<br>
- ntohl(((struct
sockaddr_in *)r_addr)->sin_addr.s_addr),<br>
- MAKE_PORT(r_qual)
);<br>
+ " connect:
resolve_addr: cm_id %p -> %s port %d\n", <br>
+ conn->cm_id, <br>
+ inet_ntoa(((struct
sockaddr_in *)&conn->r_addr)->sin_addr),<br>
+ ((struct
sockaddr_in*)&conn->r_addr)->sin_port );<br>
<br>
return DAT_SUCCESS;<br>
}<br>
@@ -1163,15 +1172,58 @@<br>
case
RDMA_CM_EVENT_ADDR_RESOLVED:<br>
dapli_addr_resolve(conn);<br>
break;<br>
+<br>
case
RDMA_CM_EVENT_ROUTE_RESOLVED:<br>
dapli_route_resolve(conn);<br>
break;<br>
+<br>
case
RDMA_CM_EVENT_ADDR_ERROR:<br>
+ dapl_dbg_log(DAPL_DBG_TYPE_WARN,<br>
+
" CM ADDR ERROR: -> %s retry (%d)..\n", <br>
+
inet_ntoa(((struct sockaddr_in *)<br>
+ &conn->r_addr)->sin_addr),<br>
+ conn->arp_retries);<br>
+ <br>
+ /*
retry address resolution */<br>
+ if
(--conn->arp_retries) {<br>
+ int
ret;<br>
+ ret
= rdma_resolve_addr(<br>
+ conn->cm_id,
NULL, <br>
+ (struct
sockaddr *)&conn->r_addr, <br>
+ conn->arp_timeout);<br>
+ if
(!ret) <br>
+ break;<br>
+ else
{ <br>
+ dapl_dbg_log(<br>
+ DAPL_DBG_TYPE_WARN,<br>
+ "
ERROR: rdma_resolve_addr = "<br>
+ "%d
%s\n", <br>
+ ret,strerror(errno));<br>
+ }<br>
+ }
<br>
+ /*
retries exhausted or resolve_addr failed */<br>
+ dapl_evd_connection_callback(<br>
+ conn,
IB_CME_DESTINATION_UNREACHABLE, <br>
+ NULL,
conn->ep);<br>
+ break;<br>
+<br>
+<br>
case
RDMA_CM_EVENT_ROUTE_ERROR:<br>
- dapl_evd_connection_callback(conn,
<br>
-
IB_CME_DESTINATION_UNREACHABLE, <br>
-
NULL, conn->ep);<br>
+ dapl_dbg_log(DAPL_DBG_TYPE_WARN,
<br>
+
" CM ROUTE ERROR: -> %s retry (%d)..\n", <br>
+
inet_ntoa(((struct sockaddr_in *)<br>
+ &conn->r_addr)->sin_addr),<br>
+
conn->route_retries );<br>
+<br>
+ /*
retry route resolution */<br>
+ if
(--conn->route_retries) <br>
+ dapli_addr_resolve(conn);<br>
+ else
<br>
+ dapl_evd_connection_callback(
conn, <br>
+ IB_CME_DESTINATION_UNREACHABLE,
<br>
+ NULL,
conn->ep);<br>
break;<br>
+ <br>
case
RDMA_CM_EVENT_DEVICE_REMOVAL:<br>
dapl_evd_connection_callback(conn,
<br>
IB_CME_LOCAL_FAILURE, <br>
Index: dapl/openib_cma/dapl_ib_qp.c<br>
===================================================================<br>
--- dapl/openib_cma/dapl_ib_qp.c (revision 10032)<br>
+++ dapl/openib_cma/dapl_ib_qp.c (working copy)<br>
@@ -160,6 +168,17 @@<br>
conn->cm_id = cm_id;<br>
conn->ep = ep_ptr;<br>
conn->hca = ia_ptr->hca_ptr;<br>
+<br>
+ /* setup timers for address and route resolution */<br>
+ conn->arp_timeout =
dapl_os_get_env_val("DAPL_CM_ARP_TIMEOUT_MS", <br>
+ IB_ARP_TIMEOUT);<br>
+ conn->arp_retries =
dapl_os_get_env_val("DAPL_CM_ARP_RETRY_COUNT", <br>
+ IB_ARP_RETRY_COUNT);<br>
+ conn->route_timeout =
dapl_os_get_env_val("DAPL_CM_ROUTE_TIMEOUT_MS", <br>
+
IB_ROUTE_TIMEOUT);<br>
+ conn->route_retries =
dapl_os_get_env_val("DAPL_CM_ROUTE_RETRY_COUNT", <br>
+
IB_ROUTE_RETRY_COUNT);<br>
+<br>
ep_ptr->qp_handle = conn;<br>
ep_ptr->qp_state = IB_QP_STATE_INIT;<br>
<br>
<br>
Index: dapl/openib_cma/dapl_ib_util.h<br>
===================================================================<br>
--- dapl/openib_cma/dapl_ib_util.h (revision 9916)<br>
+++ dapl/openib_cma/dapl_ib_util.h (working copy)<br>
@@ -67,8 +67,12 @@<br>
<br>
#define IB_RC_RETRY_COUNT 7<br>
#define IB_RNR_RETRY_COUNT 7<br>
-#define IB_CM_RESPONSE_TIMEOUT 20 /* 4 sec */<br>
-#define
IB_CM_RETRIES 15<br>
+#define IB_CM_RESPONSE_TIMEOUT 23 /* 16 sec */<br>
+#define
IB_CM_RETRIES 15 /*
240 sec total default */<br>
+#define IB_ARP_TIMEOUT 4000 /* 4 sec
*/<br>
+#define IB_ARP_RETRY_COUNT 15 /* 60 sec total */<br>
+#define IB_ROUTE_TIMEOUT 4000 /* 4 sec */<br>
+#define IB_ROUTE_RETRY_COUNT 15 /* 60 sec total */<br>
#define IB_REQ_MRA_TIMEOUT 27 /* a little
over 9 minutes */<br>
#define IB_MAX_AT_RETRY 3<br>
#define IB_TARGET_MAX 4 /*
max_qp_ous_rd_atom */<br>
@@ -177,12 +181,17 @@<br>
struct dapl_cm_id {<br>
DAPL_OS_LOCK lock;<br>
int destroy;<br>
+ int arp_retries;<br>
+ int arp_timeout;<br>
+ int route_retries;<br>
+ int route_timeout;<br>
int in_callback;<br>
struct rdma_cm_id *cm_id;<br>
struct dapl_hca *hca;<br>
struct dapl_sp *sp;<br>
struct dapl_ep *ep;<br>
struct rdma_conn_param params;<br>
+ DAT_SOCK_ADDR6 r_addr;<br>
int p_len;<br>
unsigned char p_data[IB_MAX_DREP_PDATA_SIZE];<br>
};<br>
<o:p> </o:p></span></font></p>
<p class=MsoNormal><font size=2 face=Arial><span style='font-size:10.0pt;
font-family:Arial'><o:p> </o:p></span></font></p>
</div>
</body>
</html>