[openib-general] [PATCH 3/3] uDAPL cma: add support for address and route retries, call disconnect when recving dreq
Arlin Davis
arlin.r.davis at intel.com
Wed Nov 1 16:37:39 PST 2006
Fix some timeout and long disconnect delay issues discovered during scale-out testing. Added support
to retry rdma_cm address and route resolution with configuration options and provide a disconnect
call when receiving the disconnect request to force an immediate disconnect reply to the remote
side.
Here are the new options (environment variables) with the default setting
DAPL_CM_ARP_TIMEOUT_MS 4000
DAPL_CM_ARP_RETRY_COUNT 15
DAPL_CM_ROUTE_TIMEOUT_MS 4000
DAPL_CM_ROUTE_RETRY_COUNT 15
Signed-off by: Arlin Davis ardavis at ichips.intel.com
Index: dapl/openib_cma/dapl_ib_cm.c
===================================================================
--- dapl/openib_cma/dapl_ib_cm.c (revision 9916)
+++ dapl/openib_cma/dapl_ib_cm.c (working copy)
@@ -58,6 +58,9 @@
#include "dapl_ib_util.h"
#include <sys/poll.h>
#include <signal.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
#include <rdma/rdma_cma_ib.h>
extern struct rdma_event_channel *g_cm_events;
@@ -99,8 +102,8 @@
&ipaddr->src_addr)->sin_addr.s_addr),
ntohl(((struct sockaddr_in *)
&ipaddr->dst_addr)->sin_addr.s_addr));
-
- ret = rdma_resolve_route(conn->cm_id, 2000);
+
+ ret = rdma_resolve_route(conn->cm_id, conn->route_timeout);
if (ret) {
dapl_dbg_log(DAPL_DBG_TYPE_ERR,
" rdma_connect failed: %s\n",strerror(errno));
@@ -120,6 +123,7 @@
struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
struct ib_addr *ibaddr = &conn->cm_id->route.addr.addr.ibaddr;
#endif
+
dapl_dbg_log(DAPL_DBG_TYPE_CM,
" route_resolve: cm_id %p SRC %x DST %x PORT %d\n",
conn->cm_id,
@@ -381,6 +385,7 @@
break;
case RDMA_CM_EVENT_DISCONNECTED:
+ rdma_disconnect(conn->cm_id); /* force the DREP */
/* validate EP handle */
if (!DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP))
dapl_evd_connection_callback(conn,
@@ -494,6 +499,7 @@
break;
case RDMA_CM_EVENT_DISCONNECTED:
+ rdma_disconnect(conn->cm_id); /* force the DREP */
/* validate SP handle context */
if (!DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_PSP) ||
!DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_RSP))
@@ -543,7 +549,8 @@
IN void *p_data)
{
struct dapl_ep *ep_ptr = ep_handle;
-
+ struct dapl_cm_id *conn;
+
/* Sanity check */
if (NULL == ep_ptr)
return DAT_SUCCESS;
@@ -552,36 +559,38 @@
r_qual,p_data,p_size);
/* rdma conn and cm_id pre-bound; reference via qp_handle */
- ep_ptr->cm_handle = ep_ptr->qp_handle;
+ conn = ep_ptr->cm_handle = ep_ptr->qp_handle;
/* Setup QP/CM parameters and private data in cm_id */
- (void)dapl_os_memzero(&ep_ptr->cm_handle->params,
- sizeof(ep_ptr->cm_handle->params));
- ep_ptr->cm_handle->params.responder_resources = IB_TARGET_MAX;
- ep_ptr->cm_handle->params.initiator_depth = IB_INITIATOR_DEPTH;
- ep_ptr->cm_handle->params.flow_control = 1;
- ep_ptr->cm_handle->params.rnr_retry_count = IB_RNR_RETRY_COUNT;
- ep_ptr->cm_handle->params.retry_count = IB_RC_RETRY_COUNT;
+ (void)dapl_os_memzero(&conn->params, sizeof(conn->params));
+ conn->params.responder_resources = IB_TARGET_MAX;
+ conn->params.initiator_depth = IB_INITIATOR_DEPTH;
+ conn->params.flow_control = 1;
+ conn->params.rnr_retry_count = IB_RNR_RETRY_COUNT;
+ conn->params.retry_count = IB_RC_RETRY_COUNT;
if (p_size) {
- dapl_os_memcpy(ep_ptr->cm_handle->p_data, p_data, p_size);
- ep_ptr->cm_handle->params.private_data =
- ep_ptr->cm_handle->p_data;
- ep_ptr->cm_handle->params.private_data_len = p_size;
+ dapl_os_memcpy(conn->p_data, p_data, p_size);
+ conn->params.private_data = conn->p_data;
+ conn->params.private_data_len = p_size;
}
+ /* copy in remote address, need a copy for retry attempts */
+ dapl_os_memcpy(&conn->r_addr, r_addr, sizeof(*r_addr));
+
/* Resolve remote address, src already bound during QP create */
- ((struct sockaddr_in*)r_addr)->sin_port = htons(MAKE_PORT(r_qual));
- if (rdma_resolve_addr(ep_ptr->cm_handle->cm_id,
- NULL, (struct sockaddr *)r_addr, 2000))
+ ((struct sockaddr_in*)&conn->r_addr)->sin_port = htons(MAKE_PORT(r_qual));
+ ((struct sockaddr_in*)&conn->r_addr)->sin_family = AF_INET;
+
+ if (rdma_resolve_addr(conn->cm_id, NULL,
+ (struct sockaddr *)&conn->r_addr,
+ conn->arp_timeout))
return dapl_convert_errno(errno,"ib_connect");
dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " connect: resolve_addr: cm_id %p SRC %x DST %x port %d\n",
- ep_ptr->cm_handle->cm_id,
- ntohl(((struct sockaddr_in *)
- &ep_ptr->cm_handle->hca->hca_address)->sin_addr.s_addr),
- ntohl(((struct sockaddr_in *)r_addr)->sin_addr.s_addr),
- MAKE_PORT(r_qual) );
+ " connect: resolve_addr: cm_id %p -> %s port %d\n",
+ conn->cm_id,
+ inet_ntoa(((struct sockaddr_in *)&conn->r_addr)->sin_addr),
+ ((struct sockaddr_in*)&conn->r_addr)->sin_port );
return DAT_SUCCESS;
}
@@ -1163,15 +1172,58 @@
case RDMA_CM_EVENT_ADDR_RESOLVED:
dapli_addr_resolve(conn);
break;
+
case RDMA_CM_EVENT_ROUTE_RESOLVED:
dapli_route_resolve(conn);
break;
+
case RDMA_CM_EVENT_ADDR_ERROR:
+ dapl_dbg_log(DAPL_DBG_TYPE_WARN,
+ " CM ADDR ERROR: -> %s retry (%d)..\n",
+ inet_ntoa(((struct sockaddr_in *)
+ &conn->r_addr)->sin_addr),
+ conn->arp_retries);
+
+ /* retry address resolution */
+ if (--conn->arp_retries) {
+ int ret;
+ ret = rdma_resolve_addr(
+ conn->cm_id, NULL,
+ (struct sockaddr *)&conn->r_addr,
+ conn->arp_timeout);
+ if (!ret)
+ break;
+ else {
+ dapl_dbg_log(
+ DAPL_DBG_TYPE_WARN,
+ " ERROR: rdma_resolve_addr = "
+ "%d %s\n",
+ ret,strerror(errno));
+ }
+ }
+ /* retries exhausted or resolve_addr failed */
+ dapl_evd_connection_callback(
+ conn, IB_CME_DESTINATION_UNREACHABLE,
+ NULL, conn->ep);
+ break;
+
+
case RDMA_CM_EVENT_ROUTE_ERROR:
- dapl_evd_connection_callback(conn,
- IB_CME_DESTINATION_UNREACHABLE,
- NULL, conn->ep);
+ dapl_dbg_log(DAPL_DBG_TYPE_WARN,
+ " CM ROUTE ERROR: -> %s retry (%d)..\n",
+ inet_ntoa(((struct sockaddr_in *)
+ &conn->r_addr)->sin_addr),
+ conn->route_retries );
+
+ /* retry route resolution */
+ if (--conn->route_retries)
+ dapli_addr_resolve(conn);
+ else
+ dapl_evd_connection_callback( conn,
+ IB_CME_DESTINATION_UNREACHABLE,
+ NULL, conn->ep);
break;
+
case RDMA_CM_EVENT_DEVICE_REMOVAL:
dapl_evd_connection_callback(conn,
IB_CME_LOCAL_FAILURE,
Index: dapl/openib_cma/dapl_ib_qp.c
===================================================================
--- dapl/openib_cma/dapl_ib_qp.c (revision 10032)
+++ dapl/openib_cma/dapl_ib_qp.c (working copy)
@@ -160,6 +168,17 @@
conn->cm_id = cm_id;
conn->ep = ep_ptr;
conn->hca = ia_ptr->hca_ptr;
+
+ /* setup timers for address and route resolution */
+ conn->arp_timeout = dapl_os_get_env_val("DAPL_CM_ARP_TIMEOUT_MS",
+ IB_ARP_TIMEOUT);
+ conn->arp_retries = dapl_os_get_env_val("DAPL_CM_ARP_RETRY_COUNT",
+ IB_ARP_RETRY_COUNT);
+ conn->route_timeout = dapl_os_get_env_val("DAPL_CM_ROUTE_TIMEOUT_MS",
+ IB_ROUTE_TIMEOUT);
+ conn->route_retries = dapl_os_get_env_val("DAPL_CM_ROUTE_RETRY_COUNT",
+ IB_ROUTE_RETRY_COUNT);
+
ep_ptr->qp_handle = conn;
ep_ptr->qp_state = IB_QP_STATE_INIT;
Index: dapl/openib_cma/dapl_ib_util.h
===================================================================
--- dapl/openib_cma/dapl_ib_util.h (revision 9916)
+++ dapl/openib_cma/dapl_ib_util.h (working copy)
@@ -67,8 +67,12 @@
#define IB_RC_RETRY_COUNT 7
#define IB_RNR_RETRY_COUNT 7
-#define IB_CM_RESPONSE_TIMEOUT 20 /* 4 sec */
-#define IB_CM_RETRIES 15
+#define IB_CM_RESPONSE_TIMEOUT 23 /* 16 sec */
+#define IB_CM_RETRIES 15 /* 240 sec total default */
+#define IB_ARP_TIMEOUT 4000 /* 4 sec */
+#define IB_ARP_RETRY_COUNT 15 /* 60 sec total */
+#define IB_ROUTE_TIMEOUT 4000 /* 4 sec */
+#define IB_ROUTE_RETRY_COUNT 15 /* 60 sec total */
#define IB_REQ_MRA_TIMEOUT 27 /* a little over 9 minutes */
#define IB_MAX_AT_RETRY 3
#define IB_TARGET_MAX 4 /* max_qp_ous_rd_atom */
@@ -177,12 +181,17 @@
struct dapl_cm_id {
DAPL_OS_LOCK lock;
int destroy;
+ int arp_retries;
+ int arp_timeout;
+ int route_retries;
+ int route_timeout;
int in_callback;
struct rdma_cm_id *cm_id;
struct dapl_hca *hca;
struct dapl_sp *sp;
struct dapl_ep *ep;
struct rdma_conn_param params;
+ DAT_SOCK_ADDR6 r_addr;
int p_len;
unsigned char p_data[IB_MAX_DREP_PDATA_SIZE];
};
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20061101/f969d934/attachment.html>
More information about the general
mailing list