[openib-general] [PATCH 3/3] uDAPL cma: add support for address and route retries, call disconnect when recving dreq
Arlin Davis
arlin.r.davis at intel.com
Mon Nov 6 14:44:33 PST 2006
Fix some timeout and long disconnect delay issues discovered during scale-out testing. Added support
to retry rdma_cm address and route resolution with configuration options. Provide a disconnect call
when receiving the disconnect request to guarantee a disconnect reply and event on the remote side.
The rdma_disconnect was not being called from dat_ep_disconnect() as a result of the state changing
to DISCONNECTED in the event callback.
Here are the new options (environment variables) with the default setting:
DAPL_CM_ARP_TIMEOUT_MS 4000
DAPL_CM_ARP_RETRY_COUNT 15
DAPL_CM_ROUTE_TIMEOUT_MS 4000
DAPL_CM_ROUTE_RETRY_COUNT 15
Signed-off by: Arlin Davis ardavis at ichips.intel.com
Index: dapl/openib_cma/dapl_ib_cm.c
===================================================================
--- dapl/openib_cma/dapl_ib_cm.c (revision 10032)
+++ dapl/openib_cma/dapl_ib_cm.c (working copy)
@@ -58,6 +58,9 @@
#include "dapl_ib_util.h"
#include <sys/poll.h>
#include <signal.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
#include <rdma/rdma_cma_ib.h>
extern struct rdma_event_channel *g_cm_events;
@@ -99,8 +102,8 @@ static void dapli_addr_resolve(struct da
&ipaddr->src_addr)->sin_addr.s_addr),
ntohl(((struct sockaddr_in *)
&ipaddr->dst_addr)->sin_addr.s_addr));
-
- ret = rdma_resolve_route(conn->cm_id, 2000);
+
+ ret = rdma_resolve_route(conn->cm_id, conn->route_timeout);
if (ret) {
dapl_dbg_log(DAPL_DBG_TYPE_ERR,
" rdma_connect failed: %s\n",strerror(errno));
@@ -120,6 +123,7 @@ static void dapli_route_resolve(struct d
struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
struct ib_addr *ibaddr = &conn->cm_id->route.addr.addr.ibaddr;
#endif
+
dapl_dbg_log(DAPL_DBG_TYPE_CM,
" route_resolve: cm_id %p SRC %x DST %x PORT %d\n",
conn->cm_id,
@@ -331,21 +335,17 @@ static void dapli_cm_active_cb(struct da
case RDMA_CM_EVENT_UNREACHABLE:
case RDMA_CM_EVENT_CONNECT_ERROR:
{
- ib_cm_events_t cm_event;
- dapl_dbg_log(
+ dapl_dbg_log(
DAPL_DBG_TYPE_WARN,
" dapli_cm_active_handler: CONN_ERR "
" event=0x%x status=%d %s\n",
event->event, event->status,
(event->status == -ETIMEDOUT)?"TIMEOUT":"" );
- /* no device type specified so assume IB for now */
- if (event->status == -ETIMEDOUT) /* IB timeout */
- cm_event = IB_CME_TIMEOUT;
- else
- cm_event = IB_CME_DESTINATION_UNREACHABLE;
-
- dapl_evd_connection_callback(conn, cm_event, NULL, conn->ep);
+ /* per DAT SPEC provider always returns UNREACHABLE */
+ dapl_evd_connection_callback(conn,
+ IB_CME_DESTINATION_UNREACHABLE,
+ NULL, conn->ep);
break;
}
case RDMA_CM_EVENT_REJECTED:
@@ -381,6 +381,7 @@ static void dapli_cm_active_cb(struct da
break;
case RDMA_CM_EVENT_DISCONNECTED:
+ rdma_disconnect(conn->cm_id); /* force the DREP */
/* validate EP handle */
if (!DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP))
dapl_evd_connection_callback(conn,
@@ -494,6 +495,7 @@ static void dapli_cm_passive_cb(struct d
break;
case RDMA_CM_EVENT_DISCONNECTED:
+ rdma_disconnect(conn->cm_id); /* force the DREP */
/* validate SP handle context */
if (!DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_PSP) ||
!DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_RSP))
@@ -543,7 +545,8 @@ DAT_RETURN dapls_ib_connect(IN DAT_EP_HA
IN void *p_data)
{
struct dapl_ep *ep_ptr = ep_handle;
-
+ struct dapl_cm_id *conn;
+
/* Sanity check */
if (NULL == ep_ptr)
return DAT_SUCCESS;
@@ -552,36 +555,38 @@ DAT_RETURN dapls_ib_connect(IN DAT_EP_HA
r_qual,p_data,p_size);
/* rdma conn and cm_id pre-bound; reference via qp_handle */
- ep_ptr->cm_handle = ep_ptr->qp_handle;
+ conn = ep_ptr->cm_handle = ep_ptr->qp_handle;
/* Setup QP/CM parameters and private data in cm_id */
- (void)dapl_os_memzero(&ep_ptr->cm_handle->params,
- sizeof(ep_ptr->cm_handle->params));
- ep_ptr->cm_handle->params.responder_resources = IB_TARGET_MAX;
- ep_ptr->cm_handle->params.initiator_depth = IB_INITIATOR_DEPTH;
- ep_ptr->cm_handle->params.flow_control = 1;
- ep_ptr->cm_handle->params.rnr_retry_count = IB_RNR_RETRY_COUNT;
- ep_ptr->cm_handle->params.retry_count = IB_RC_RETRY_COUNT;
+ (void)dapl_os_memzero(&conn->params, sizeof(conn->params));
+ conn->params.responder_resources = IB_TARGET_MAX;
+ conn->params.initiator_depth = IB_INITIATOR_DEPTH;
+ conn->params.flow_control = 1;
+ conn->params.rnr_retry_count = IB_RNR_RETRY_COUNT;
+ conn->params.retry_count = IB_RC_RETRY_COUNT;
if (p_size) {
- dapl_os_memcpy(ep_ptr->cm_handle->p_data, p_data, p_size);
- ep_ptr->cm_handle->params.private_data =
- ep_ptr->cm_handle->p_data;
- ep_ptr->cm_handle->params.private_data_len = p_size;
+ dapl_os_memcpy(conn->p_data, p_data, p_size);
+ conn->params.private_data = conn->p_data;
+ conn->params.private_data_len = p_size;
}
+ /* copy in remote address, need a copy for retry attempts */
+ dapl_os_memcpy(&conn->r_addr, r_addr, sizeof(*r_addr));
+
/* Resolve remote address, src already bound during QP create */
- ((struct sockaddr_in*)r_addr)->sin_port = htons(MAKE_PORT(r_qual));
- if (rdma_resolve_addr(ep_ptr->cm_handle->cm_id,
- NULL, (struct sockaddr *)r_addr, 2000))
+ ((struct sockaddr_in*)&conn->r_addr)->sin_port = htons(MAKE_PORT(r_qual));
+ ((struct sockaddr_in*)&conn->r_addr)->sin_family = AF_INET;
+
+ if (rdma_resolve_addr(conn->cm_id, NULL,
+ (struct sockaddr *)&conn->r_addr,
+ conn->arp_timeout))
return dapl_convert_errno(errno,"ib_connect");
dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " connect: resolve_addr: cm_id %p SRC %x DST %x port %d\n",
- ep_ptr->cm_handle->cm_id,
- ntohl(((struct sockaddr_in *)
- &ep_ptr->cm_handle->hca->hca_address)->sin_addr.s_addr),
- ntohl(((struct sockaddr_in *)r_addr)->sin_addr.s_addr),
- MAKE_PORT(r_qual) );
+ " connect: resolve_addr: cm_id %p -> %s port %d\n",
+ conn->cm_id,
+ inet_ntoa(((struct sockaddr_in *)&conn->r_addr)->sin_addr),
+ ((struct sockaddr_in*)&conn->r_addr)->sin_port );
return DAT_SUCCESS;
}
@@ -1163,15 +1168,60 @@ void dapli_cma_event_cb(void)
case RDMA_CM_EVENT_ADDR_RESOLVED:
dapli_addr_resolve(conn);
break;
+
case RDMA_CM_EVENT_ROUTE_RESOLVED:
dapli_route_resolve(conn);
break;
+
case RDMA_CM_EVENT_ADDR_ERROR:
+ dapl_dbg_log(DAPL_DBG_TYPE_WARN,
+ " CM ADDR ERROR: -> %s retry (%d)..\n",
+ inet_ntoa(((struct sockaddr_in *)
+ &conn->r_addr)->sin_addr),
+ conn->arp_retries);
+
+ /* retry address resolution */
+ if ((--conn->arp_retries) &&
+ (event->status == -ETIMEDOUT)) {
+ int ret;
+ ret = rdma_resolve_addr(
+ conn->cm_id, NULL,
+ (struct sockaddr *)&conn->r_addr,
+ conn->arp_timeout);
+ if (!ret)
+ break;
+ else {
+ dapl_dbg_log(
+ DAPL_DBG_TYPE_WARN,
+ " ERROR: rdma_resolve_addr = "
+ "%d %s\n",
+ ret,strerror(errno));
+ }
+ }
+ /* retries exhausted or resolve_addr failed */
+ dapl_evd_connection_callback(
+ conn, IB_CME_DESTINATION_UNREACHABLE,
+ NULL, conn->ep);
+ break;
+
+
case RDMA_CM_EVENT_ROUTE_ERROR:
- dapl_evd_connection_callback(conn,
- IB_CME_DESTINATION_UNREACHABLE,
- NULL, conn->ep);
+ dapl_dbg_log(DAPL_DBG_TYPE_WARN,
+ " CM ROUTE ERROR: -> %s retry (%d)..\n",
+ inet_ntoa(((struct sockaddr_in *)
+ &conn->r_addr)->sin_addr),
+ conn->route_retries );
+
+ /* retry route resolution */
+ if ((--conn->route_retries) &&
+ (event->status == -ETIMEDOUT))
+ dapli_addr_resolve(conn);
+ else
+ dapl_evd_connection_callback( conn,
+ IB_CME_DESTINATION_UNREACHABLE,
+ NULL, conn->ep);
break;
+
case RDMA_CM_EVENT_DEVICE_REMOVAL:
dapl_evd_connection_callback(conn,
IB_CME_LOCAL_FAILURE,
Index: dapl/openib_cma/dapl_ib_qp.c
===================================================================
--- dapl/openib_cma/dapl_ib_qp.c (revision 10032)
+++ dapl/openib_cma/dapl_ib_qp.c (working copy)
@@ -160,6 +168,17 @@ DAT_RETURN dapls_ib_qp_alloc(IN DAPL_IA
conn->cm_id = cm_id;
conn->ep = ep_ptr;
conn->hca = ia_ptr->hca_ptr;
+
+ /* setup timers for address and route resolution */
+ conn->arp_timeout = dapl_os_get_env_val("DAPL_CM_ARP_TIMEOUT_MS",
+ IB_ARP_TIMEOUT);
+ conn->arp_retries = dapl_os_get_env_val("DAPL_CM_ARP_RETRY_COUNT",
+ IB_ARP_RETRY_COUNT);
+ conn->route_timeout = dapl_os_get_env_val("DAPL_CM_ROUTE_TIMEOUT_MS",
+ IB_ROUTE_TIMEOUT);
+ conn->route_retries = dapl_os_get_env_val("DAPL_CM_ROUTE_RETRY_COUNT",
+ IB_ROUTE_RETRY_COUNT);
+
ep_ptr->qp_handle = conn;
ep_ptr->qp_state = IB_QP_STATE_INIT;
Index: dapl/openib_cma/dapl_ib_util.h
===================================================================
--- dapl/openib_cma/dapl_ib_util.h (revision 10032)
+++ dapl/openib_cma/dapl_ib_util.h (working copy)
@@ -67,8 +67,12 @@ typedef ib_hca_handle_t dapl_ibal_ca_t;
#define IB_RC_RETRY_COUNT 7
#define IB_RNR_RETRY_COUNT 7
-#define IB_CM_RESPONSE_TIMEOUT 20 /* 4 sec */
-#define IB_CM_RETRIES 15
+#define IB_CM_RESPONSE_TIMEOUT 23 /* 16 sec */
+#define IB_CM_RETRIES 15 /* 240 sec total default */
+#define IB_ARP_TIMEOUT 4000 /* 4 sec */
+#define IB_ARP_RETRY_COUNT 15 /* 60 sec total */
+#define IB_ROUTE_TIMEOUT 4000 /* 4 sec */
+#define IB_ROUTE_RETRY_COUNT 15 /* 60 sec total */
#define IB_REQ_MRA_TIMEOUT 27 /* a little over 9 minutes */
#define IB_MAX_AT_RETRY 3
#define IB_TARGET_MAX 4 /* max_qp_ous_rd_atom */
@@ -177,12 +181,17 @@ struct ib_llist_entry
struct dapl_cm_id {
DAPL_OS_LOCK lock;
int destroy;
+ int arp_retries;
+ int arp_timeout;
+ int route_retries;
+ int route_timeout;
int in_callback;
struct rdma_cm_id *cm_id;
struct dapl_hca *hca;
struct dapl_sp *sp;
struct dapl_ep *ep;
struct rdma_conn_param params;
+ DAT_SOCK_ADDR6 r_addr;
int p_len;
unsigned char p_data[IB_MAX_DREP_PDATA_SIZE];
};
More information about the general
mailing list