[openib-general] [PATCH] uDAPL cma; dat_ep_free can return without freeing cm_id

Arlin Davis arlin.r.davis at intel.com
Thu Apr 6 14:28:52 PDT 2006


James and Steve,

Here is revised patch that was tested (free and debug build versions) with dtest, dapltest, and
Intel MPI test suites. The rdma_destroy_id will block until we acknowledge the event so there was no
need to add our own wait objects for synchronization. This will never be called from the async event
thread so there is no chance of deadlock conditions. I also made some changes to build with
configure –enable-debug. Some unused variables that were deleted are actually used in the debug
messages. Please review the changes. 

Steve, can you test this version and see if it works for your iWARP device.

Thanks,

-arlin

Signed-off by: Arlin Davis ardavis at ichips.intel.com
 
 
Index: dapl/openib_cma/dapl_ib_cm.c
===================================================================
--- dapl/openib_cma/dapl_ib_cm.c	(revision 6305)
+++ dapl/openib_cma/dapl_ib_cm.c	(working copy)
@@ -62,9 +62,9 @@
 /* local prototypes */
 static struct dapl_cm_id * dapli_req_recv(struct dapl_cm_id *conn, 
 					  struct rdma_cm_event *event);
-static int dapli_cm_active_cb(struct dapl_cm_id *conn, 
+static void dapli_cm_active_cb(struct dapl_cm_id *conn, 
 			      struct rdma_cm_event *event);
-static int dapli_cm_passive_cb(struct dapl_cm_id *conn, 
+static void dapli_cm_passive_cb(struct dapl_cm_id *conn, 
 			       struct rdma_cm_event *event);
 static void dapli_addr_resolve(struct dapl_cm_id *conn);
 static void dapli_route_resolve(struct dapl_cm_id *conn);
@@ -87,7 +87,9 @@ static inline uint64_t cpu_to_be64(uint6
 static void dapli_addr_resolve(struct dapl_cm_id *conn)
 {
 	int ret;
-	
+#ifdef DAPL_DBG
+	struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
+#endif
 	dapl_dbg_log(DAPL_DBG_TYPE_CM, 
 		" addr_resolve: cm_id %p SRC %x DST %x\n", 
 		conn->cm_id, 
@@ -110,8 +112,10 @@ static void dapli_addr_resolve(struct da
 static void dapli_route_resolve(struct dapl_cm_id *conn)
 {
 	int ret;
-	struct rdma_cm_id *cm_id = conn->cm_id;
-
+#ifdef DAPL_DBG
+	struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
+	struct ib_addr   *ibaddr = &conn->cm_id->route.addr.addr.ibaddr;
+#endif
 	dapl_dbg_log(DAPL_DBG_TYPE_CM, 
 		" route_resolve: cm_id %p SRC %x DST %x PORT %d\n", 
 		conn->cm_id, 
@@ -158,37 +162,51 @@ bail:
 				     NULL, conn->ep);
 }
 
+/* 
+ * Called from consumer thread via dat_ep_free().
+ * CANNOT be called from the async event processing thread
+ * dapli_cma_event_cb() since a cm_id reference is held and
+ * a deadlock will occur.
+ */
 void dapli_destroy_conn(struct dapl_cm_id *conn)
 {
-	int in_callback;
+	struct rdma_cm_id *cm_id;
 
 	dapl_dbg_log(DAPL_DBG_TYPE_CM, 
 		     " destroy_conn: conn %p id %d\n",
 		     conn,conn->cm_id);
-
+	
 	dapl_os_lock(&conn->lock);
 	conn->destroy = 1;
-	in_callback = conn->in_callback;
+	
+	if (conn->ep)
+		conn->ep->cm_handle = IB_INVALID_HANDLE;
+
+	cm_id = conn->cm_id;
+	conn->cm_id = NULL;
 	dapl_os_unlock(&conn->lock);
 
-	if (!in_callback) {
-		if (conn->ep)
-			conn->ep->cm_handle = IB_INVALID_HANDLE;
-		if (conn->cm_id) {
-			if (conn->cm_id->qp)
-				rdma_destroy_qp(conn->cm_id);
-			rdma_destroy_id(conn->cm_id);
-		}
-			
-		conn->cm_id = NULL;
-		dapl_os_free(conn, sizeof(*conn));
+	/* 
+	 * rdma_destroy_id will force synchronization with async CM event 
+	 * thread since it blocks until the in-process event reference
+	 * is cleared during our event processing call exit.
+	 */
+	if (cm_id) {
+		if (cm_id->qp)
+			rdma_destroy_qp(cm_id);
+
+		rdma_destroy_id(cm_id);
 	}
+	dapl_os_free(conn, sizeof(*conn));
 }
 
 static struct dapl_cm_id * dapli_req_recv(struct dapl_cm_id *conn,
 					  struct rdma_cm_event *event)
 {
 	struct dapl_cm_id *new_conn;
+#ifdef DAPL_DBG
+	struct rdma_addr *ipaddr = &event->id->route.addr;
+#endif
 	
 	if (conn->sp == NULL) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
@@ -239,11 +257,9 @@ static struct dapl_cm_id * dapli_req_rec
 	return new_conn;
 }
 
-static int dapli_cm_active_cb(struct dapl_cm_id *conn,
+static void dapli_cm_active_cb(struct dapl_cm_id *conn,
 			      struct rdma_cm_event *event)
 {
-	int destroy;
-
 	dapl_dbg_log(DAPL_DBG_TYPE_CM, 
 		     " active_cb: conn %p id %d event %d\n",
 		     conn, conn->cm_id, event->event );
@@ -251,9 +267,8 @@ static int dapli_cm_active_cb(struct dap
 	dapl_os_lock(&conn->lock);
 	if (conn->destroy) {
 		dapl_os_unlock(&conn->lock);
-		return 0;
+		return;
 	}
-	conn->in_callback = 1;
 	dapl_os_unlock(&conn->lock);
 
 	switch (event->event) {
@@ -298,17 +313,12 @@ static int dapli_cm_active_cb(struct dap
 		break;
 	}
 
-	dapl_os_lock(&conn->lock);
-	destroy = conn->destroy;
-	conn->in_callback = conn->destroy;
-	dapl_os_unlock(&conn->lock);
-	return(destroy);
+	return;
 }
 
-static int dapli_cm_passive_cb(struct dapl_cm_id *conn,
+static void dapli_cm_passive_cb(struct dapl_cm_id *conn,
 			       struct rdma_cm_event *event)
 {
-	int destroy;
 	struct dapl_cm_id *new_conn;
 
 	dapl_dbg_log(DAPL_DBG_TYPE_CM, 
@@ -318,9 +328,8 @@ static int dapli_cm_passive_cb(struct da
 	dapl_os_lock(&conn->lock);
 	if (conn->destroy) {
 		dapl_os_unlock(&conn->lock);
-		return 0;
+		return;
 	}
-	conn->in_callback = 1;
 	dapl_os_unlock(&conn->lock);
 
 	switch (event->event) {
@@ -372,11 +381,7 @@ static int dapli_cm_passive_cb(struct da
 		break;
 	}
 
-	dapl_os_lock(&conn->lock);
-	destroy = conn->destroy;
-	conn->in_callback = conn->destroy;
-	dapl_os_unlock(&conn->lock);
-	return(destroy);
+	return;
 }
 
 
@@ -1008,16 +1013,12 @@ dapls_ib_get_cm_event(IN DAT_EVENT_NUMBE
 void dapli_cma_event_cb(void)
 {
 	struct rdma_cm_event *event;
-	int ret;
-		
-	dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_event()\n");
 
-	ret = rdma_get_cm_event(&event);
+	dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_event()\n");
 
 	/* process one CM event, fairness */
-	if(!ret) {
+	if(!rdma_get_cm_event(&event)) {
 		struct dapl_cm_id *conn;
-		int ret;
 				
 		/* set proper conn from cm_id context*/
 		if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
@@ -1055,26 +1056,9 @@ void dapli_cma_event_cb(void)
 		case RDMA_CM_EVENT_DISCONNECTED:
 			/* passive or active */
 			if (conn->sp) 
-				ret = dapli_cm_passive_cb(conn,event);
+				dapli_cm_passive_cb(conn,event);
 			else 
-				ret = dapli_cm_active_cb(conn,event);
-
-			/* destroy both qp and cm_id */
-			if (ret) {
-				dapl_dbg_log(DAPL_DBG_TYPE_CM, 
-					     " cma_cb: DESTROY conn %p" 
-					     " cm_id %p qp %p\n",
-					     conn, conn->cm_id, 
-					     conn->cm_id->qp);
-	
-				if (conn->cm_id->qp)
-					rdma_destroy_qp(conn->cm_id);
-
-				rdma_ack_cm_event(event);
-				rdma_destroy_id(conn->cm_id);
-				dapl_os_free(conn, sizeof(*conn));
-				return;
-			}
+				dapli_cm_active_cb(conn,event);
 			break;
 		case RDMA_CM_EVENT_CONNECT_RESPONSE:
 		default:
@@ -1084,12 +1068,9 @@ void dapli_cma_event_cb(void)
 			     event->id->context);
 			break;
 		}
+		/* ack event, unblocks destroy_cm_id in consumer threads */
 		rdma_ack_cm_event(event);
-	} else {
-		dapl_dbg_log(DAPL_DBG_TYPE_CM,
-			" cm_event: ERROR: rdma_get_cm_event() %d %d %s\n",
-			ret, errno, strerror(errno));
-	}
+	} 
 }
 
 /*




More information about the general mailing list