[openib-general] [PATCH] kdapl: Make CM event rather than state based

Hal Rosenstock halr at voltaire.com
Wed May 11 05:42:45 PDT 2005


Change kDAPL CM to be event rather than state based
Also, fix connection and CM ID destruction relative to DAPL threading
and CM callbacks
Add REJ handling
Some other more minor bug fixes and rewrite

Kudos to Sean for making this happen so quickly.

Signed-off-by: Sean Hefty <sean.hefty at intel.com>
Signed-off-by: Hal Rosenstock <halr at voltaire.com>

Index: dapl_openib_cm.h
===================================================================
--- dapl_openib_cm.h	(revision 2302)
+++ dapl_openib_cm.h	(working copy)
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
  * Copyright (c) 2004-2005, Mellanox Technologies, Inc. All rights reserved.
  * Copyright (c) 2003 Topspin Corporation.  All rights reserved. 
  *
@@ -56,39 +57,30 @@
 	IB_CME_BROKEN
 } ib_cm_events_t;
 
-#define IB_CM_LOCAL_FAILURE IB_CME_LOCAL_FAILURE
-#define IB_CM_HANDLE_FMT  "%x"
-
-/* Currently needed for DAPL */
-typedef struct ib_cm_id *ib_cm_handle_t;
-typedef struct ib_cm_id *ib_cm_srvc_handle_t;
-
-#define IB_MAX_REQ_PDATA_SIZE	92
-#define IB_MAX_REP_PDATA_SIZE  196
-#define IB_MAX_REJ_PDATA_SIZE  148
-#define IB_MAX_DREQ_PDATA_SIZE 220
-#define IB_MAX_DREP_PDATA_SIZE 224
-
-#define OPENIB_REQ_MRA_TIMEOUT 27	/* a little over 9 minutes */
-
-#define OPENIB_MAX_AT_RETRY  3
-
-struct dapl_ib_destroy_cm_id {
-	struct work_struct work;
-	struct ib_cm_id *cm_id;
-};
-
-struct dapl_ib_connection {
+struct dapl_cm_id {
 	int retries;
 	struct ib_at_ib_route dapl_rt;
 	struct ib_sa_path_rec dapl_path;
-	struct dapl_ep *ep_ptr;
+	struct dapl_ep *ep;
 	struct sockaddr *remote_ia_address;
 	struct ib_cm_req_param param;
 	u64 service_id;
 	struct ib_at_completion dapl_comp;
+
+	struct ib_cm_id *cm_id;
+	struct dapl_sp *sp;
+	spinlock_t lock;
+	int in_callback;
+	int destroy;
 };
 
+typedef struct dapl_cm_id *ib_cm_handle_t;
+typedef struct ib_cm_id *ib_cm_srvc_handle_t;
+
+#define OPENIB_REQ_MRA_TIMEOUT 27	/* a little over 9 minutes */
+
+#define OPENIB_MAX_AT_RETRY  3
+
 /************************
 * Function declarations *
 ************************/
Index: dapl_openib_cm.c
===================================================================
--- dapl_openib_cm.c	(revision 2302)
+++ dapl_openib_cm.c	(working copy)
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
  * Copyright (c) 2004-2005, Mellanox Technologies, Inc. All rights reserved. 
  * Copyright (c) 2003 Topspin Corporation.  All rights reserved. 
  *
@@ -35,234 +36,344 @@
 #include "dapl.h"
 #include "dapl_adapter_util.h"
 #include "dapl_evd_util.h"
-#include "dapl_sp_util.h"
 #include "dapl_cr_util.h"
 
-static void dapl_rt_comp_handler(u64 req_id, void *context, int rec_num);
-static void dapl_path_comp_handler(u64 req_id, void *context, int rec_num);
-
-static struct workqueue_struct *dapl_cm_wq;
-
 /* Should these be queried ? */
 #define IB_TARGET_MAX      4	/* responder resources (max_qp_ous_rd_atom) */
 #define IB_INITIATOR_DEPTH 4	/* initiator depth (max_qp_init_rd_atom) */
 
-static void do_active_idle(DAPL_EP * ep_ptr)
+static void dapl_destroy_cm_id(struct dapl_cm_id *conn)
 {
-	ib_cm_events_t cm_event;
+	unsigned long flags;
+	int in_callback;
 
-	if (ep_ptr->cm_handle == IB_INVALID_HANDLE)
-		return;
+	spin_lock_irqsave(&conn->lock, flags);
+	conn->destroy = 1;
+	in_callback = conn->in_callback;
+	spin_unlock_irqrestore(&conn->lock, flags);
 
-	if (ep_ptr->param.connect_evd_handle) {
-		cm_event = IB_CME_DISCONNECTED;
-		dapl_evd_connection_callback(ep_ptr->cm_handle,
-					     cm_event, NULL, (void *)ep_ptr);
+	if (!in_callback) {
+		ib_destroy_cm_id(conn->cm_id);
+		if (conn->ep)
+			conn->ep->cm_handle = IB_INVALID_HANDLE;
+		kfree(conn);
 	}
-
 }
 
-static void do_active_rep_recv(DAPL_EP * ep_ptr,
-			      struct ib_cm_id *comm_id,
-			      struct ib_cm_event *rep_recv_param)
+static void do_rep_recv(struct dapl_cm_id *conn,
+			struct ib_cm_event *rep_recv_param)
 {
 	int status;
 
-	if (ep_ptr->cm_handle == IB_INVALID_HANDLE)
-		return;
-
-	if (ep_ptr->qp_handle == IB_INVALID_HANDLE) {
-		dapl_dbg_log(DAPL_DBG_TYPE_ERR, " do_active_rep_recv: invalid qp handle\n");
-		return;
+	if (conn->ep->qp_handle == IB_INVALID_HANDLE) {
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, " do_rep_recv: invalid qp "
+			     "handle\n");
+		goto reject;
 	}
 
 	/* First, transition QP to RTR */
-	status =
-	    dapl_modify_qp_state_to_rtr(ep_ptr->cm_handle, ep_ptr->qp_handle);
-	if (status)
-		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-			     " do_active_rep_recv: could not modify QP state to RTR status %d\n",
-			     status);
-	else {
-		/* Now, transition QP to RTS */
-		status =
-		    dapl_modify_qp_state_to_rts(ep_ptr->cm_handle,
-						ep_ptr->qp_handle);
-		if (status)
-			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-				     " do_active_rep_recv: could not modify QP state to RTS status %d\n",
-				     status);
+	status = dapl_modify_qp_state_to_rtr(conn->cm_id, conn->ep->qp_handle);
+	if (status) {
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, " do_rep_recv: could not "
+			     "modify QP state to RTR status %d\n", status);
+		goto reject;
 	}
 
-	status = ib_send_cm_rtu(comm_id, NULL, 0);
-	if (status)
-		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-			     " do_active_rep_recv: ib_send_cm_rtu failed: %d\n",
-			     status);
+	/* Now, transition QP to RTS */
+	status = dapl_modify_qp_state_to_rts(conn->cm_id, conn->ep->qp_handle);
+	if (status) {
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, " do_rep_recv: could not "
+			     "modify QP state to RTS status %d\n", status);
+		goto reject;
+	}
 
-	if (ep_ptr->param.connect_evd_handle)
-		dapl_evd_connection_callback(ep_ptr->cm_handle,
-					     IB_CME_CONNECTED,
-					     rep_recv_param->private_data,
-					     (void *)ep_ptr);
+	status = ib_send_cm_rtu(conn->cm_id, NULL, 0);
+	if (status) {
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, " do_rep_recv: ib_send_cm_rtu "
+			     "failed: %d\n", status);
+		goto reject;
+	}
 
-}
+	dapl_evd_connection_callback(conn, IB_CME_CONNECTED,
+				     rep_recv_param->private_data, conn->ep);
+	return;
 
-static void do_active_disc(DAPL_EP * ep_ptr, struct ib_cm_id *comm_id)
-{
-	dapl_evd_connection_callback(comm_id,
-				     IB_CME_DISCONNECTED, NULL, (void *)ep_ptr);
+reject:
+	ib_send_cm_rej(conn->cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
+		       NULL, 0);
+	dapl_evd_connection_callback(conn, IB_CME_LOCAL_FAILURE, NULL,
+				     conn->ep);
 }
 
 int dapl_cm_active_cb_handler(struct ib_cm_id *comm_id,
 			      struct ib_cm_event *event)
 {
-	DAPL_EP *ep_ptr;
+	struct dapl_cm_id *conn = comm_id->context;
+	int ret;
+	unsigned long flags;
 
-	ep_ptr = (DAPL_EP *) comm_id->context;
-	/* Is this check needed ? */
-	if (ep_ptr == NULL) {
-		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-			     " dapl_cm_active_cb_handler: NULL CM ID\n");
+	spin_lock_irqsave(&conn->lock, flags);
+	if (conn->destroy) {
+		spin_unlock_irqrestore(&conn->lock, flags);
 		return 0;
 	}
+	conn->in_callback = 1;
+	spin_unlock_irqrestore(&conn->lock, flags);
 
-	switch (comm_id->state) {
-	case IB_CM_IDLE:
-		do_active_idle(ep_ptr);
+	switch (event->event) {
+	case IB_CM_REQ_ERROR:
+		dapl_evd_connection_callback(conn,
+					     IB_CME_DESTINATION_UNREACHABLE,
+					     NULL, conn->ep);
 		break;
-	case IB_CM_REP_RCVD:
-		do_active_rep_recv(ep_ptr, comm_id, event);
+	case IB_CM_REJ_RECEIVED:
+		dapl_evd_connection_callback(conn, IB_CME_DESTINATION_REJECT,
+					     NULL, conn->ep);
 		break;
-	case IB_CM_ESTABLISHED:
-		/* Nothing to do */
+	case IB_CM_REP_RECEIVED:
+		do_rep_recv(conn, event);
 		break;
-	case IB_CM_DREQ_RCVD:
-		do_active_disc(ep_ptr, comm_id);
+	case IB_CM_DREQ_RECEIVED:
+		ib_send_cm_drep(comm_id, NULL, 0);
 		break;
+	case IB_CM_DREQ_ERROR:
+	case IB_CM_DREP_RECEIVED:
+		/* Wait to exit timewait. */
+		break;
+	case IB_CM_TIMEWAIT_EXIT:
+		dapl_evd_connection_callback(conn, IB_CME_DISCONNECTED,
+					     NULL, conn->ep);
+		break;
 	default:
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-			     " dapl_cm_active_cb_handler: Unexpected CM state %d on ID 0x%p event %d\n",
-			     comm_id->state, comm_id, event->event);
+			     " dapl_cm_active_cb_handler: Unexpected CM "
+			     "event %d on ID 0x%p\n", event->event, comm_id);
 		break;
 	}
 
-	return 0;
+	spin_lock_irqsave(&conn->lock, flags);
+	ret = conn->destroy;
+	conn->in_callback = conn->destroy;
+	spin_unlock_irqrestore(&conn->lock, flags);
+	if (ret) {
+		if (conn->ep)
+			conn->ep->cm_handle = IB_INVALID_HANDLE;
+		kfree(conn);
+	}
+	return ret;
 }
 
-static void do_passive_idle(struct ib_cm_id *comm_id)
+static int do_rtu_recv(struct dapl_cm_id *conn)
 {
-	ib_cm_events_t cm_event;
+	int status;
 
-	cm_event = IB_CME_DISCONNECTED;
+	status = dapl_modify_qp_state_to_rts(conn->cm_id, conn->ep->qp_handle);
+	if (status) {
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, " do_rtu_recv: could not "
+			     "modify QP state to RTS status %d\n", status);
+		goto reject;
+	}
+	dapl_cr_callback(conn, IB_CME_CONNECTED, NULL, conn->sp);
+	return 0;
 
-	dapl_cr_callback(comm_id, cm_event, NULL, comm_id->context);
-
+reject:
+	ib_send_cm_rej(conn->cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
+		       NULL, 0);
+	dapl_cr_callback(conn, IB_CME_LOCAL_FAILURE, NULL, conn->sp);
+	return 1;
 }
 
-static void do_passive_req_recv(struct ib_cm_id *comm_id,
-			       struct ib_cm_event *req_recv_param)
+static struct dapl_cm_id * dapl_get_conn(struct ib_cm_id *comm_id,
+					 struct ib_cm_event *event)
 {
-	dapl_cr_callback(comm_id, IB_CME_CONNECTION_REQUEST_PENDING, 
-                         req_recv_param->private_data,
-			 comm_id->context);
+	struct dapl_cm_id *conn;
+	int status;
 
-}
+	if (event->event != IB_CM_REQ_RECEIVED)
+		return comm_id->context;
 
-static void do_passive_established(struct ib_cm_id *comm_id)
-{
-	DAPL_SP *sp_ptr;
-	DAPL_CR *cr_ptr;
-	DAPL_EP *ep_ptr;
-	int status;
-
-	sp_ptr = comm_id->context;
-	cr_ptr = dapl_sp_search_cr(sp_ptr, comm_id);
-	if (cr_ptr == NULL) {
-		printk(KERN_ERR "do_passive_established: No CR found for CM ID 0x%p\n", comm_id);
-		return;
+	conn = kmalloc(sizeof *conn, GFP_KERNEL);
+	if (!conn) {
+		status = ib_send_cm_rej(comm_id,
+					IB_CM_REJ_CONSUMER_DEFINED,
+					NULL, 0, NULL, 0);
+		return NULL;
 	}
-	ep_ptr = (DAPL_EP *)cr_ptr->param.local_ep_handle;
-	/* Quick check to ensure our EP is still valid */
-	if ((DAPL_BAD_HANDLE(ep_ptr, DAPL_MAGIC_EP)))
-		ep_ptr = NULL;
-	if (ep_ptr) {
-		if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {
-			/* Transition QP to RTS */
-			status =
-			    dapl_modify_qp_state_to_rts(comm_id,
-							ep_ptr->qp_handle);
-			if (status)
-				dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-					     " do_passive_established: could not modify QP state to RTS status %d\n",
-					     status);
-		} else
-			printk(KERN_ERR "do_passive_established: invalid QP handle\n");
-	} else
-		printk(KERN_ERR "do_passive_established: No EP found for CM ID 0x%p\n", comm_id);
 
-	dapl_cr_callback(comm_id,
-                     IB_CME_CONNECTED, 
-                     NULL, (void *)comm_id->context);
-
+	memset(conn, 0, sizeof *conn);
+	conn->cm_id = comm_id;
+	conn->sp = comm_id->context;
+	comm_id->context = conn;
+	spin_lock_init(&conn->lock);
+	return conn;
 }
 
-static void do_passive_disc(struct ib_cm_id *comm_id)
-{
-	dapl_cr_callback(comm_id,
-			 IB_CME_DISCONNECTED,
-			 NULL, (void *)comm_id->context);
-}
-
 int dapl_cm_passive_cb_handler(struct ib_cm_id *comm_id,
 			       struct ib_cm_event *event)
 {
-	switch (comm_id->state) {
-	case IB_CM_IDLE:
-		do_passive_idle(comm_id);
+	struct dapl_cm_id *conn;
+	int ret;
+	unsigned long flags;
+
+	conn = dapl_get_conn(comm_id, event);
+	if (!conn) {
+		printk(KERN_ERR "dapl_cm_passive_cb_handler: dapl_get_conn failed\n");
+		return 1;
+	}
+
+	spin_lock_irqsave(&conn->lock, flags);
+	if (conn->destroy) {
+		spin_unlock_irqrestore(&conn->lock, flags);
+		return 0;
+	}
+	conn->in_callback = 1;
+	spin_unlock_irqrestore(&conn->lock, flags);
+
+	switch (event->event) {
+	case IB_CM_REQ_RECEIVED:
+		dapl_cr_callback(conn, IB_CME_CONNECTION_REQUEST_PENDING, 
+				 event->private_data, conn->sp);
 		break;
-	case IB_CM_REQ_RCVD:
-		do_passive_req_recv(comm_id, event);
+	case IB_CM_REP_ERROR:
+		dapl_cr_callback(conn, IB_CME_DESTINATION_UNREACHABLE,
+				 NULL, conn->sp);
 		break;
-	case IB_CM_ESTABLISHED:
-		do_passive_established(comm_id);
+	case IB_CM_REJ_RECEIVED:
+		dapl_cr_callback(conn, IB_CME_DESTINATION_REJECT, NULL,
+				 conn->sp);
 		break;
-	case IB_CM_DREQ_RCVD:
-		do_passive_disc(comm_id);
+	case IB_CM_RTU_RECEIVED:
+		do_rtu_recv(conn);
 		break;
+	/* TODO: case IB_CM_USER_ESTABLISHED: - when initiated by DAPL */
+	case IB_CM_DREQ_RECEIVED:
+		ib_send_cm_drep(comm_id, NULL, 0);
+		break;
+	case IB_CM_DREQ_ERROR:
+	case IB_CM_DREP_RECEIVED:
+		/* Wait to exit timewait. */
+		break;
+	case IB_CM_TIMEWAIT_EXIT:
+		dapl_cr_callback(conn, IB_CME_DISCONNECTED, NULL, conn->sp);
+		break;
 	default:
-		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-			     " dapl_cm_passive_cb_handler: Unexpected CM state %d on ID 0x%p event %d\n",
-			     comm_id->state, comm_id, event->event);
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, " dapl_cm_passive_cb_handler: "
+			     "Unexpected CM event %d on ID 0x%p\n",
+			     event->event, comm_id);
 		break;
 	}
 
-	return 0;
+	spin_lock_irqsave(&conn->lock, flags);
+	ret = conn->destroy;
+	conn->in_callback = conn->destroy;
+	spin_unlock_irqrestore(&conn->lock, flags);
+	if (ret) {
+		if (conn->ep)
+			conn->ep->cm_handle = IB_INVALID_HANDLE;
+		kfree(conn);
+	}
+	return ret;
 }
 
-static void dapl_ib_destroy_cm_id_work(void *data)
+static void dapl_path_comp_handler(u64 req_id, void *context, int rec_num)
 {
-	struct dapl_ib_destroy_cm_id *work = (struct dapl_ib_destroy_cm_id *)data;
-	struct ib_cm_id *cm_id = work->cm_id;
+	struct dapl_cm_id *conn = context;
+	int status;
+	ib_cm_events_t event;
 
-	ib_destroy_cm_id(cm_id); /* invokes disconnect sequence if not already done */
-	kfree(work);
+	if (rec_num <= 0) {
+		printk(KERN_ERR "dapl_path_comp_handler: path resolution "
+		       "failed (%d) retries %d!!!\n", rec_num, conn->retries);
+		if (++conn->retries > OPENIB_MAX_AT_RETRY) {
+			printk(KERN_ERR "dapl_path_comp_handler: ep_ptr 0x%p\n",
+			       conn->ep);
+			event = IB_CME_DESTINATION_UNREACHABLE;
+			goto error;
+		}
+
+		status = ib_at_paths_by_route(&conn->dapl_rt, 0,
+					      &conn->dapl_path, 1,
+					      &conn->dapl_comp);
+		if (status) {
+			printk(KERN_ERR "dapl_path_comp_handler: "
+			       "ib_at_paths_by_route returned %d id %lld\n",
+			       status, conn->dapl_comp.req_id);
+			event = IB_CME_LOCAL_FAILURE;
+			goto error;
+		}
+		return;
+	}
+
+	/* Mellanox performance workaround - best performance is MTU of 1024 */
+	if (conn->dapl_path.mtu > IB_MTU_1024)    
+		conn->dapl_path.mtu = IB_MTU_1024;
+
+	conn->param.service_id = be64_to_cpu(conn->service_id);
+	conn->param.primary_path = &conn->dapl_path;
+	conn->param.alternate_path = NULL;
+
+	status = ib_send_cm_req(conn->cm_id, &conn->param);
+	if (status) {
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, " ib_send_cm_req failed: %d\n",
+			     status);
+		event = IB_CME_LOCAL_FAILURE;
+		goto error;
+	}
+	return;
+
+error:
+	//*** should we pass conn here?  it will be destroyed...
+	dapl_evd_connection_callback(conn, event, NULL, conn->ep);
+	dapl_destroy_cm_id(conn);
 }
 
-static int dapl_ib_queue_destroy_cm_id(struct ib_cm_id *cm_id)
+static void dapl_rt_comp_handler(u64 req_id, void *context, int rec_num)
 {
-	struct dapl_ib_destroy_cm_id *work;
+	struct dapl_cm_id *conn = context;
+	int status;
+	ib_cm_events_t event;
 
-	work = kmalloc(sizeof *work, GFP_ATOMIC);
-	if (!work) {
-		printk(KERN_ERR "dapl_ib_queue_destroy_cm_id: No memory for dapl_ib_destroy_cm_id\n");
-		return DAT_INSUFFICIENT_RESOURCES;
+	if (rec_num <= 0) {
+		printk(KERN_ERR "dapl_rt_comp_handler: rec_num %d retry %d\n",
+		       rec_num, conn->retries);
+		if (++conn->retries > OPENIB_MAX_AT_RETRY) {
+			event = IB_CME_DESTINATION_UNREACHABLE;
+			goto error;
+		}
+
+		status = ib_at_route_by_ip( ((struct sockaddr_in *)
+				conn->remote_ia_address)->sin_addr.s_addr,
+				0, 0, 0, &conn->dapl_rt, &conn->dapl_comp);
+		if (status < 0) {
+			dapl_dbg_log(DAPL_DBG_TYPE_ERR, "dapl_rt_comp_handler: "
+				    "ib_at_route_by_ip failed with status %d\n",
+				    status);
+			event = IB_CME_DESTINATION_UNREACHABLE;
+			goto error;
+		}
+
+		if (status == 1)
+			dapl_rt_comp_handler(conn->dapl_comp.req_id, conn, 1);
+		return;
 	}
 
-	work->cm_id = cm_id;
-	INIT_WORK(&work->work, dapl_ib_destroy_cm_id_work, work);
-	queue_work(dapl_cm_wq, &work->work);
-	return 0;
+	conn->dapl_comp.fn = &dapl_path_comp_handler;
+	conn->dapl_comp.context = conn;
+	conn->retries = 0;
+	status = ib_at_paths_by_route(&conn->dapl_rt, 0, &conn->dapl_path, 1,
+				      &conn->dapl_comp);
+	if (status) {
+		printk(KERN_ERR "dapl_rt_comp_handler: ib_at_paths_by_route "
+		       "returned %d id %lld\n", status, conn->dapl_comp.req_id);
+		event = IB_CME_LOCAL_FAILURE;
+		goto error;
+	}
+	return;
+
+error:
+	//*** should we pass conn here?  it will be destroyed...
+	dapl_evd_connection_callback(conn, event, NULL, conn->ep);
+	dapl_destroy_cm_id(conn);
 }
 
 /*
@@ -293,7 +404,7 @@
 {
 	DAPL_IA *ia_ptr;
 	DAPL_EP *ep_ptr;
-	struct dapl_ib_connection *conn;
+	struct dapl_cm_id *conn;
 	int status;
 
 	ep_ptr = (DAPL_EP *)ep_handle;
@@ -302,30 +413,30 @@
 		return DAT_INTERNAL_ERROR;
 	}
 
-	/* Is this always called out of task context ? */
-	/* If so, change to GFP_KERNEL */
-	conn = kmalloc(sizeof *conn, GFP_ATOMIC);
+	conn = kmalloc(sizeof *conn, GFP_KERNEL);
 	if (!conn) {
 		printk(KERN_ERR
-		       "dapl_ib_connect: no memory for dapl_ib_connection struct\n");
+		       "dapl_ib_connect: no memory for dapl_cm_id struct\n");
 		return DAT_INSUFFICIENT_RESOURCES;
 	}
+	memset(conn, 0, sizeof *conn);
 
-	conn->ep_ptr = ep_ptr;
-	conn->ep_ptr->cm_handle = ib_create_cm_id(dapl_cm_active_cb_handler,
-						  ep_handle);
-	if (!conn->ep_ptr->cm_handle) {
+	spin_lock_init(&conn->lock);
+	conn->ep = ep_ptr;
+	conn->cm_id = ib_create_cm_id(dapl_cm_active_cb_handler, conn);
+	if (IS_ERR(conn->cm_id)) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR, " CM ID creation failed\n");
 		kfree(conn);
 		return DAT_INTERNAL_ERROR;
 	}
+	conn->ep->cm_handle = conn;
 
-	ia_ptr = conn->ep_ptr->header.owner_ia;
+	ia_ptr = conn->ep->header.owner_ia;
 
 	/* Setup QP/CM parameters */
 	memset(&conn->param, 0, sizeof(conn->param));
 	conn->param.qp_type = IB_QPT_RC;
-	conn->param.qp_num = conn->ep_ptr->qp_handle->qp_num;
+	conn->param.qp_num = conn->ep->qp_handle->qp_num;
 	conn->param.private_data = private_data;
 	conn->param.private_data_len = private_data_size;
 	conn->param.responder_resources = IB_TARGET_MAX;
@@ -348,126 +459,18 @@
 			      sin_addr.s_addr, 0, 0, 0, &conn->dapl_rt,
 			      &conn->dapl_comp);
 	if (status < 0) {
-		dapl_dbg_log(DAPL_DBG_TYPE_ERR, " ib_at_route_by_ip failed with status %d\n", status);
-		kfree(conn);
-		/* What happens with CM ID in this case ? */
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, " ib_at_route_by_ip failed "
+			     "with status %d\n", status);
+		dapl_destroy_cm_id(conn);
 		return DAT_INTERNAL_ERROR;
 	}
 
 	if (status == 1)
-		dapl_rt_comp_handler(conn->dapl_comp.req_id,
-				     conn->dapl_comp.context, 1);
+		dapl_rt_comp_handler(conn->dapl_comp.req_id, conn, 1);
 
 	return DAT_SUCCESS;
 }
 
-static void dapl_rt_comp_handler(u64 req_id, void *context, int rec_num)
-{
-	struct dapl_ib_connection *conn = context;
-	int status;
-
-	if (rec_num <= 0) {
-		printk(KERN_ERR "dapl_rt_comp_handler: rec_num %d retry %d\n", rec_num, conn->retries);
-		if (++conn->retries > OPENIB_MAX_AT_RETRY) {
-			dapl_evd_connection_callback(conn->ep_ptr->cm_handle,
-						     IB_CME_DESTINATION_UNREACHABLE,
-						     NULL,
-						     conn->ep_ptr);
-			kfree(conn);
-			return;
-		} else {
-			status =
-				ib_at_route_by_ip(
-					((struct sockaddr_in *)conn->
-					remote_ia_address)->sin_addr.s_addr,
-					0, 0, 0, &conn->dapl_rt,
-					&conn->dapl_comp);
-			if (status < 0) {
-				dapl_dbg_log(DAPL_DBG_TYPE_ERR, " dapl_rt_comp_handler: ib_at_route_by_ip failed with status %d\n", status);
-				dapl_evd_connection_callback(conn->ep_ptr->cm_handle,
-							     IB_CME_DESTINATION_UNREACHABLE,
-							     NULL,
-							     conn->ep_ptr);
-				kfree(conn);
-				return;
-			}
-			if (status == 1)
-				dapl_rt_comp_handler(conn->dapl_comp.req_id,
-						conn->dapl_comp.context, 1);
-			return;
-		}
-	}
-
-	conn->dapl_comp.fn = &dapl_path_comp_handler;
-	conn->dapl_comp.context = conn;
-	conn->retries = 0;
-	status = ib_at_paths_by_route(&conn->dapl_rt, 0, &conn->dapl_path,
-				      1, &conn->dapl_comp);
-	if (status) {
-		printk(KERN_ERR
-		       "dapl_rt_comp_handler: ib_at_paths_by_route returned %d id %lld\n",
-		       status, conn->dapl_comp.req_id);
-		dapl_evd_connection_callback(conn->ep_ptr->cm_handle,
-					     IB_CME_LOCAL_FAILURE, 
-					     NULL,
-					     conn->ep_ptr);
-		kfree(conn);
-	}
-}
-
-static void dapl_path_comp_handler(u64 req_id, void *context, int rec_num)
-{
-	struct dapl_ib_connection *conn = context;
-	int status;
-
-	if (rec_num <= 0) {
-		printk(KERN_ERR "dapl_path_comp_handler: path resolution failed (%d) retries %d!!!\n", rec_num, conn->retries);
-		if (++conn->retries > OPENIB_MAX_AT_RETRY) {
-printk(KERN_ERR "dapl_path_comp_handler: ep_ptr 0x%p\n", conn->ep_ptr);
-			dapl_evd_connection_callback(conn->ep_ptr->cm_handle,
-						     IB_CME_DESTINATION_UNREACHABLE,
-						     NULL,
-						     conn->ep_ptr);
-			kfree(conn);
-			return;
-		} else {
-			status = ib_at_paths_by_route(&conn->dapl_rt, 0,
-						      &conn->dapl_path,
-						      1, &conn->dapl_comp);
-			if (status) {
-				printk(KERN_ERR
-				       "dapl_path_comp_handler: ib_at_paths_by_route returned %d id %lld\n",
-					status, conn->dapl_comp.req_id);
-				dapl_evd_connection_callback(conn->ep_ptr->cm_handle,
-							     IB_CME_LOCAL_FAILURE,
-							     NULL,
-							     conn->ep_ptr);
-				kfree(conn);
-			}
-			return;
-		}
-	}
-
-	/* Mellanox performance workaround */
-	if (conn->dapl_path.mtu > IB_MTU_1024)    
-		conn->dapl_path.mtu = IB_MTU_1024; /* Best performance is obtained at this MTU */
-
-	conn->param.service_id = be64_to_cpu(conn->service_id);
-	conn->param.primary_path = &conn->dapl_path;
-	conn->param.alternate_path = NULL;
-
-	status = ib_send_cm_req(conn->ep_ptr->cm_handle, &conn->param);
-	if (status) {
-		dapl_dbg_log(DAPL_DBG_TYPE_ERR, " ib_send_cm_req failed: %d\n",
-			     status);
-		dapl_evd_connection_callback(conn->ep_ptr->cm_handle,
-					     IB_CME_LOCAL_FAILURE, NULL,
-					     conn->ep_ptr);
-	}
-
-	kfree(conn);
-}
-
 /*
  * dapl_ib_disconnect
  *
@@ -488,6 +491,25 @@
  */
 DAT_RETURN dapl_ib_disconnect(DAPL_EP * ep_ptr, DAT_CLOSE_FLAGS close_flags)
 {
+	struct dapl_cm_id *conn = ep_ptr->cm_handle;
+	int status;
+
+	dapl_dbg_log(DAPL_DBG_TYPE_CM,
+		     "  >>> dapl_ib_disconnect: EP: %p conn %p close flags %d\n",
+		     ep_ptr, conn, close_flags);
+
+	if (conn == IB_INVALID_HANDLE)
+		return DAT_SUCCESS;
+
+	if (close_flags == DAT_CLOSE_ABRUPT_FLAG)
+		dapl_destroy_cm_id(conn);
+	else {
+		status = ib_send_cm_dreq(conn->cm_id, NULL, 0);
+		if (status)
+			printk(KERN_ERR "dapl_ib_disconnect: CM ID 0x%p "
+			       "status %d\n", ep_ptr->cm_handle, status);
+	}
+
 	return DAT_SUCCESS;
 }
 
@@ -516,27 +538,24 @@
 {
 	int status;
 
-	if  (ep_ptr->cm_handle == IB_INVALID_HANDLE)
-		return;
+	dapl_dbg_log(DAPL_DBG_TYPE_CM,
+		     "  >>> dapl_ib_disconnect_clean: EP: %p active %d\n",
+		     ep_ptr, active);
 
-	dapl_dbg_log(DAPL_DBG_TYPE_CM, 
-		     "  >>> dapl_ib_disconnect_clean: CM handle: %p\n", 
-		     ep_ptr->cm_handle);
-
 	/*
 	 * Clean up outstanding connection state
 	 */
+	dapl_ib_disconnect(ep_ptr, DAT_CLOSE_ABRUPT_FLAG);
+
 	if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {
 		/* Transition QP to ERROR */
 		status = dapl_modify_qp_state_to_error(ep_ptr->qp_handle);
 		if (status)
-			printk(KERN_ERR "dapl_ib_disconnect_clean: status %d on transition to error\n", status);
+			printk(KERN_ERR "dapl_ib_disconnect_clean: status %d "
+					"on transition to error\n", status);
 	} else
-		dapl_dbg_log(DAPL_DBG_TYPE_ERR, "dapl_ib_disconnect_clean: invalid qp handle\n");
-
-	dapl_ib_queue_destroy_cm_id(ep_ptr->cm_handle);
-
-	ep_ptr->cm_handle = IB_INVALID_HANDLE;
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, "dapl_ib_disconnect_clean: "
+						"invalid qp handle\n");
 }
 
 /*
@@ -561,6 +580,13 @@
 
 	ia_ptr = ep_ptr->header.owner_ia;
 
+	//*** What if QP is connected?  Do we need to destroy the cm_id
+	//*** here as well, send a DREQ, REJ?  What cm_id state are we in?
+	//*** we may not care if we just destroy the cm_id
+	//*** ib_destroy_cm_id(ep_ptr->cm_handle);
+	//*** supporting this call may require setting the cm_handle to
+	//*** something like IB_INVALID_HANDLE wherever it is destroyed
+
 	ib_status = dapl_modify_qp_state_to_reset(ep_ptr->qp_handle);
 	if (ib_status < 0) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
@@ -604,7 +630,7 @@
 
 	sp_ptr->cm_srvc_handle = ib_create_cm_id(dapl_cm_passive_cb_handler,
 						 sp_ptr);
-	if (sp_ptr->cm_srvc_handle == NULL) {
+	if (IS_ERR(sp_ptr->cm_srvc_handle)) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR, " CM ID creation failed\n");
 		return DAT_INTERNAL_ERROR;
 	}
@@ -612,7 +638,7 @@
 	status = ib_cm_listen(sp_ptr->cm_srvc_handle,
 			      be64_to_cpu(ServiceID), 0);
 	if (status) {
-		/* Need to also destroy CM ID ? */
+		ib_destroy_cm_id(sp_ptr->cm_srvc_handle);
 		sp_ptr->cm_srvc_handle = IB_INVALID_HANDLE;
 
 		if (status == -EBUSY)
@@ -643,19 +669,12 @@
  */
 DAT_RETURN dapl_ib_remove_conn_listener(DAPL_IA * ia_ptr, DAPL_SP * sp_ptr)
 {
-	int status;
-
-	if (sp_ptr->cm_srvc_handle == IB_INVALID_HANDLE)
-		return DAT_SUCCESS;
-
-	status = dapl_ib_queue_destroy_cm_id(sp_ptr->cm_srvc_handle);
-	sp_ptr->cm_srvc_handle = IB_INVALID_HANDLE;
-	if (status) {
-		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-			     " dapl_ib_remove_conn_listener: ib_destroy_cm_id failed: %d\n", status);
-		return DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0);
+	//*** This will hang if called from CM thread context...
+	//*** Move back to using WQ...
+	if (sp_ptr->cm_srvc_handle != IB_INVALID_HANDLE) {
+		ib_destroy_cm_id(sp_ptr->cm_srvc_handle);
+		sp_ptr->cm_srvc_handle = IB_INVALID_HANDLE;
 	}
-
 	return DAT_SUCCESS;
 }
 
@@ -677,26 +696,25 @@
  *
  */
 DAT_RETURN
-dapl_ib_reject_connection(struct ib_cm_id * ib_cm_handle, int reject_reason)
+dapl_ib_reject_connection(struct dapl_cm_id *cm_handle, int reject_reason)
 {
 	int status;
 
-	if (ib_cm_handle == IB_INVALID_HANDLE) {
+	if (cm_handle == IB_INVALID_HANDLE) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
 			     " dapl_ib_reject_connection: invalid CM handle: reject reason %d\n",
 			     reject_reason);
 		return DAT_SUCCESS;
 	}
 
-	status = ib_send_cm_rej(ib_cm_handle, IB_CM_REJ_CONSUMER_DEFINED,
+	status = ib_send_cm_rej(cm_handle->cm_id, IB_CM_REJ_CONSUMER_DEFINED,
 				NULL, 0, NULL, 0);
 	if (status) {
-		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-			     " dapl_ib_reject_connection: ib_send_cm_rej failed: %d\n",
-			     status);
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, " dapl_ib_reject_connection: "
+			     "ib_send_cm_rej failed: %d\n", status);
 		return DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0);
 	}
-
+	dapl_destroy_cm_id(cm_handle);
 	return DAT_SUCCESS;
 }
 
@@ -731,14 +749,18 @@
 	DAT_RETURN dat_status;
 	int status;
 	struct ib_cm_rep_param passive_params;
+	struct dapl_cm_id *conn;
 
 	cr_ptr = (DAPL_CR *) cr_handle;
 	ep_ptr = (DAPL_EP *) ep_handle;
 	ia_ptr = ep_ptr->header.owner_ia;
+	conn = cr_ptr->ib_cm_handle;
 
 	/* Obtain size of private data structure & contents */
-	if (private_data_size > IB_MAX_REP_PDATA_SIZE)
-		return DAT_ERROR(DAT_LENGTH_ERROR, DAT_NO_SUBTYPE);
+	if (private_data_size > IB_MAX_REP_PDATA_SIZE) {
+		dat_status = DAT_ERROR(DAT_LENGTH_ERROR, DAT_NO_SUBTYPE);
+		goto reject;
+	}
 
 	if (ep_ptr->qp_state == DAPL_QP_STATE_UNATTACHED) {
 		/* 
@@ -749,14 +771,16 @@
 		dat_status = dapl_ib_qp_alloc(ia_ptr, ep_ptr, NULL);
 		if (dat_status != DAT_SUCCESS) {
 			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-				     " dapl_ib_accept_connection: dapl_ib_qp_alloc failed: %d\n",
+				     " dapl_ib_accept_connection: "
+				     "dapl_ib_qp_alloc failed: %d\n",
 				     dat_status);
-			return dat_status;
+			goto reject;
 		}
 	}
 
 	cr_ptr->param.local_ep_handle = ep_handle;
-	ep_ptr->cm_handle = cr_ptr->ib_cm_handle;
+	ep_ptr->cm_handle = conn;
+	conn->ep = ep_ptr;
 
 	memset(&passive_params, 0, sizeof(passive_params));
 	passive_params.private_data = prd_ptr;
@@ -767,28 +791,35 @@
 	passive_params.rnr_retry_count = DAPL_OPENIB_RNR_RETRY_COUNT;
 
 	/* Transition QP to RTR */
-	status =
-	    dapl_modify_qp_state_to_rtr(ep_ptr->cm_handle, ep_ptr->qp_handle);
-	if (status)
-		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-			     " dapl_ib_accept_connection: could not modify QP state to RTR status %d\n",
+	status = dapl_modify_qp_state_to_rtr(conn->cm_id, ep_ptr->qp_handle);
+	if (status) {
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, " dapl_ib_accept_connection: "
+			     "could not modify QP state to RTR status %d\n",
 			     status);
+		dat_status = DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0);
+		goto reject;
+	}
 
-	status = ib_send_cm_rep(cr_ptr->ib_cm_handle, &passive_params);
+	status = ib_send_cm_rep(conn->cm_id, &passive_params);
 	if (status) {
-		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-			     " dapl_ib_accept_connection: ib_send_cm_rep failed: %d\n",
-			     status);
-		return DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0);
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, " dapl_ib_accept_connection: "
+			     "ib_send_cm_rep failed: %d\n", status);
+		dat_status = DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0);
+		goto reject;
 	}
+	return DAT_SUCCESS;
 
-	return DAT_SUCCESS;
+reject:
+	ib_send_cm_rej(conn->cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
+		       NULL, 0);
+	dapl_destroy_cm_id(conn);
+	return dat_status;
 }
 
 /*
  * ib_cm_get_remote_gid 
  */
-static int ib_cm_get_remote_gid(struct ib_cm_id *cm_handle, u8 *remote_gid)
+static int ib_cm_get_remote_gid(struct dapl_cm_id *cm_handle, u8 *remote_gid)
 {
 	return DAT_INTERNAL_ERROR;	/* for now!!! */
 }
@@ -837,19 +868,9 @@
 
 int dapl_cm_init(void)
 {
-	dapl_cm_wq = create_workqueue("dapl_cm_wq");
-	if (!dapl_cm_wq) {
-		printk(KERN_ERR "dapl_cm_init: failed to allocate work queue for DAPL CM\n");
-		return DAT_INSUFFICIENT_RESOURCES;
-	}
 	return 0;
 }
 
 void dapl_cm_cleanup(void)
 {
-	/* Need to handle in progress connections */
-	if (dapl_cm_wq) {
-		flush_workqueue(dapl_cm_wq);
-		destroy_workqueue(dapl_cm_wq);
-	}
 }






More information about the general mailing list