[ofa-general] [GIT PULL] 2.6.24: please pull rdma-dev.git for-roland branch

Sean Hefty sean.hefty at intel.com
Fri Oct 12 17:05:22 PDT 2007


Please pull from:

	git://git.openfabrics.org/~shefty/rdma-dev.git for-roland

This will pick up a couple of recent rdma_cm bug fixes.

 drivers/infiniband/core/cma.c |  160 +++++++++++++++++++++---------------------
 1 files changed, 83 insertions(+), 77 deletions(-)

Sean Hefty (2):
      rdma/cm: add locking around QP accesses
      rdma/cm: fix deadlock destroying listen requests


diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 93644f8..ee946cc 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -114,13 +114,16 @@ struct rdma_id_private {
 
 	struct rdma_bind_list	*bind_list;
 	struct hlist_node	node;
-	struct list_head	list;
-	struct list_head	listen_list;
+	struct list_head	list; /* listen_any_list or cma_device.list */
+	struct list_head	listen_list; /* per device listens */
 	struct cma_device	*cma_dev;
 	struct list_head	mc_list;
 
+	int			internal_id;
 	enum cma_state		state;
 	spinlock_t		lock;
+	struct mutex		qp_mutex;
+
 	struct completion	comp;
 	atomic_t		refcount;
 	wait_queue_head_t	wait_remove;
@@ -389,6 +392,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler
event_handler,
 	id_priv->id.event_handler = event_handler;
 	id_priv->id.ps = ps;
 	spin_lock_init(&id_priv->lock);
+	mutex_init(&id_priv->qp_mutex);
 	init_completion(&id_priv->comp);
 	atomic_set(&id_priv->refcount, 1);
 	init_waitqueue_head(&id_priv->wait_remove);
@@ -474,61 +478,86 @@ EXPORT_SYMBOL(rdma_create_qp);
 
 void rdma_destroy_qp(struct rdma_cm_id *id)
 {
-	ib_destroy_qp(id->qp);
+	struct rdma_id_private *id_priv;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	mutex_lock(&id_priv->qp_mutex);
+	ib_destroy_qp(id_priv->id.qp);
+	id_priv->id.qp = NULL;
+	mutex_unlock(&id_priv->qp_mutex);
 }
 EXPORT_SYMBOL(rdma_destroy_qp);
 
-static int cma_modify_qp_rtr(struct rdma_cm_id *id)
+static int cma_modify_qp_rtr(struct rdma_id_private *id_priv)
 {
 	struct ib_qp_attr qp_attr;
 	int qp_attr_mask, ret;
 
-	if (!id->qp)
-		return 0;
+	mutex_lock(&id_priv->qp_mutex);
+	if (!id_priv->id.qp) {
+		ret = 0;
+		goto out;
+	}
 
 	/* Need to update QP attributes from default values. */
 	qp_attr.qp_state = IB_QPS_INIT;
-	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
+	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 	if (ret)
-		return ret;
+		goto out;
 
-	ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
+	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
 	if (ret)
-		return ret;
+		goto out;
 
 	qp_attr.qp_state = IB_QPS_RTR;
-	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
+	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 	if (ret)
-		return ret;
+		goto out;
 
-	return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
+	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
+out:
+	mutex_unlock(&id_priv->qp_mutex);
+	return ret;
 }
 
-static int cma_modify_qp_rts(struct rdma_cm_id *id)
+static int cma_modify_qp_rts(struct rdma_id_private *id_priv)
 {
 	struct ib_qp_attr qp_attr;
 	int qp_attr_mask, ret;
 
-	if (!id->qp)
-		return 0;
+	mutex_lock(&id_priv->qp_mutex);
+	if (!id_priv->id.qp) {
+		ret = 0;
+		goto out;
+	}
 
 	qp_attr.qp_state = IB_QPS_RTS;
-	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
+	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 	if (ret)
-		return ret;
+		goto out;
 
-	return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
+	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
+out:
+	mutex_unlock(&id_priv->qp_mutex);
+	return ret;
 }
 
-static int cma_modify_qp_err(struct rdma_cm_id *id)
+static int cma_modify_qp_err(struct rdma_id_private *id_priv)
 {
 	struct ib_qp_attr qp_attr;
+	int ret;
 
-	if (!id->qp)
-		return 0;
+	mutex_lock(&id_priv->qp_mutex);
+	if (!id_priv->id.qp) {
+		ret = 0;
+		goto out;
+	}
 
 	qp_attr.qp_state = IB_QPS_ERR;
-	return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE);
+	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
+out:
+	mutex_unlock(&id_priv->qp_mutex);
+	return ret;
 }
 
 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
@@ -717,50 +746,27 @@ static void cma_cancel_route(struct rdma_id_private
*id_priv)
 	}
 }
 
-static inline int cma_internal_listen(struct rdma_id_private *id_priv)
-{
-	return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev &&
-	       cma_any_addr(&id_priv->id.route.addr.src_addr);
-}
-
-static void cma_destroy_listen(struct rdma_id_private *id_priv)
-{
-	cma_exch(id_priv, CMA_DESTROYING);
-
-	if (id_priv->cma_dev) {
-		switch (rdma_node_get_transport(id_priv->id.device->node_type))
{
-		case RDMA_TRANSPORT_IB:
-			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
-				ib_destroy_cm_id(id_priv->cm_id.ib);
-			break;
-		case RDMA_TRANSPORT_IWARP:
-			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
-				iw_destroy_cm_id(id_priv->cm_id.iw);
-			break;
-		default:
-			break;
-		}
-		cma_detach_from_dev(id_priv);
-	}
-	list_del(&id_priv->listen_list);
-
-	cma_deref_id(id_priv);
-	wait_for_completion(&id_priv->comp);
-
-	kfree(id_priv);
-}
-
 static void cma_cancel_listens(struct rdma_id_private *id_priv)
 {
 	struct rdma_id_private *dev_id_priv;
 
+	/*
+	 * Remove from listen_any_list to prevent added devices from spawning
+	 * additional listen requests.
+	 */
 	mutex_lock(&lock);
 	list_del(&id_priv->list);
 
 	while (!list_empty(&id_priv->listen_list)) {
 		dev_id_priv = list_entry(id_priv->listen_list.next,
 					 struct rdma_id_private, listen_list);
-		cma_destroy_listen(dev_id_priv);
+		/* sync with device removal to avoid duplicate destruction */
+		list_del_init(&dev_id_priv->list);
+		list_del(&dev_id_priv->listen_list);
+		mutex_unlock(&lock);
+
+		rdma_destroy_id(&dev_id_priv->id);
+		mutex_lock(&lock);
 	}
 	mutex_unlock(&lock);
 }
@@ -848,6 +854,9 @@ void rdma_destroy_id(struct rdma_cm_id *id)
 	cma_deref_id(id_priv);
 	wait_for_completion(&id_priv->comp);
 
+	if (id_priv->internal_id)
+		cma_deref_id(id_priv->id.context);
+
 	kfree(id_priv->id.route.path_rec);
 	kfree(id_priv);
 }
@@ -857,11 +866,11 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
 {
 	int ret;
 
-	ret = cma_modify_qp_rtr(&id_priv->id);
+	ret = cma_modify_qp_rtr(id_priv);
 	if (ret)
 		goto reject;
 
-	ret = cma_modify_qp_rts(&id_priv->id);
+	ret = cma_modify_qp_rts(id_priv);
 	if (ret)
 		goto reject;
 
@@ -871,7 +880,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
 
 	return 0;
 reject:
-	cma_modify_qp_err(&id_priv->id);
+	cma_modify_qp_err(id_priv);
 	ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
 		       NULL, 0, NULL, 0);
 	return ret;
@@ -947,7 +956,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct
ib_cm_event *ib_event)
 		/* ignore event */
 		goto out;
 	case IB_CM_REJ_RECEIVED:
-		cma_modify_qp_err(&id_priv->id);
+		cma_modify_qp_err(id_priv);
 		event.status = ib_event->param.rej_rcvd.reason;
 		event.event = RDMA_CM_EVENT_REJECTED;
 		event.param.conn.private_data = ib_event->private_data;
@@ -1404,14 +1413,13 @@ static void cma_listen_on_dev(struct rdma_id_private
*id_priv,
 
 	cma_attach_to_dev(dev_id_priv, cma_dev);
 	list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
+	atomic_inc(&id_priv->refcount);
+	dev_id_priv->internal_id = 1;
 
 	ret = rdma_listen(id, id_priv->backlog);
 	if (ret)
-		goto err;
-
-	return;
-err:
-	cma_destroy_listen(dev_id_priv);
+		printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
+		       "listening on device %s", ret, cma_dev->device->name);
 }
 
 static void cma_listen_on_all(struct rdma_id_private *id_priv)
@@ -2264,7 +2272,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
 	sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
 	cm_id->remote_addr = *sin;
 
-	ret = cma_modify_qp_rtr(&id_priv->id);
+	ret = cma_modify_qp_rtr(id_priv);
 	if (ret)
 		goto out;
 
@@ -2331,7 +2339,7 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
 	int qp_attr_mask, ret;
 
 	if (id_priv->id.qp) {
-		ret = cma_modify_qp_rtr(&id_priv->id);
+		ret = cma_modify_qp_rtr(id_priv);
 		if (ret)
 			goto out;
 
@@ -2370,7 +2378,7 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
 	struct iw_cm_conn_param iw_param;
 	int ret;
 
-	ret = cma_modify_qp_rtr(&id_priv->id);
+	ret = cma_modify_qp_rtr(id_priv);
 	if (ret)
 		return ret;
 
@@ -2442,7 +2450,7 @@ int rdma_accept(struct rdma_cm_id *id, struct
rdma_conn_param *conn_param)
 
 	return 0;
 reject:
-	cma_modify_qp_err(id);
+	cma_modify_qp_err(id_priv);
 	rdma_reject(id, NULL, 0);
 	return ret;
 }
@@ -2512,7 +2520,7 @@ int rdma_disconnect(struct rdma_cm_id *id)
 
 	switch (rdma_node_get_transport(id->device->node_type)) {
 	case RDMA_TRANSPORT_IB:
-		ret = cma_modify_qp_err(id);
+		ret = cma_modify_qp_err(id_priv);
 		if (ret)
 			goto out;
 		/* Initiate or respond to a disconnect. */
@@ -2543,9 +2551,11 @@ static int cma_ib_mc_handler(int status, struct
ib_sa_multicast *multicast)
 	    cma_disable_remove(id_priv, CMA_ADDR_RESOLVED))
 		return 0;
 
+	mutex_lock(&id_priv->qp_mutex);
 	if (!status && id_priv->id.qp)
 		status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
 					 multicast->rec.mlid);
+	mutex_unlock(&id_priv->qp_mutex);
 
 	memset(&event, 0, sizeof event);
 	event.status = status;
@@ -2757,16 +2767,12 @@ static void cma_process_remove(struct cma_device
*cma_dev)
 		id_priv = list_entry(cma_dev->id_list.next,
 				     struct rdma_id_private, list);
 
-		if (cma_internal_listen(id_priv)) {
-			cma_destroy_listen(id_priv);
-			continue;
-		}
-
+		list_del(&id_priv->listen_list);
 		list_del_init(&id_priv->list);
 		atomic_inc(&id_priv->refcount);
 		mutex_unlock(&lock);
 
-		ret = cma_remove_id_dev(id_priv);
+		ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
 		cma_deref_id(id_priv);
 		if (ret)
 			rdma_destroy_id(&id_priv->id);




More information about the general mailing list