[ofa-general] [PATCH] rdma/cm: add locking around QP accesses

Sean Hefty sean.hefty at intel.com
Mon Sep 24 14:07:28 PDT 2007


If a user allocates a QP on an rdma_cm_id, the rdma_cm will automatically
transition the QP through its states (RTR, RTS, error, etc.)  While the
QP state transitions are occurring, the QP itself must remain valid.
Provide locking around the QP pointer to prevent its destruction while
accessing the pointer.

This fixes an issue reported by Olaf Kirch from Oracle that resulted in
a system crash:

"An incoming connection arrives and we decide to tear down the nascent
 connection.  The remote ends decides to do the same.  We start to shut
 down the connection, and call rdma_destroy_qp on our cm_id. ... Now
 apparently a 'connect reject' message comes in from the other host,
 and cma_ib_handler() is called with an event of IB_CM_REJ_RECEIVED.
 It calls cma_modify_qp_err, which for some odd reason tries to modify
 the exact same QP we just destroyed."

Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
Rick, can you please test this patch and let me know if it fixes your problem?

 drivers/infiniband/core/cma.c |   90 +++++++++++++++++++++++++++--------------
 1 files changed, 60 insertions(+), 30 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 9ffb998..c6a6dba 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -120,6 +120,8 @@ struct rdma_id_private {
 
 	enum cma_state		state;
 	spinlock_t		lock;
+	struct mutex		qp_mutex;
+
 	struct completion	comp;
 	atomic_t		refcount;
 	wait_queue_head_t	wait_remove;
@@ -387,6 +389,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
 	id_priv->id.event_handler = event_handler;
 	id_priv->id.ps = ps;
 	spin_lock_init(&id_priv->lock);
+	mutex_init(&id_priv->qp_mutex);
 	init_completion(&id_priv->comp);
 	atomic_set(&id_priv->refcount, 1);
 	init_waitqueue_head(&id_priv->wait_remove);
@@ -472,61 +475,86 @@ EXPORT_SYMBOL(rdma_create_qp);
 
 void rdma_destroy_qp(struct rdma_cm_id *id)
 {
-	ib_destroy_qp(id->qp);
+	struct rdma_id_private *id_priv;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	mutex_lock(&id_priv->qp_mutex);
+	ib_destroy_qp(id_priv->id.qp);
+	id_priv->id.qp = NULL;
+	mutex_unlock(&id_priv->qp_mutex);
 }
 EXPORT_SYMBOL(rdma_destroy_qp);
 
-static int cma_modify_qp_rtr(struct rdma_cm_id *id)
+static int cma_modify_qp_rtr(struct rdma_id_private *id_priv)
 {
 	struct ib_qp_attr qp_attr;
 	int qp_attr_mask, ret;
 
-	if (!id->qp)
-		return 0;
+	mutex_lock(&id_priv->qp_mutex);
+	if (!id_priv->id.qp) {
+		ret = 0;
+		goto out;
+	}
 
 	/* Need to update QP attributes from default values. */
 	qp_attr.qp_state = IB_QPS_INIT;
-	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
+	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 	if (ret)
-		return ret;
+		goto out;
 
-	ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
+	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
 	if (ret)
-		return ret;
+		goto out;
 
 	qp_attr.qp_state = IB_QPS_RTR;
-	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
+	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 	if (ret)
-		return ret;
+		goto out;
 
-	return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
+	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
+out:
+	mutex_unlock(&id_priv->qp_mutex);
+	return ret;
 }
 
-static int cma_modify_qp_rts(struct rdma_cm_id *id)
+static int cma_modify_qp_rts(struct rdma_id_private *id_priv)
 {
 	struct ib_qp_attr qp_attr;
 	int qp_attr_mask, ret;
 
-	if (!id->qp)
-		return 0;
+	mutex_lock(&id_priv->qp_mutex);
+	if (!id_priv->id.qp) {
+		ret = 0;
+		goto out;
+	}
 
 	qp_attr.qp_state = IB_QPS_RTS;
-	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
+	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 	if (ret)
-		return ret;
+		goto out;
 
-	return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
+	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
+out:
+	mutex_unlock(&id_priv->qp_mutex);
+	return ret;
 }
 
-static int cma_modify_qp_err(struct rdma_cm_id *id)
+static int cma_modify_qp_err(struct rdma_id_private *id_priv)
 {
 	struct ib_qp_attr qp_attr;
+	int ret;
 
-	if (!id->qp)
-		return 0;
+	mutex_lock(&id_priv->qp_mutex);
+	if (!id_priv->id.qp) {
+		ret = 0;
+		goto out;
+	}
 
 	qp_attr.qp_state = IB_QPS_ERR;
-	return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE);
+	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
+out:
+	mutex_unlock(&id_priv->qp_mutex);
+	return ret;
 }
 
 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
@@ -855,11 +883,11 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
 {
 	int ret;
 
-	ret = cma_modify_qp_rtr(&id_priv->id);
+	ret = cma_modify_qp_rtr(id_priv);
 	if (ret)
 		goto reject;
 
-	ret = cma_modify_qp_rts(&id_priv->id);
+	ret = cma_modify_qp_rts(id_priv);
 	if (ret)
 		goto reject;
 
@@ -869,7 +897,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
 
 	return 0;
 reject:
-	cma_modify_qp_err(&id_priv->id);
+	cma_modify_qp_err(id_priv);
 	ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
 		       NULL, 0, NULL, 0);
 	return ret;
@@ -945,7 +973,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 		/* ignore event */
 		goto out;
 	case IB_CM_REJ_RECEIVED:
-		cma_modify_qp_err(&id_priv->id);
+		cma_modify_qp_err(id_priv);
 		event.status = ib_event->param.rej_rcvd.reason;
 		event.event = RDMA_CM_EVENT_REJECTED;
 		event.param.conn.private_data = ib_event->private_data;
@@ -2236,7 +2264,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
 	sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
 	cm_id->remote_addr = *sin;
 
-	ret = cma_modify_qp_rtr(&id_priv->id);
+	ret = cma_modify_qp_rtr(id_priv);
 	if (ret)
 		goto out;
 
@@ -2303,7 +2331,7 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
 	int qp_attr_mask, ret;
 
 	if (id_priv->id.qp) {
-		ret = cma_modify_qp_rtr(&id_priv->id);
+		ret = cma_modify_qp_rtr(id_priv);
 		if (ret)
 			goto out;
 
@@ -2342,7 +2370,7 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
 	struct iw_cm_conn_param iw_param;
 	int ret;
 
-	ret = cma_modify_qp_rtr(&id_priv->id);
+	ret = cma_modify_qp_rtr(id_priv);
 	if (ret)
 		return ret;
 
@@ -2414,7 +2442,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 
 	return 0;
 reject:
-	cma_modify_qp_err(id);
+	cma_modify_qp_err(id_priv);
 	rdma_reject(id, NULL, 0);
 	return ret;
 }
@@ -2484,7 +2512,7 @@ int rdma_disconnect(struct rdma_cm_id *id)
 
 	switch (rdma_node_get_transport(id->device->node_type)) {
 	case RDMA_TRANSPORT_IB:
-		ret = cma_modify_qp_err(id);
+		ret = cma_modify_qp_err(id_priv);
 		if (ret)
 			goto out;
 		/* Initiate or respond to a disconnect. */
@@ -2515,9 +2543,11 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 	    cma_disable_remove(id_priv, CMA_ADDR_RESOLVED))
 		return 0;
 
+	mutex_lock(&id_priv->qp_mutex);
 	if (!status && id_priv->id.qp)
 		status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
 					 multicast->rec.mlid);
+	mutex_unlock(&id_priv->qp_mutex);
 
 	memset(&event, 0, sizeof event);
 	event.status = status;




More information about the general mailing list