[openib-general] RE: [PATCH] cm refcount race fix

Sean Hefty sean.hefty at intel.com
Tue May 9 10:19:08 PDT 2006


Here's a patch that should fix both the IB CM and RDMA CM using
completions rather than spinlock / wait objects.

Michael, can you test that this version works for you?

Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
Index: cm.c
===================================================================
--- cm.c	(revision 6884)
+++ cm.c	(working copy)
@@ -34,6 +34,8 @@
  *
  * $Id$
  */
+
+#include <linux/completion.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/idr.h>
@@ -122,7 +124,7 @@ struct cm_id_private {
 	struct rb_node service_node;
 	struct rb_node sidr_id_node;
 	spinlock_t lock;	/* Do not acquire inside cm.lock */
-	wait_queue_head_t wait;
+	struct completion comp;
 	atomic_t refcount;
 
 	struct ib_mad_send_buf *msg;
@@ -160,7 +162,7 @@ static void cm_work_handler(void *data);
 static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
 {
 	if (atomic_dec_and_test(&cm_id_priv->refcount))
-		wake_up(&cm_id_priv->wait);
+		complete(&cm_id_priv->comp);
 }
 
 static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
@@ -611,7 +613,7 @@ struct ib_cm_id *ib_create_cm_id(struct 
 		goto error;
 
 	spin_lock_init(&cm_id_priv->lock);
-	init_waitqueue_head(&cm_id_priv->wait);
+	init_completion(&cm_id_priv->comp);
 	INIT_LIST_HEAD(&cm_id_priv->work_list);
 	atomic_set(&cm_id_priv->work_count, -1);
 	atomic_set(&cm_id_priv->refcount, 1);
@@ -776,8 +778,8 @@ retest:
 	}
 
 	cm_free_id(cm_id->local_id);
-	atomic_dec(&cm_id_priv->refcount);
-	wait_event(cm_id_priv->wait, !atomic_read(&cm_id_priv->refcount));
+	cm_deref_id(cm_id_priv);
+	wait_for_completion(&cm_id_priv->comp);
 	while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
 		cm_free_work(work);
 	kfree(cm_id_priv->compare_data);
Index: cma.c
===================================================================
--- cma.c	(revision 6948)
+++ cma.c	(working copy)
@@ -29,6 +29,7 @@
  *
  */
 
+#include <linux/completion.h>
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/mutex.h>
@@ -70,7 +71,7 @@ struct cma_device {
 	struct list_head	list;
 	struct ib_device	*device;
 	__be64			node_guid;
-	wait_queue_head_t	wait;
+	struct completion	comp;
 	atomic_t		refcount;
 	struct list_head	id_list;
 };
@@ -111,7 +112,7 @@ struct rdma_id_private {
 
 	enum cma_state		state;
 	spinlock_t		lock;
-	wait_queue_head_t	wait;
+	struct completion	comp;
 	atomic_t		refcount;
 	wait_queue_head_t	wait_remove;
 	atomic_t		dev_remove;
@@ -244,11 +245,16 @@ static void cma_attach_to_dev(struct rdm
 	list_add_tail(&id_priv->list, &cma_dev->id_list);
 }
 
+static inline void cma_deref_dev(struct cma_device *cma_dev)
+{
+	if (atomic_dec_and_test(&cma_dev->refcount))
+		complete(&cma_dev->comp);
+}
+
 static void cma_detach_from_dev(struct rdma_id_private *id_priv)
 {
 	list_del(&id_priv->list);
-	if (atomic_dec_and_test(&id_priv->cma_dev->refcount))
-		wake_up(&id_priv->cma_dev->wait);
+	cma_deref_dev(id_priv->cma_dev);
 	id_priv->cma_dev = NULL;
 }
 
@@ -288,7 +294,7 @@ static int cma_acquire_dev(struct rdma_i
 static void cma_deref_id(struct rdma_id_private *id_priv)
 {
 	if (atomic_dec_and_test(&id_priv->refcount))
-		wake_up(&id_priv->wait);
+		complete(&id_priv->comp);
 }
 
 static void cma_release_remove(struct rdma_id_private *id_priv)
@@ -311,7 +317,7 @@ struct rdma_cm_id* rdma_create_id(rdma_c
 	id_priv->id.event_handler = event_handler;
 	id_priv->id.ps = ps;
 	spin_lock_init(&id_priv->lock);
-	init_waitqueue_head(&id_priv->wait);
+	init_completion(&id_priv->comp);
 	atomic_set(&id_priv->refcount, 1);
 	init_waitqueue_head(&id_priv->wait_remove);
 	atomic_set(&id_priv->dev_remove, 0);
@@ -618,8 +624,8 @@ static void cma_destroy_listen(struct rd
 	}
 	list_del(&id_priv->listen_list);
 
-	atomic_dec(&id_priv->refcount);
-	wait_event(id_priv->wait, !atomic_read(&id_priv->refcount));
+	cma_deref_id(id_priv);
+	wait_for_completion(&id_priv->comp);
 
 	kfree(id_priv);
 }
@@ -699,8 +705,8 @@ void rdma_destroy_id(struct rdma_cm_id *
 	}
 
 	cma_release_port(id_priv);
-	atomic_dec(&id_priv->refcount);
-	wait_event(id_priv->wait, !atomic_read(&id_priv->refcount));
+	cma_deref_id(id_priv);
+	wait_for_completion(&id_priv->comp);
 
 	kfree(id_priv->id.route.path_rec);
 	kfree(id_priv);
@@ -1778,7 +1784,7 @@ static void cma_add_one(struct ib_device
 	if (!cma_dev->node_guid)
 		goto err;
 
-	init_waitqueue_head(&cma_dev->wait);
+	init_completion(&cma_dev->comp);
 	atomic_set(&cma_dev->refcount, 1);
 	INIT_LIST_HEAD(&cma_dev->id_list);
 	ib_set_client_data(device, &cma_client, cma_dev);
@@ -1845,8 +1851,8 @@ static void cma_process_remove(struct cm
 	}
 	mutex_unlock(&lock);
 
-	atomic_dec(&cma_dev->refcount);
-	wait_event(cma_dev->wait, !atomic_read(&cma_dev->refcount));
+	cma_deref_dev(cma_dev);
+	wait_for_completion(&cma_dev->comp);
 }
 
 static void cma_remove_one(struct ib_device *device)




More information about the general mailing list