[openib-general] Fwd: RE: [PATCH] cm refcount race fix

Michael S. Tsirkin mst at mellanox.co.il
Thu May 11 10:07:20 PDT 2006


Here's the patch that I tested.

----- Forwarded message from Sean Hefty <sean.hefty at intel.com> -----

From: "Sean Hefty" <sean.hefty at intel.com>
Subject: RE: [PATCH] cm refcount race fix
Date: Tue, 9 May 2006 12:14:02 -0700
In-Reply-To: <20060509172703.GA22825 at mellanox.co.il>
X-Spam: exempt

Here's a patch for all of the files that you listed.

I did do some basic testing and didn't see any issues.

Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
Index: mad_rmpp.c
===================================================================
--- mad_rmpp.c	(revision 6884)
+++ mad_rmpp.c	(working copy)
@@ -49,7 +49,7 @@ struct mad_rmpp_recv {
 	struct list_head list;
 	struct work_struct timeout_work;
 	struct work_struct cleanup_work;
-	wait_queue_head_t wait;
+	struct completion comp;
 	enum rmpp_state state;
 	spinlock_t lock;
 	atomic_t refcount;
@@ -69,10 +69,16 @@ struct mad_rmpp_recv {
 	u8 method;
 };
 
+static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
+{
+	if (atomic_dec_and_test(&rmpp_recv->refcount))
+		complete(&rmpp_recv->comp);
+}
+
 static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
 {
-	atomic_dec(&rmpp_recv->refcount);
-	wait_event(rmpp_recv->wait, !atomic_read(&rmpp_recv->refcount));
+	deref_rmpp_recv(rmpp_recv);
+	wait_for_completion(&rmpp_recv->comp);
 	ib_destroy_ah(rmpp_recv->ah);
 	kfree(rmpp_recv);
 }
@@ -253,7 +259,7 @@ create_rmpp_recv(struct ib_mad_agent_pri
 		goto error;
 
 	rmpp_recv->agent = agent;
-	init_waitqueue_head(&rmpp_recv->wait);
+	init_completion(&rmpp_recv->comp);
 	INIT_WORK(&rmpp_recv->timeout_work, recv_timeout_handler, rmpp_recv);
 	INIT_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler, rmpp_recv);
 	spin_lock_init(&rmpp_recv->lock);
@@ -279,12 +285,6 @@ error:	kfree(rmpp_recv);
 	return NULL;
 }
 
-static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
-{
-	if (atomic_dec_and_test(&rmpp_recv->refcount))
-		wake_up(&rmpp_recv->wait);
-}
-
 static struct mad_rmpp_recv *
 find_rmpp_recv(struct ib_mad_agent_private *agent,
 	       struct ib_mad_recv_wc *mad_recv_wc)
Index: cm.c
===================================================================
--- cm.c	(revision 6884)
+++ cm.c	(working copy)
@@ -34,6 +34,8 @@
  *
  * $Id$
  */
+
+#include <linux/completion.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/idr.h>
@@ -122,7 +124,7 @@ struct cm_id_private {
 	struct rb_node service_node;
 	struct rb_node sidr_id_node;
 	spinlock_t lock;	/* Do not acquire inside cm.lock */
-	wait_queue_head_t wait;
+	struct completion comp;
 	atomic_t refcount;
 
 	struct ib_mad_send_buf *msg;
@@ -160,7 +162,7 @@ static void cm_work_handler(void *data);
 static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
 {
 	if (atomic_dec_and_test(&cm_id_priv->refcount))
-		wake_up(&cm_id_priv->wait);
+		complete(&cm_id_priv->comp);
 }
 
 static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
@@ -611,7 +613,7 @@ struct ib_cm_id *ib_create_cm_id(struct 
 		goto error;
 
 	spin_lock_init(&cm_id_priv->lock);
-	init_waitqueue_head(&cm_id_priv->wait);
+	init_completion(&cm_id_priv->comp);
 	INIT_LIST_HEAD(&cm_id_priv->work_list);
 	atomic_set(&cm_id_priv->work_count, -1);
 	atomic_set(&cm_id_priv->refcount, 1);
@@ -776,8 +778,8 @@ retest:
 	}
 
 	cm_free_id(cm_id->local_id);
-	atomic_dec(&cm_id_priv->refcount);
-	wait_event(cm_id_priv->wait, !atomic_read(&cm_id_priv->refcount));
+	cm_deref_id(cm_id_priv);
+	wait_for_completion(&cm_id_priv->comp);
 	while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
 		cm_free_work(work);
 	kfree(cm_id_priv->compare_data);
Index: multicast.c
===================================================================
--- multicast.c	(revision 6884)
+++ multicast.c	(working copy)
@@ -30,6 +30,7 @@
  * SOFTWARE.
  */
 
+#include <linux/completion.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
@@ -69,7 +70,7 @@ struct mcast_port {
 	spinlock_t		lock;
 	struct rb_root		table;
 	atomic_t		refcount;
-	wait_queue_head_t	wait;
+	struct completion	comp;
 	u8			port_num;
 };
 
@@ -110,7 +111,7 @@ struct mcast_member {
 	struct list_head	list;
 	enum mcast_state	state;
 	atomic_t		refcount;
-	wait_queue_head_t	wait;
+	struct completion	comp;
 };
 
 static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
@@ -168,7 +169,7 @@ static struct mcast_group *mcast_insert(
 static void deref_port(struct mcast_port *port)
 {
 	if (atomic_dec_and_test(&port->refcount))
-		wake_up(&port->wait);
+		complete(&port->comp);
 }
 
 static void release_group(struct mcast_group *group)
@@ -189,7 +190,7 @@ static void release_group(struct mcast_g
 static void deref_member(struct mcast_member *member)
 {
 	if (atomic_dec_and_test(&member->refcount))
-		wake_up(&member->wait);
+		complete(&member->comp);
 }
 
 static void queue_join(struct mcast_member *member)
@@ -512,7 +513,7 @@ struct ib_multicast *ib_join_multicast(s
 	member->multicast.comp_mask = comp_mask;
 	member->multicast.callback = callback;
 	member->multicast.context = context;
-	init_waitqueue_head(&member->wait);
+	init_completion(&member->comp);
 	atomic_set(&member->refcount, 1);
 	member->state = MCAST_JOINING;
 
@@ -569,8 +570,8 @@ void ib_free_multicast(struct ib_multica
 		release_group(group);
 	}
 
-	atomic_dec(&member->refcount);
-	wait_event(member->wait, !atomic_read(&member->refcount));
+	deref_member(member);
+	wait_for_completion(&member->comp);
 	kfree(member);
 }
 EXPORT_SYMBOL(ib_free_multicast);
@@ -602,7 +603,7 @@ static void mcast_add_one(struct ib_devi
 		port->port_num = dev->start_port + i;
 		spin_lock_init(&port->lock);
 		port->table = RB_ROOT;
-		init_waitqueue_head(&port->wait);
+		init_completion(&port->comp);
 		atomic_set(&port->refcount, 1);
 	}
 
@@ -644,8 +645,8 @@ static void mcast_remove_one(struct ib_d
 	for (i = 0; i < dev->end_port - dev->start_port; i++) {
 		port = &dev->port[i];
 		leave_groups(port);
-		atomic_dec(&port->refcount);
-		wait_event(port->wait, !atomic_read(&port->refcount));
+		deref_port(port);
+		wait_for_completion(&port->comp);
 	}
 
 	kfree(dev);
Index: cma.c
===================================================================
--- cma.c	(revision 6948)
+++ cma.c	(working copy)
@@ -29,6 +29,7 @@
  *
  */
 
+#include <linux/completion.h>
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/mutex.h>
@@ -70,7 +71,7 @@ struct cma_device {
 	struct list_head	list;
 	struct ib_device	*device;
 	__be64			node_guid;
-	wait_queue_head_t	wait;
+	struct completion	comp;
 	atomic_t		refcount;
 	struct list_head	id_list;
 };
@@ -111,7 +112,7 @@ struct rdma_id_private {
 
 	enum cma_state		state;
 	spinlock_t		lock;
-	wait_queue_head_t	wait;
+	struct completion	comp;
 	atomic_t		refcount;
 	wait_queue_head_t	wait_remove;
 	atomic_t		dev_remove;
@@ -244,11 +245,16 @@ static void cma_attach_to_dev(struct rdm
 	list_add_tail(&id_priv->list, &cma_dev->id_list);
 }
 
+static inline void cma_deref_dev(struct cma_device *cma_dev)
+{
+	if (atomic_dec_and_test(&cma_dev->refcount))
+		complete(&cma_dev->comp);
+}
+
 static void cma_detach_from_dev(struct rdma_id_private *id_priv)
 {
 	list_del(&id_priv->list);
-	if (atomic_dec_and_test(&id_priv->cma_dev->refcount))
-		wake_up(&id_priv->cma_dev->wait);
+	cma_deref_dev(id_priv->cma_dev);
 	id_priv->cma_dev = NULL;
 }
 
@@ -288,7 +294,7 @@ static int cma_acquire_dev(struct rdma_i
 static void cma_deref_id(struct rdma_id_private *id_priv)
 {
 	if (atomic_dec_and_test(&id_priv->refcount))
-		wake_up(&id_priv->wait);
+		complete(&id_priv->comp);
 }
 
 static void cma_release_remove(struct rdma_id_private *id_priv)
@@ -311,7 +317,7 @@ struct rdma_cm_id* rdma_create_id(rdma_c
 	id_priv->id.event_handler = event_handler;
 	id_priv->id.ps = ps;
 	spin_lock_init(&id_priv->lock);
-	init_waitqueue_head(&id_priv->wait);
+	init_completion(&id_priv->comp);
 	atomic_set(&id_priv->refcount, 1);
 	init_waitqueue_head(&id_priv->wait_remove);
 	atomic_set(&id_priv->dev_remove, 0);
@@ -618,8 +624,8 @@ static void cma_destroy_listen(struct rd
 	}
 	list_del(&id_priv->listen_list);
 
-	atomic_dec(&id_priv->refcount);
-	wait_event(id_priv->wait, !atomic_read(&id_priv->refcount));
+	cma_deref_id(id_priv);
+	wait_for_completion(&id_priv->comp);
 
 	kfree(id_priv);
 }
@@ -699,8 +705,8 @@ void rdma_destroy_id(struct rdma_cm_id *
 	}
 
 	cma_release_port(id_priv);
-	atomic_dec(&id_priv->refcount);
-	wait_event(id_priv->wait, !atomic_read(&id_priv->refcount));
+	cma_deref_id(id_priv);
+	wait_for_completion(&id_priv->comp);
 
 	kfree(id_priv->id.route.path_rec);
 	kfree(id_priv);
@@ -1778,7 +1784,7 @@ static void cma_add_one(struct ib_device
 	if (!cma_dev->node_guid)
 		goto err;
 
-	init_waitqueue_head(&cma_dev->wait);
+	init_completion(&cma_dev->comp);
 	atomic_set(&cma_dev->refcount, 1);
 	INIT_LIST_HEAD(&cma_dev->id_list);
 	ib_set_client_data(device, &cma_client, cma_dev);
@@ -1845,8 +1851,8 @@ static void cma_process_remove(struct cm
 	}
 	mutex_unlock(&lock);
 
-	atomic_dec(&cma_dev->refcount);
-	wait_event(cma_dev->wait, !atomic_read(&cma_dev->refcount));
+	cma_deref_dev(cma_dev);
+	wait_for_completion(&cma_dev->comp);
 }
 
 static void cma_remove_one(struct ib_device *device)
Index: mad.c
===================================================================
--- mad.c	(revision 6886)
+++ mad.c	(working copy)
@@ -353,7 +353,7 @@ struct ib_mad_agent *ib_register_mad_age
 	INIT_WORK(&mad_agent_priv->local_work, local_completions,
 		   mad_agent_priv);
 	atomic_set(&mad_agent_priv->refcount, 1);
-	init_waitqueue_head(&mad_agent_priv->wait);
+	init_completion(&mad_agent_priv->comp);
 
 	return &mad_agent_priv->agent;
 
@@ -468,7 +468,7 @@ struct ib_mad_agent *ib_register_mad_sno
 	mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp;
 	mad_snoop_priv->agent.port_num = port_num;
 	mad_snoop_priv->mad_snoop_flags = mad_snoop_flags;
-	init_waitqueue_head(&mad_snoop_priv->wait);
+	init_completion(&mad_snoop_priv->comp);
 	mad_snoop_priv->snoop_index = register_snoop_agent(
 						&port_priv->qp_info[qpn],
 						mad_snoop_priv);
@@ -487,6 +487,18 @@ error1:
 }
 EXPORT_SYMBOL(ib_register_mad_snoop);
 
+static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
+{
+	if (atomic_dec_and_test(&mad_agent_priv->refcount))
+		complete(&mad_agent_priv->comp);
+}
+
+static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv)
+{
+	if (atomic_dec_and_test(&mad_snoop_priv->refcount))
+		complete(&mad_snoop_priv->comp);
+}
+
 static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
 {
 	struct ib_mad_port_private *port_priv;
@@ -510,9 +522,8 @@ static void unregister_mad_agent(struct 
 	flush_workqueue(port_priv->wq);
 	ib_cancel_rmpp_recvs(mad_agent_priv);
 
-	atomic_dec(&mad_agent_priv->refcount);
-	wait_event(mad_agent_priv->wait,
-		   !atomic_read(&mad_agent_priv->refcount));
+	deref_mad_agent(mad_agent_priv);
+	wait_for_completion(&mad_agent_priv->comp);
 
 	kfree(mad_agent_priv->reg_req);
 	ib_dereg_mr(mad_agent_priv->agent.mr);
@@ -530,9 +541,8 @@ static void unregister_mad_snoop(struct 
 	atomic_dec(&qp_info->snoop_count);
 	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
 
-	atomic_dec(&mad_snoop_priv->refcount);
-	wait_event(mad_snoop_priv->wait,
-		   !atomic_read(&mad_snoop_priv->refcount));
+	deref_snoop_agent(mad_snoop_priv);
+	wait_for_completion(&mad_snoop_priv->comp);
 
 	kfree(mad_snoop_priv);
 }
@@ -601,8 +611,7 @@ static void snoop_send(struct ib_mad_qp_
 		spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
 		mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent,
 						    send_buf, mad_send_wc);
-		if (atomic_dec_and_test(&mad_snoop_priv->refcount))
-			wake_up(&mad_snoop_priv->wait);
+		deref_snoop_agent(mad_snoop_priv);
 		spin_lock_irqsave(&qp_info->snoop_lock, flags);
 	}
 	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
@@ -627,8 +636,7 @@ static void snoop_recv(struct ib_mad_qp_
 		spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
 		mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent,
 						   mad_recv_wc);
-		if (atomic_dec_and_test(&mad_snoop_priv->refcount))
-			wake_up(&mad_snoop_priv->wait);
+		deref_snoop_agent(mad_snoop_priv);
 		spin_lock_irqsave(&qp_info->snoop_lock, flags);
 	}
 	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
@@ -969,8 +977,7 @@ void ib_free_send_mad(struct ib_mad_send
 
 	free_send_rmpp_list(mad_send_wr);
 	kfree(send_buf->mad);
-	if (atomic_dec_and_test(&mad_agent_priv->refcount))
-		wake_up(&mad_agent_priv->wait);
+	deref_mad_agent(mad_agent_priv);
 }
 EXPORT_SYMBOL(ib_free_send_mad);
 
@@ -1789,8 +1796,7 @@ static void ib_mad_complete_recv(struct 
 		mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
 						      mad_recv_wc);
 		if (!mad_recv_wc) {
-			if (atomic_dec_and_test(&mad_agent_priv->refcount))
-				wake_up(&mad_agent_priv->wait);
+			deref_mad_agent(mad_agent_priv);
 			return;
 		}
 	}
@@ -1802,8 +1808,7 @@ static void ib_mad_complete_recv(struct 
 		if (!mad_send_wr) {
 			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 			ib_free_recv_mad(mad_recv_wc);
-			if (atomic_dec_and_test(&mad_agent_priv->refcount))
-				wake_up(&mad_agent_priv->wait);
+			deref_mad_agent(mad_agent_priv);
 			return;
 		}
 		ib_mark_mad_done(mad_send_wr);
@@ -1822,8 +1827,7 @@ static void ib_mad_complete_recv(struct 
 	} else {
 		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
 						   mad_recv_wc);
-		if (atomic_dec_and_test(&mad_agent_priv->refcount))
-			wake_up(&mad_agent_priv->wait);
+		deref_mad_agent(mad_agent_priv);
 	}
 }
 
@@ -2053,8 +2057,7 @@ void ib_mad_complete_send_wr(struct ib_m
 						   mad_send_wc);
 
 	/* Release reference on agent taken when sending */
-	if (atomic_dec_and_test(&mad_agent_priv->refcount))
-		wake_up(&mad_agent_priv->wait);
+	deref_mad_agent(mad_agent_priv);
 	return;
 done:
 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
Index: mad_priv.h
===================================================================
--- mad_priv.h	(revision 6884)
+++ mad_priv.h	(working copy)
@@ -37,6 +37,7 @@
 #ifndef __IB_MAD_PRIV_H__
 #define __IB_MAD_PRIV_H__
 
+#include <linux/completion.h>
 #include <linux/pci.h>
 #include <linux/kthread.h>
 #include <linux/workqueue.h>
@@ -108,7 +109,7 @@ struct ib_mad_agent_private {
 	struct list_head rmpp_list;
 
 	atomic_t refcount;
-	wait_queue_head_t wait;
+	struct completion comp;
 };
 
 struct ib_mad_snoop_private {
@@ -117,7 +118,7 @@ struct ib_mad_snoop_private {
 	int snoop_index;
 	int mad_snoop_flags;
 	atomic_t refcount;
-	wait_queue_head_t wait;
+	struct completion comp;
 };
 
 struct ib_mad_send_wr_private {
Index: ucm.c
===================================================================
--- ucm.c	(revision 6884)
+++ ucm.c	(working copy)
@@ -32,6 +32,8 @@
  *
  * $Id$
  */
+
+#include <linux/completion.h>
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/module.h>
@@ -73,7 +75,7 @@ struct ib_ucm_file {
 
 struct ib_ucm_context {
 	int                 id;
-	wait_queue_head_t   wait;
+	struct completion   comp;
 	atomic_t            ref;
 	int		    events_reported;
 
@@ -139,7 +141,7 @@ static struct ib_ucm_context *ib_ucm_ctx
 static void ib_ucm_ctx_put(struct ib_ucm_context *ctx)
 {
 	if (atomic_dec_and_test(&ctx->ref))
-		wake_up(&ctx->wait);
+		complete(&ctx->comp);
 }
 
 static inline int ib_ucm_new_cm_id(int event)
@@ -179,7 +181,7 @@ static struct ib_ucm_context *ib_ucm_ctx
 		return NULL;
 
 	atomic_set(&ctx->ref, 1);
-	init_waitqueue_head(&ctx->wait);
+	init_completion(&ctx->comp);
 	ctx->file = file;
 	INIT_LIST_HEAD(&ctx->events);
 
@@ -559,8 +561,8 @@ static ssize_t ib_ucm_destroy_id(struct 
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
-	atomic_dec(&ctx->ref);
-	wait_event(ctx->wait, !atomic_read(&ctx->ref));
+	ib_ucm_ctx_put(ctx);
+	wait_for_completion(&ctx->comp);
 
 	/* No new events will be generated after destroying the cm_id. */
 	ib_destroy_cm_id(ctx->cm_id);
Index: ucma.c
===================================================================
--- ucma.c	(revision 6949)
+++ ucma.c	(working copy)
@@ -30,6 +30,7 @@
  * SOFTWARE.
  */
 
+#include <linux/completion.h>
 #include <linux/mutex.h>
 #include <linux/poll.h>
 #include <linux/idr.h>
@@ -61,7 +62,7 @@ struct ucma_file {
 
 struct ucma_context {
 	int			id;
-	wait_queue_head_t	wait;
+	struct completion	comp;
 	atomic_t		ref;
 	int			events_reported;
 	int			backlog;
@@ -105,7 +106,7 @@ static struct ucma_context* ucma_get_ctx
 static void ucma_put_ctx(struct ucma_context *ctx)
 {
 	if (atomic_dec_and_test(&ctx->ref))
-		wake_up(&ctx->wait);
+		complete(&ctx->comp);
 }
 
 static void ucma_cleanup_events(struct ucma_context *ctx)
@@ -140,7 +141,7 @@ static struct ucma_context* ucma_alloc_c
 		return NULL;
 
 	atomic_set(&ctx->ref, 1);
-	init_waitqueue_head(&ctx->wait);
+	init_completion(&ctx->comp);
 	ctx->file = file;
 	INIT_LIST_HEAD(&ctx->events);
 
@@ -341,8 +342,8 @@ static ssize_t ucma_destroy_id(struct uc
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
-	atomic_dec(&ctx->ref);
-	wait_event(ctx->wait, !atomic_read(&ctx->ref));
+	ucma_put_ctx(ctx);
+	wait_for_completion(&ctx->comp);
 
 	/* No new events will be generated after destroying the id. */
 	rdma_destroy_id(ctx->cm_id);

----- End forwarded message -----

-- 
MST



More information about the general mailing list