[openib-general] Re: RE: Re: [PATCH] ipoib_flush_paths

Michael S. Tsirkin mst at mellanox.co.il
Thu Apr 6 06:17:55 PDT 2006


Quoting r. Roland Dreier <rdreier at cisco.com>:
> I'm pretty sure that any scheme that tries to use module reference
> counting will either be horribly complex, still have subtle races, or
> (mostly likely) both.

Actually, it turned out to be the simplest solution - and quite
elegant since there's no room for mistakes: if query is going to be running
this means module is still loaded so we can take a reference to it
without races.

As a bonus, and assertion inside __module_get increases the chance to catch
races where user forgets to cancel the query - much nicer than crashing
randomly.

Please take a look, if OK I'll build a similiar patch for ib_addr.

Warning: compile-tested only.

---

Make sure callback module isn't unloaded while callback is running.

Signed-off-by: Michael S. Tsirkin <mst at mellanox.co.il>

Index: linux-2.6.16/drivers/infiniband/include/rdma/ib_sa.h
===================================================================
--- linux-2.6.16/drivers/infiniband/include/rdma/ib_sa.h	(revision 6281)
+++ linux-2.6.16/drivers/infiniband/include/rdma/ib_sa.h	(working copy)
@@ -262,6 +262,7 @@
 					struct ib_sa_path_rec *resp,
 					void *context),
 		       void *context,
+		       struct module *owner,
 		       struct ib_sa_query **query);
 
 int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
@@ -273,6 +274,7 @@
 					      struct ib_sa_mcmember_rec *resp,
 					      void *context),
 			     void *context,
+			     struct module *owner,
 			     struct ib_sa_query **query);
 
 int ib_sa_service_rec_query(struct ib_device *device, u8 port_num,
@@ -284,6 +286,7 @@
 					  struct ib_sa_service_rec *resp,
 					  void *context),
 			 void *context,
+			 struct module *owner,
 			 struct ib_sa_query **sa_query);
 
 /**
@@ -319,13 +322,14 @@
 					struct ib_sa_mcmember_rec *resp,
 					void *context),
 		       void *context,
+		       struct module *owner,
 		       struct ib_sa_query **query)
 {
 	return ib_sa_mcmember_rec_query(device, port_num,
 					IB_MGMT_METHOD_SET,
 					rec, comp_mask,
 					timeout_ms, gfp_mask, callback,
-					context, query);
+					context, owner, query);
 }
 
 /**
@@ -361,13 +365,14 @@
 					   struct ib_sa_mcmember_rec *resp,
 					   void *context),
 			  void *context,
+			  struct module *owner,
 			  struct ib_sa_query **query)
 {
 	return ib_sa_mcmember_rec_query(device, port_num,
 					IB_SA_METHOD_DELETE,
 					rec, comp_mask,
 					timeout_ms, gfp_mask, callback,
-					context, query);
+					context, owner, query);
 }
 
 /**
Index: linux-2.6.16/drivers/infiniband/core/at.c
===================================================================
--- linux-2.6.16/drivers/infiniband/core/at.c	(revision 6281)
+++ linux-2.6.16/drivers/infiniband/core/at.c	(working copy)
@@ -220,6 +220,7 @@
 					GFP_KERNEL,
 					ats_op_complete,
 					ib_dev,
+					THIS_MODULE,
 					&ib_dev->sa_query);
 
 	if (ib_dev->sa_id < 0) {
@@ -1122,6 +1123,7 @@
 				    GFP_KERNEL,
 				    ats_ips_req_complete,
 				    req,
+				    THIS_MODULE,
 				    &req->pend.sa_query);
 
 	if (req->pend.sa_id < 0) {
@@ -1167,6 +1169,7 @@
 				    GFP_KERNEL,
 				    ats_route_req_complete,
 				    req,
+				    THIS_MODULE,
 				    &req->pend.sa_query);
 
 	if (req->pend.sa_id < 0) {
@@ -1230,6 +1233,7 @@
 					     GFP_KERNEL,
 					     path_req_complete,
 					     req,
+					     THIS_MODULE,
 					    &req->pend.sa_query);
 
 	if (req->pend.sa_id < 0) {
Index: linux-2.6.16/drivers/infiniband/core/sa_query.c
===================================================================
--- linux-2.6.16/drivers/infiniband/core/sa_query.c	(revision 6281)
+++ linux-2.6.16/drivers/infiniband/core/sa_query.c	(working copy)
@@ -73,6 +73,7 @@
 struct ib_sa_query {
 	void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
 	void (*release)(struct ib_sa_query *);
+	struct module          *owner;
 	struct ib_sa_port      *port;
 	struct ib_mad_send_buf *mad_buf;
 	struct ib_sa_sm_ah     *sm_ah;
@@ -559,6 +560,7 @@
  * @callback:function called when query completes, times out or is
  * canceled
  * @context:opaque user context passed to callback
+ * @owner:callback owner module
  * @sa_query:query context, used to cancel query
  *
  * Send a Path Record Get query to the SA to look up a path.  The
@@ -580,6 +582,7 @@
 					struct ib_sa_path_rec *resp,
 					void *context),
 		       void *context,
+		       struct module *owner,
 		       struct ib_sa_query **sa_query)
 {
 	struct ib_sa_path_query *query;
@@ -614,6 +617,7 @@
 	init_mad(mad, agent);
 
 	query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
+	query->sa_query.owner    = owner;
 	query->sa_query.release  = ib_sa_path_rec_release;
 	query->sa_query.port     = port;
 	mad->mad_hdr.method	 = IB_MGMT_METHOD_GET;
@@ -696,6 +700,7 @@
 					     struct ib_sa_service_rec *resp,
 					     void *context),
 			    void *context,
+			    struct module *owner,
 			    struct ib_sa_query **sa_query)
 {
 	struct ib_sa_service_query *query;
@@ -735,6 +740,7 @@
 	init_mad(mad, agent);
 
 	query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
+	query->sa_query.owner    = owner;
 	query->sa_query.release  = ib_sa_service_rec_release;
 	query->sa_query.port     = port;
 	mad->mad_hdr.method	 = method;
@@ -793,6 +799,7 @@
 					      struct ib_sa_mcmember_rec *resp,
 					      void *context),
 			     void *context,
+			     struct module *owner,
 			     struct ib_sa_query **sa_query)
 {
 	struct ib_sa_mcmember_query *query;
@@ -827,6 +834,7 @@
 	init_mad(mad, agent);
 
 	query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
+	query->sa_query.owner    = owner;
 	query->sa_query.release  = ib_sa_mcmember_rec_release;
 	query->sa_query.port     = port;
 	mad->mad_hdr.method	 = method;
@@ -866,13 +874,19 @@
 			/* No callback -- already got recv */
 			break;
 		case IB_WC_RESP_TIMEOUT_ERR:
+			__module_get(query->owner);
 			query->callback(query, -ETIMEDOUT, NULL);
+			module_put(query->owner);
 			break;
 		case IB_WC_WR_FLUSH_ERR:
+			__module_get(query->owner);
 			query->callback(query, -EINTR, NULL);
+			module_put(query->owner);
 			break;
 		default:
+			__module_get(query->owner);
 			query->callback(query, -EIO, NULL);
+			module_put(query->owner);
 			break;
 		}
 
@@ -895,6 +909,7 @@
 	query = mad_buf->context[0];
 
 	if (query->callback) {
+		__module_get(query->owner);
 		if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
 			query->callback(query,
 					mad_recv_wc->recv_buf.mad->mad_hdr.status ?
@@ -902,6 +917,7 @@
 					(struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
 		else
 			query->callback(query, -EIO, NULL);
+		module_put(query->owner);
 	}
 
 	ib_free_recv_mad(mad_recv_wc);
Index: linux-2.6.16/drivers/infiniband/core/cma.c
===================================================================
--- linux-2.6.16/drivers/infiniband/core/cma.c	(revision 6281)
+++ linux-2.6.16/drivers/infiniband/core/cma.c	(working copy)
@@ -1065,7 +1065,8 @@
 				IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
 				IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH,
 				timeout_ms, GFP_KERNEL,
-				cma_query_handler, work, &id_priv->query);
+				cma_query_handler, work, THIS_MODULE,
+				&id_priv->query);
 	
 	return (id_priv->query_id < 0) ? id_priv->query_id : 0;
 }
Index: linux-2.6.16/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- linux-2.6.16/drivers/infiniband/ulp/ipoib/ipoib_main.c	(revision 6281)
+++ linux-2.6.16/drivers/infiniband/ulp/ipoib/ipoib_main.c	(working copy)
@@ -472,7 +472,7 @@
 				   IB_SA_PATH_REC_PKEY,
 				   1000, GFP_ATOMIC,
 				   path_rec_completion,
-				   path, &path->query);
+				   path, THIS_MODULE, &path->query);
 	if (path->query_id < 0) {
 		ipoib_warn(priv, "ib_sa_path_rec_get failed\n");
 		path->query = NULL;
Index: linux-2.6.16/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
===================================================================
--- linux-2.6.16/drivers/infiniband/ulp/ipoib/ipoib_multicast.c	(revision 6281)
+++ linux-2.6.16/drivers/infiniband/ulp/ipoib/ipoib_multicast.c	(working copy)
@@ -367,7 +367,7 @@
 				     IB_SA_MCMEMBER_REC_JOIN_STATE,
 				     1000, GFP_ATOMIC,
 				     ipoib_mcast_sendonly_join_complete,
-				     mcast, &mcast->query);
+				     mcast, THIS_MODULE, &mcast->query);
 	if (ret < 0) {
 		ipoib_warn(priv, "ib_sa_mcmember_rec_set failed (ret = %d)\n",
 			   ret);
@@ -487,7 +487,7 @@
 	ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, comp_mask,
 				     mcast->backoff * 1000, GFP_ATOMIC,
 				     ipoib_mcast_join_complete,
-				     mcast, &mcast->query);
+				     mcast, THIS_MODULE, &mcast->query);
 
 	if (ret < 0) {
 		ipoib_warn(priv, "ib_sa_mcmember_rec_set failed, status %d\n", ret);
@@ -686,7 +686,7 @@
 					IB_SA_MCMEMBER_REC_PKEY		|
 					IB_SA_MCMEMBER_REC_JOIN_STATE,
 					0, GFP_ATOMIC, NULL,
-					mcast, &mcast->query);
+					mcast, THIS_MODULE, &mcast->query);
 	if (ret < 0)
 		ipoib_warn(priv, "ib_sa_mcmember_rec_delete failed "
 			   "for leave (result = %d)\n", ret);
Index: linux-2.6.16/drivers/infiniband/ulp/srp/ib_srp.c
===================================================================
--- linux-2.6.16/drivers/infiniband/ulp/srp/ib_srp.c	(revision 6281)
+++ linux-2.6.16/drivers/infiniband/ulp/srp/ib_srp.c	(working copy)
@@ -260,7 +260,8 @@
 						   SRP_PATH_REC_TIMEOUT_MS,
 						   GFP_KERNEL,
 						   srp_path_rec_completion,
-						   target, &target->path_query);
+						   target, THIS_MODULE,
+						   &target->path_query);
 	if (target->path_query_id < 0)
 		return target->path_query_id;
 


-- 
MST



More information about the general mailing list