[openib-general] [PATCH] Missing check for atomic_dec in ib_post_send_mad

Krishna Kumar krkumar at us.ibm.com
Tue Nov 2 18:33:10 PST 2004


Hi Sean,

I guess you meant "even if solicited is NOT set". What you described is
right, the race will mean that the remove_mad_reg_req() will free things
like method/class, while the find_mad_agent looks through the version
and class to find the mad_agent. This patch will fix it correctly.

I have also cleaned up a hack in ib_mad_recv_done_handler() where a
test for '!mad_agent' was being done to determine whether to free 'recv'
or not :-).

Couple of issues with the new code (same as old code, though) :

1. printk(KERN_ERR PFX "No client 0x%x for received MAD "
                      "on port %d\n",
		      hi_tid, port_priv->port_num);
   and printk(KERN_NOTICE PFX "No matching mad agent found for "
                       "received MAD on port %d\n", port_priv->port_num);
   both get printed when mad_agent is not found in solicited case.

2. spin_unlock is performed after all the printk's, which is a bit icky.

Compile-tested patch (not tested) follows at the end of the mail. Let me
know if I should fix above problems too.

Thanks,

- KK

On Tue, 2 Nov 2004, Sean Hefty wrote:

> On Tue, 2 Nov 2004 09:59:14 -0800 (PST)
> Krishna Kumar <krkumar at us.ibm.com> wrote:
>
> > Hi Sean,
> >
> > I think that is the best approach. And using this method, we can also
> > avoid holding the lock if solicited is set. I will send a patch in a
> > few minutes if this approach looks good.
>
> Sounds good.
>
> I think that you'll need to hold the lock even if solicited is set to
> handle the case where a response is received after the sender
> unregistered.
>
> - Sean


diff -ruNp 7/mad.c 8/mad.c
--- 7/mad.c	2004-11-02 16:13:05.000000000 -0800
+++ 8/mad.c	2004-11-02 18:30:19.000000000 -0800
@@ -747,13 +747,16 @@ find_mad_agent(struct ib_mad_port_privat
 	       struct ib_mad *mad,
 	       int solicited)
 {
-	struct ib_mad_agent_private *entry, *mad_agent = NULL;
-	struct ib_mad_mgmt_class_table *version;
-	struct ib_mad_mgmt_method_table *class;
-	u32 hi_tid;
+	struct ib_mad_agent_private *mad_agent = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&port_priv->reg_lock, flags);

 	/* Whether MAD was solicited determines type of routing to MAD client */
 	if (solicited) {
+		u32 hi_tid;
+		struct ib_mad_agent_private *entry;
+
 		/* Routing is based on high 32 bits of transaction ID of MAD  */
 		hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32;
 		list_for_each_entry(entry, &port_priv->agent_list, agent_list) {
@@ -762,12 +765,14 @@ find_mad_agent(struct ib_mad_port_privat
 				break;
 			}
 		}
-		if (!mad_agent) {
+		if (!mad_agent)
 			printk(KERN_ERR PFX "No client 0x%x for received MAD "
-			       "on port %d\n", hi_tid, port_priv->port_num);
-			goto out;
-		}
+			       "on port %d\n",
+			       hi_tid, port_priv->port_num);
 	} else {
+		struct ib_mad_mgmt_class_table *version;
+		struct ib_mad_mgmt_method_table *class;
+
 		/* Routing is based on version, class, and method */
 		if (mad->mad_hdr.class_version >= MAX_MGMT_VERSION) {
 			printk(KERN_ERR PFX "MAD received with unsupported "
@@ -784,23 +789,30 @@ find_mad_agent(struct ib_mad_port_privat
 		}
 		class = version->method_table[convert_mgmt_class(
 						mad->mad_hdr.mgmt_class)];
-		if (!class) {
+		if (class)
+			mad_agent = class->agent[mad->mad_hdr.method &
+					 ~IB_MGMT_METHOD_RESP];
+		else
 			printk(KERN_ERR PFX "MAD received on port %d for class "
 			       "%d with no client\n",
 			       port_priv->port_num, mad->mad_hdr.mgmt_class);
-			goto out;
-		}
-		mad_agent = class->agent[mad->mad_hdr.method &
-					 ~IB_MGMT_METHOD_RESP];
 	}

 out:
-	if (mad_agent && !mad_agent->agent.recv_handler) {
-		printk(KERN_ERR PFX "No receive handler for client "
-		       "%p on port %d\n",
-		       &mad_agent->agent, port_priv->port_num);
-		mad_agent = NULL;
-	}
+	if (mad_agent) {
+		if (mad_agent->agent.recv_handler)
+			atomic_inc(&mad_agent->refcount);
+		else {
+			mad_agent = NULL;
+			printk(KERN_ERR PFX "No receive handler for client "
+			       "%p on port %d\n",
+			       &mad_agent->agent, port_priv->port_num);
+		}
+	} else
+		printk(KERN_NOTICE PFX "No matching mad agent found for "
+		       "received MAD on port %d\n", port_priv->port_num);
+
+	spin_unlock_irqrestore(&port_priv->reg_lock, flags);

 	return mad_agent;
 }
@@ -929,9 +941,8 @@ static void ib_mad_recv_done_handler(str
 	struct ib_mad_private_header *mad_priv_hdr;
 	struct ib_mad_private *recv;
 	struct ib_mad_list_head *mad_list;
-	struct ib_mad_agent_private *mad_agent = NULL;
+	struct ib_mad_agent_private *mad_agent;
 	int solicited;
-	unsigned long flags;

 	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
 	qp_info = mad_list->mad_queue->qp_info;
@@ -965,23 +976,17 @@ static void ib_mad_recv_done_handler(str
 						 recv->header.recv_buf.mad))
 			goto out;

-	spin_lock_irqsave(&port_priv->reg_lock, flags);
 	/* Determine corresponding MAD agent for incoming receive MAD */
 	solicited = solicited_mad(recv->header.recv_buf.mad);
 	mad_agent = find_mad_agent(port_priv, recv->header.recv_buf.mad,
 				   solicited);
-	if (!mad_agent) {
-		spin_unlock_irqrestore(&port_priv->reg_lock, flags);
-		printk(KERN_NOTICE PFX "No matching mad agent found for "
-		       "received MAD on port %d\n", port_priv->port_num);
-	} else {
-		atomic_inc(&mad_agent->refcount);
-		spin_unlock_irqrestore(&port_priv->reg_lock, flags);
+	if (mad_agent) {
 		ib_mad_complete_recv(mad_agent, recv, solicited);
+		recv = NULL;	/* recv is freed up via ib_mad_complete_recv */
 	}

 out:
-	if (!mad_agent) {
+	if (recv) {
 		/* Should this case be optimized ? */
 		kmem_cache_free(ib_mad_cache, recv);
 	}




More information about the general mailing list