[openib-general] [PATCH] Missing check for atomic_dec in ib_post_send_mad
Krishna Kumar
krkumar at us.ibm.com
Tue Nov 2 18:33:10 PST 2004
Hi Sean,
I guess you meant "even if solicited is NOT set". What you described is
right, the race will mean that the remove_mad_reg_req() will free things
like method/class, while the find_mad_agent looks through the version
and class to find the mad_agent. This patch will fix it correctly.
I have also cleaned up a hack in ib_mad_recv_done_handler() where a
test for '!mad_agent' was being done to determine whether to free 'recv'
or not :-).
Couple of issues with the new code (same as old code, though) :
1. printk(KERN_ERR PFX "No client 0x%x for received MAD "
"on port %d\n",
hi_tid, port_priv->port_num);
and printk(KERN_NOTICE PFX "No matching mad agent found for "
"received MAD on port %d\n", port_priv->port_num);
both get printed when mad_agent is not found in solicited case.
2. spin_unlock is performed after all the printk's, which is a bit icky.
Compile-tested patch (not tested) follows at the end of the mail. Let me
know if I should fix above problems too.
Thanks,
- KK
On Tue, 2 Nov 2004, Sean Hefty wrote:
> On Tue, 2 Nov 2004 09:59:14 -0800 (PST)
> Krishna Kumar <krkumar at us.ibm.com> wrote:
>
> > Hi Sean,
> >
> > I think that is the best approach. And using this method, we can also
> > avoid holding the lock if solicited is set. I will send a patch in a
> > few minutes if this approach looks good.
>
> Sounds good.
>
> I think that you'll need to hold the lock even if solicited is set to
> handle the case where a response is received after the sender
> unregistered.
>
> - Sean
diff -ruNp 7/mad.c 8/mad.c
--- 7/mad.c 2004-11-02 16:13:05.000000000 -0800
+++ 8/mad.c 2004-11-02 18:30:19.000000000 -0800
@@ -747,13 +747,16 @@ find_mad_agent(struct ib_mad_port_privat
struct ib_mad *mad,
int solicited)
{
- struct ib_mad_agent_private *entry, *mad_agent = NULL;
- struct ib_mad_mgmt_class_table *version;
- struct ib_mad_mgmt_method_table *class;
- u32 hi_tid;
+ struct ib_mad_agent_private *mad_agent = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port_priv->reg_lock, flags);
/* Whether MAD was solicited determines type of routing to MAD client */
if (solicited) {
+ u32 hi_tid;
+ struct ib_mad_agent_private *entry;
+
/* Routing is based on high 32 bits of transaction ID of MAD */
hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32;
list_for_each_entry(entry, &port_priv->agent_list, agent_list) {
@@ -762,12 +765,14 @@ find_mad_agent(struct ib_mad_port_privat
break;
}
}
- if (!mad_agent) {
+ if (!mad_agent)
printk(KERN_ERR PFX "No client 0x%x for received MAD "
- "on port %d\n", hi_tid, port_priv->port_num);
- goto out;
- }
+ "on port %d\n",
+ hi_tid, port_priv->port_num);
} else {
+ struct ib_mad_mgmt_class_table *version;
+ struct ib_mad_mgmt_method_table *class;
+
/* Routing is based on version, class, and method */
if (mad->mad_hdr.class_version >= MAX_MGMT_VERSION) {
printk(KERN_ERR PFX "MAD received with unsupported "
@@ -784,23 +789,30 @@ find_mad_agent(struct ib_mad_port_privat
}
class = version->method_table[convert_mgmt_class(
mad->mad_hdr.mgmt_class)];
- if (!class) {
+ if (class)
+ mad_agent = class->agent[mad->mad_hdr.method &
+ ~IB_MGMT_METHOD_RESP];
+ else
printk(KERN_ERR PFX "MAD received on port %d for class "
"%d with no client\n",
port_priv->port_num, mad->mad_hdr.mgmt_class);
- goto out;
- }
- mad_agent = class->agent[mad->mad_hdr.method &
- ~IB_MGMT_METHOD_RESP];
}
out:
- if (mad_agent && !mad_agent->agent.recv_handler) {
- printk(KERN_ERR PFX "No receive handler for client "
- "%p on port %d\n",
- &mad_agent->agent, port_priv->port_num);
- mad_agent = NULL;
- }
+ if (mad_agent) {
+ if (mad_agent->agent.recv_handler)
+ atomic_inc(&mad_agent->refcount);
+ else {
+ mad_agent = NULL;
+ printk(KERN_ERR PFX "No receive handler for client "
+ "%p on port %d\n",
+ &mad_agent->agent, port_priv->port_num);
+ }
+ } else
+ printk(KERN_NOTICE PFX "No matching mad agent found for "
+ "received MAD on port %d\n", port_priv->port_num);
+
+ spin_unlock_irqrestore(&port_priv->reg_lock, flags);
return mad_agent;
}
@@ -929,9 +941,8 @@ static void ib_mad_recv_done_handler(str
struct ib_mad_private_header *mad_priv_hdr;
struct ib_mad_private *recv;
struct ib_mad_list_head *mad_list;
- struct ib_mad_agent_private *mad_agent = NULL;
+ struct ib_mad_agent_private *mad_agent;
int solicited;
- unsigned long flags;
mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
qp_info = mad_list->mad_queue->qp_info;
@@ -965,23 +976,17 @@ static void ib_mad_recv_done_handler(str
recv->header.recv_buf.mad))
goto out;
- spin_lock_irqsave(&port_priv->reg_lock, flags);
/* Determine corresponding MAD agent for incoming receive MAD */
solicited = solicited_mad(recv->header.recv_buf.mad);
mad_agent = find_mad_agent(port_priv, recv->header.recv_buf.mad,
solicited);
- if (!mad_agent) {
- spin_unlock_irqrestore(&port_priv->reg_lock, flags);
- printk(KERN_NOTICE PFX "No matching mad agent found for "
- "received MAD on port %d\n", port_priv->port_num);
- } else {
- atomic_inc(&mad_agent->refcount);
- spin_unlock_irqrestore(&port_priv->reg_lock, flags);
+ if (mad_agent) {
ib_mad_complete_recv(mad_agent, recv, solicited);
+ recv = NULL; /* recv is freed up via ib_mad_complete_recv */
}
out:
- if (!mad_agent) {
+ if (recv) {
/* Should this case be optimized ? */
kmem_cache_free(ib_mad_cache, recv);
}
More information about the general
mailing list