[openib-general] [PATCH] [MAD] fix race completing request MAD with timeout/cancel
Sean Hefty
mshefty at ichips.intel.com
Thu Apr 21 10:31:06 PDT 2005
This patch should fix an issue processing a sent MAD after it has timed
out or been canceled. The race occurs when a response MAD matches with
the sent request. The request could time out or be canceled after the
response MAD matches with the request, but before the request completion
can be processed.
Signed-off-by: Sean Hefty <sean.hefty at intel.com>
Index: core/mad.c
===================================================================
--- core/mad.c (revision 2203)
+++ core/mad.c (working copy)
@@ -342,6 +342,7 @@
spin_lock_init(&mad_agent_priv->lock);
INIT_LIST_HEAD(&mad_agent_priv->send_list);
INIT_LIST_HEAD(&mad_agent_priv->wait_list);
+ INIT_LIST_HEAD(&mad_agent_priv->done_list);
INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
INIT_WORK(&mad_agent_priv->timed_work, timeout_sends, mad_agent_priv);
INIT_LIST_HEAD(&mad_agent_priv->local_list);
@@ -1591,6 +1592,16 @@
return NULL;
}
+static void ib_mark_req_done(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ mad_send_wr->timeout = 0;
+ if (mad_send_wr->refcount == 1) {
+ list_del(&mad_send_wr->agent_list);
+ list_add_tail(&mad_send_wr->agent_list,
+ &mad_send_wr->mad_agent_priv->done_list);
+ }
+}
+
static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
struct ib_mad_recv_wc *mad_recv_wc)
{
@@ -1619,8 +1630,7 @@
wake_up(&mad_agent_priv->wait);
return;
}
- /* Timeout = 0 means that we won't wait for a response */
- mad_send_wr->timeout = 0;
+ ib_mark_req_done(mad_send_wr);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
/* Defined behavior is to complete response before request */
Index: core/mad_priv.h
===================================================================
--- core/mad_priv.h (revision 2202)
+++ core/mad_priv.h (working copy)
@@ -92,6 +92,7 @@
spinlock_t lock;
struct list_head send_list;
struct list_head wait_list;
+ struct list_head done_list;
struct work_struct timed_work;
unsigned long timeout;
struct list_head local_list;
More information about the general
mailing list