[openib-general] [PATCH] Fix MAD completion handling
Roland Dreier
roland at topspin.com
Tue Sep 28 14:50:57 PDT 2004
While looking over Sean's changes, I noticed what look like a few bugs
in the mad thread usage. It didn't seem like there was any way for
the MAD thread to stop, and I think there are a few race conditions
that could lead to lost wakeups. This patch tries to fix both of
these problems.
I didn't test this because I didn't feel like messing with the
Makefile to get it to build in my environment. (It would be good to
switch to a standard kbuild Makefile so things like cross-compiling
and separate object directories work)
Thanks,
Roland
Index: infiniband/access/ib_mad_priv.h
===================================================================
--- infiniband/access/ib_mad_priv.h (revision 899)
+++ infiniband/access/ib_mad_priv.h (working copy)
@@ -131,11 +131,6 @@
struct ib_mad_mgmt_method_table *method_table[MAX_MGMT_CLASS];
};
-struct ib_mad_thread_private {
- wait_queue_head_t wait;
- atomic_t completion_event;
-};
-
struct ib_mad_port_private {
struct list_head port_list;
struct ib_device *device;
@@ -159,7 +154,7 @@
u32 recv_wr_index[IB_MAD_QPS_CORE];
struct task_struct *mad_thread;
- struct ib_mad_thread_private mad_thread_private;
+ int thread_wake;
};
#endif /* __IB_MAD_PRIV_H__ */
Index: infiniband/access/ib_mad.c
===================================================================
--- infiniband/access/ib_mad.c (revision 899)
+++ infiniband/access/ib_mad.c (working copy)
@@ -892,6 +892,8 @@
struct ib_wc wc;
int err_status = 0;
+ ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
+
while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
printk(KERN_DEBUG "Completion opcode 0x%x WRID 0x%Lx\n", wc.opcode, wc.wr_id);
if (wc.status != IB_WC_SUCCESS) {
@@ -928,11 +930,8 @@
}
}
- if (err_status) {
+ if (err_status)
ib_mad_port_restart(port_priv);
- } else {
- ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
- }
}
/*
@@ -941,23 +940,22 @@
static int ib_mad_thread(void *param)
{
struct ib_mad_port_private *port_priv = param;
- struct ib_mad_thread_private *mad_thread_priv = &port_priv->mad_thread_private;
- int ret;
- while (1) {
- while (!signal_pending(current)) {
- ret = wait_event_interruptible(mad_thread_priv->wait,
- atomic_read(&mad_thread_priv->completion_event) > 0);
- atomic_set(&mad_thread_priv->completion_event, 0);
- if (ret) {
- printk(KERN_ERR "ib_mad thread exiting\n");
- return 0;
- }
+ __set_current_state(TASK_RUNNING);
- ib_mad_completion_handler(port_priv);
+ do {
+ port_priv->thread_wake = 0;
+ wmb();
- }
- }
+ ib_mad_completion_handler(port_priv);
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (!port_priv->thread_wake)
+ schedule();
+ __set_current_state(TASK_RUNNING);
+ } while (!kthread_should_stop());
+
+ return 0;
}
/*
@@ -965,11 +963,8 @@
*/
static int ib_mad_thread_init(struct ib_mad_port_private *port_priv)
{
- struct ib_mad_thread_private *mad_thread_priv = &port_priv->mad_thread_private;
+ port_priv->thread_wake = 0;
- atomic_set(&mad_thread_priv->completion_event, 0);
- init_waitqueue_head(&mad_thread_priv->wait);
-
port_priv->mad_thread = kthread_create(ib_mad_thread,
port_priv,
"ib_mad(%6s-%-2d)",
@@ -978,27 +973,18 @@
if (IS_ERR(port_priv->mad_thread)) {
printk(KERN_ERR "Couldn't start ib_mad thread for %s port %d\n",
port_priv->device->name, port_priv->port_num);
- return 1;
+ return PTR_ERR(port_priv->mad_thread);
}
- wake_up_process(port_priv->mad_thread);
return 0;
}
-/*
- * Stop the IB MAD thread
- */
-static void ib_mad_thread_stop(struct ib_mad_port_private *port_priv)
-{
- kthread_stop(port_priv->mad_thread); /* !!! */
-}
-
static void ib_mad_thread_completion_handler(struct ib_cq *cq)
{
struct ib_mad_port_private *port_priv = cq->cq_context;
- struct ib_mad_thread_private *mad_thread_priv = &port_priv->mad_thread_private;
- atomic_inc(&mad_thread_priv->completion_event);
- wake_up_interruptible(&mad_thread_priv->wait);
+ port_priv->thread_wake = 1;
+ wmb();
+ wake_up_process(port_priv->mad_thread);
}
static int ib_mad_post_receive_mad(struct ib_mad_port_private *port_priv,
@@ -1527,7 +1513,7 @@
spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
ib_mad_port_stop(port_priv);
- ib_mad_thread_stop(port_priv);
+ kthread_stop(port_priv->mad_thread);
ib_destroy_qp(port_priv->qp[1]);
ib_destroy_qp(port_priv->qp[0]);
ib_dereg_mr(port_priv->mr);
More information about the general
mailing list