<html><body>
<p>Hello Roland,<br>
<br>
This patch addresses a possible race if more than two last wqe reached events<br>
have been received. The first reap will reap all list in the drain list, then <br>
if there is any other cqe arrives after the first drain WR cqe, then it will<br>
crash.<br>
<br>
<br>
Signed-off-by: Shirley Ma <xma@us.ibm.com><br>
---------------------<br>
<br>
 drivers/infiniband/ulp/ipoib/ipoib.h    |    1 -<br>
 drivers/infiniband/ulp/ipoib/ipoib_cm.c |   38 +++++++++++++++---------------<br>
 2 files changed, 19 insertions(+), 20 deletions(-)<br>
<br>
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h<br>
index ca126fc..fc6c811 100644<br>
--- a/drivers/infiniband/ulp/ipoib/ipoib.h<br>
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h<br>
@@ -226,7 +226,6 @@ struct ipoib_cm_dev_priv {<br>
        struct list_head        passive_ids;   /* state: LIVE */<br>
        struct list_head        rx_error_list; /* state: ERROR */<br>
        struct list_head        rx_flush_list; /* state: FLUSH, drain not started */<br>
-       struct list_head        rx_drain_list; /* state: FLUSH, drain started */<br>
        struct list_head        rx_reap_list;  /* state: FLUSH, drain done */<br>
        struct work_struct      start_task;<br>
        struct work_struct      reap_task;<br>
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c<br>
index 0886ee7..ae67379 100644<br>
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c<br>
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c<br>
@@ -210,10 +210,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)<br>
        struct ib_send_wr *bad_wr;<br>
        struct ipoib_cm_rx *p;<br>
 <br>
-       /* We only reserved 1 extra slot in CQ for drain WRs, so<br>
-        * make sure we have at most 1 outstanding WR. */<br>
-       if (list_empty(&priv->cm.rx_flush_list) ||<br>
-           !list_empty(&priv->cm.rx_drain_list))<br>
+       if (list_empty(&priv->cm.rx_flush_list))<br>
                return;<br>
 <br>
        /*<br>
@@ -221,10 +218,11 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)<br>
         * error" WC will be immediately generated for each WR we post.<br>
         */<br>
        p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);<br>
+       /* We only reserved 1 extra slot in CQ for drain WRs, so<br>
+        * make sure we have at most 1 outstanding WR. */<br>
        if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))<br>
                ipoib_warn(priv, "failed to post drain wr\n");<br>
 <br>
-       list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);<br>
 }<br>
 <br>
 static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)<br>
@@ -237,9 +235,11 @@ static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)<br>
                return;<br>
 <br>
        spin_lock_irqsave(&priv->lock, flags);<br>
-       list_move(&p->list, &priv->cm.rx_flush_list);<br>
-       p->state = IPOIB_CM_RX_FLUSH;<br>
-       ipoib_cm_start_rx_drain(priv);<br>
+       if (p->state == IPOIB_CM_RX_LIVE) {<br>
+               list_move(&p->list, &priv->cm.rx_flush_list);<br>
+               p->state = IPOIB_CM_RX_FLUSH;<br>
+               ipoib_cm_start_rx_drain(priv);<br>
+       }<br>
        spin_unlock_irqrestore(&priv->lock, flags);<br>
 }<br>
 <br>
@@ -529,21 +529,25 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)<br>
        ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",<br>
                       wr_id, wc->status);<br>
 <br>
+       p = wc->qp->qp_context;<br>
+<br>
        if (unlikely(wr_id >= ipoib_recvq_size)) {<br>
                if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) {<br>
                        spin_lock_irqsave(&priv->lock, flags);<br>
-                       list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);<br>
-                       ipoib_cm_start_rx_drain(priv);<br>
-                       queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);<br>
-                       spin_unlock_irqrestore(&priv->lock, flags);<br>
+                       if (p->state == IPOIB_CM_RX_FLUSH) {<br>
+                               list_move(&p->list, &priv->cm.rx_reap_list);<br>
+                               p->state == IPOIB_CM_RX_ERROR;<br>
+                               ipoib_cm_start_rx_drain(priv);<br>
+                               spin_unlock_irqrestore(&priv->lock, flags);<br>
+                               queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);<br>
+                       } else<br>
+                               spin_unlock_irqrestore(&priv->lock, flags);<br>
                } else<br>
                        ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",<br>
                                   wr_id, ipoib_recvq_size);<br>
                return;<br>
        }<br>
 <br>
-       p = wc->qp->qp_context;<br>
-<br>
        has_srq = ipoib_cm_has_srq(dev);<br>
        rx_ring = has_srq ? priv->cm.srq_ring : p->rx_ring;<br>
 <br>
@@ -853,8 +857,7 @@ void ipoib_cm_dev_stop(struct net_device *dev)<br>
        begin = jiffies;<br>
 <br>
        while (!list_empty(&priv->cm.rx_error_list) ||<br>
-              !list_empty(&priv->cm.rx_flush_list) ||<br>
-              !list_empty(&priv->cm.rx_drain_list)) {<br>
+              !list_empty(&priv->cm.rx_flush_list)) {<br>
                if (time_after(jiffies, begin + 5 * HZ)) {<br>
                        ipoib_warn(priv, "RX drain timing out\n");<br>
 <br>
@@ -865,8 +868,6 @@ void ipoib_cm_dev_stop(struct net_device *dev)<br>
                                         &priv->cm.rx_reap_list);<br>
                        list_splice_init(&priv->cm.rx_error_list,<br>
                                         &priv->cm.rx_reap_list);<br>
-                       list_splice_init(&priv->cm.rx_drain_list,<br>
-                                        &priv->cm.rx_reap_list);<br>
                        break;<br>
                }<br>
                spin_unlock_irq(&priv->lock);<br>
@@ -1458,7 +1459,6 @@ int ipoib_cm_dev_init(struct net_device *dev)<br>
        INIT_LIST_HEAD(&priv->cm.start_list);<br>
        INIT_LIST_HEAD(&priv->cm.rx_error_list);<br>
        INIT_LIST_HEAD(&priv->cm.rx_flush_list);<br>
-       INIT_LIST_HEAD(&priv->cm.rx_drain_list);<br>
        INIT_LIST_HEAD(&priv->cm.rx_reap_list);<br>
        INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start);<br>
        INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap);<br>
<br>
<br>
<i>(See attached file: last_wqe_race1.patch)</i><br>
<br>
Thanks<br>
Shirley</body></html>