[ofa-general] [RFC][PATCH] last wqe event handler patch

Shirley Ma xma at us.ibm.com
Tue Jun 24 17:34:43 PDT 2008





Hello Roland,

This patch addresses a possible race if more than two last wqe reached events
have been received. The first reap will reap all list in the drain list, then
if there is any other cqe arrives after the first drain WR cqe, then it will
crash.


Signed-off-by: Shirley Ma <xma at us.ibm.com>
---------------------

 drivers/infiniband/ulp/ipoib/ipoib.h    |    1 -
 drivers/infiniband/ulp/ipoib/ipoib_cm.c |   38 +++++++++++++++---------------
 2 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index ca126fc..fc6c811 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -226,7 +226,6 @@ struct ipoib_cm_dev_priv {
      struct list_head  passive_ids;   /* state: LIVE */
      struct list_head  rx_error_list; /* state: ERROR */
      struct list_head  rx_flush_list; /* state: FLUSH, drain not started */
-     struct list_head  rx_drain_list; /* state: FLUSH, drain started */
      struct list_head  rx_reap_list;  /* state: FLUSH, drain done */
      struct work_struct      start_task;
      struct work_struct      reap_task;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 0886ee7..ae67379 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -210,10 +210,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
      struct ib_send_wr *bad_wr;
      struct ipoib_cm_rx *p;

-     /* We only reserved 1 extra slot in CQ for drain WRs, so
-      * make sure we have at most 1 outstanding WR. */
-     if (list_empty(&priv->cm.rx_flush_list) ||
-         !list_empty(&priv->cm.rx_drain_list))
+     if (list_empty(&priv->cm.rx_flush_list))
            return;

      /*
@@ -221,10 +218,11 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
       * error" WC will be immediately generated for each WR we post.
       */
      p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
+     /* We only reserved 1 extra slot in CQ for drain WRs, so
+      * make sure we have at most 1 outstanding WR. */
      if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))
            ipoib_warn(priv, "failed to post drain wr\n");

-     list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);
 }

 static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
@@ -237,9 +235,11 @@ static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
            return;

      spin_lock_irqsave(&priv->lock, flags);
-     list_move(&p->list, &priv->cm.rx_flush_list);
-     p->state = IPOIB_CM_RX_FLUSH;
-     ipoib_cm_start_rx_drain(priv);
+     if (p->state == IPOIB_CM_RX_LIVE) {
+           list_move(&p->list, &priv->cm.rx_flush_list);
+           p->state = IPOIB_CM_RX_FLUSH;
+           ipoib_cm_start_rx_drain(priv);
+     }
      spin_unlock_irqrestore(&priv->lock, flags);
 }

@@ -529,21 +529,25 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
      ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
                   wr_id, wc->status);

+     p = wc->qp->qp_context;
+
      if (unlikely(wr_id >= ipoib_recvq_size)) {
            if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) {
                  spin_lock_irqsave(&priv->lock, flags);
-                 list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
-                 ipoib_cm_start_rx_drain(priv);
-                 queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
-                 spin_unlock_irqrestore(&priv->lock, flags);
+                 if (p->state == IPOIB_CM_RX_FLUSH) {
+                       list_move(&p->list, &priv->cm.rx_reap_list);
+                       p->state == IPOIB_CM_RX_ERROR;
+                       ipoib_cm_start_rx_drain(priv);
+                       spin_unlock_irqrestore(&priv->lock, flags);
+                       queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+                 } else
+                       spin_unlock_irqrestore(&priv->lock, flags);
            } else
                  ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
                           wr_id, ipoib_recvq_size);
            return;
      }

-     p = wc->qp->qp_context;
-
      has_srq = ipoib_cm_has_srq(dev);
      rx_ring = has_srq ? priv->cm.srq_ring : p->rx_ring;

@@ -853,8 +857,7 @@ void ipoib_cm_dev_stop(struct net_device *dev)
      begin = jiffies;

      while (!list_empty(&priv->cm.rx_error_list) ||
-            !list_empty(&priv->cm.rx_flush_list) ||
-            !list_empty(&priv->cm.rx_drain_list)) {
+            !list_empty(&priv->cm.rx_flush_list)) {
            if (time_after(jiffies, begin + 5 * HZ)) {
                  ipoib_warn(priv, "RX drain timing out\n");

@@ -865,8 +868,6 @@ void ipoib_cm_dev_stop(struct net_device *dev)
                               &priv->cm.rx_reap_list);
                  list_splice_init(&priv->cm.rx_error_list,
                               &priv->cm.rx_reap_list);
-                 list_splice_init(&priv->cm.rx_drain_list,
-                              &priv->cm.rx_reap_list);
                  break;
            }
            spin_unlock_irq(&priv->lock);
@@ -1458,7 +1459,6 @@ int ipoib_cm_dev_init(struct net_device *dev)
      INIT_LIST_HEAD(&priv->cm.start_list);
      INIT_LIST_HEAD(&priv->cm.rx_error_list);
      INIT_LIST_HEAD(&priv->cm.rx_flush_list);
-     INIT_LIST_HEAD(&priv->cm.rx_drain_list);
      INIT_LIST_HEAD(&priv->cm.rx_reap_list);
      INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start);
      INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap);


(See attached file: last_wqe_race1.patch)

Thanks
Shirley
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20080624/958af269/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: last_wqe_race1.patch
Type: application/octet-stream
Size: 5055 bytes
Desc: not available
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20080624/958af269/attachment.obj>


More information about the general mailing list