<html><body>
<p>Hello Roland,<br>
<br>
This patch addresses a possible race if more than two last wqe reached events<br>
have been received. The first reap will reap all list in the drain list, then <br>
if there is any other cqe arrives after the first drain WR cqe, then it will<br>
crash.<br>
<br>
<br>
Signed-off-by: Shirley Ma <xma@us.ibm.com><br>
---------------------<br>
<br>
drivers/infiniband/ulp/ipoib/ipoib.h | 1 -<br>
drivers/infiniband/ulp/ipoib/ipoib_cm.c | 38 +++++++++++++++---------------<br>
2 files changed, 19 insertions(+), 20 deletions(-)<br>
<br>
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h<br>
index ca126fc..fc6c811 100644<br>
--- a/drivers/infiniband/ulp/ipoib/ipoib.h<br>
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h<br>
@@ -226,7 +226,6 @@ struct ipoib_cm_dev_priv {<br>
struct list_head passive_ids; /* state: LIVE */<br>
struct list_head rx_error_list; /* state: ERROR */<br>
struct list_head rx_flush_list; /* state: FLUSH, drain not started */<br>
- struct list_head rx_drain_list; /* state: FLUSH, drain started */<br>
struct list_head rx_reap_list; /* state: FLUSH, drain done */<br>
struct work_struct start_task;<br>
struct work_struct reap_task;<br>
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c<br>
index 0886ee7..ae67379 100644<br>
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c<br>
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c<br>
@@ -210,10 +210,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)<br>
struct ib_send_wr *bad_wr;<br>
struct ipoib_cm_rx *p;<br>
<br>
- /* We only reserved 1 extra slot in CQ for drain WRs, so<br>
- * make sure we have at most 1 outstanding WR. */<br>
- if (list_empty(&priv->cm.rx_flush_list) ||<br>
- !list_empty(&priv->cm.rx_drain_list))<br>
+ if (list_empty(&priv->cm.rx_flush_list))<br>
return;<br>
<br>
/*<br>
@@ -221,10 +218,11 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)<br>
* error" WC will be immediately generated for each WR we post.<br>
*/<br>
p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);<br>
+ /* We only reserved 1 extra slot in CQ for drain WRs, so<br>
+ * make sure we have at most 1 outstanding WR. */<br>
if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))<br>
ipoib_warn(priv, "failed to post drain wr\n");<br>
<br>
- list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);<br>
}<br>
<br>
static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)<br>
@@ -237,9 +235,11 @@ static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)<br>
return;<br>
<br>
spin_lock_irqsave(&priv->lock, flags);<br>
- list_move(&p->list, &priv->cm.rx_flush_list);<br>
- p->state = IPOIB_CM_RX_FLUSH;<br>
- ipoib_cm_start_rx_drain(priv);<br>
+ if (p->state == IPOIB_CM_RX_LIVE) {<br>
+ list_move(&p->list, &priv->cm.rx_flush_list);<br>
+ p->state = IPOIB_CM_RX_FLUSH;<br>
+ ipoib_cm_start_rx_drain(priv);<br>
+ }<br>
spin_unlock_irqrestore(&priv->lock, flags);<br>
}<br>
<br>
@@ -529,21 +529,25 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)<br>
ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",<br>
wr_id, wc->status);<br>
<br>
+ p = wc->qp->qp_context;<br>
+<br>
if (unlikely(wr_id >= ipoib_recvq_size)) {<br>
if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) {<br>
spin_lock_irqsave(&priv->lock, flags);<br>
- list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);<br>
- ipoib_cm_start_rx_drain(priv);<br>
- queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);<br>
- spin_unlock_irqrestore(&priv->lock, flags);<br>
+ if (p->state == IPOIB_CM_RX_FLUSH) {<br>
+ list_move(&p->list, &priv->cm.rx_reap_list);<br>
+ p->state == IPOIB_CM_RX_ERROR;<br>
+ ipoib_cm_start_rx_drain(priv);<br>
+ spin_unlock_irqrestore(&priv->lock, flags);<br>
+ queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);<br>
+ } else<br>
+ spin_unlock_irqrestore(&priv->lock, flags);<br>
} else<br>
ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",<br>
wr_id, ipoib_recvq_size);<br>
return;<br>
}<br>
<br>
- p = wc->qp->qp_context;<br>
-<br>
has_srq = ipoib_cm_has_srq(dev);<br>
rx_ring = has_srq ? priv->cm.srq_ring : p->rx_ring;<br>
<br>
@@ -853,8 +857,7 @@ void ipoib_cm_dev_stop(struct net_device *dev)<br>
begin = jiffies;<br>
<br>
while (!list_empty(&priv->cm.rx_error_list) ||<br>
- !list_empty(&priv->cm.rx_flush_list) ||<br>
- !list_empty(&priv->cm.rx_drain_list)) {<br>
+ !list_empty(&priv->cm.rx_flush_list)) {<br>
if (time_after(jiffies, begin + 5 * HZ)) {<br>
ipoib_warn(priv, "RX drain timing out\n");<br>
<br>
@@ -865,8 +868,6 @@ void ipoib_cm_dev_stop(struct net_device *dev)<br>
&priv->cm.rx_reap_list);<br>
list_splice_init(&priv->cm.rx_error_list,<br>
&priv->cm.rx_reap_list);<br>
- list_splice_init(&priv->cm.rx_drain_list,<br>
- &priv->cm.rx_reap_list);<br>
break;<br>
}<br>
spin_unlock_irq(&priv->lock);<br>
@@ -1458,7 +1459,6 @@ int ipoib_cm_dev_init(struct net_device *dev)<br>
INIT_LIST_HEAD(&priv->cm.start_list);<br>
INIT_LIST_HEAD(&priv->cm.rx_error_list);<br>
INIT_LIST_HEAD(&priv->cm.rx_flush_list);<br>
- INIT_LIST_HEAD(&priv->cm.rx_drain_list);<br>
INIT_LIST_HEAD(&priv->cm.rx_reap_list);<br>
INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start);<br>
INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap);<br>
<br>
<br>
<i>(See attached file: last_wqe_race1.patch)</i><br>
<br>
Thanks<br>
Shirley</body></html>