[ofa-general] [PATCH] ipoib: racing uses of ipoib_neigh in IPoIB-CM

akepner at sgi.com akepner at sgi.com
Wed Jun 10 12:07:28 PDT 2009


There is a race between the neighbour cleanup code, and some uses 
of the closely related ipoib_neigh structure when using IPoIB-CM. 
To prevent the race, take a reference to the neighbour before the 
connection is established, and release the reference when the 
connection is destroyed. Also, defer the cleanup and release 
of the ipoib_neigh structure until the connection is destroyed.

Signed-off-by: Arthur Kepner <akepner at sgi.com>
---
 ipoib_cm.c   |   40 ++++++++++++++++++++++++++++++++--------
 ipoib_main.c |    7 +++++--
 2 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 47d588b..ed22a37 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -810,9 +810,6 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 			list_del(&neigh->list);
 			if (neigh->ah)
 				ipoib_put_ah(neigh->ah);
-			ipoib_neigh_free(dev, neigh);
-
-			tx->neigh = NULL;
 		}
 
 		if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
@@ -1230,9 +1227,6 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
 			list_del(&neigh->list);
 			if (neigh->ah)
 				ipoib_put_ah(neigh->ah);
-			ipoib_neigh_free(dev, neigh);
-
-			tx->neigh = NULL;
 		}
 
 		if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
@@ -1278,7 +1272,6 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
 		queue_work(ipoib_workqueue, &priv->cm.reap_task);
 		ipoib_dbg(priv, "Reap connection for gid %pI6\n",
 			  tx->neigh->dgid.raw);
-		tx->neigh = NULL;
 	}
 }
 
@@ -1302,6 +1295,10 @@ static void ipoib_cm_tx_start(struct work_struct *work)
 		p = list_entry(priv->cm.start_list.next, typeof(*p), list);
 		list_del_init(&p->list);
 		neigh = p->neigh;
+		ipoib_dbg(priv, "%s: holding ref to %pI6 (refcnt: %d)\n",
+			  __func__, neigh->dgid.raw,
+			  atomic_read(&neigh->neighbour->refcnt));
+		neigh_hold(neigh->neighbour);
 		qpn = IPOIB_QPN(neigh->neighbour->ha);
 		memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
 
@@ -1316,11 +1313,22 @@ static void ipoib_cm_tx_start(struct work_struct *work)
 		if (ret) {
 			neigh = p->neigh;
 			if (neigh) {
+				struct sk_buff *skb;
 				neigh->cm = NULL;
 				list_del(&neigh->list);
 				if (neigh->ah)
 					ipoib_put_ah(neigh->ah);
-				ipoib_neigh_free(dev, neigh);
+				*to_ipoib_neigh(neigh->neighbour) = NULL;
+				ipoib_dbg(priv, "%s: releasing ref to %pI6 "
+					  "(refcnt: %d)\n", __func__,
+					  neigh->dgid.raw,
+					  atomic_read(&neigh->neighbour->refcnt));
+				neigh_release(neigh->neighbour);
+				while ((skb = __skb_dequeue(&neigh->queue))) {
+					++dev->stats.tx_dropped;
+					dev_kfree_skb_any(skb);
+				}
+				kfree(neigh);
 			}
 			list_del(&p->list);
 			kfree(p);
@@ -1343,7 +1351,23 @@ static void ipoib_cm_tx_reap(struct work_struct *work)
 	spin_lock_irqsave(&priv->lock, flags);
 
 	while (!list_empty(&priv->cm.reap_list)) {
+		struct ipoib_neigh *neigh;
+		struct sk_buff *skb;
 		p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
+		neigh = p->neigh;
+		if (neigh) {
+			*to_ipoib_neigh(neigh->neighbour) = NULL;
+			ipoib_dbg(priv, "%s: releasing ref to %pI6 "
+				  "(refcnt: %d)\n", __func__, neigh->dgid.raw,
+				  atomic_read(&neigh->neighbour->refcnt));
+			neigh_release(neigh->neighbour);
+			while ((skb = __skb_dequeue(&neigh->queue))) {
+				++dev->stats.tx_dropped;
+				dev_kfree_skb_any(skb);
+			}
+			kfree(neigh);
+		}
+		p->neigh = NULL;
 		list_del(&p->list);
 		spin_unlock_irqrestore(&priv->lock, flags);
 		netif_tx_unlock_bh(dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index ab2c192..8841160 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -889,13 +889,16 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour,
 void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh)
 {
 	struct sk_buff *skb;
+	if (ipoib_cm_get(neigh)) {
+		ipoib_cm_destroy_tx(ipoib_cm_get(neigh));
+		return;
+	}
+
 	*to_ipoib_neigh(neigh->neighbour) = NULL;
 	while ((skb = __skb_dequeue(&neigh->queue))) {
 		++dev->stats.tx_dropped;
 		dev_kfree_skb_any(skb);
 	}
-	if (ipoib_cm_get(neigh))
-		ipoib_cm_destroy_tx(ipoib_cm_get(neigh));
 	kfree(neigh);
 }
 



More information about the general mailing list