[ofa-general] Re: [PATCH RFC] IB/ipoib: fix to_ipoib_neigh access race

Michael S. Tsirkin mst at dev.mellanox.co.il
Wed May 23 12:50:11 PDT 2007


> Quoting Roland Dreier <rdreier at cisco.com>:
> Subject: Re: [PATCH RFC] IB/ipoib: fix to_ipoib_neigh access race
> 
>  > hard_start_xmit dereferences to_ipoib_neigh when only tx_lock is taken.  This
>  > would only be safe if all calls that modify *to_ipoib_neigh take tx_lock too.
>  > Currently this is not always true for ipoib_neigh_free and path_rec_completion,
>  > which results in memory corruption.  Fix this race, making sure
>  > path_rec_completion and ipoib_neigh_free are always called under
>  > tx_lock.
>  > 
>  > Signed-off-by: Michael S. Tsirkin <mst at dev.mellanox.co.il>
>  > 
>  > ---
>  > 
>  > I'm looking at
>  > https://bugs.openfabrics.org/show_bug.cgi?id=604
>  > and I think this could explain the crashes.
>  > In any case, Roland, is there a race or am I imagining things?
>  > 
>  > NB: The patch is untested (I'm not at the lab now).
> 
> Yes, it does seem that there is a problem here.  However, I the first
> part of this needs to be handled another way -- for example:
> 
>  > -		path_free(dev, path);
>  >  		spin_lock_irq(&priv->tx_lock);
>  >  		spin_lock(&priv->lock);
>  > +		path_free(dev, path);
> 
> path_free already takes priv->lock internally, and also calls
> ipoib_put_ah(), which may end up in ipoib_free_ah(), which also might
> take priv->lock.

Interesting point: note how unicast_arp_send is called under tx_lock,
and calls path_free from there.

It seems to be safe simply because we never have an AH
or any neighbours there, but it does look a bit ugly,
and there's a bit of code duplication that function.

> It's not immediately obvious what the right fix is...

Maybe 1. avoid doing path_free in unicast_arp_send: just
do __path_add unconditionally like we do for regular packets.
and 2. make path_free take both tx_lock and priv->lock?

Something along the following lines (NB: untested):

Signed-off-by: Michael S. Tsirkin <mst at dev.mellanox.co.il>

---

Index: linux-2.6/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- linux-2.6.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c	2007-05-22 01:46:54.000000000 +0300
+++ linux-2.6/drivers/infiniband/ulp/ipoib/ipoib_main.c	2007-05-23 22:45:18.000000000 +0300
@@ -262,7 +262,8 @@ static void path_free(struct net_device 
 	while ((skb = __skb_dequeue(&path->queue)))
 		dev_kfree_skb_irq(skb);
 
-	spin_lock_irqsave(&priv->lock, flags);
+	spin_lock_irqsave(&priv->tx_lock, flags);
+	spin_lock(&priv->lock);
 
 	list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) {
 		/*
@@ -277,7 +278,8 @@ static void path_free(struct net_device 
 		ipoib_neigh_free(dev, neigh);
 	}
 
-	spin_unlock_irqrestore(&priv->lock, flags);
+	spin_unlock(&priv->lock);
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
 
 	if (path->ah)
 		ipoib_put_ah(path->ah);
@@ -401,7 +403,8 @@ static void path_rec_completion(int stat
 			ah = ipoib_create_ah(dev, priv->pd, &av);
 	}
 
-	spin_lock_irqsave(&priv->lock, flags);
+	spin_lock_irqsave(&priv->tx_lock, flags);
+	spin_lock(&priv->lock);
 
 	path->ah = ah;
 
@@ -442,7 +445,8 @@ static void path_rec_completion(int stat
 	path->query = NULL;
 	complete(&path->done);
 
-	spin_unlock_irqrestore(&priv->lock, flags);
+	spin_unlock(&priv->lock);
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
 
 	while ((skb = __skb_dequeue(&skqueue))) {
 		skb->dev = dev;
@@ -614,32 +618,16 @@ static void unicast_arp_send(struct sk_b
 	path = __path_find(dev, phdr->hwaddr + 4);
 	if (!path) {
 		path = path_rec_create(dev, phdr->hwaddr + 4);
-		if (path) {
-			/* put pseudoheader back on for next time */
-			skb_push(skb, sizeof *phdr);
-			__skb_queue_tail(&path->queue, skb);
-
-			if (path_rec_start(dev, path)) {
-				spin_unlock(&priv->lock);
-				path_free(dev, path);
-				return;
-			} else
-				__path_add(dev, path);
-		} else {
-			++priv->stats.tx_dropped;
-			dev_kfree_skb_any(skb);
-		}
-
-		spin_unlock(&priv->lock);
-		return;
+		if (path)
+			__path_add(dev, path);
 	}
 
-	if (path->ah) {
+	if (path && path->ah) {
 		ipoib_dbg(priv, "Send unicast ARP to %04x\n",
 			  be16_to_cpu(path->pathrec.dlid));
 
 		ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
-	} else if ((path->query || !path_rec_start(dev, path)) &&
+	} else if (path && (path->query || !path_rec_start(dev, path)) &&
 		   skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
 		/* put pseudoheader back on for next time */
 		skb_push(skb, sizeof *phdr);
@@ -822,7 +810,8 @@ static void ipoib_neigh_cleanup(struct n
 		  IPOIB_QPN(n->ha),
 		  IPOIB_GID_RAW_ARG(n->ha + 4));
 
-	spin_lock_irqsave(&priv->lock, flags);
+	spin_lock_irqsave(&priv->tx_lock, flags);
+	spin_lock(&priv->lock);
 
 	neigh = *to_ipoib_neigh(n);
 	if (neigh) {
@@ -832,7 +821,8 @@ static void ipoib_neigh_cleanup(struct n
 		ipoib_neigh_free(n->dev, neigh);
 	}
 
-	spin_unlock_irqrestore(&priv->lock, flags);
+	spin_unlock(&priv->lock);
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
 
 	if (ah)
 		ipoib_put_ah(ah);
Index: linux-2.6/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
===================================================================
--- linux-2.6.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c	2007-05-22 01:46:54.000000000 +0300
+++ linux-2.6/drivers/infiniband/ulp/ipoib/ipoib_multicast.c	2007-05-23 21:38:28.000000000 +0300
@@ -100,7 +100,8 @@ static void ipoib_mcast_free(struct ipoi
 			"deleting multicast group " IPOIB_GID_FMT "\n",
 			IPOIB_GID_ARG(mcast->mcmember.mgid));
 
-	spin_lock_irqsave(&priv->lock, flags);
+	spin_lock_irqsave(&priv->tx_lock, flags);
+	spin_lock(&priv->lock);
 
 	list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) {
 		/*
@@ -114,7 +115,8 @@ static void ipoib_mcast_free(struct ipoi
 		ipoib_neigh_free(dev, neigh);
 	}
 
-	spin_unlock_irqrestore(&priv->lock, flags);
+	spin_unlock(&priv->lock);
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
 
 	if (mcast->ah)
 		ipoib_put_ah(mcast->ah);

-- 
MST



More information about the general mailing list