[ewg] Re: Continue of "defer skb_orphan() until irqs enabled"

Eli Cohen eli at dev.mellanox.co.il
Thu Sep 25 04:44:14 PDT 2008


On Wed, Sep 24, 2008 at 12:16:23PM -0700, akepner at sgi.com wrote:
> On Wed, Sep 24, 2008 at 10:22:11AM -0700, Roland Dreier wrote:
> >  > -	spin_unlock_irqrestore(&priv->tx_lock, flags);
> >  > +	local_irq_restore(flags);
> >  > +	if (orphan)
> >  > +		skb_orphan(skb);
> >  > +
> >  > +	spin_unlock(&priv->tx_lock);
I think this one will work, after applying the changes I proposed
here:
http://lists.openfabrics.org/pipermail/general/2008-September/054079.html

But I would consider the patch bellow too since it allows the
desctuctor to run with tx_lock freed.

> > 
> > What did we conclude about the possible skb_orphan()-after-skb_free()
> > scenario here?
> > 
> 
> Eli?
> 
> I thought the conclusion was that the "ipoib: defer skb_orphan()..." 
> patch had indeed created a race condition between the skb_orphan() 
> call and the tx completion handler freeing the skb.
> 

The only problem we have after using the patch proposed in the link
above, was with the SKB that filled up the tx queue and cause arming
of the send CQ. So the patch bellow just avoids calling skb_orphan()
for that special case. What would happen is that at least half the
queue is drained by the timer and good chances that that SKB will be
freed pretty soon. In any case, it would be just that SKB that will be
outstanding for sometime.

Note the change in this patch is the addition of this condition:
        return !(sent && priv->tx_outstanding &&
===>>>>                   priv->tx_outstanding != ipoib_sendq_size);

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 05eb41b..107f122 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -441,7 +441,7 @@ int ipoib_open(struct net_device *dev);
 int ipoib_add_pkey_attr(struct net_device *dev);
 int ipoib_add_umcast_attr(struct net_device *dev);
 
-void ipoib_send(struct net_device *dev, struct sk_buff *skb,
+int ipoib_send(struct net_device *dev, struct sk_buff *skb,
 		struct ipoib_ah *address, u32 qpn);
 void ipoib_reap_ah(struct work_struct *work);
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 66cafa2..9271e6c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -525,13 +525,14 @@ static inline int post_send(struct ipoib_dev_priv *priv,
 	return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
 }
 
-void ipoib_send(struct net_device *dev, struct sk_buff *skb,
+int ipoib_send(struct net_device *dev, struct sk_buff *skb,
 		struct ipoib_ah *address, u32 qpn)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_tx_buf *tx_req;
 	int hlen;
 	void *phead;
+	int sent;
 
 	if (skb_is_gso(skb)) {
 		hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
@@ -541,7 +542,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 			++dev->stats.tx_dropped;
 			++dev->stats.tx_errors;
 			dev_kfree_skb_any(skb);
-			return;
+			return 1;
 		}
 	} else {
 		if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
@@ -550,7 +551,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 			++dev->stats.tx_dropped;
 			++dev->stats.tx_errors;
 			ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
-			return;
+			return 1;
 		}
 		phead = NULL;
 		hlen  = 0;
@@ -571,7 +572,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 	if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
 		++dev->stats.tx_errors;
 		dev_kfree_skb_any(skb);
-		return;
+		return 1;
 	}
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -593,6 +594,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 		--priv->tx_outstanding;
 		ipoib_dma_unmap_tx(priv->ca, tx_req);
 		dev_kfree_skb_any(skb);
+		sent = 0;
 		if (netif_queue_stopped(dev))
 			netif_wake_queue(dev);
 	} else {
@@ -600,13 +602,15 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 
 		address->last_send = priv->tx_head;
 		++priv->tx_head;
-		skb_orphan(skb);
-
+		sent = 1;
 	}
 
 	if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
 		while (poll_tx(priv))
 			; /* nothing */
+
+	return !(sent && priv->tx_outstanding &&
+			priv->tx_outstanding != ipoib_sendq_size);
 }
 
 static void __ipoib_reap_ah(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 1b1df5c..3645c6b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -604,7 +604,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
 				goto err_drop;
 			}
 		} else
-			ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha));
+			(void) ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha));
 	} else {
 		neigh->ah  = NULL;
 
@@ -685,7 +685,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
 		ipoib_dbg(priv, "Send unicast ARP to %04x\n",
 			  be16_to_cpu(path->pathrec.dlid));
 
-		ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
+		(void) ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
 	} else if ((path->query || !path_rec_start(dev, path)) &&
 		   skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
 		/* put pseudoheader back on for next time */
@@ -704,6 +704,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_neigh *neigh;
 	unsigned long flags;
+	int orphan = 0;
 
 	if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags)))
 		return NETDEV_TX_LOCKED;
@@ -743,7 +744,9 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 				goto out;
 			}
 		} else if (neigh->ah) {
-			ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha));
+			int ret;
+			ret = ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha));
+			orphan = !ret;
 			goto out;
 		}
 
@@ -788,6 +791,8 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 out:
 	spin_unlock_irqrestore(&priv->tx_lock, flags);
+	if (orphan)
+		skb_orphan(skb);
 
 	return NETDEV_TX_OK;
 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index aae2862..9637122 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -734,7 +734,7 @@ out:
 			}
 		}
 
-		ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
+		(void) ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
 	}
 
 unlock:
-- 
1.6.0.2




More information about the ewg mailing list