[ofa-general] [PATCH] ipoib: defer skb_orphan() until irqs enabled

akepner at sgi.com akepner at sgi.com
Tue Sep 9 07:54:35 PDT 2008


If a socket's sk_write_space() method expects to run with interrupts 
enabled, syslog can get very noisy with messages like:

Badness in local_bh_enable at kernel/softirq.c:140

 Call Trace:
  [<a000000100014ce0>] show_stack+0x40/0xa0
  [<a000000100014d70>] dump_stack+0x30/0x60
  [<a0000001000a1730>] local_bh_enable+0x90/0x140
  [<a000000100557d30>] _spin_unlock_bh+0x30/0x60
  [<a0000002097787f0>] svc_sock_enqueue+0x750/0x780 [sunrpc]
  [<a00000020977a420>] svc_write_space+0xc0/0x1c0 [sunrpc]
  [<a000000100451ad0>] sock_wfree+0xd0/0x140
  [<a0000002087959e0>] ipoib_send+0x1120/0x14a0 [ib_ipoib]
  [<a00000020878dfa0>] ipoib_start_xmit+0x380/0x1140 [ib_ipoib]
  [<a000000100461ab0>] dev_hard_start_xmit+0x4b0/0x680
  [<a00000010048d2f0>] __qdisc_run+0x2d0/0x680


A simple fix is to defer calling skb_orphan() until interrupts have 
been reenabled.

Signed-off-by: Arthur Kepner <akepner at sgi.com>
---
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index b0ffc9a..8c9dcf1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -440,7 +440,7 @@ int ipoib_open(struct net_device *dev);
 int ipoib_add_pkey_attr(struct net_device *dev);
 int ipoib_add_umcast_attr(struct net_device *dev);
 
-void ipoib_send(struct net_device *dev, struct sk_buff *skb,
+int ipoib_send(struct net_device *dev, struct sk_buff *skb,
 		struct ipoib_ah *address, u32 qpn);
 void ipoib_reap_ah(struct work_struct *work);
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 66cafa2..711a3ac 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -525,13 +525,14 @@ static inline int post_send(struct ipoib_dev_priv *priv,
 	return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
 }
 
-void ipoib_send(struct net_device *dev, struct sk_buff *skb,
+int ipoib_send(struct net_device *dev, struct sk_buff *skb,
 		struct ipoib_ah *address, u32 qpn)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_tx_buf *tx_req;
 	int hlen;
 	void *phead;
+	int ret = 1; /* assume the worst */
 
 	if (skb_is_gso(skb)) {
 		hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
@@ -541,7 +542,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 			++dev->stats.tx_dropped;
 			++dev->stats.tx_errors;
 			dev_kfree_skb_any(skb);
-			return;
+			return 1;
 		}
 	} else {
 		if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
@@ -550,7 +551,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 			++dev->stats.tx_dropped;
 			++dev->stats.tx_errors;
 			ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
-			return;
+			return 1;
 		}
 		phead = NULL;
 		hlen  = 0;
@@ -571,7 +572,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 	if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
 		++dev->stats.tx_errors;
 		dev_kfree_skb_any(skb);
-		return;
+		return 1;
 	}
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -593,6 +594,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 		--priv->tx_outstanding;
 		ipoib_dma_unmap_tx(priv->ca, tx_req);
 		dev_kfree_skb_any(skb);
+		ret = 1;
 		if (netif_queue_stopped(dev))
 			netif_wake_queue(dev);
 	} else {
@@ -600,13 +602,14 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 
 		address->last_send = priv->tx_head;
 		++priv->tx_head;
-		skb_orphan(skb);
-
+		ret = 0;
 	}
 
 	if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
 		while (poll_tx(priv))
 			; /* nothing */
+
+	return ret;
 }
 
 static void __ipoib_reap_ah(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 7e9e218..b67c793 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -604,7 +604,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
 				goto err_drop;
 			}
 		} else
-			ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha));
+			(void) ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha));
 	} else {
 		neigh->ah  = NULL;
 
@@ -685,7 +685,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
 		ipoib_dbg(priv, "Send unicast ARP to %04x\n",
 			  be16_to_cpu(path->pathrec.dlid));
 
-		ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
+		(void) ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
 	} else if ((path->query || !path_rec_start(dev, path)) &&
 		   skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
 		/* put pseudoheader back on for next time */
@@ -704,6 +704,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_neigh *neigh;
 	unsigned long flags;
+	int orphan = 0;
 
 	if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags)))
 		return NETDEV_TX_LOCKED;
@@ -743,7 +744,9 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 				goto out;
 			}
 		} else if (neigh->ah) {
-			ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha));
+			int ret;
+			ret = ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha));
+			orphan = !ret;
 			goto out;
 		}
 
@@ -788,6 +791,8 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 out:
 	spin_unlock_irqrestore(&priv->tx_lock, flags);
+	if (orphan)
+		skb_orphan(skb);
 
 	return NETDEV_TX_OK;
 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index ac33c8f..d491801 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -723,7 +723,7 @@ out:
 			}
 		}
 
-		ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
+		(void) ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
 	}
 
 unlock:



More information about the general mailing list