[openib-general] [PATCH] Move IPoIB to use LockLess TX

Roland Dreier roland at topspin.com
Wed Dec 1 09:32:50 PST 2004


This changes IPoIB's locking scheme to use the new NETIF_F_LLTX
scheme.  It adds about 2-3 % to throughput in my netpipe tests.

 - R.

Index: infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- infiniband/ulp/ipoib/ipoib_main.c	(revision 1304)
+++ infiniband/ulp/ipoib/ipoib_main.c	(working copy)
@@ -204,7 +204,7 @@
 	kfree(path);
 }
 
-static int path_rec_start(struct sk_buff *skb, struct net_device *dev)
+static void path_rec_start(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_path *path = kmalloc(sizeof *path, GFP_ATOMIC);
@@ -244,23 +244,23 @@
 
 	path->neighbour = skb->dst->neighbour;
 	*to_ipoib_path(skb->dst->neighbour) = path;
-	return 0;
+	return;
 
 err:
 	kfree(path);
 	++priv->stats.tx_dropped;
 	dev_kfree_skb_any(skb);
-
-	return 0;
 }
 
-static int path_lookup(struct sk_buff *skb, struct net_device *dev)
+static void path_lookup(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(skb->dev);
 
 	/* Look up path record for unicasts */
-	if (skb->dst->neighbour->ha[4] != 0xff)
-		return path_rec_start(skb, dev);
+	if (skb->dst->neighbour->ha[4] != 0xff) {
+		path_rec_start(skb, dev);
+		return;
+	}
 
 	/* Add in the P_Key */
 	skb->dst->neighbour->ha[8] = (priv->pkey >> 8) & 0xff;
@@ -268,7 +268,6 @@
 	ipoib_mcast_send(dev,
 			 (union ib_gid *) (skb->dst->neighbour->ha + 4),
 			 skb);
-	return 0;
 }
 
 static void unicast_arp_completion(int status,
@@ -336,8 +335,8 @@
  * still go through (since we'll get the new path from the SM for
  * these queries) so we'll never update the neighbour.
  */
-static int unicast_arp_start(struct sk_buff *skb, struct net_device *dev,
-			     struct ipoib_pseudoheader *phdr)
+static void unicast_arp_start(struct sk_buff *skb, struct net_device *dev,
+			      struct ipoib_pseudoheader *phdr)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct sk_buff *tmp_skb;
@@ -352,7 +351,7 @@
 		dev_kfree_skb_any(tmp_skb);
 		if (!skb) {
 			++priv->stats.tx_dropped;
-			return 0;
+			return;
 		}
 	}
 
@@ -381,25 +380,32 @@
 		++priv->stats.tx_dropped;
 		dev_kfree_skb_any(skb);
 	}
-
-	return 0;
 }
 
 static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_path *path;
+	unsigned long flags;
 
+	local_irq_save(flags);
+	if (!spin_trylock(&priv->tx_lock)) { 
+		local_irq_restore(flags);
+		return NETDEV_TX_LOCKED; 
+	} 
+
 	if (skb->dst && skb->dst->neighbour) {
-		if (unlikely(!*to_ipoib_path(skb->dst->neighbour)))
-			return path_lookup(skb, dev);
+		if (unlikely(!*to_ipoib_path(skb->dst->neighbour))) {
+			path_lookup(skb, dev);
+			goto out;
+		}
 
 		path = *to_ipoib_path(skb->dst->neighbour);
 
 		if (likely(path->ah)) {
 			ipoib_send(dev, skb, path->ah,
 				   be32_to_cpup((__be32 *) skb->dst->neighbour->ha));
-			return 0;
+			goto out;
 		}
 
 		if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE)
@@ -417,8 +423,7 @@
 			phdr->hwaddr[9] = priv->pkey & 0xff;
 
 			ipoib_mcast_send(dev, (union ib_gid *) (phdr->hwaddr + 4), skb);
-		}
-		else {
+		} else {
 			/* unicast GID -- ARP reply?? */
 
 			/*
@@ -429,7 +434,7 @@
 			if (skb->destructor == unicast_arp_finish) {
 				ipoib_send(dev, skb, *(struct ipoib_ah **) skb->cb,
 					   be32_to_cpup((u32 *) phdr->hwaddr));
-				return 0;
+				goto out;
 			}
 
 			if (be16_to_cpup((u16 *) skb->data) != ETH_P_ARP) {
@@ -441,22 +446,25 @@
 					   IPOIB_GID_ARG(*(union ib_gid *) (phdr->hwaddr + 4)));
 				dev_kfree_skb_any(skb);
 				++priv->stats.tx_dropped;
-				return 0;
+				goto out;
 			}
 
 			/* put the pseudoheader back on */			  
 			skb_push(skb, sizeof *phdr);
-			return unicast_arp_start(skb, dev, phdr);
+			unicast_arp_start(skb, dev, phdr);
 		}
 	}
 
-	return 0;
+	goto out;
 
 err:
 	++priv->stats.tx_dropped;
 	dev_kfree_skb_any(skb);
 
-	return 0;
+out:
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+	return NETDEV_TX_OK;
 }
 
 struct net_device_stats *ipoib_get_stats(struct net_device *dev)
@@ -641,7 +649,7 @@
 	dev->addr_len 		 = INFINIBAND_ALEN;
 	dev->type 		 = ARPHRD_INFINIBAND;
 	dev->tx_queue_len 	 = IPOIB_TX_RING_SIZE * 2;
-	dev->features            = NETIF_F_VLAN_CHALLENGED;
+	dev->features            = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX;
 
 	/* MTU will be reset when mcast join happens */
 	dev->mtu 		 = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN;
@@ -656,6 +664,7 @@
 	priv->dev = dev;
 
 	spin_lock_init(&priv->lock);
+	spin_lock_init(&priv->tx_lock);
 
 	init_MUTEX(&priv->mcast_mutex);
 	init_MUTEX(&priv->vlan_mutex);
Index: infiniband/ulp/ipoib/ipoib.h
===================================================================
--- infiniband/ulp/ipoib/ipoib.h	(revision 1304)
+++ infiniband/ulp/ipoib/ipoib.h	(working copy)
@@ -130,6 +130,7 @@
 
 	struct ipoib_buf *rx_ring;
 
+	spinlock_t tx_lock;
 	struct ipoib_buf *tx_ring;
 	unsigned tx_head;
 	unsigned tx_tail;
Index: infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- infiniband/ulp/ipoib/ipoib_ib.c	(revision 1304)
+++ infiniband/ulp/ipoib/ipoib_ib.c	(working copy)
@@ -70,14 +70,15 @@
 
 	unsigned long flags;
 
-	spin_lock_irqsave(&priv->lock, flags);
 	if (ah->last_send <= priv->tx_tail) {
 		ipoib_dbg(priv, "Freeing ah %p\n", ah->ah);
 		ib_destroy_ah(ah->ah);
 		kfree(ah);
-	} else
+	} else {
+		spin_lock_irqsave(&priv->lock, flags);
 		list_add_tail(&ah->list, &priv->dead_ahs);
-	spin_unlock_irqrestore(&priv->lock, flags);
+		spin_unlock_irqrestore(&priv->lock, flags);
+	}
 }
 
 static inline int ipoib_ib_receive(struct ipoib_dev_priv *priv,
@@ -235,11 +236,11 @@
 
 		dev_kfree_skb_any(tx_req->skb);
 
-		spin_lock_irqsave(&priv->lock, flags);
+		spin_lock_irqsave(&priv->tx_lock, flags);
 		++priv->tx_tail;
 		if (priv->tx_head - priv->tx_tail <= IPOIB_TX_RING_SIZE / 2)
 			netif_wake_queue(dev);
-		spin_unlock_irqrestore(&priv->lock, flags);
+		spin_unlock_irqrestore(&priv->tx_lock, flags);
 
 		if (wc->status != IB_WC_SUCCESS &&
 		    wc->status != IB_WC_WR_FLUSH_ERR)
@@ -338,19 +339,15 @@
 		++priv->stats.tx_errors;
 		dev_kfree_skb_any(skb);
 	} else {
-		unsigned long flags;
-
 		dev->trans_start = jiffies;
 
 		address->last_send = priv->tx_head;
 		++priv->tx_head;
 
-		spin_lock_irqsave(&priv->lock, flags);
 		if (priv->tx_head - priv->tx_tail == IPOIB_TX_RING_SIZE) {
 			ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
 			netif_stop_queue(dev);
 		}
-		spin_unlock_irqrestore(&priv->lock, flags);
 	}
 }
 



More information about the general mailing list