[openib-general] Re: [PATCH] ipoib tune rx ring size

Michael S. Tsirkin mst at mellanox.co.il
Tue Jun 21 01:27:58 PDT 2005


Quoting r. Grant Grundler <iod00d at hp.com>:
> Subject: Re: [PATCH] ipoib tune rx ring size
>
> However, binding the tasks to the non-interrupt CPU ("-T 0,0") results
> in essentially no change in perf.
> 

Interesting. In theory, this could be caused by tx_lock being taken
on completion path, in effect serializing tx with interrupt handler.
Grant, could you try the patch below, and see what happens?

Its probably interesting to try this with both 128 and 64 rx ring size.


Index: ulp/ipoib/ipoib_main.c
===================================================================
--- ulp/ipoib/ipoib_main.c	(revision 2656)
+++ ulp/ipoib/ipoib_main.c	(working copy)
@@ -837,6 +837,7 @@ static void ipoib_setup(struct net_devic
 
 	spin_lock_init(&priv->lock);
 	spin_lock_init(&priv->tx_lock);
+	spin_lock_init(&priv->queue_lock);
 
 	init_MUTEX(&priv->mcast_mutex);
 	init_MUTEX(&priv->vlan_mutex);
Index: ulp/ipoib/ipoib.h
===================================================================
--- ulp/ipoib/ipoib.h	(revision 2656)
+++ ulp/ipoib/ipoib.h	(working copy)
@@ -107,8 +107,10 @@ struct ipoib_buf {
 /*
  * Device private locking: tx_lock protects members used in TX fast
  * path (and we use LLTX so upper layers don't do extra locking).
- * lock protects everything else.  lock nests inside of tx_lock (ie
- * tx_lock must be acquired first if needed).
+ * queue_lock protects the tx_tail value and netif queue stopped status.
+ * lock protects everything else.
+ * queue_lock and lock nest inside of tx_lock
+ * (ie tx_lock must be acquired first if needed).
  */
 struct ipoib_dev_priv {
 	spinlock_t lock;
@@ -158,6 +160,8 @@ struct ipoib_dev_priv {
 	struct ib_sge     tx_sge;
 	struct ib_send_wr tx_wr;
 
+	spinlock_t        queue_lock;
+
 	struct ib_wc ibwc[IPOIB_NUM_WC];
 
 	struct list_head dead_ahs;
Index: ulp/ipoib/ipoib_ib.c
===================================================================
--- ulp/ipoib/ipoib_ib.c	(revision 2656)
+++ ulp/ipoib/ipoib_ib.c	(working copy)
@@ -229,6 +229,7 @@ static void ipoib_ib_handle_wc(struct ne
 	} else {
 		struct ipoib_buf *tx_req;
 		unsigned long flags;
+		int stopped;
 
 		if (wr_id >= IPOIB_TX_RING_SIZE) {
 			ipoib_warn(priv, "completion event with wrid %d (> %d)\n",
@@ -250,12 +251,16 @@ static void ipoib_ib_handle_wc(struct ne
 
 		dev_kfree_skb_any(tx_req->skb);
 
-		spin_lock_irqsave(&priv->tx_lock, flags);
+		spin_lock_irqsave(&priv->queue_lock, flags);
 		++priv->tx_tail;
-		if (netif_queue_stopped(dev) &&
-		    priv->tx_head - priv->tx_tail <= IPOIB_TX_RING_SIZE / 2)
-			netif_wake_queue(dev);
-		spin_unlock_irqrestore(&priv->tx_lock, flags);
+		stopped = netif_queue_stopped(dev);
+		spin_unlock_irqrestore(&priv->queue_lock, flags);
+		if (stopped) {
+			spin_lock_irqsave(&priv->tx_lock, flags);
+			if (priv->tx_head - priv->tx_tail <= IPOIB_TX_RING_SIZE / 2)
+				netif_wake_queue(dev);
+			spin_unlock_irqrestore(&priv->tx_lock, flags);
+		}
 
 		if (wc->status != IB_WC_SUCCESS &&
 		    wc->status != IB_WC_WR_FLUSH_ERR)
@@ -336,14 +341,20 @@ void ipoib_send(struct net_device *dev, 
 				 DMA_TO_DEVICE);
 		dev_kfree_skb_any(skb);
 	} else {
+		unsigned long flags;
 		dev->trans_start = jiffies;
 
 		address->last_send = priv->tx_head;
 		++priv->tx_head;
 
 		if (priv->tx_head - priv->tx_tail == IPOIB_TX_RING_SIZE) {
-			ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
-			netif_stop_queue(dev);
+			spin_lock_irqsave(&priv->queue_lock, flags);
+
+			if (priv->tx_head - priv->tx_tail == IPOIB_TX_RING_SIZE) {
+				ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
+				netif_stop_queue(dev);
+			}
+			spin_unlock_irqrestore(&priv->queue_lock, flags);
 		}
 	}
 }

-- 
MST



More information about the general mailing list