[openib-general] Re: [PATCH] repost: IPoIB queue size tune patch

Roland Dreier rdreier at cisco.com
Wed Apr 5 15:31:37 PDT 2006


Thanks, here's the version I committed to svn and queued for 2.6.17.

I made the module parameters "send_queue_size" and "recv_queue_size"
because I think that "sendq_size" might be too obscure for people to
understand.  I made the queue size variables __read_mostly to avoid
false sharing of cache lines.  I changed one "/ 2" into ">> 1",
because now that the queue size is not a compile-time constant, the
compiler is forced to generate a divide instruction for the "/ 2".
And I did a few other minor cleanups...

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 374109d..12a1e05 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -65,6 +65,8 @@ enum {
 
 	IPOIB_RX_RING_SIZE 	  = 128,
 	IPOIB_TX_RING_SIZE 	  = 64,
+	IPOIB_MAX_QUEUE_SIZE	  = 8192,
+	IPOIB_MIN_QUEUE_SIZE	  = 2,
 
 	IPOIB_NUM_WC 		  = 4,
 
@@ -332,6 +334,8 @@ static inline void ipoib_unregister_debu
 #define ipoib_warn(priv, format, arg...)		\
 	ipoib_printk(KERN_WARNING, priv, format , ## arg)
 
+extern int ipoib_sendq_size;
+extern int ipoib_recvq_size;
 
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 extern int ipoib_debug_level;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index ed65202..a54da42 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -161,7 +161,7 @@ static int ipoib_ib_post_receives(struct
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int i;
 
-	for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) {
+	for (i = 0; i < ipoib_recvq_size; ++i) {
 		if (ipoib_alloc_rx_skb(dev, i)) {
 			ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
 			return -ENOMEM;
@@ -187,7 +187,7 @@ static void ipoib_ib_handle_wc(struct ne
 	if (wr_id & IPOIB_OP_RECV) {
 		wr_id &= ~IPOIB_OP_RECV;
 
-		if (wr_id < IPOIB_RX_RING_SIZE) {
+		if (wr_id < ipoib_recvq_size) {
 			struct sk_buff *skb  = priv->rx_ring[wr_id].skb;
 			dma_addr_t      addr = priv->rx_ring[wr_id].mapping;
 
@@ -252,9 +252,9 @@ static void ipoib_ib_handle_wc(struct ne
 		struct ipoib_tx_buf *tx_req;
 		unsigned long flags;
 
-		if (wr_id >= IPOIB_TX_RING_SIZE) {
+		if (wr_id >= ipoib_sendq_size) {
 			ipoib_warn(priv, "completion event with wrid %d (> %d)\n",
-				   wr_id, IPOIB_TX_RING_SIZE);
+				   wr_id, ipoib_sendq_size);
 			return;
 		}
 
@@ -275,7 +275,7 @@ static void ipoib_ib_handle_wc(struct ne
 		spin_lock_irqsave(&priv->tx_lock, flags);
 		++priv->tx_tail;
 		if (netif_queue_stopped(dev) &&
-		    priv->tx_head - priv->tx_tail <= IPOIB_TX_RING_SIZE / 2)
+		    priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
 			netif_wake_queue(dev);
 		spin_unlock_irqrestore(&priv->tx_lock, flags);
 
@@ -344,13 +344,13 @@ void ipoib_send(struct net_device *dev, 
 	 * means we have to make sure everything is properly recorded and
 	 * our state is consistent before we call post_send().
 	 */
-	tx_req = &priv->tx_ring[priv->tx_head & (IPOIB_TX_RING_SIZE - 1)];
+	tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
 	tx_req->skb = skb;
 	addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len,
 			      DMA_TO_DEVICE);
 	pci_unmap_addr_set(tx_req, mapping, addr);
 
-	if (unlikely(post_send(priv, priv->tx_head & (IPOIB_TX_RING_SIZE - 1),
+	if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
 			       address->ah, qpn, addr, skb->len))) {
 		ipoib_warn(priv, "post_send failed\n");
 		++priv->stats.tx_errors;
@@ -363,7 +363,7 @@ void ipoib_send(struct net_device *dev, 
 		address->last_send = priv->tx_head;
 		++priv->tx_head;
 
-		if (priv->tx_head - priv->tx_tail == IPOIB_TX_RING_SIZE) {
+		if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) {
 			ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
 			netif_stop_queue(dev);
 		}
@@ -488,7 +488,7 @@ static int recvs_pending(struct net_devi
 	int pending = 0;
 	int i;
 
-	for (i = 0; i < IPOIB_RX_RING_SIZE; ++i)
+	for (i = 0; i < ipoib_recvq_size; ++i)
 		if (priv->rx_ring[i].skb)
 			++pending;
 
@@ -527,7 +527,7 @@ int ipoib_ib_dev_stop(struct net_device 
 			 */
 			while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
 				tx_req = &priv->tx_ring[priv->tx_tail &
-							(IPOIB_TX_RING_SIZE - 1)];
+							(ipoib_sendq_size - 1)];
 				dma_unmap_single(priv->ca->dma_device,
 						 pci_unmap_addr(tx_req, mapping),
 						 tx_req->skb->len,
@@ -536,7 +536,7 @@ int ipoib_ib_dev_stop(struct net_device 
 				++priv->tx_tail;
 			}
 
-			for (i = 0; i < IPOIB_RX_RING_SIZE; ++i)
+			for (i = 0; i < ipoib_recvq_size; ++i)
 				if (priv->rx_ring[i].skb) {
 					dma_unmap_single(priv->ca->dma_device,
 							 pci_unmap_addr(&priv->rx_ring[i],
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 9cb9e43..5bf7e26 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -41,6 +41,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/kernel.h>
 
 #include <linux/if_arp.h>	/* For ARPHRD_xxx */
 
@@ -53,6 +54,14 @@ MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
 MODULE_LICENSE("Dual BSD/GPL");
 
+int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE;
+int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE;
+
+module_param_named(send_queue_size, ipoib_sendq_size, int, 0444);
+MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
+module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
+MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
+
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 int ipoib_debug_level;
 
@@ -795,20 +804,19 @@ int ipoib_dev_init(struct net_device *de
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
 	/* Allocate RX/TX "rings" to hold queued skbs */
-
-	priv->rx_ring =	kzalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf),
+	priv->rx_ring =	kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
 				GFP_KERNEL);
 	if (!priv->rx_ring) {
 		printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
-		       ca->name, IPOIB_RX_RING_SIZE);
+		       ca->name, ipoib_recvq_size);
 		goto out;
 	}
 
-	priv->tx_ring = kzalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf),
+	priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring,
 				GFP_KERNEL);
 	if (!priv->tx_ring) {
 		printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
-		       ca->name, IPOIB_TX_RING_SIZE);
+		       ca->name, ipoib_sendq_size);
 		goto out_rx_ring_cleanup;
 	}
 
@@ -876,7 +884,7 @@ static void ipoib_setup(struct net_devic
 	dev->hard_header_len 	 = IPOIB_ENCAP_LEN + INFINIBAND_ALEN;
 	dev->addr_len 		 = INFINIBAND_ALEN;
 	dev->type 		 = ARPHRD_INFINIBAND;
-	dev->tx_queue_len 	 = IPOIB_TX_RING_SIZE * 2;
+	dev->tx_queue_len 	 = ipoib_sendq_size * 2;
 	dev->features            = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX;
 
 	/* MTU will be reset when mcast join happens */
@@ -1128,6 +1136,14 @@ static int __init ipoib_init_module(void
 {
 	int ret;
 
+	ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size);
+	ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE);
+	ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE);
+
+	ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
+	ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
+	ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE);
+
 	ret = ipoib_register_debugfs();
 	if (ret)
 		return ret;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 5f03880..1d49d16 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -159,8 +159,8 @@ int ipoib_transport_dev_init(struct net_
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_qp_init_attr init_attr = {
 		.cap = {
-			.max_send_wr  = IPOIB_TX_RING_SIZE,
-			.max_recv_wr  = IPOIB_RX_RING_SIZE,
+			.max_send_wr  = ipoib_sendq_size,
+			.max_recv_wr  = ipoib_recvq_size,
 			.max_send_sge = 1,
 			.max_recv_sge = 1
 		},
@@ -175,7 +175,7 @@ int ipoib_transport_dev_init(struct net_
 	}
 
 	priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev,
-				IPOIB_TX_RING_SIZE + IPOIB_RX_RING_SIZE + 1);
+				ipoib_sendq_size + ipoib_recvq_size + 1);
 	if (IS_ERR(priv->cq)) {
 		printk(KERN_WARNING "%s: failed to create CQ\n", ca->name);
 		goto out_free_pd;



More information about the general mailing list