[ofa-general] [PATCH] IB/ipoib: IPOIB CM rx use higher order fragments

Eli Cohen eli at mellanox.co.il
Tue Oct 23 09:07:46 PDT 2007


IPOIB CM rx use higher order fragments

In order to reduce the overhead of iterating the fragments of an
SKB in the receive flow, we use fragments of higher order and thus
reduce the number of iterations. This patch seams to improve receive
throughput of small UDP messages.

Signed-off-by: Eli Cohen <eli at mellanox.co.il>
---

I used the following command line to see improvemet:
netperf -H 12.4.3.175 -t UDP_STREAM -- -m 128

 drivers/infiniband/ulp/ipoib/ipoib.h    |    5 ++++-
 drivers/infiniband/ulp/ipoib/ipoib_cm.c |   18 +++++++++---------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 0a00ea0..6cf14ff 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -57,6 +57,8 @@
 
 enum {
 	IPOIB_PACKET_SIZE         = 2048,
+	IPOIB_FRAG_ORDER          = 2,
+	IPOIB_FRAG_SIZE           = PAGE_SIZE << IPOIB_FRAG_ORDER,
 	IPOIB_BUF_SIZE 		  = IPOIB_PACKET_SIZE + IB_GRH_BYTES,
 
 	IPOIB_ENCAP_LEN 	  = 4,
@@ -64,7 +66,8 @@ enum {
 	IPOIB_CM_MTU              = 0x10000 - 0x10, /* padding to align header to 16 */
 	IPOIB_CM_BUF_SIZE         = IPOIB_CM_MTU  + IPOIB_ENCAP_LEN,
 	IPOIB_CM_HEAD_SIZE 	  = IPOIB_CM_BUF_SIZE % PAGE_SIZE,
-	IPOIB_CM_RX_SG            = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE,
+	IPOIB_CM_RX_SG            = 1 + ALIGN(IPOIB_CM_BUF_SIZE - IPOIB_CM_HEAD_SIZE,
+							IPOIB_FRAG_SIZE) / IPOIB_FRAG_SIZE,
 	IPOIB_RX_RING_SIZE 	  = 128,
 	IPOIB_TX_RING_SIZE 	  = 64,
 	IPOIB_MAX_QUEUE_SIZE	  = 8192,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 8761077..5fee3c6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -78,7 +78,7 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
 	ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
 
 	for (i = 0; i < frags; ++i)
-		ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
+		ib_dma_unmap_single(priv->ca, mapping[i + 1], IPOIB_FRAG_SIZE, DMA_FROM_DEVICE);
 }
 
 static int ipoib_cm_post_receive(struct net_device *dev, int id)
@@ -129,14 +129,14 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, int
 	}
 
 	for (i = 0; i < frags; i++) {
-		struct page *page = alloc_page(GFP_ATOMIC);
+		struct page *page = alloc_pages(GFP_ATOMIC | __GFP_COMP, IPOIB_FRAG_ORDER);
 
 		if (!page)
 			goto partial_error;
-		skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE);
+		skb_fill_page_desc(skb, i, page, 0, IPOIB_FRAG_SIZE);
 
 		mapping[i + 1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page,
-						 0, PAGE_SIZE, DMA_FROM_DEVICE);
+						 0, IPOIB_FRAG_SIZE, DMA_FROM_DEVICE);
 		if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1])))
 			goto partial_error;
 	}
@@ -384,10 +384,10 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
 
 		if (length == 0) {
 			/* don't need this page */
-			skb_fill_page_desc(toskb, i, frag->page, 0, PAGE_SIZE);
+			skb_fill_page_desc(toskb, i, frag->page, 0, IPOIB_FRAG_SIZE);
 			--skb_shinfo(skb)->nr_frags;
 		} else {
-			size = min(length, (unsigned) PAGE_SIZE);
+			size = min(length, (unsigned) IPOIB_FRAG_SIZE);
 
 			frag->size = size;
 			skb->data_len += size;
@@ -447,8 +447,8 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 		}
 	}
 
-	frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
-					      (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
+	frags = ALIGN(wc->byte_len - min(wc->byte_len,
+		 (unsigned)IPOIB_CM_HEAD_SIZE), IPOIB_FRAG_SIZE) / IPOIB_FRAG_SIZE;
 
 	newskb = ipoib_cm_alloc_rx_skb(dev, wr_id, frags, mapping);
 	if (unlikely(!newskb)) {
@@ -1302,7 +1302,7 @@ int ipoib_cm_dev_init(struct net_device *dev)
 
 	priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
 	for (i = 1; i < IPOIB_CM_RX_SG; ++i)
-		priv->cm.rx_sge[i].length = PAGE_SIZE;
+		priv->cm.rx_sge[i].length = IPOIB_FRAG_SIZE;
 	priv->cm.rx_wr.next = NULL;
 	priv->cm.rx_wr.sg_list = priv->cm.rx_sge;
 	priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG;
-- 
1.5.3.4





More information about the general mailing list