[ofa-general] socket buffer accounting with UDP/ipoib

Eli Cohen eli at mellanox.co.il
Wed Jul 18 05:25:52 PDT 2007


I made some experiments with iperf running on CM mode and TCP sockets. I
can see that there is no bad affect on BW (excel file attached). We did
see a slight improvement in packet loss in UDP mode with an application
supplied by a customer.



Copy small received patckets to newly allocated SKBs just
big enough to contain the packet. This will relief accounting
done on the socket so that a smaller size is used.

Signed-off-by: Eli Cohen <eli at mellanox.co.il>

---

Index: connectx_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- connectx_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h	2007-07-17 15:41:29.000000000 +0300
+++ connectx_kernel/drivers/infiniband/ulp/ipoib/ipoib.h	2007-07-18 09:34:49.000000000 +0300
@@ -651,4 +651,7 @@
 
 #define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff)
 
+#define SKB_LEN_THOLD 256
+#define CM_SKB_LEN_THOLD min(SKB_LEN_THOLD, IPOIB_CM_HEAD_SIZE)
+
 #endif /* _IPOIB_H */
Index: connectx_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c
===================================================================
--- connectx_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_cm.c	2007-07-17 15:41:29.000000000 +0300
+++ connectx_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c	2007-07-18 10:46:54.000000000 +0300
@@ -452,26 +452,40 @@
 
 	frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
 					      (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
-
-	newskb = ipoib_cm_alloc_rx_skb(dev, wr_id, frags, mapping);
-	if (unlikely(!newskb)) {
-		/*
-		 * If we can't allocate a new RX buffer, dump
-		 * this packet and reuse the old buffer.
-		 */
-		ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id);
-		++priv->stats.rx_dropped;
-		goto repost;
+	if (wc->byte_len < CM_SKB_LEN_THOLD) {
+		newskb = dev_alloc_skb(wc->byte_len);
+		if (!newskb)
+			ipoib_warn(priv, "failed to allocate skb\n");
+
+		ib_dma_sync_single_for_cpu(priv->ca, priv->cm.srq_ring[wr_id].mapping[0],
+					   IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
+		skb_copy_from_linear_data_offset(skb, IB_GRH_BYTES, new_skb->data,
+						 wc->byte_len - IB_GRH_BYTES);
+		ib_dma_sync_single_for_device(priv->ca, priv->cm.srq_ring[wr_id].mapping[0],
+					      IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
+
+		skb_put(newskb, wc->byte_len);
+		skb = newskb;
+	}
+	else {
+		newskb = ipoib_cm_alloc_rx_skb(dev, wr_id, frags, mapping);
+		if (unlikely(!newskb)) {
+			/*
+			 * If we can't allocate a new RX buffer, dump
+			 * this packet and reuse the old buffer.
+			 */
+			ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id);
+			++priv->stats.rx_dropped;
+			goto repost;
+		}
+		ipoib_cm_dma_unmap_rx(priv, frags, priv->cm.srq_ring[wr_id].mapping);
+		memcpy(priv->cm.srq_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping);
+		skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
 	}
 
-	ipoib_cm_dma_unmap_rx(priv, frags, priv->cm.srq_ring[wr_id].mapping);
-	memcpy(priv->cm.srq_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping);
-
 	ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
 		       wc->byte_len, wc->slid);
 
-	skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
-
 	skb->protocol = ((struct ipoib_header *) skb->data)->proto;
 	skb_reset_mac_header(skb);
 	skb_pull(skb, IPOIB_ENCAP_LEN);




More information about the general mailing list