[ofa-general] socket buffer accounting with UDP/ipoib
Eli Cohen
eli at mellanox.co.il
Wed Jul 18 05:25:52 PDT 2007
I made some experiments with iperf running on CM mode and TCP sockets. I
can see that there is no bad affect on BW (excel file attached). We did
see a slight improvement in packet loss in UDP mode with an application
supplied by a customer.
Copy small received patckets to newly allocated SKBs just
big enough to contain the packet. This will relief accounting
done on the socket so that a smaller size is used.
Signed-off-by: Eli Cohen <eli at mellanox.co.il>
---
Index: connectx_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- connectx_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2007-07-17 15:41:29.000000000 +0300
+++ connectx_kernel/drivers/infiniband/ulp/ipoib/ipoib.h 2007-07-18 09:34:49.000000000 +0300
@@ -651,4 +651,7 @@
#define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff)
+#define SKB_LEN_THOLD 256
+#define CM_SKB_LEN_THOLD min(SKB_LEN_THOLD, IPOIB_CM_HEAD_SIZE)
+
#endif /* _IPOIB_H */
Index: connectx_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c
===================================================================
--- connectx_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2007-07-17 15:41:29.000000000 +0300
+++ connectx_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2007-07-18 10:46:54.000000000 +0300
@@ -452,26 +452,40 @@
frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
(unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
-
- newskb = ipoib_cm_alloc_rx_skb(dev, wr_id, frags, mapping);
- if (unlikely(!newskb)) {
- /*
- * If we can't allocate a new RX buffer, dump
- * this packet and reuse the old buffer.
- */
- ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id);
- ++priv->stats.rx_dropped;
- goto repost;
+ if (wc->byte_len < CM_SKB_LEN_THOLD) {
+ newskb = dev_alloc_skb(wc->byte_len);
+ if (!newskb)
+ ipoib_warn(priv, "failed to allocate skb\n");
+
+ ib_dma_sync_single_for_cpu(priv->ca, priv->cm.srq_ring[wr_id].mapping[0],
+ IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
+ skb_copy_from_linear_data_offset(skb, IB_GRH_BYTES, new_skb->data,
+ wc->byte_len - IB_GRH_BYTES);
+ ib_dma_sync_single_for_device(priv->ca, priv->cm.srq_ring[wr_id].mapping[0],
+ IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
+
+ skb_put(newskb, wc->byte_len);
+ skb = newskb;
+ }
+ else {
+ newskb = ipoib_cm_alloc_rx_skb(dev, wr_id, frags, mapping);
+ if (unlikely(!newskb)) {
+ /*
+ * If we can't allocate a new RX buffer, dump
+ * this packet and reuse the old buffer.
+ */
+ ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id);
+ ++priv->stats.rx_dropped;
+ goto repost;
+ }
+ ipoib_cm_dma_unmap_rx(priv, frags, priv->cm.srq_ring[wr_id].mapping);
+ memcpy(priv->cm.srq_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping);
+ skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
}
- ipoib_cm_dma_unmap_rx(priv, frags, priv->cm.srq_ring[wr_id].mapping);
- memcpy(priv->cm.srq_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping);
-
ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
wc->byte_len, wc->slid);
- skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
-
skb->protocol = ((struct ipoib_header *) skb->data)->proto;
skb_reset_mac_header(skb);
skb_pull(skb, IPOIB_ENCAP_LEN);
More information about the general
mailing list