[ofa-general] socket buffer accounting with UDP/ipoib
Roland Dreier
rdreier at cisco.com
Tue Jul 17 10:41:49 PDT 2007
I did a quick hack to enable copybreak for UD packets up to 256 bytes
(see below). This is still missing copybreak for CM / RC mode.
However I just wanted to see how it affected performance. And the
answer is that on my system (fast quad-core Xeon, 1-port Mellanox PCIe
HCA) is that it didn't make any difference in small-message latency or
throughput, at least none that I could measure with netpipe (NPtcp).
I'm not sure whether to pursue this or not.
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 285c143..bf60bbb 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -59,6 +59,8 @@ enum {
IPOIB_PACKET_SIZE = 2048,
IPOIB_BUF_SIZE = IPOIB_PACKET_SIZE + IB_GRH_BYTES,
+ IPOIB_COPYBREAK = 256,
+
IPOIB_ENCAP_LEN = 4,
IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 1094488..8d6d0d0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -203,22 +203,48 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num)
goto repost;
- /*
- * If we can't allocate a new RX buffer, dump
- * this packet and reuse the old buffer.
- */
- if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
- ++priv->stats.rx_dropped;
- goto repost;
- }
-
ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
wc->byte_len, wc->slid);
- ib_dma_unmap_single(priv->ca, addr, IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+ if (wc->byte_len < IPOIB_COPYBREAK + IB_GRH_BYTES) {
+ struct sk_buff *new_skb;
+
+ /*
+ * Add 12 bytes to 4-byte IPoIB header to get IP
+ * header at a multiple of 16.
+ */
+ new_skb = dev_alloc_skb(wc->byte_len - IB_GRH_BYTES + 12);
+ if (unlikely(!new_skb)) {
+ ++priv->stats.rx_dropped;
+ goto repost;
+ }
+
+ skb_reserve(new_skb, 12);
+ skb_put(new_skb, wc->byte_len - IB_GRH_BYTES);
- skb_put(skb, wc->byte_len);
- skb_pull(skb, IB_GRH_BYTES);
+ ib_dma_sync_single_for_cpu(priv->ca, addr, IPOIB_BUF_SIZE,
+ DMA_FROM_DEVICE);
+ skb_copy_from_linear_data_offset(skb, IB_GRH_BYTES, new_skb->data,
+ wc->byte_len - IB_GRH_BYTES);
+ ib_dma_sync_single_for_device(priv->ca, addr, IPOIB_BUF_SIZE,
+ DMA_FROM_DEVICE);
+
+ skb = new_skb;
+ } else {
+ /*
+ * If we can't allocate a new RX buffer, dump
+ * this packet and reuse the old buffer.
+ */
+ if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
+ ++priv->stats.rx_dropped;
+ goto repost;
+ }
+
+ ib_dma_unmap_single(priv->ca, addr, IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+
+ skb_put(skb, wc->byte_len);
+ skb_pull(skb, IB_GRH_BYTES);
+ }
skb->protocol = ((struct ipoib_header *) skb->data)->proto;
skb_reset_mac_header(skb);
More information about the general
mailing list