[ofa-general] [PATCH 4/10] IB/ipoib: Add LSO support to ipoib

Eli Cohen eli at dev.mellanox.co.il
Mon Mar 17 08:23:55 PDT 2008


>From 7d2a91c6ac83e9af3141b5e4d73a757bf9159b44 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli at mellanox.co.il>
Date: Thu, 28 Feb 2008 14:13:45 +0200
Subject: [PATCH] IB/ipoib: Add LSO support to ipoib

Signed-off-by: Eli Cohen <eli at mellnaox.co.il>
---
 drivers/infiniband/ulp/ipoib/ipoib.h       |    1 +
 drivers/infiniband/ulp/ipoib/ipoib_cm.c    |    7 +-
 drivers/infiniband/ulp/ipoib/ipoib_ib.c    |  117 +++++++++++++++++++++-------
 drivers/infiniband/ulp/ipoib/ipoib_main.c  |   10 ++-
 drivers/infiniband/ulp/ipoib/ipoib_verbs.c |    3 +
 5 files changed, 103 insertions(+), 35 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 08930ca..19a41ff 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -319,6 +319,7 @@ struct ipoib_dev_priv {
 	struct dentry *mcg_dentry;
 	struct dentry *path_dentry;
 #endif
+	int	hca_caps;
 };
 
 struct ipoib_ah {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index edf63dc..90ff2c9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1384,7 +1384,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
 		ipoib_warn(priv, "enabling connected mode "
 			   "will cause multicast packet drops\n");
 
-		dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG);
+		dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO);
 		priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
 
 		ipoib_flush_paths(dev);
@@ -1396,8 +1396,11 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
 		dev->mtu = min(priv->mcast_mtu, dev->mtu);
 		ipoib_flush_paths(dev);
 
-		if (test_bit(IPOIB_FLAG_CSUM, &priv->flags))
+		if (test_bit(IPOIB_FLAG_CSUM, &priv->flags)) {
 			dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
+			if (priv->hca_caps & IB_DEVICE_TCP_TSO)
+				dev->features |= NETIF_F_TSO;
+		}
 
 		return count;
 	}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index d13f4fb..6605109 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -39,6 +39,8 @@
 #include <linux/dma-mapping.h>
 
 #include <rdma/ib_cache.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
 
 #include "ipoib.h"
 
@@ -249,29 +251,37 @@ static int ipoib_dma_map_tx(struct ib_device *ca,
 	struct sk_buff *skb = tx_req->skb;
 	u64 *mapping = tx_req->mapping;
 	int i;
+	int off;
 
-	mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb),
-				       DMA_TO_DEVICE);
-	if (unlikely(ib_dma_mapping_error(ca, mapping[0])))
-		return -EIO;
+	if (skb_headlen(skb)) {
+		mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb),
+					       DMA_TO_DEVICE);
+		if (unlikely(ib_dma_mapping_error(ca, mapping[0])))
+			return -EIO;
+
+		off = 1;
+	} else
+		off = 0;
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-		mapping[i + 1] = ib_dma_map_page(ca, frag->page,
+		mapping[i + off] = ib_dma_map_page(ca, frag->page,
 						 frag->page_offset, frag->size,
 						 DMA_TO_DEVICE);
-		if (unlikely(ib_dma_mapping_error(ca, mapping[i + 1])))
+		if (unlikely(ib_dma_mapping_error(ca, mapping[i + off])))
 			goto partial_error;
 	}
 	return 0;
 
 partial_error:
-	ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
-
 	for (; i > 0; --i) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
-		ib_dma_unmap_page(ca, mapping[i], frag->size, DMA_TO_DEVICE);
+		ib_dma_unmap_page(ca, mapping[i - !off], frag->size, DMA_TO_DEVICE);
 	}
+
+	if (off)
+		ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
+
 	return -EIO;
 }
 
@@ -281,12 +291,17 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca,
 	struct sk_buff *skb = tx_req->skb;
 	u64 *mapping = tx_req->mapping;
 	int i;
+	int off;
 
-	ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
+	if (skb_headlen(skb)) {
+		ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
+		off = 1;
+	} else
+		off = 0;
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-		ib_dma_unmap_page(ca, mapping[i + 1], frag->size,
+		ib_dma_unmap_page(ca, mapping[i + off], frag->size,
 				  DMA_TO_DEVICE);
 	}
 }
@@ -392,24 +407,40 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
 static inline int post_send(struct ipoib_dev_priv *priv,
 			    unsigned int wr_id,
 			    struct ib_ah *address, u32 qpn,
-			    u64 *mapping, int headlen,
-			    skb_frag_t *frags,
-			    int nr_frags)
+			    struct ipoib_tx_buf *tx_req,
+			    void *head, int hlen)
 {
 	struct ib_send_wr *bad_wr;
-	int i;
+	int i, off;
+	struct sk_buff *skb = tx_req->skb;
+	skb_frag_t *frags = skb_shinfo(skb)->frags;
+	int nr_frags = skb_shinfo(skb)->nr_frags;
+	u64 *mapping = tx_req->mapping;
+
+	if (skb_headlen(skb)) {
+		priv->tx_sge[0].addr         = mapping[0];
+		priv->tx_sge[0].length       = skb_headlen(skb);
+		off = 1;
+	} else
+		off = 0;
 
-	priv->tx_sge[0].addr	     = mapping[0];
-	priv->tx_sge[0].length       = headlen;
 	for (i = 0; i < nr_frags; ++i) {
-		priv->tx_sge[i + 1].addr = mapping[i + 1];
-		priv->tx_sge[i + 1].length = frags[i].size;
+		priv->tx_sge[i + off].addr = mapping[i + off];
+		priv->tx_sge[i + off].length = frags[i].size;
 	}
-	priv->tx_wr.num_sge	     = nr_frags + 1;
+	priv->tx_wr.num_sge	     = nr_frags + off;
 	priv->tx_wr.wr_id 	     = wr_id;
 	priv->tx_wr.wr.ud.remote_qpn = qpn;
 	priv->tx_wr.wr.ud.ah 	     = address;
 
+	if (head) {
+		priv->tx_wr.wr.ud.mss = skb_shinfo(skb)->gso_size;
+		priv->tx_wr.wr.ud.header = head;
+		priv->tx_wr.wr.ud.hlen = hlen;
+		priv->tx_wr.opcode      = IB_WR_LSO;
+	} else
+		priv->tx_wr.opcode      = IB_WR_SEND;
+
 	return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
 }
 
@@ -418,14 +449,35 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_tx_buf *tx_req;
-
-	if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
-		ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
-			   skb->len, priv->mcast_mtu + IPOIB_ENCAP_LEN);
-		++dev->stats.tx_dropped;
-		++dev->stats.tx_errors;
-		ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
-		return;
+	int hlen;
+	void *phead;
+
+	if (!skb_is_gso(skb)) {
+		if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
+			ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
+				   skb->len, priv->mcast_mtu + IPOIB_ENCAP_LEN);
+			++dev->stats.tx_dropped;
+			++dev->stats.tx_errors;
+			ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
+			return;
+		}
+		phead = 0;
+		hlen = 0;
+	} else {
+		/*
+		 * LSO header is limited to max 60 bytes
+		 */
+		if (unlikely((ip_hdr(skb)->ihl + tcp_hdr(skb)->doff) > 15)) {
+			ipoib_warn(priv, "ip(%d) and tcp(%d) headers too long, dropping skb\n",
+				   ip_hdr(skb)->ihl << 2, tcp_hdr(skb)->doff << 2);
+			goto drop;
+		}
+		hlen = ((ip_hdr(skb)->ihl + tcp_hdr(skb)->doff) << 2) + IPOIB_ENCAP_LEN;
+		phead = skb->data;
+		if (unlikely(!skb_pull(skb, hlen))) {
+			ipoib_warn(priv, "linear data too small\n");
+			goto drop;
+		}
 	}
 
 	ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
@@ -453,8 +505,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 
 	if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
 			       address->ah, qpn,
-			       tx_req->mapping, skb_headlen(skb),
-			       skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags))) {
+			       tx_req, phead, hlen))) {
 		ipoib_warn(priv, "post_send failed\n");
 		++dev->stats.tx_errors;
 		ipoib_dma_unmap_tx(priv->ca, tx_req);
@@ -470,6 +521,12 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 			netif_stop_queue(dev);
 		}
 	}
+	return;
+
+drop:
+	++dev->stats.tx_errors;
+	dev_kfree_skb_any(skb);
+	return;
 }
 
 static void __ipoib_reap_ah(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 40db0d9..bba286c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1134,14 +1134,15 @@ static struct net_device *ipoib_add_port(const char *format,
 		       hca->name, result);
 		goto device_init_failed;
 	}
+	priv->hca_caps = device_attr->device_cap_flags;
 
-	if (device_attr->device_cap_flags & IB_DEVICE_UD_IP_CSUM) {
+	kfree(device_attr);
+
+	if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
 		set_bit(IPOIB_FLAG_CSUM, &priv->flags);
 		priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
 	}
 
-	kfree(device_attr);
-
 	/*
 	 * Set the full membership bit, so that we join the right
 	 * broadcast group, etc.
@@ -1176,6 +1177,9 @@ static struct net_device *ipoib_add_port(const char *format,
 		goto event_failed;
 	}
 
+	if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_TCP_TSO)
+		priv->dev->features |= NETIF_F_TSO;
+
 	result = register_netdev(priv->dev);
 	if (result) {
 		printk(KERN_WARNING "%s: couldn't register ipoib port %d; error %d\n",
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index a3aeb91..1d59f27 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -192,6 +192,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 	init_attr.send_cq = priv->cq;
 	init_attr.recv_cq = priv->cq;
 
+	if (priv->hca_caps & IB_DEVICE_TCP_TSO)
+		init_attr.create_flags = QP_CREATE_LSO;
+
 	if (dev->features & NETIF_F_SG)
 		init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
 
-- 
1.5.4.4






More information about the general mailing list