[ofa-general] [PATCH - 11] ipoib - add LSO support

Eli Cohen eli at mellanox.co.il
Wed Aug 15 11:22:39 PDT 2007


Add LSO support to ipoib

Using LSO improves performance by allowing the software
to not fragment the payload to mtu sized patckets and also
results in lower rate of interrupts since each such work
request has just one CQE. 

Signed-off-by: Eli Cohen <eli at mellnaox.co.il>

---

Index: linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- linux-2.6.23-rc1.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c	2007-08-15 20:50:33.000000000 +0300
+++ linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_main.c	2007-08-15 20:50:38.000000000 +0300
@@ -704,7 +704,13 @@ static int ipoib_start_xmit(struct sk_bu
 				goto out;
 			}
 
-			ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha));
+			if (skb_is_gso(skb))
+				ipoib_send_gso(dev, skb, neigh->ah,
+					   IPOIB_QPN(skb->dst->neighbour->ha));
+			else
+				ipoib_send(dev, skb, neigh->ah,
+					   IPOIB_QPN(skb->dst->neighbour->ha));
+
 			goto out;
 		}
 
@@ -1152,6 +1158,10 @@ static struct net_device *ipoib_add_port
 		goto event_failed;
 	}
 
+	if (priv->dev->features & NETIF_F_SG)
+		if (priv->ca->flags & IB_DEVICE_TCP_GSO)
+			priv->dev->features |= NETIF_F_TSO;
+
 	result = register_netdev(priv->dev);
 	if (result) {
 		printk(KERN_WARNING "%s: couldn't register ipoib port %d; error %d\n",
Index: linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- linux-2.6.23-rc1.orig/drivers/infiniband/ulp/ipoib/ipoib.h	2007-08-15 20:50:33.000000000 +0300
+++ linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib.h	2007-08-15 20:50:38.000000000 +0300
@@ -373,6 +373,10 @@ int ipoib_add_pkey_attr(struct net_devic
 
 void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 		struct ipoib_ah *address, u32 qpn);
+
+void ipoib_send_gso(struct net_device *dev, struct sk_buff *skb,
+		struct ipoib_ah *address, u32 qpn);
+
 void ipoib_reap_ah(struct work_struct *work);
 
 void ipoib_flush_paths(struct net_device *dev);
Index: linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- linux-2.6.23-rc1.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2007-08-15 20:50:33.000000000 +0300
+++ linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2007-08-15 20:50:38.000000000 +0300
@@ -38,6 +38,7 @@
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/ip.h>
+#include <linux/tcp.h>
 
 #include <rdma/ib_cache.h>
 
@@ -249,15 +250,24 @@ repost:
 }
 
 static int dma_unmap_list(struct ib_device *ca, struct ipoib_mapping_st *map,
-			   u16 n)
+			   u16 n, int gso)
 {
 	int i;
 	int len;
+	int first;
 
-	ib_dma_unmap_single(ca, map[0].addr, map[0].size, DMA_TO_DEVICE);
-	len = map[0].size;
+	if (!gso) {
+		ib_dma_unmap_single(ca, map[0].addr, map[0].size,
+				    DMA_TO_DEVICE);
+		len = map[0].size;
+		first = 1;
+	} else {
+		len = 0;
+		first = 0;
+	}
+
+	for (i = first; i < n; ++i) {
 
-	for (i = 1; i < n; ++i) {
 		ib_dma_unmap_page(ca, map[i].addr, map[i].size,
 				  DMA_TO_DEVICE);
 		len += map[i].size;
@@ -276,6 +286,7 @@ static void ipoib_ib_handle_tx_wc(struct
 	ipoib_dbg_data(priv, "send completion: id %d, status: %d\n",
 		       wr_id, wc->status);
 
+
 	if (unlikely(wr_id >= ipoib_sendq_size)) {
 		ipoib_warn(priv, "send completion event with wrid %d (> %d)\n",
 			   wr_id, ipoib_sendq_size);
@@ -283,8 +294,16 @@ static void ipoib_ib_handle_tx_wc(struct
 	}
 
 	tx_req = &priv->tx_ring[wr_id];
-	priv->stats.tx_bytes += dma_unmap_list(priv->ca, tx_req->mapping,
-					skb_shinfo(tx_req->skb)->nr_frags + 1);
+	if (skb_is_gso(tx_req->skb))
+		priv->stats.tx_bytes +=
+			dma_unmap_list(priv->ca, tx_req->mapping,
+				       skb_shinfo(tx_req->skb)->nr_frags, 1);
+	else
+		priv->stats.tx_bytes +=
+			dma_unmap_list(priv->ca, tx_req->mapping,
+				       skb_shinfo(tx_req->skb)->nr_frags + 1,
+				       0);
+
 	++priv->stats.tx_packets;
 
 	dev_kfree_skb_any(tx_req->skb);
@@ -367,7 +386,8 @@ void ipoib_ib_completion(struct ib_cq *c
 static inline int post_send(struct ipoib_dev_priv *priv,
 			    unsigned int wr_id,
 			    struct ib_ah *address, u32 qpn,
-			    struct ipoib_mapping_st *mapping, int ngather)
+			    struct ipoib_mapping_st *mapping, int ngather,
+			    void *lso_header, int h_len)
 {
 	struct ib_send_wr *bad_wr;
 	int i;
@@ -382,9 +402,88 @@ static inline int post_send(struct ipoib
 	priv->tx_wr.wr.ud.remote_qpn  = qpn;
 	priv->tx_wr.wr.ud.ah 	      = address;
 
+	if (lso_header) {
+		priv->tx_wr.wr.ud.mss = priv->dev->mtu;
+		priv->tx_wr.wr.ud.header = lso_header;
+		priv->tx_wr.wr.ud.hlen = h_len;
+		priv->tx_wr.opcode 	= IB_WR_LSO;
+	} else
+		priv->tx_wr.opcode 	= IB_WR_SEND;
+
 	return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
 }
 
+
+void ipoib_send_gso(struct net_device *dev, struct sk_buff *skb,
+		    struct ipoib_ah *address, u32 qpn)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_tx_buf *tx_req;
+	struct skb_frag_struct *frag;
+	u64 addr;
+	unsigned short i;
+
+	ipoib_dbg_data(priv, "sending gso packet, length=%d address=%p"
+		       " qpn=0x%06x\n", skb->len, address, qpn);
+
+	if (unlikely((skb_headlen(skb) - IPOIB_ENCAP_LEN) !=
+	    ((ip_hdr(skb)->ihl + tcp_hdr(skb)->doff) << 2))) {
+		ipoib_warn(priv, "headlen (%d) does not match ip (%d)and "
+			   "tcp headers(%d), dropping skb\n",
+			   skb_headlen(skb) - IPOIB_ENCAP_LEN,
+			   ip_hdr(skb)->ihl << 2, tcp_hdr(skb)->doff << 2);
+		++priv->stats.tx_errors;
+		dev_kfree_skb_any(skb);
+		return;
+	}
+
+	/*
+	 * We put the skb into the tx_ring _before_ we call post_send()
+	 * because it's entirely possible that the completion handler will
+	 * run before we execute anything after the post_send().  That
+	 * means we have to make sure everything is properly recorded and
+	 * our state is consistent before we call post_send().
+	 */
+	tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
+	tx_req->skb = skb;
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
+		frag = &skb_shinfo(skb)->frags[i];
+		addr = ib_dma_map_page(priv->ca, frag->page, frag->page_offset,
+				       frag->size, DMA_TO_DEVICE);
+		if (unlikely(ib_dma_mapping_error(priv->ca, addr)))
+			goto map_err;
+
+		tx_req->mapping[i].addr = addr;
+		tx_req->mapping[i].size = frag->size;
+	}
+
+	if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
+			       address->ah, qpn, tx_req->mapping,
+			       skb_shinfo(skb)->nr_frags, skb->data,
+			       skb_headlen(skb)))) {
+		ipoib_warn(priv, "post_send failed\n");
+		goto map_err;
+	} else {
+		dev->trans_start = jiffies;
+
+		address->last_send = priv->tx_head;
+		++priv->tx_head;
+
+		if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) {
+			ipoib_dbg(priv, "TX ring full, stopping kernel"
+				  " net queue\n");
+			netif_stop_queue(dev);
+			set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
+		}
+	}
+	return;
+
+map_err:
+	dma_unmap_list(priv->ca, tx_req->mapping, i, 1);
+	dev_kfree_skb_any(skb);
+}
+
 void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 		struct ipoib_ah *address, u32 qpn)
 {
@@ -449,7 +548,7 @@ void ipoib_send(struct net_device *dev, 
 
 	if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
 			       address->ah, qpn, tx_req->mapping,
-			       skb_shinfo(skb)->nr_frags + 1))) {
+			       skb_shinfo(skb)->nr_frags + 1, NULL, 0))) {
 		ipoib_warn(priv, "post_send failed\n");
 		goto map_err;
 	} else {
@@ -467,7 +566,7 @@ void ipoib_send(struct net_device *dev, 
 	return;
 
 map_err:
-	dma_unmap_list(priv->ca, tx_req->mapping, i + 1);
+	dma_unmap_list(priv->ca, tx_req->mapping, i + 1, 0);
 	dev_kfree_skb_any(skb);
 }
 
@@ -664,7 +763,8 @@ int ipoib_ib_dev_stop(struct net_device 
 							(ipoib_sendq_size - 1)];
 				skb = tx_req->skb;
 				dma_unmap_list(priv->ca, tx_req->mapping,
-					       skb_shinfo(skb)->nr_frags + 1);
+					       skb_shinfo(skb)->nr_frags + 1,
+					       skb_is_gso(skb));
 				dev_kfree_skb_any(skb);
 				++priv->tx_tail;
 			}
Index: linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
===================================================================
--- linux-2.6.23-rc1.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2007-08-15 20:50:33.000000000 +0300
+++ linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2007-08-15 20:50:38.000000000 +0300
@@ -206,7 +206,6 @@ int ipoib_transport_dev_init(struct net_
 	for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
 		priv->tx_sge[i].lkey 	= priv->mr->lkey;
 
-	priv->tx_wr.opcode 	= IB_WR_SEND;
 	priv->tx_wr.sg_list 	= priv->tx_sge;
 	priv->tx_wr.send_flags 	= IB_SEND_SIGNALED;
 




More information about the general mailing list