[ofa-general] [PATCH] IPOIB: use LRO

Eli Cohen eli at mellanox.co.il
Tue Dec 4 07:46:40 PST 2007


IPOIB use LRO

modify IPOIB to use LRO. Checksum offload is still required
to ensure reliability of the packets.

Signed-off-by: Eli Cohen <eli at mellanox.co.il>
---

TODO:
add checksum offload support to the core and hw devices.
add ethtool support to provide interface for statistics.


 drivers/infiniband/ulp/ipoib/Kconfig      |    1 +
 drivers/infiniband/ulp/ipoib/ipoib.h      |    8 +++++
 drivers/infiniband/ulp/ipoib/ipoib_ib.c   |    9 +++++-
 drivers/infiniband/ulp/ipoib/ipoib_main.c |   47 +++++++++++++++++++++++++++++
 4 files changed, 64 insertions(+), 1 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig
index 1f76bad..691525c 100644
--- a/drivers/infiniband/ulp/ipoib/Kconfig
+++ b/drivers/infiniband/ulp/ipoib/Kconfig
@@ -1,6 +1,7 @@
 config INFINIBAND_IPOIB
 	tristate "IP-over-InfiniBand"
 	depends on NETDEVICES && INET && (IPV6 || IPV6=n)
+	select INET_LRO
 	---help---
 	  Support for the IP-over-InfiniBand protocol (IPoIB). This
 	  transports IP packets over InfiniBand so you can use your IB
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index eb7edab..4621e93 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -52,6 +52,7 @@
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_pack.h>
 #include <rdma/ib_sa.h>
+#include <linux/inet_lro.h>
 
 /* constants */
 
@@ -93,6 +94,9 @@ enum {
 	IPOIB_MCAST_FLAG_SENDONLY = 1,
 	IPOIB_MCAST_FLAG_BUSY 	  = 2,	/* joining or already joined */
 	IPOIB_MCAST_FLAG_ATTACHED = 3,
+
+	IPOIB_MAX_LRO_DESCRIPTORS = 8,
+	IPOIB_LRO_MAX_AGGR 	  = 64,
 };
 
 #define	IPOIB_OP_RECV   (1ul << 31)
@@ -313,6 +317,9 @@ struct ipoib_dev_priv {
 	struct dentry *mcg_dentry;
 	struct dentry *path_dentry;
 #endif
+
+	struct net_lro_mgr lro_mgr;
+	struct net_lro_desc lro_desc[IPOIB_MAX_LRO_DESCRIPTORS];
 };
 
 struct ipoib_ah {
@@ -622,6 +629,7 @@ extern struct ib_sa_client ipoib_sa_client;
 
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 extern int ipoib_debug_level;
+extern int ipoib_use_lro;
 
 #define ipoib_dbg(priv, format, arg...)			\
 	do {					        \
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 5063dd5..07f30ad 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -231,7 +231,11 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 	skb->dev = dev;
 	/* XXX get correct PACKET_ type here */
 	skb->pkt_type = PACKET_HOST;
-	netif_receive_skb(skb);
+
+	if (ipoib_use_lro)
+		lro_receive_skb(&priv->lro_mgr, skb, 0);
+	else
+		netif_receive_skb(skb);
 
 repost:
 	if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
@@ -327,6 +331,9 @@ poll_more:
 			goto poll_more;
 	}
 
+	if (ipoib_use_lro)
+		lro_flush_all(&priv->lro_mgr);
+
 	return done;
 }
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index c9f6077..8623075 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -61,6 +61,11 @@ MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
 module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
 MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
 
+
+int ipoib_use_lro __read_mostly = 0;
+module_param_named(ipoib_use_lro, ipoib_use_lro, int, 0644);
+MODULE_PARM_DESC(ipoib_use_lro, "Enable LRO if not equal 0");
+
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 int ipoib_debug_level;
 
@@ -946,6 +951,46 @@ static const struct header_ops ipoib_header_ops = {
 	.create	= ipoib_hard_header,
 };
 
+static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
+		       void **tcph, u64 *hdr_flags, void *priv)
+{
+	unsigned int ip_len;
+	struct iphdr *iph;
+
+	/* FIXME - verify CQE checksum ??? */
+
+	/* non tcp packet */
+	skb_reset_network_header(skb);
+	iph = ip_hdr(skb);
+	if (iph->protocol != IPPROTO_TCP)
+		return -1;
+
+	ip_len = ip_hdrlen(skb);
+	skb_set_transport_header(skb, ip_len);
+	*tcph = tcp_hdr(skb);
+
+	/* check if ip header and tcp header are complete */
+	if (iph->tot_len < ip_len + tcp_hdrlen(skb))
+		return -1;
+
+	*hdr_flags = LRO_IPV4 | LRO_TCP;
+	*iphdr = iph;
+
+	return 0;
+}
+
+static void ipoib_lro_setup(struct ipoib_dev_priv *priv)
+{
+	priv->lro_mgr.max_aggr = IPOIB_LRO_MAX_AGGR;
+	priv->lro_mgr.max_desc = IPOIB_MAX_LRO_DESCRIPTORS;
+	priv->lro_mgr.lro_arr = priv->lro_desc;
+	priv->lro_mgr.get_skb_header = get_skb_hdr;
+	priv->lro_mgr.features = LRO_F_NAPI;
+	priv->lro_mgr.dev = priv->dev;
+	priv->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY;
+	priv->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
+}
+
 static void ipoib_setup(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -985,6 +1030,8 @@ static void ipoib_setup(struct net_device *dev)
 
 	priv->dev = dev;
 
+	ipoib_lro_setup(priv);
+
 	spin_lock_init(&priv->lock);
 	spin_lock_init(&priv->tx_lock);
 
-- 
1.5.3.6






More information about the general mailing list