[ofa-general] [PATCH v2 for-2.6.27] IPOIB: add LRO support.

Vladimir Sokolovsky vlad at dev.mellanox.co.il
Tue Jun 24 09:25:15 PDT 2008


 From 89aa9ca5a7866a542807af2c66fbbaa96c1e0672 Mon Sep 17 00:00:00 2001
From: Vladimir Sokolovsky <vlad at mellanox.co.il>
Date: Tue, 24 Jun 2008 19:20:41 +0300
Subject: [PATCH] IPOIB:  add LRO support.

add "ipoib_use_lro" module parameter to enable LRO.
add "ipoib_lro_max_aggr" module parameter to set
the Max number of packets to be aggregated.
LRO statistics accessible through ethtool.

Signed-off-by: Vladimir Sokolovsky <vlad at mellanox.co.il>
Signed-off-by: Eli Cohen <eli at mellanox.co.il>
---

Changes from v1:
added use_lro parameter per device through sysfs.

  drivers/infiniband/ulp/ipoib/Kconfig         |    1 +
  drivers/infiniband/ulp/ipoib/ipoib.h         |   15 ++++
  drivers/infiniband/ulp/ipoib/ipoib_ethtool.c |   50 ++++++++++++++
  drivers/infiniband/ulp/ipoib/ipoib_ib.c      |    8 ++-
  drivers/infiniband/ulp/ipoib/ipoib_main.c    |   94 ++++++++++++++++++++++++++
  5 files changed, 167 insertions(+), 1 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig
index 1f76bad..691525c 100644
--- a/drivers/infiniband/ulp/ipoib/Kconfig
+++ b/drivers/infiniband/ulp/ipoib/Kconfig
@@ -1,6 +1,7 @@
  config INFINIBAND_IPOIB
  	tristate "IP-over-InfiniBand"
  	depends on NETDEVICES && INET && (IPV6 || IPV6=n)
+	select INET_LRO
  	---help---
  	  Support for the IP-over-InfiniBand protocol (IPoIB). This
  	  transports IP packets over InfiniBand so you can use your IB
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 8754b36..3aae34d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -50,6 +50,7 @@
  #include <rdma/ib_verbs.h>
  #include <rdma/ib_pack.h>
  #include <rdma/ib_sa.h>
+#include <linux/inet_lro.h>

  /* constants */

@@ -94,6 +95,9 @@ enum {
  	IPOIB_MCAST_FLAG_BUSY	  = 2,	/* joining or already joined */
  	IPOIB_MCAST_FLAG_ATTACHED = 3,

+	IPOIB_MAX_LRO_DESCRIPTORS = 8,
+	IPOIB_LRO_MAX_AGGR 	  = 64,
+
  	MAX_SEND_CQE		  = 16,
  	IPOIB_CM_COPYBREAK	  = 256,
  };
@@ -248,6 +252,12 @@ struct ipoib_ethtool_st {
  	u16     max_coalesced_frames;
  };

+struct ipoib_lro {
+	int	use_lro;
+	struct	net_lro_mgr lro_mgr;
+	struct	net_lro_desc lro_desc[IPOIB_MAX_LRO_DESCRIPTORS];
+};
+
  /*
   * Device private locking: tx_lock protects members used in TX fast
   * path (and we use LLTX so upper layers don't do extra locking).
@@ -334,6 +344,8 @@ struct ipoib_dev_priv {
  	int	hca_caps;
  	struct ipoib_ethtool_st ethtool;
  	struct timer_list poll_timer;
+
+	struct ipoib_lro lro;
  };

  struct ipoib_ah {
@@ -417,6 +429,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah)
  int ipoib_open(struct net_device *dev);
  int ipoib_add_pkey_attr(struct net_device *dev);
  int ipoib_add_umcast_attr(struct net_device *dev);
+int ipoib_add_use_lro_attr(struct net_device *dev);

  void ipoib_send(struct net_device *dev, struct sk_buff *skb,
  		struct ipoib_ah *address, u32 qpn);
@@ -679,6 +692,8 @@ extern struct ib_sa_client ipoib_sa_client;

  #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
  extern int ipoib_debug_level;
+extern int ipoib_use_lro;
+extern int ipoib_lro_max_aggr;

  #define ipoib_dbg(priv, format, arg...)			\
  	do {						\
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 10279b7..79709f0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -86,11 +86,61 @@ static int ipoib_set_coalesce(struct net_device *dev,
  	return 0;
  }

+#define IPOIB_STATS_LEN  ARRAY_SIZE(ipoib_gstrings_stats)
+
+static const char ipoib_gstrings_stats[][ETH_GSTRING_LEN] = {
+	"LRO aggregated", "LRO flushed",
+	"LRO avg aggr", "LRO no_desc"
+};
+
+static void
+ipoib_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+	switch (stringset) {
+	case ETH_SS_STATS:
+		memcpy(data, *ipoib_gstrings_stats,
+			sizeof(ipoib_gstrings_stats));
+			data += sizeof(ipoib_gstrings_stats);
+		break;
+	}
+}
+
+static int ipoib_get_sset_count(struct net_device *dev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return IPOIB_STATS_LEN;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void ipoib_get_ethtool_stats(struct net_device *dev,
+				struct ethtool_stats *stats, uint64_t *data)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	int index = 0;
+
+	/* Get LRO statistics */
+	data[index++] = priv->lro.lro_mgr.stats.aggregated;
+	data[index++] = priv->lro.lro_mgr.stats.flushed;
+	if (priv->lro.lro_mgr.stats.flushed)
+		data[index++] = priv->lro.lro_mgr.stats.aggregated /
+				priv->lro.lro_mgr.stats.flushed;
+	else
+		data[index++] = 0;
+	data[index++] = priv->lro.lro_mgr.stats.no_desc;
+
+}
+
  static const struct ethtool_ops ipoib_ethtool_ops = {
  	.get_drvinfo		= ipoib_get_drvinfo,
  	.get_tso		= ethtool_op_get_tso,
  	.get_coalesce		= ipoib_get_coalesce,
  	.set_coalesce		= ipoib_set_coalesce,
+	.get_strings		= ipoib_get_strings,
+	.get_sset_count 	= ipoib_get_sset_count,
+	.get_ethtool_stats	= ipoib_get_ethtool_stats,
  };

  void ipoib_set_ethtool_ops(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index eca8518..009b862 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -288,7 +288,10 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
  	if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok))
  		skb->ip_summed = CHECKSUM_UNNECESSARY;

-	netif_receive_skb(skb);
+	if (priv->lro.use_lro)
+		lro_receive_skb(&priv->lro.lro_mgr, skb, 0);
+	else
+		netif_receive_skb(skb);

  repost:
  	if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
@@ -448,6 +451,9 @@ poll_more:
  			goto poll_more;
  	}

+	if (priv->lro.use_lro)
+		lro_flush_all(&priv->lro.lro_mgr);
+
  	return done;
  }

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index bfe1dbf..f8cce51 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -60,6 +60,17 @@ MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
  module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
  MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");

+int ipoib_use_lro __read_mostly;
+module_param_named(ipoib_use_lro, ipoib_use_lro, int, 0644);
+MODULE_PARM_DESC(ipoib_use_lro,  " Large Receive Offload, 1: enable, "
+		"0: disable, Default = 0");
+
+int ipoib_lro_max_aggr __read_mostly = IPOIB_LRO_MAX_AGGR;
+module_param_named(ipoib_lro_max_aggr, ipoib_lro_max_aggr, int, 0644);
+MODULE_PARM_DESC(ipoib_lro_max_aggr, " LRO: Max packets to be aggregated. "
+		"Default = 64");
+
+
  #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
  int ipoib_debug_level;

@@ -936,6 +947,85 @@ static const struct header_ops ipoib_header_ops = {
  	.create	= ipoib_hard_header,
  };

+static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
+		       void **tcph, u64 *hdr_flags, void *priv)
+{
+	unsigned int ip_len;
+	struct iphdr *iph;
+
+	if (unlikely(skb->protocol != htons(ETH_P_IP)))
+		return -1;
+
+	if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY))
+		return -1;
+
+	/* non tcp packet */
+	skb_reset_network_header(skb);
+	iph = ip_hdr(skb);
+	if (iph->protocol != IPPROTO_TCP)
+		return -1;
+
+	ip_len = ip_hdrlen(skb);
+	skb_set_transport_header(skb, ip_len);
+	*tcph = tcp_hdr(skb);
+
+	/* check if ip header and tcp header are complete */
+	if (iph->tot_len < ip_len + tcp_hdrlen(skb))
+		return -1;
+
+	*hdr_flags = LRO_IPV4 | LRO_TCP;
+	*iphdr = iph;
+
+	return 0;
+}
+
+static ssize_t show_use_lro(struct device *d, struct device_attribute *attr,
+			 char *buf)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(d));
+
+	if (priv->lro.use_lro)
+		return sprintf(buf, "yes\n");
+	else
+		return sprintf(buf, "no\n");
+}
+
+static ssize_t set_use_lro(struct device *d, struct device_attribute *attr,
+			const char *buf, size_t count)
+{
+	struct net_device *dev = to_net_dev(d);
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+	if (!strcmp(buf, "yes\n"))
+		priv->lro.use_lro = 1;
+	else if (!strcmp(buf, "no\n"))
+		priv->lro.use_lro = 0;
+	else
+		return -EINVAL;
+
+	return count;
+}
+
+static DEVICE_ATTR(use_lro, S_IWUSR | S_IRUGO, show_use_lro, set_use_lro);
+
+int ipoib_add_use_lro_attr(struct net_device *dev)
+{
+	return device_create_file(&dev->dev, &dev_attr_use_lro);
+}
+
+static void ipoib_lro_setup(struct ipoib_dev_priv *priv)
+{
+	priv->lro.use_lro = ipoib_use_lro;
+	priv->lro.lro_mgr.max_aggr = ipoib_lro_max_aggr;
+	priv->lro.lro_mgr.max_desc = IPOIB_MAX_LRO_DESCRIPTORS;
+	priv->lro.lro_mgr.lro_arr = priv->lro.lro_desc;
+	priv->lro.lro_mgr.get_skb_header = get_skb_hdr;
+	priv->lro.lro_mgr.features = LRO_F_NAPI;
+	priv->lro.lro_mgr.dev = priv->dev;
+	priv->lro.lro_mgr.ip_summed = CHECKSUM_UNNECESSARY;
+	priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
+}
+
  static void ipoib_setup(struct net_device *dev)
  {
  	struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -975,6 +1065,8 @@ static void ipoib_setup(struct net_device *dev)

  	priv->dev = dev;

+	ipoib_lro_setup(priv);
+
  	spin_lock_init(&priv->lock);
  	spin_lock_init(&priv->tx_lock);

@@ -1204,6 +1296,8 @@ static struct net_device *ipoib_add_port(const char *format,
  		goto sysfs_failed;
  	if (ipoib_add_umcast_attr(priv->dev))
  		goto sysfs_failed;
+	if (ipoib_add_use_lro_attr(priv->dev))
+		goto sysfs_failed;
  	if (device_create_file(&priv->dev->dev, &dev_attr_create_child))
  		goto sysfs_failed;
  	if (device_create_file(&priv->dev->dev, &dev_attr_delete_child))
-- 
1.5.5.1




More information about the general mailing list