[ewg] [for OFED 1.3 PATCH 1/2] IB/ipoib: enable IGMP for userpsace multicast IB apps

Or Gerlitz ogerlitz at voltaire.com
Tue Sep 25 07:49:30 PDT 2007


Hi Michael,

This the patch for ofed 1.3 that should go to the kernel_patch/fixes directory
and to be named zzz_ipoib_allow_umcast.patch

If this goes into 2.6.24 and we merge 2.6.24, then the patch can be pulled out.

Or.


The kernel IB stack allows (through the RDMA CM) user space multicast applications
to interoperate with IP based apps optionally running at a different IP subnet.

To support this inter-op for the case where the receiving party resides at
the IB side, there is a need to handle IGMP (reports/queries) else the local
IP router would not forward multicast traffic towards the IB network.

This patch does a lookup on the database used for multicast reference counting and
enhances IPoIB to ignore mulicast group which is already handled by user space, all
this under a per device policy flag. That is when the policy flag allows it, IPoIB
will not join and attach its QP to a multicast group which has an entry on the database.

For each IPoIB device, the /sys/class/net/$dev/umcast attribute controls the
policy flag where the default value follows the umcast_allowed module param
(whose default value is zero). The flag can be read and set/unset through sysfs.

Signed-off-by: Or Gerlitz <ogerlitz at voltaire.com>

Index: ofa_kernel-1.3-work/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
===================================================================
--- ofa_kernel-1.3-work.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c	2007-09-25 19:49:33.000000000 +0200
+++ ofa_kernel-1.3-work/drivers/infiniband/ulp/ipoib/ipoib_multicast.c	2007-09-25 19:49:38.000000000 +0200
@@ -783,6 +783,7 @@ void ipoib_mcast_restart_task(struct wor
 	struct ipoib_mcast *mcast, *tmcast;
 	LIST_HEAD(remove_list);
 	unsigned long flags;
+	struct ib_sa_mcmember_rec rec;

 	ipoib_dbg_mcast(priv, "restarting multicast task\n");

@@ -816,6 +817,15 @@ void ipoib_mcast_restart_task(struct wor
 		if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
 			struct ipoib_mcast *nmcast;

+			/* ignore group which is directly joined by user space */
+			if (test_bit(IPOIB_FLAG_ADMIN_UMCAST_ALLOWED, &priv->flags) &&
+			    !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec))
+			{
+				ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid "
+						IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid));
+				continue;
+			}
+
 			/* Not found or send-only group, let's add a new entry */
 			ipoib_dbg_mcast(priv, "adding multicast entry for mgid "
 					IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid));
Index: ofa_kernel-1.3-work/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- ofa_kernel-1.3-work.orig/drivers/infiniband/ulp/ipoib/ipoib.h	2007-09-25 19:49:37.000000000 +0200
+++ ofa_kernel-1.3-work/drivers/infiniband/ulp/ipoib/ipoib.h	2007-09-25 19:49:38.000000000 +0200
@@ -88,6 +88,7 @@ enum {
 	IPOIB_FLAG_ADMIN_CM 	  = 10,
 	IPOIB_FLAG_HW_CSUM	  = 11,
 	IPOIB_FLAG_RX_CSUM	  = 12,
+ 	IPOIB_FLAG_ADMIN_UMCAST_ALLOWED	= 13,

 	IPOIB_MAX_BACKOFF_SECONDS = 16,

@@ -470,6 +471,7 @@ static inline void ipoib_put_ah(struct i

 int ipoib_open(struct net_device *dev);
 int ipoib_add_pkey_attr(struct net_device *dev);
+int ipoib_add_umcast_attr(struct net_device *dev);

 void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 		struct ipoib_ah *address, u32 qpn);
Index: ofa_kernel-1.3-work/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- ofa_kernel-1.3-work.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c	2007-09-25 19:49:37.000000000 +0200
+++ ofa_kernel-1.3-work/drivers/infiniband/ulp/ipoib/ipoib_main.c	2007-09-25 19:51:18.000000000 +0200
@@ -64,6 +64,10 @@ MODULE_PARM_DESC(recv_queue_size, "Numbe
 module_param_named(hw_csum, ipoib_hw_csum, int, 0444);
 MODULE_PARM_DESC(hw_csum, "Rely on hardware end-to-end checksum (ICRC) if > 0");

+int ipoib_umcast_allowed = 0;
+module_param_named(umcast_allowed, ipoib_umcast_allowed, int, 0444);
+MODULE_PARM_DESC(umcast_allowed, "allow ignoring mulicast group which is already handled by user space");
+
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 int ipoib_debug_level;

@@ -950,6 +954,9 @@ int ipoib_dev_init(struct net_device *de

 	/* priv->tx_head & tx_tail are already 0 */

+	if (ipoib_umcast_allowed)
+		set_bit(IPOIB_FLAG_ADMIN_UMCAST_ALLOWED, &priv->flags);
+
 	if (ipoib_ib_dev_init(dev, ca, port))
 		goto out_tx_ring_cleanup;

@@ -1124,6 +1131,44 @@ int ipoib_add_pkey_attr(struct net_devic
 	return device_create_file(&dev->dev, &dev_attr_pkey);
 }

+static ssize_t show_umcast(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
+
+	if (test_bit(IPOIB_FLAG_ADMIN_UMCAST_ALLOWED, &priv->flags))
+		return sprintf(buf, "1\n");
+	else
+		return sprintf(buf, "0\n");
+}
+
+static ssize_t set_umcast(struct device *dev,
+			  struct device_attribute *attr,
+			  const char *buf, size_t count)
+{
+ 	struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
+
+ 	if (!strcmp(buf, "1\n")) {
+ 		set_bit(IPOIB_FLAG_ADMIN_UMCAST_ALLOWED, &priv->flags);
+ 		ipoib_warn(priv, "ignoring multicast groups joined directly "
+ 				"by user space\n");
+ 		return count;
+ 	}
+
+ 	if (!strcmp(buf, "0\n")) {
+ 		clear_bit(IPOIB_FLAG_ADMIN_UMCAST_ALLOWED, &priv->flags);
+ 		return count;
+ 	}
+
+ 	return -EINVAL;
+}
+static DEVICE_ATTR(umcast, S_IWUSR | S_IRUGO, show_umcast, set_umcast);
+
+int ipoib_add_umcast_attr(struct net_device *dev)
+{
+	return device_create_file(&dev->dev, &dev_attr_umcast);
+}
+
 static void set_tx_csum(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -1227,6 +1272,8 @@ static struct net_device *ipoib_add_port
 		goto sysfs_failed;
 	if (ipoib_add_pkey_attr(priv->dev))
 		goto sysfs_failed;
+	if (ipoib_add_umcast_attr(priv->dev))
+		goto sysfs_failed;
 	if (device_create_file(&priv->dev->dev, &dev_attr_create_child))
 		goto sysfs_failed;
 	if (device_create_file(&priv->dev->dev, &dev_attr_delete_child))
Index: ofa_kernel-1.3-work/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
===================================================================
--- ofa_kernel-1.3-work.orig/drivers/infiniband/ulp/ipoib/ipoib_vlan.c	2007-09-10 16:02:22.000000000 +0300
+++ ofa_kernel-1.3-work/drivers/infiniband/ulp/ipoib/ipoib_vlan.c	2007-09-25 19:49:38.000000000 +0200
@@ -119,6 +119,8 @@ int ipoib_vlan_add(struct net_device *pd
 		goto sysfs_failed;
 	if (ipoib_add_pkey_attr(priv->dev))
 		goto sysfs_failed;
+	if (ipoib_add_umcast_attr(priv->dev))
+		goto sysfs_failed;

 	if (device_create_file(&priv->dev->dev, &dev_attr_parent))
 		goto sysfs_failed;




More information about the ewg mailing list