[ewg] [PATCH] ipiob: fix rtnl deadlock

Yossi Etigin yosefe at Voltaire.COM
Mon Aug 11 10:24:53 PDT 2008


This fixes bug #1114 in bugzilla, which is a deadlock between ipoib_stop 
and mcast_join_task.

ipoib_stop is called with rtnl_lock, and flushes ipoib_workqueue.
the flush operation might wait for mcast_join_task to finish, which
in turn might wait for rtnl_lock.

Signed-off-by: Yossi Etigin <yosefe at voltaire.com>

--

Index: b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
===================================================================
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c	2008-08-04 18:09:33.000000000 +0300
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c	2008-08-04 18:39:08.000000000 +0300
@@ -504,6 +504,7 @@
	struct ipoib_dev_priv *priv =
		container_of(work, struct ipoib_dev_priv, mcast_join_task.work);
	struct net_device *dev = priv->dev;
+	int ret;

	if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
		return;
@@ -577,9 +578,16 @@
	priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));

	if (!ipoib_cm_admin_enabled(dev)) {
-		rtnl_lock();
-		dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
-		rtnl_unlock();
+		/* Avoid deadlock with ipoib_stop */
+		while (!(ret = rtnl_trylock()) &&
+		       test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+			yield();
+
+		if (ret) {
+			dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
+			rtnl_unlock();
+		} else
+			ipoib_dbg_mcast(priv, "ignoring mtu setup because device is down\n");
	}

	ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");

--
--Yossi




More information about the ewg mailing list