[ofa-general] [PATCH v2] ipoib: fix a deadlock between ipoib start/stop and child interface create/delete

Yossi Etigin yosefe at Voltaire.COM
Wed Sep 24 08:23:56 PDT 2008


Fix a deadlock between child interface creation/deletion and ipoib start/stop.
The former takes first vlan_mutex, and might take rtnl_lock via register_netdev
or unregister_netdev. The latter is executed with rtnl_lock held, and tries
to take vlan_mutex.
 We take the vlan_mutex and bring child interface up/down on a scheduled task
instead of during stop/start, since ipoib_workqueue will not be flushed with 
rtnl_lock held. 

Signed-off-by: Yossi Etigin <yosefe at voltaire.com>

---

Fix bug #1198.

One alternative approach might be to fine-grain the locking (for example use 
one mutex to sync child creation/deletion, and another one to sync accesses to
child_intfs list).

Changes from v1:
 - declare an atomic flag in ipoib_dev_priv and use it instead of
   work_struct::data


 drivers/infiniband/ulp/ipoib/ipoib.h      |    3 ++
 drivers/infiniband/ulp/ipoib/ipoib_main.c |   35 +++++-------------------------
 drivers/infiniband/ulp/ipoib/ipoib_vlan.c |   22 ++++++++++++++++++
 3 files changed, 31 insertions(+), 29 deletions(-)

Index: b/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- a/drivers/infiniband/ulp/ipoib/ipoib.h	2008-09-22 21:24:59.000000000 +0300
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h	2008-09-22 22:11:54.000000000 +0300
@@ -299,6 +299,8 @@ struct ipoib_dev_priv {
 	struct work_struct flush_heavy;
 	struct work_struct restart_task;
 	struct delayed_work ah_reap_task;
+	struct work_struct vlan_task;
+	atomic_t vlan_task_flag;
 
 	struct ib_device *ca;
 	u8		  port;
@@ -503,6 +505,7 @@ void ipoib_event(struct ib_event_handler
 
 int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey);
 int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey);
+void ipoib_vlan_task(struct work_struct *work);
 
 void ipoib_pkey_poll(struct work_struct *work);
 int ipoib_pkey_dev_delay_open(struct net_device *dev);
Index: b/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c	2008-09-22 21:24:59.000000000 +0300
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c	2008-09-22 22:13:26.000000000 +0300
@@ -124,20 +124,8 @@ int ipoib_open(struct net_device *dev)
 	}
 
 	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
-		struct ipoib_dev_priv *cpriv;
-
-		/* Bring up any child interfaces too */
-		mutex_lock(&priv->vlan_mutex);
-		list_for_each_entry(cpriv, &priv->child_intfs, list) {
-			int flags;
-
-			flags = cpriv->dev->flags;
-			if (flags & IFF_UP)
-				continue;
-
-			dev_change_flags(cpriv->dev, flags | IFF_UP);
-		}
-		mutex_unlock(&priv->vlan_mutex);
+		vlan_task_flag(&priv->vlan_task_flag, 1);
+		queue_work(ipoib_workqueue, &priv->vlan_task);
 	}
 
 	netif_start_queue(dev);
@@ -160,20 +148,8 @@ static int ipoib_stop(struct net_device 
 	ipoib_ib_dev_stop(dev, 0);
 
 	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
-		struct ipoib_dev_priv *cpriv;
-
-		/* Bring down any child interfaces too */
-		mutex_lock(&priv->vlan_mutex);
-		list_for_each_entry(cpriv, &priv->child_intfs, list) {
-			int flags;
-
-			flags = cpriv->dev->flags;
-			if (!(flags & IFF_UP))
-				continue;
-
-			dev_change_flags(cpriv->dev, flags & ~IFF_UP);
-		}
-		mutex_unlock(&priv->vlan_mutex);
+		atomic_set(&priv->vlan_task_flag, 0);
+		queue_work(ipoib_workqueue, &priv->vlan_task);
 	}
 
 	return 0;
@@ -1075,12 +1051,13 @@ static void ipoib_setup(struct net_devic
 
 	INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
 	INIT_DELAYED_WORK(&priv->mcast_task,   ipoib_mcast_join_task);
-	INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task);
+	INIT_WORK(&priv->broadcast_join_task, ipoib_mcast_broadcast_join_task);
 	INIT_WORK(&priv->flush_light,   ipoib_ib_dev_flush_light);
 	INIT_WORK(&priv->flush_normal,   ipoib_ib_dev_flush_normal);
 	INIT_WORK(&priv->flush_heavy,   ipoib_ib_dev_flush_heavy);
 	INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
 	INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
+	INIT_WORK(&priv->vlan_task, ipoib_vlan_task);
 }
 
 struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
Index: b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
===================================================================
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c	2008-09-22 21:05:14.000000000 +0300
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c	2008-09-22 22:13:43.000000000 +0300
@@ -174,3 +174,25 @@ int ipoib_vlan_delete(struct net_device 
 
 	return ret;
 }
+
+void ipoib_vlan_task(struct work_struct *work)
+{
+	struct ipoib_dev_priv *priv =
+		container_of(work, struct ipoib_dev_priv, vlan_task);
+	struct ipoib_dev_priv *cpriv;
+	int flags, new_flags, iffup_value;
+
+	iffup_value = atomic_read(&priv->vlan_task_flag) ? IFF_UP : 0;
+
+	mutex_lock(&priv->vlan_mutex);
+	list_for_each_entry(cpriv, &priv->child_intfs, list) {
+		flags = cpriv->dev->flags;
+		new_flags = (flags & ~IFF_UP) | iffup_value;
+		if (flags != new_flags) {
+			rtnl_lock();
+			dev_change_flags(cpriv->dev, new_flags);
+			rtnl_unlock();
+		}
+	}
+	mutex_unlock(&priv->vlan_mutex);
+}

-- 
--Yossi



More information about the general mailing list