[ewg] ***SPAM*** Fwd: [ofa-general] ***SPAM*** [PATCH] ipoib: fix deadlock between join completion handler and ipoib_stop
Olga Shern (Voltaire)
olga.shern at gmail.com
Sun Sep 21 02:37:03 PDT 2008
Hi Vlad,
Please add this patch to OFED 1.4
Thanks
Olga
---------- Forwarded message ----------
From: Yossi Etigin <yossi.openib at gmail.com>
Date: Mon, Sep 15, 2008 at 11:45 PM
Subject: [ofa-general] ***SPAM*** [PATCH] ipoib: fix deadlock between
join completion handler and ipoib_stop
To: Roland Dreier <rdreier at cisco.com>
Cc: general list <general at lists.openfabrics.org>
Taking rtnl_lock in ipoib_mcast_join_complete() causes a deadlock with
ipoib_stop(). We avoid it by scheduling the piece of code that takes
the lock on ipoib_workqueue instead of executing it directly.
The deadlock happens because ipoib_stop() calls ipoib_ib_dev_down()
which calls ipoib_mcast_dev_flush(), which calls ipoib_mcast_free(),
which calls ipoib_mcast_leave(). The latter calls
ib_sa_free_multicast(), and this waits until the multicast completion
handler finishes. This handler is ipoib_mcast_join_complete(), which
waits for the rtnl_lock(),
which was already taken by ipoib_stop().
Signed-off-by: Yossi Etigin <yosefe at voltaire.com>
--
Index: b/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- a/drivers/infiniband/ulp/ipoib/ipoib.h 2008-08-27
21:03:44.000000000 +0300
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h 2008-09-15
23:08:30.000000000 +0300
@@ -293,6 +293,7 @@ struct ipoib_dev_priv {
struct delayed_work pkey_poll_task;
struct delayed_work mcast_task;
+ struct work_struct broadcast_join_task;
struct work_struct flush_light;
struct work_struct flush_normal;
struct work_struct flush_heavy;
@@ -464,6 +465,7 @@ int ipoib_dev_init(struct net_device *de
void ipoib_dev_cleanup(struct net_device *dev);
void ipoib_mcast_join_task(struct work_struct *work);
+void ipoib_mcast_broadcast_join_task(struct work_struct *work);
void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb);
void ipoib_mcast_restart_task(struct work_struct *work);
Index: b/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c 2008-09-08
20:14:08.000000000 +0300
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c 2008-09-15
23:07:45.000000000 +0300
@@ -1075,6 +1075,7 @@ static void ipoib_setup(struct net_devic
INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
+ INIT_WORK(&priv->broadcast_join_task, ipoib_mcast_broadcast_join_task);
INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light);
INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal);
INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy);
Index: b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
===================================================================
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2008-09-15
23:02:42.000000000 +0300
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2008-09-15
23:37:41.000000000 +0300
@@ -389,6 +389,21 @@ static int ipoib_mcast_sendonly_join(str
return ret;
}
+void ipoib_mcast_broadcast_join_task(struct work_struct *work)
+{
+ struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
+ broadcast_join_task);
+
+ /*
+ * Take rtnl_lock to avoid racing with ipoib_stop()
+ * and turning the carrier back on while a device
+ * is being removed.
+ */
+ rtnl_lock();
+ netif_carrier_on(priv->dev);
+ rtnl_unlock();
+}
+
static int ipoib_mcast_join_complete(int status,
struct ib_sa_multicast *multicast)
{
@@ -415,16 +430,9 @@ static int ipoib_mcast_join_complete(int
&priv->mcast_task, 0);
mutex_unlock(&mcast_mutex);
- if (mcast == priv->broadcast) {
- /*
- * Take RTNL lock here to avoid racing with
- * ipoib_stop() and turning the carrier back
- * on while a device is being removed.
- */
- rtnl_lock();
- netif_carrier_on(dev);
- rtnl_unlock();
- }
+ /* Would deadlock with ipoib_stop if rtnl_lock was taken */
+ if (mcast == priv->broadcast)
+ queue_work(ipoib_workqueue, &priv->broadcast_join_task);
return 0;
}
_______________________________________________
general mailing list
general at lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
More information about the ewg
mailing list