[ofa-general] [PATCH] ipoib: garbage-collect stale multicast entries

Yossi Etigin yossi.openib at gmail.com
Mon Aug 11 09:18:16 PDT 2008


 Multicast sender joins the MGID as full member, but does not
leave (as long as the interface is up). This causes an MGID
leakage in the SM.
 Here, a garbage-collection task will be scheduled once a in
a while (1 minute), and leave stale multicast groups (more than
2 minutes old).


Signed-off-by: Yossi Etigin <yosefe at voltaire.com>

--

 drivers/infiniband/ulp/ipoib/ipoib.h           |    6 ++-
 drivers/infiniband/ulp/ipoib/ipoib_main.c      |    3 +
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c |   47 +++++++++++++++++++++----
 3 files changed, 47 insertions(+), 9 deletions(-)

Index: b/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -92,6 +92,7 @@ enum {
 	IPOIB_FLAG_ADMIN_CM	  = 9,
 	IPOIB_FLAG_UMCAST	  = 10,
 	IPOIB_FLAG_CSUM		  = 11,
+	IPOIB_MCAST_RUN_GC	  = 12,
 
 	IPOIB_MAX_BACKOFF_SECONDS = 16,
 
@@ -135,6 +136,7 @@ struct ipoib_mcast {
 	struct list_head  list;
 
 	unsigned long created;
+	unsigned long used;
 	unsigned long backoff;
 
 	unsigned long flags;
@@ -292,7 +294,8 @@ struct ipoib_dev_priv {
 	struct rb_root multicast_tree;
 
 	struct delayed_work pkey_poll_task;
-	struct delayed_work mcast_task;
+	struct delayed_work mcast_join_task;
+	struct delayed_work mcast_leave_task;
 	struct work_struct flush_light;
 	struct work_struct flush_normal;
 	struct work_struct flush_heavy;
@@ -464,6 +467,7 @@ int ipoib_dev_init(struct net_device *de
 void ipoib_dev_cleanup(struct net_device *dev);
 
 void ipoib_mcast_join_task(struct work_struct *work);
+void ipoib_mcast_leave_task(struct work_struct *work);
 void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb);
 
 void ipoib_mcast_restart_task(struct work_struct *work);
Index: b/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1080,7 +1080,8 @@ static void ipoib_setup(struct net_devic
 	INIT_LIST_HEAD(&priv->multicast_list);
 
 	INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
-	INIT_DELAYED_WORK(&priv->mcast_task,   ipoib_mcast_join_task);
+	INIT_DELAYED_WORK(&priv->mcast_join_task,   ipoib_mcast_join_task);
+	INIT_DELAYED_WORK(&priv->mcast_leave_task, ipoib_mcast_leave_task);
 	INIT_WORK(&priv->flush_light,   ipoib_ib_dev_flush_light);
 	INIT_WORK(&priv->flush_normal,   ipoib_ib_dev_flush_normal);
 	INIT_WORK(&priv->flush_heavy,   ipoib_ib_dev_flush_heavy);
Index: b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
===================================================================
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -118,6 +118,7 @@ static struct ipoib_mcast *ipoib_mcast_a
 
 	mcast->dev = dev;
 	mcast->created = jiffies;
+	mcast->used = jiffies;
 	mcast->backoff = 1;
 
 	INIT_LIST_HEAD(&mcast->list);
@@ -389,7 +390,7 @@ static int ipoib_mcast_join_complete(int
 		mutex_lock(&mcast_mutex);
 		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
 			queue_delayed_work(ipoib_workqueue,
-					   &priv->mcast_task, 0);
+					   &priv->mcast_join_task, 0);
 		mutex_unlock(&mcast_mutex);
 
 		if (mcast == priv->broadcast)
@@ -422,7 +423,7 @@ static int ipoib_mcast_join_complete(int
 	mutex_lock(&mcast_mutex);
 	spin_lock_irq(&priv->lock);
 	if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-		queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
+		queue_delayed_work(ipoib_workqueue, &priv->mcast_join_task,
 				   mcast->backoff * HZ);
 	spin_unlock_irq(&priv->lock);
 	mutex_unlock(&mcast_mutex);
@@ -492,7 +493,7 @@ static void ipoib_mcast_join(struct net_
 		mutex_lock(&mcast_mutex);
 		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
 			queue_delayed_work(ipoib_workqueue,
-					   &priv->mcast_task,
+					   &priv->mcast_join_task,
 					   mcast->backoff * HZ);
 		mutex_unlock(&mcast_mutex);
 	}
@@ -501,7 +502,7 @@ static void ipoib_mcast_join(struct net_
 void ipoib_mcast_join_task(struct work_struct *work)
 {
 	struct ipoib_dev_priv *priv =
-		container_of(work, struct ipoib_dev_priv, mcast_task.work);
+		container_of(work, struct ipoib_dev_priv, mcast_join_task.work);
 	struct net_device *dev = priv->dev;
 
 	if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
@@ -530,7 +531,7 @@ void ipoib_mcast_join_task(struct work_s
 			mutex_lock(&mcast_mutex);
 			if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
 				queue_delayed_work(ipoib_workqueue,
-						   &priv->mcast_task, HZ);
+						   &priv->mcast_join_task, HZ);
 			mutex_unlock(&mcast_mutex);
 			return;
 		}
@@ -594,7 +595,9 @@ int ipoib_mcast_start_thread(struct net_
 
 	mutex_lock(&mcast_mutex);
 	if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
-		queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
+		queue_delayed_work(ipoib_workqueue, &priv->mcast_join_task, 0);
+	if (!test_and_set_bit(IPOIB_MCAST_RUN_GC, &priv->flags))
+		queue_delayed_work(ipoib_workqueue, &priv->mcast_leave_task, 0);
 	mutex_unlock(&mcast_mutex);
 
 	return 0;
@@ -608,7 +611,9 @@ int ipoib_mcast_stop_thread(struct net_d
 
 	mutex_lock(&mcast_mutex);
 	clear_bit(IPOIB_MCAST_RUN, &priv->flags);
-	cancel_delayed_work(&priv->mcast_task);
+	clear_bit(IPOIB_MCAST_RUN_GC, &priv->flags);
+	cancel_delayed_work(&priv->mcast_join_task);
+	cancel_delayed_work(&priv->mcast_leave_task);
 	mutex_unlock(&mcast_mutex);
 
 	if (flush)
@@ -715,6 +720,7 @@ out:
 			}
 		}
 
+		mcast->used = jiffies;
 		ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
 	}
 
@@ -859,6 +865,33 @@ void ipoib_mcast_restart_task(struct wor
 		ipoib_mcast_start_thread(dev);
 }
 
+void ipoib_mcast_leave_task(struct work_struct *work)
+{
+	struct ipoib_dev_priv *priv =
+		container_of(work, struct ipoib_dev_priv, mcast_leave_task.work);
+	struct net_device *dev = priv->dev;
+	struct ipoib_mcast *mcast, *tmcast;
+	LIST_HEAD(remove_list);
+
+	if (!test_bit(IPOIB_MCAST_RUN_GC, &priv->flags))
+		return;
+
+	list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
+		if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
+		    time_before(mcast->used, jiffies - 120 * HZ)) {
+			rb_erase(&mcast->rb_node, &priv->multicast_tree);
+			list_move_tail(&mcast->list, &remove_list);
+		}
+	}
+
+	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
+		ipoib_mcast_leave(dev, mcast);
+		ipoib_mcast_free(mcast);
+	}
+
+	queue_delayed_work(ipoib_workqueue, &priv->mcast_leave_task, 60 * HZ);
+}
+
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 
 struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)



More information about the general mailing list