[ofa-general] [PATCH] IB/IPoIB: Decrease the time that invalid paths stay useless

Moni Shoua monis at Voltaire.COM
Thu Dec 4 08:10:50 PST 2008


If a remote LID change occurs (causing only IPOIB_FLUSH_LIGHT event on the
node) or when path completion returns with an error, it might take a long
time untill the next path lookup. This depends on when the kernel sneds an
ARP probe packet that will trigger a path lookup.
This patch adds a task that is responsible to restart the lookup of invalid
paths. This taks is scheduled to run on 2 occasions
1. IPOIB_FLUSH_LIGHT event happens
2. Path completion returned with bad status

Signed-off-by: Moni Shoua <monis at voltaire.com>
---

 drivers/infiniband/ulp/ipoib/ipoib.h      |    6 +++-
 drivers/infiniband/ulp/ipoib/ipoib_ib.c   |    2 -
 drivers/infiniband/ulp/ipoib/ipoib_main.c |   37 +++++++++++++++++++++++++-----
 3 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index e0c7dfa..98564c3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -298,6 +298,7 @@ struct ipoib_dev_priv {
 	struct work_struct flush_heavy;
 	struct work_struct restart_task;
 	struct delayed_work ah_reap_task;
+	struct delayed_work path_refresh_task;
 
 	struct ib_device *ca;
 	u8		  port;
@@ -378,7 +379,7 @@ struct ipoib_path {
 
 	struct rb_node	      rb_node;
 	struct list_head      list;
-	int  		      valid;
+	u8  		      stale;
 };
 
 struct ipoib_neigh {
@@ -442,8 +443,9 @@ int ipoib_add_umcast_attr(struct net_device *dev);
 void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 		struct ipoib_ah *address, u32 qpn);
 void ipoib_reap_ah(struct work_struct *work);
+void ipoib_refresh_paths(struct work_struct *work);
 
-void ipoib_mark_paths_invalid(struct net_device *dev);
+void ipoib_mark_paths_stale(struct net_device *dev);
 void ipoib_flush_paths(struct net_device *dev);
 struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 28eb6f0..ff52314 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -962,7 +962,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
 	}
 
 	if (level == IPOIB_FLUSH_LIGHT) {
-		ipoib_mark_paths_invalid(dev);
+		ipoib_mark_paths_stale(dev);
 		ipoib_mcast_dev_flush(dev);
 	}
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 85257f6..c9b5890 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -352,7 +352,7 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
 
 #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
 
-void ipoib_mark_paths_invalid(struct net_device *dev)
+void ipoib_mark_paths_stale(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_path *path, *tp;
@@ -360,12 +360,15 @@ void ipoib_mark_paths_invalid(struct net_device *dev)
 	spin_lock_irq(&priv->lock);
 
 	list_for_each_entry_safe(path, tp, &priv->path_list, list) {
-		ipoib_dbg(priv, "mark path LID 0x%04x GID " IPOIB_GID_FMT " invalid\n",
+		ipoib_dbg(priv, "mark path LID 0x%04x GID " IPOIB_GID_FMT " stale\n",
 			be16_to_cpu(path->pathrec.dlid),
 			IPOIB_GID_ARG(path->pathrec.dgid));
-		path->valid =  0;
+		path->stale =  1;
 	}
 
+	if (!list_empty(&priv->path_list))
+		queue_delayed_work(ipoib_workqueue, &priv->path_refresh_task,
+					round_jiffies_relative(HZ));
 	spin_unlock_irq(&priv->lock);
 }
 
@@ -427,6 +430,10 @@ static void path_rec_completion(int status,
 
 		if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av))
 			ah = ipoib_create_ah(dev, priv->pd, &av);
+	} else {
+		path->stale = 1;
+		queue_delayed_work(ipoib_workqueue, &priv->path_refresh_task,
+					round_jiffies_relative(HZ));
 	}
 
 	spin_lock_irqsave(&priv->lock, flags);
@@ -477,7 +484,6 @@ static void path_rec_completion(int status,
 			while ((skb = __skb_dequeue(&neigh->queue)))
 				__skb_queue_tail(&skqueue, skb);
 		}
-		path->valid = 1;
 	}
 
 	path->query = NULL;
@@ -551,9 +557,29 @@ static int path_rec_start(struct net_device *dev,
 		return path->query_id;
 	}
 
+	path->stale =  0;
 	return 0;
 }
 
+void ipoib_refresh_paths(struct work_struct *work)
+{
+	struct ipoib_dev_priv *priv =
+		container_of(work, struct ipoib_dev_priv, path_refresh_task.work);
+	struct net_device *dev = priv->dev;
+	struct ipoib_path *path, *tp;
+
+	spin_lock_irq(&priv->lock);
+	list_for_each_entry_safe(path, tp, &priv->path_list, list) {
+		ipoib_dbg(priv, "restart path LID 0x%04x GID " IPOIB_GID_FMT "\n",
+			be16_to_cpu(path->pathrec.dlid),
+			IPOIB_GID_ARG(path->pathrec.dgid));
+		if (path->stale)
+			path_rec_start(dev, path);
+	}
+
+	spin_unlock_irq(&priv->lock);
+}
+
 static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -656,7 +682,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
 	spin_lock_irqsave(&priv->lock, flags);
 
 	path = __path_find(dev, phdr->hwaddr + 4);
-	if (!path || !path->valid) {
+	if (!path) {
 		if (!path)
 			path = path_rec_create(dev, phdr->hwaddr + 4);
 		if (path) {
@@ -1071,6 +1097,7 @@ static void ipoib_setup(struct net_device *dev)
 	INIT_WORK(&priv->flush_heavy,   ipoib_ib_dev_flush_heavy);
 	INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
 	INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
+	INIT_DELAYED_WORK(&priv->path_refresh_task, ipoib_refresh_paths);
 }
 
 struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)



More information about the general mailing list