[ofa-general] Re: [PATCH v2] IPoIB: refresh paths that might be invalid

Yossi Etigin yosefe at Voltaire.COM
Wed Jan 14 10:51:40 PST 2009


How about marking neighbours invalid (instead of paths)
then you could mark all neighbours invalid on flush events,
and in ipoib_start_xmit() add neigh->valid to the gid memcmp
condition, so that if neigh->valid is 0 ipoib will free it,
and call ipoib_path_loookup()?

Moni Shoua wrote:
> If a standby SM takes over and if only some of the nodes change their LID as a
> result, the other nodes get an IPOIB_FLUSH_LIGHT event that doesn't cause
> flushing of paths but only marks them as probably invalid. Path refresh
> will happen only after an ARP probe which may take some time to happen (tens of seconds).
> 
> This patch adds a task that is responsible to restart the lookup of possibly
> invalid paths in 2 occasions:
>  - handling of IPOIB_FLUSH_LIGHT event 
>  - when path completion returns with bad status.
> 
> This way paths are being refreshed much faster.
> 
> Signed-off-by: Moni Shoua <monis at voltaire.com>
> ---
>  drivers/infiniband/ulp/ipoib/ipoib.h      |    6 +++-
>  drivers/infiniband/ulp/ipoib/ipoib_ib.c   |    2 -
>  drivers/infiniband/ulp/ipoib/ipoib_main.c |   38 ++++++++++++++++++++++++++----
>  3 files changed, 38 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
> index 753a983..89f574c 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib.h
> +++ b/drivers/infiniband/ulp/ipoib/ipoib.h
> @@ -298,6 +298,7 @@ struct ipoib_dev_priv {
>  	struct work_struct flush_heavy;
>  	struct work_struct restart_task;
>  	struct delayed_work ah_reap_task;
> +	struct delayed_work path_refresh_task;
>  
>  	struct ib_device *ca;
>  	u8		  port;
> @@ -378,7 +379,7 @@ struct ipoib_path {
>  
>  	struct rb_node	      rb_node;
>  	struct list_head      list;
> -	int  		      valid;
> +	u8  		      stale;
>  };
>  
>  struct ipoib_neigh {
> @@ -442,8 +443,9 @@ int ipoib_add_umcast_attr(struct net_device *dev);
>  void ipoib_send(struct net_device *dev, struct sk_buff *skb,
>  		struct ipoib_ah *address, u32 qpn);
>  void ipoib_reap_ah(struct work_struct *work);
> +void ipoib_refresh_paths(struct work_struct *work);
>  
> -void ipoib_mark_paths_invalid(struct net_device *dev);
> +void ipoib_mark_paths_stale(struct net_device *dev);
>  void ipoib_flush_paths(struct net_device *dev);
>  struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
>  
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> index a192581..1a50076 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> @@ -962,7 +962,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
>  	}
>  
>  	if (level == IPOIB_FLUSH_LIGHT) {
> -		ipoib_mark_paths_invalid(dev);
> +		ipoib_mark_paths_stale(dev);
>  		ipoib_mcast_dev_flush(dev);
>  	}
>  
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> index dce0443..b2ec845 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> @@ -352,7 +352,7 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
>  
>  #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
>  
> -void ipoib_mark_paths_invalid(struct net_device *dev)
> +void ipoib_mark_paths_stale(struct net_device *dev)
>  {
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
>  	struct ipoib_path *path, *tp;
> @@ -360,11 +360,15 @@ void ipoib_mark_paths_invalid(struct net_device *dev)
>  	spin_lock_irq(&priv->lock);
>  
>  	list_for_each_entry_safe(path, tp, &priv->path_list, list) {
> -		ipoib_dbg(priv, "mark path LID 0x%04x GID %pI6 invalid\n",
> +		ipoib_dbg(priv, "mark path LID 0x%04x GID %pI6 stale\n",
>  			be16_to_cpu(path->pathrec.dlid),
>  			path->pathrec.dgid.raw);
> -		path->valid =  0;
> +		path->stale =  1;
>  	}
> +	if (!list_empty(&priv->path_list))
> +		queue_delayed_work(ipoib_workqueue, &priv->path_refresh_task,
> +					round_jiffies_relative(HZ));
> +
>  
>  	spin_unlock_irq(&priv->lock);
>  }
> @@ -427,6 +431,10 @@ static void path_rec_completion(int status,
>  
>  		if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av))
>  			ah = ipoib_create_ah(dev, priv->pd, &av);
> +	} else {
> +		path->stale = 1;
> +		queue_delayed_work(ipoib_workqueue, &priv->path_refresh_task,
> +					round_jiffies_relative(HZ));
>  	}
>  
>  	spin_lock_irqsave(&priv->lock, flags);
> @@ -477,7 +485,6 @@ static void path_rec_completion(int status,
>  			while ((skb = __skb_dequeue(&neigh->queue)))
>  				__skb_queue_tail(&skqueue, skb);
>  		}
> -		path->valid = 1;
>  	}
>  
>  	path->query = NULL;
> @@ -551,9 +558,29 @@ static int path_rec_start(struct net_device *dev,
>  		return path->query_id;
>  	}
>  
> +	path->stale =  0;
>  	return 0;
>  }
>  
> +void ipoib_refresh_paths(struct work_struct *work)
> +{
> +	struct ipoib_dev_priv *priv =
> +		container_of(work, struct ipoib_dev_priv, path_refresh_task.work);
> +	struct net_device *dev = priv->dev;
> +	struct ipoib_path *path, *tp;
> +
> +	spin_lock_irq(&priv->lock);
> +	list_for_each_entry_safe(path, tp, &priv->path_list, list) {
> +		ipoib_dbg(priv, "restart path LID 0x%04x GID %pI6\n",
> +			be16_to_cpu(path->pathrec.dlid),
> +			path->pathrec.dgid.raw);
> +		if (path->stale)
> +			path_rec_start(dev, path);
> +	}
> +
> +	spin_unlock_irq(&priv->lock);
> +}
> +
>  static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
>  {
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
> @@ -656,7 +683,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
>  	spin_lock_irqsave(&priv->lock, flags);
>  
>  	path = __path_find(dev, phdr->hwaddr + 4);
> -	if (!path || !path->valid) {
> +	if (!path) {
>  		if (!path)
>  			path = path_rec_create(dev, phdr->hwaddr + 4);
>  		if (path) {
> @@ -1070,6 +1097,7 @@ static void ipoib_setup(struct net_device *dev)
>  	INIT_WORK(&priv->flush_heavy,   ipoib_ib_dev_flush_heavy);
>  	INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
>  	INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
> +	INIT_DELAYED_WORK(&priv->path_refresh_task, ipoib_refresh_paths);
>  }
>  
>  struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
> _______________________________________________
> general mailing list
> general at lists.openfabrics.org
> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
> 
> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
> 

-- 
--Yossi



More information about the general mailing list