[ofa-general] Re: [PATCH v2] IPoIB: refresh paths that might be invalid
Yossi Etigin
yosefe at Voltaire.COM
Wed Jan 14 10:51:40 PST 2009
How about marking neighbours invalid (instead of paths)
then you could mark all neighbours invalid on flush events,
and in ipoib_start_xmit() add neigh->valid to the gid memcmp
condition, so that if neigh->valid is 0 ipoib will free it,
and call ipoib_path_loookup()?
Moni Shoua wrote:
> If a standby SM takes over and if only some of the nodes change their LID as a
> result, the other nodes get an IPOIB_FLUSH_LIGHT event that doesn't cause
> flushing of paths but only marks them as probably invalid. Path refresh
> will happen only after an ARP probe which may take some time to happen (tens of seconds).
>
> This patch adds a task that is responsible to restart the lookup of possibly
> invalid paths in 2 occasions:
> - handling of IPOIB_FLUSH_LIGHT event
> - when path completion returns with bad status.
>
> This way paths are being refreshed much faster.
>
> Signed-off-by: Moni Shoua <monis at voltaire.com>
> ---
> drivers/infiniband/ulp/ipoib/ipoib.h | 6 +++-
> drivers/infiniband/ulp/ipoib/ipoib_ib.c | 2 -
> drivers/infiniband/ulp/ipoib/ipoib_main.c | 38 ++++++++++++++++++++++++++----
> 3 files changed, 38 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
> index 753a983..89f574c 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib.h
> +++ b/drivers/infiniband/ulp/ipoib/ipoib.h
> @@ -298,6 +298,7 @@ struct ipoib_dev_priv {
> struct work_struct flush_heavy;
> struct work_struct restart_task;
> struct delayed_work ah_reap_task;
> + struct delayed_work path_refresh_task;
>
> struct ib_device *ca;
> u8 port;
> @@ -378,7 +379,7 @@ struct ipoib_path {
>
> struct rb_node rb_node;
> struct list_head list;
> - int valid;
> + u8 stale;
> };
>
> struct ipoib_neigh {
> @@ -442,8 +443,9 @@ int ipoib_add_umcast_attr(struct net_device *dev);
> void ipoib_send(struct net_device *dev, struct sk_buff *skb,
> struct ipoib_ah *address, u32 qpn);
> void ipoib_reap_ah(struct work_struct *work);
> +void ipoib_refresh_paths(struct work_struct *work);
>
> -void ipoib_mark_paths_invalid(struct net_device *dev);
> +void ipoib_mark_paths_stale(struct net_device *dev);
> void ipoib_flush_paths(struct net_device *dev);
> struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
>
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> index a192581..1a50076 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> @@ -962,7 +962,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
> }
>
> if (level == IPOIB_FLUSH_LIGHT) {
> - ipoib_mark_paths_invalid(dev);
> + ipoib_mark_paths_stale(dev);
> ipoib_mcast_dev_flush(dev);
> }
>
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> index dce0443..b2ec845 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> @@ -352,7 +352,7 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
>
> #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
>
> -void ipoib_mark_paths_invalid(struct net_device *dev)
> +void ipoib_mark_paths_stale(struct net_device *dev)
> {
> struct ipoib_dev_priv *priv = netdev_priv(dev);
> struct ipoib_path *path, *tp;
> @@ -360,11 +360,15 @@ void ipoib_mark_paths_invalid(struct net_device *dev)
> spin_lock_irq(&priv->lock);
>
> list_for_each_entry_safe(path, tp, &priv->path_list, list) {
> - ipoib_dbg(priv, "mark path LID 0x%04x GID %pI6 invalid\n",
> + ipoib_dbg(priv, "mark path LID 0x%04x GID %pI6 stale\n",
> be16_to_cpu(path->pathrec.dlid),
> path->pathrec.dgid.raw);
> - path->valid = 0;
> + path->stale = 1;
> }
> + if (!list_empty(&priv->path_list))
> + queue_delayed_work(ipoib_workqueue, &priv->path_refresh_task,
> + round_jiffies_relative(HZ));
> +
>
> spin_unlock_irq(&priv->lock);
> }
> @@ -427,6 +431,10 @@ static void path_rec_completion(int status,
>
> if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av))
> ah = ipoib_create_ah(dev, priv->pd, &av);
> + } else {
> + path->stale = 1;
> + queue_delayed_work(ipoib_workqueue, &priv->path_refresh_task,
> + round_jiffies_relative(HZ));
> }
>
> spin_lock_irqsave(&priv->lock, flags);
> @@ -477,7 +485,6 @@ static void path_rec_completion(int status,
> while ((skb = __skb_dequeue(&neigh->queue)))
> __skb_queue_tail(&skqueue, skb);
> }
> - path->valid = 1;
> }
>
> path->query = NULL;
> @@ -551,9 +558,29 @@ static int path_rec_start(struct net_device *dev,
> return path->query_id;
> }
>
> + path->stale = 0;
> return 0;
> }
>
> +void ipoib_refresh_paths(struct work_struct *work)
> +{
> + struct ipoib_dev_priv *priv =
> + container_of(work, struct ipoib_dev_priv, path_refresh_task.work);
> + struct net_device *dev = priv->dev;
> + struct ipoib_path *path, *tp;
> +
> + spin_lock_irq(&priv->lock);
> + list_for_each_entry_safe(path, tp, &priv->path_list, list) {
> + ipoib_dbg(priv, "restart path LID 0x%04x GID %pI6\n",
> + be16_to_cpu(path->pathrec.dlid),
> + path->pathrec.dgid.raw);
> + if (path->stale)
> + path_rec_start(dev, path);
> + }
> +
> + spin_unlock_irq(&priv->lock);
> +}
> +
> static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
> {
> struct ipoib_dev_priv *priv = netdev_priv(dev);
> @@ -656,7 +683,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
> spin_lock_irqsave(&priv->lock, flags);
>
> path = __path_find(dev, phdr->hwaddr + 4);
> - if (!path || !path->valid) {
> + if (!path) {
> if (!path)
> path = path_rec_create(dev, phdr->hwaddr + 4);
> if (path) {
> @@ -1070,6 +1097,7 @@ static void ipoib_setup(struct net_device *dev)
> INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy);
> INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
> INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
> + INIT_DELAYED_WORK(&priv->path_refresh_task, ipoib_refresh_paths);
> }
>
> struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
> _______________________________________________
> general mailing list
> general at lists.openfabrics.org
> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
>
> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
>
--
--Yossi
More information about the general
mailing list