[ofa-general] Re: [PATCH 3/6 v2] fix pkey change handling and remove the cahce
Michael S. Tsirkin
mst at dev.mellanox.co.il
Mon May 7 08:30:25 PDT 2007
All in all, this patch tries to do many things at once. I wonder whether you
can split the patch in 2: fix the pkey change case separately, and remove pkey
polling separately.
> Quoting Yosef Etigin <yosefe at voltaire.com>:
> Subject: [PATCH 3/6 v2] fix pkey change handling and remove the cahce
>
> ipoib: handle pkey change events
>
> This issue was found during partitioning & SM fail over testing.
>
> * added flush flag to ipoib_ib_dev_stop(), ipoib_ib_dev_down() alike
> * fixed a bug in device extraction from the work struct
> * removed some warnings in case they are caused due to missing PKEY as
> this seems like a valid flow now.
This seems to remove a useful tool for debugging invalid pkeys.
Why is this a valid flow now?
> * Assume that the cache is coherent - do not retry on cache queries
> * Restart child interfaces first before parent
Why? Is this related to pkey change somehow?
> * Remove the pkey polling thread and pkey delayed initiallization
> * If an interface is brought up but pkey is not found, mark it with
> IPOIB_PKEY_NEEDED and when a pkey event arrives, try to restart it.
>
> SM reconfiguration or failover possibly causes a shuffling of the values in the port
> pkey table. The current implementation only queries for the index of the pkey once,
> when it creates the device QP and after that moves it into working state, and hence
> does not address this scenario. Fix this by using the PKEY_CHANGE event as a trigger
> to reconfigure the device QP.
>
>
> Signed-off-by: Moni Levy <monil at voltaire.com>
> Signed-off-by: Yosef Etigin <yosefe at voltaire.com>
> ---
> drivers/infiniband/ulp/ipoib/ipoib.h | 10 -
> drivers/infiniband/ulp/ipoib/ipoib_ib.c | 144 ++++++++-----------------
> drivers/infiniband/ulp/ipoib/ipoib_main.c | 11 -
> drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 11 +
> drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 21 +--
> 5 files changed, 76 insertions(+), 121 deletions(-)
>
> Index: b/drivers/infiniband/ulp/ipoib/ipoib.h
> ===================================================================
> --- a/drivers/infiniband/ulp/ipoib/ipoib.h 2007-05-06 09:26:08.000000000 +0300
> +++ b/drivers/infiniband/ulp/ipoib/ipoib.h 2007-05-06 09:26:18.000000000 +0300
> @@ -80,7 +80,7 @@ enum {
> IPOIB_FLAG_INITIALIZED = 1,
> IPOIB_FLAG_ADMIN_UP = 2,
> IPOIB_PKEY_ASSIGNED = 3,
> - IPOIB_PKEY_STOP = 4,
> + IPOIB_PKEY_NEEDED = 4,
> IPOIB_FLAG_SUBINTERFACE = 5,
> IPOIB_MCAST_RUN = 6,
> IPOIB_STOP_REAPER = 7,
> @@ -202,9 +202,9 @@ struct ipoib_dev_priv {
> struct list_head multicast_list;
> struct rb_root multicast_tree;
>
> - struct delayed_work pkey_task;
> struct delayed_work mcast_task;
> struct work_struct flush_task;
> + struct work_struct pkey_task;
> struct work_struct restart_task;
> struct delayed_work ah_reap_task;
>
> @@ -333,12 +333,13 @@ struct ipoib_dev_priv *ipoib_intf_alloc(
>
> int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
> void ipoib_ib_dev_flush(struct work_struct *work);
> +void ipoib_pkey_event(struct work_struct *work);
> void ipoib_ib_dev_cleanup(struct net_device *dev);
>
> int ipoib_ib_dev_open(struct net_device *dev);
> int ipoib_ib_dev_up(struct net_device *dev);
> int ipoib_ib_dev_down(struct net_device *dev, int flush);
> -int ipoib_ib_dev_stop(struct net_device *dev);
> +int ipoib_ib_dev_stop(struct net_device *dev, int flush);
>
> int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
> void ipoib_dev_cleanup(struct net_device *dev);
> @@ -384,9 +385,6 @@ void ipoib_event(struct ib_event_handler
> int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey);
> int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey);
>
> -void ipoib_pkey_poll(struct work_struct *work);
> -int ipoib_pkey_dev_delay_open(struct net_device *dev);
> -
> #ifdef CONFIG_INFINIBAND_IPOIB_CM
>
> #define IPOIB_FLAGS_RC 0x80
> Index: b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> ===================================================================
> --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2007-05-06 09:26:17.000000000 +0300
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2007-05-06 09:26:26.000000000 +0300
> @@ -422,14 +422,14 @@ int ipoib_ib_dev_open(struct net_device
> ret = ipoib_ib_post_receives(dev);
> if (ret) {
> ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
> - ipoib_ib_dev_stop(dev);
> + ipoib_ib_dev_stop(dev, 1);
> return -1;
> }
>
> ret = ipoib_cm_dev_open(dev);
> if (ret) {
> ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
> - ipoib_ib_dev_stop(dev);
> + ipoib_ib_dev_stop(dev, 1);
> return -1;
> }
>
> @@ -441,28 +441,10 @@ int ipoib_ib_dev_open(struct net_device
> return 0;
> }
>
> -static void ipoib_pkey_dev_check_presence(struct net_device *dev)
> -{
> - struct ipoib_dev_priv *priv = netdev_priv(dev);
> - u16 pkey_index = 0;
> -
> - if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index))
> - clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
> - else
> - set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
> -}
> -
> int ipoib_ib_dev_up(struct net_device *dev)
> {
> struct ipoib_dev_priv *priv = netdev_priv(dev);
>
> - ipoib_pkey_dev_check_presence(dev);
> -
> - if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
> - ipoib_dbg(priv, "PKEY is not assigned.\n");
> - return 0;
> - }
> -
> set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
>
> return ipoib_mcast_start_thread(dev);
> @@ -477,16 +459,6 @@ int ipoib_ib_dev_down(struct net_device
> clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
> netif_carrier_off(dev);
>
> - /* Shutdown the P_Key thread if still active */
> - if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
> - mutex_lock(&pkey_mutex);
> - set_bit(IPOIB_PKEY_STOP, &priv->flags);
> - cancel_delayed_work(&priv->pkey_task);
> - mutex_unlock(&pkey_mutex);
> - if (flush)
> - flush_workqueue(ipoib_workqueue);
> - }
> -
> ipoib_mcast_stop_thread(dev, flush);
> ipoib_mcast_dev_flush(dev);
>
> @@ -508,7 +480,7 @@ static int recvs_pending(struct net_devi
> return pending;
> }
>
> -int ipoib_ib_dev_stop(struct net_device *dev)
> +int ipoib_ib_dev_stop(struct net_device *dev, int flush)
> {
> struct ipoib_dev_priv *priv = netdev_priv(dev);
> struct ib_qp_attr qp_attr;
> @@ -581,7 +553,8 @@ timeout:
> /* Wait for all AHs to be reaped */
> set_bit(IPOIB_STOP_REAPER, &priv->flags);
> cancel_delayed_work(&priv->ah_reap_task);
> - flush_workqueue(ipoib_workqueue);
> + if (flush)
> + flush_workqueue(ipoib_workqueue);
>
> begin = jiffies;
>
> @@ -622,14 +595,33 @@ int ipoib_ib_dev_init(struct net_device
> return 0;
> }
>
> -void ipoib_ib_dev_flush(struct work_struct *work)
> +static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int restart_qp)
> {
> - struct ipoib_dev_priv *cpriv, *priv =
> - container_of(work, struct ipoib_dev_priv, flush_task);
> + struct ipoib_dev_priv *cpriv;
> struct net_device *dev = priv->dev;
>
> - if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) {
> - ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
> + mutex_lock(&priv->vlan_mutex);
> +
> + /* Flush any child interfaces */
> + list_for_each_entry(cpriv, &priv->child_intfs, list)
> + __ipoib_ib_dev_flush(cpriv, restart_qp);
> +
> + mutex_unlock(&priv->vlan_mutex);
> +
> + /*
> + * If the device is not initiallized since it needs a pkey -
> + * try to reopen it
> + */
> + if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
> +
> + if (restart_qp
> + && test_bit(IPOIB_PKEY_NEEDED, &priv->flags)
> + && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
> + /* this iface needs pkey, try to bring it up */
> + ipoib_open(priv->dev);
> + }
> + else
> + ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
> return;
> }
Clean up the above please.
> @@ -642,6 +634,12 @@ void ipoib_ib_dev_flush(struct work_stru
>
> ipoib_ib_dev_down(dev, 0);
>
> + if (restart_qp) {
> + if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) )
> + ipoib_ib_dev_stop(dev, 0);
> + ipoib_ib_dev_open(dev);
> + }
> +
> /*
> * The device could have been brought down between the start and when
> * we get here, don't bring it back up if it's not configured up
I find these if (restart_qp) branches somewhat confusing.
Why is this flag tested in 2 places?
> @@ -650,14 +648,25 @@ void ipoib_ib_dev_flush(struct work_stru
> ipoib_ib_dev_up(dev);
> ipoib_mcast_restart_task(&priv->restart_task);
> }
> +}
>
> - mutex_lock(&priv->vlan_mutex);
> +void ipoib_ib_dev_flush(struct work_struct *work)
> +{
> + struct ipoib_dev_priv *priv =
> + container_of(work, struct ipoib_dev_priv, flush_task);
> + /* we only restart the QP in case of pkey change event */
Kill the comment please.
> + ipoib_dbg(priv, "Flushing %s\n", priv->dev->name);
> + __ipoib_ib_dev_flush(priv, 0);
> +}
>
> - /* Flush any child interfaces too */
> - list_for_each_entry(cpriv, &priv->child_intfs, list)
> - ipoib_ib_dev_flush(&cpriv->flush_task);
> +void ipoib_pkey_event(struct work_struct *work)
> +{
> + struct ipoib_dev_priv *priv =
> + container_of(work, struct ipoib_dev_priv, pkey_task);
>
> - mutex_unlock(&priv->vlan_mutex);
> + /* restart the QP in case of pkey change event */
> + ipoib_dbg(priv, "Flushing %s and restarting it's QP\n", priv->dev->name);
Kill the comment please.
> + __ipoib_ib_dev_flush(priv, 1);
> }
>
> void ipoib_ib_dev_cleanup(struct net_device *dev)
> @@ -672,54 +681,3 @@ void ipoib_ib_dev_cleanup(struct net_dev
> ipoib_transport_dev_cleanup(dev);
> }
>
> -/*
> - * Delayed P_Key Assigment Interim Support
> - *
> - * The following is initial implementation of delayed P_Key assigment
> - * mechanism. It is using the same approach implemented for the multicast
> - * group join. The single goal of this implementation is to quickly address
> - * Bug #2507. This implementation will probably be removed when the P_Key
> - * change async notification is available.
> - */
> -
> -void ipoib_pkey_poll(struct work_struct *work)
> -{
> - struct ipoib_dev_priv *priv =
> - container_of(work, struct ipoib_dev_priv, pkey_task.work);
> - struct net_device *dev = priv->dev;
> -
> - ipoib_pkey_dev_check_presence(dev);
> -
> - if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
> - ipoib_open(dev);
> - else {
> - mutex_lock(&pkey_mutex);
> - if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
> - queue_delayed_work(ipoib_workqueue,
> - &priv->pkey_task,
> - HZ);
> - mutex_unlock(&pkey_mutex);
> - }
> -}
> -
> -int ipoib_pkey_dev_delay_open(struct net_device *dev)
> -{
> - struct ipoib_dev_priv *priv = netdev_priv(dev);
> -
> - /* Look for the interface pkey value in the IB Port P_Key table and */
> - /* set the interface pkey assigment flag */
> - ipoib_pkey_dev_check_presence(dev);
> -
> - /* P_Key value not assigned yet - start polling */
> - if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
> - mutex_lock(&pkey_mutex);
> - clear_bit(IPOIB_PKEY_STOP, &priv->flags);
> - queue_delayed_work(ipoib_workqueue,
> - &priv->pkey_task,
> - HZ);
> - mutex_unlock(&pkey_mutex);
> - return 1;
> - }
> -
> - return 0;
> -}
>
> Index: b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> ===================================================================
> --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-05-06 09:26:08.000000000 +0300
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-05-06 09:26:18.000000000 +0300
> @@ -100,14 +100,11 @@ int ipoib_open(struct net_device *dev)
>
> set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
>
> - if (ipoib_pkey_dev_delay_open(dev))
> - return 0;
> -
> if (ipoib_ib_dev_open(dev))
> - return -EINVAL;
> + return test_bit(IPOIB_PKEY_NEEDED, &priv->flags) ? 0 : -EINVAL;
>
> if (ipoib_ib_dev_up(dev)) {
> - ipoib_ib_dev_stop(dev);
> + ipoib_ib_dev_stop(dev, 1);
> return -EINVAL;
> }
>
> @@ -152,7 +149,7 @@ static int ipoib_stop(struct net_device
> flush_workqueue(ipoib_workqueue);
>
> ipoib_ib_dev_down(dev, 1);
> - ipoib_ib_dev_stop(dev);
> + ipoib_ib_dev_stop(dev, 1);
>
> if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
> struct ipoib_dev_priv *cpriv;
> @@ -990,7 +987,7 @@ static void ipoib_setup(struct net_devic
> INIT_LIST_HEAD(&priv->dead_ahs);
> INIT_LIST_HEAD(&priv->multicast_list);
>
> - INIT_DELAYED_WORK(&priv->pkey_task, ipoib_pkey_poll);
> + INIT_WORK(&priv->pkey_task, ipoib_pkey_event);
> INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
> INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush);
> INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
> Index: b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> ===================================================================
> --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2007-05-06 09:26:08.000000000 +0300
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2007-05-06 09:26:18.000000000 +0300
> @@ -232,9 +232,10 @@ static int ipoib_mcast_join_finish(struc
> ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
> &mcast->mcmember.mgid);
> if (ret < 0) {
> - ipoib_warn(priv, "couldn't attach QP to multicast group "
> - IPOIB_GID_FMT "\n",
> - IPOIB_GID_ARG(mcast->mcmember.mgid));
> + if (ret != -ENXIO) /* No pkey found */
> + ipoib_warn(priv, "couldn't attach QP to multicast group "
> + IPOIB_GID_FMT "\n",
> + IPOIB_GID_ARG(mcast->mcmember.mgid));
>
> clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags);
> return ret;
> @@ -312,7 +313,7 @@ ipoib_mcast_sendonly_join_complete(int s
> status = ipoib_mcast_join_finish(mcast, &multicast->rec);
>
> if (status) {
> - if (mcast->logcount++ < 20)
> + if (mcast->logcount++ < 20 && status != -ENXIO)
> ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for "
> IPOIB_GID_FMT ", status %d\n",
> IPOIB_GID_ARG(mcast->mcmember.mgid), status);
> @@ -420,7 +421,7 @@ static int ipoib_mcast_join_complete(int
> ", status %d\n",
> IPOIB_GID_ARG(mcast->mcmember.mgid),
> status);
> - } else {
> + } else if (status != -ENXIO) {
> ipoib_warn(priv, "multicast join failed for "
> IPOIB_GID_FMT ", status %d\n",
> IPOIB_GID_ARG(mcast->mcmember.mgid),
>
> Index: b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
> ===================================================================
> --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2007-05-06 09:26:08.000000000 +0300
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2007-05-06 09:26:18.000000000 +0300
> @@ -33,8 +33,6 @@
> * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $
> */
>
> -#include <rdma/ib_cache.h>
> -
> #include "ipoib.h"
>
> int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
> @@ -49,12 +47,12 @@ int ipoib_mcast_attach(struct net_device
> if (!qp_attr)
> goto out;
>
> - if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
> - clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
> + if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
> + clear_bit(IPOIB_PKEY_NEEDED, &priv->flags);
> ret = -ENXIO;
> goto out;
> }
> - set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
> + set_bit(IPOIB_PKEY_NEEDED, &priv->flags);
>
> /* set correct QKey for QP */
> qp_attr->qkey = priv->qkey;
> @@ -103,12 +101,12 @@ int ipoib_init_qp(struct net_device *dev
> * The port has to be assigned to the respective IB partition in
> * advance.
> */
> - ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index);
> + ret = ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index);
> if (ret) {
> - clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
> + set_bit(IPOIB_PKEY_NEEDED, &priv->flags);
> return ret;
> }
> - set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
> + clear_bit(IPOIB_PKEY_NEEDED, &priv->flags);
>
> qp_attr.qp_state = IB_QPS_INIT;
> qp_attr.qkey = 0;
> @@ -238,7 +236,7 @@ void ipoib_transport_dev_cleanup(struct
> ipoib_warn(priv, "ib_qp_destroy failed\n");
>
> priv->qp = NULL;
> - clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
> + clear_bit(IPOIB_PKEY_NEEDED, &priv->flags);
> }
>
> if (ib_destroy_cq(priv->cq))
> @@ -260,7 +258,6 @@ void ipoib_event(struct ib_event_handler
> container_of(handler, struct ipoib_dev_priv, event_handler);
>
> if ((record->event == IB_EVENT_PORT_ERR ||
> - record->event == IB_EVENT_PKEY_CHANGE ||
> record->event == IB_EVENT_PORT_ACTIVE ||
> record->event == IB_EVENT_LID_CHANGE ||
> record->event == IB_EVENT_SM_CHANGE ||
> @@ -268,5 +265,9 @@ void ipoib_event(struct ib_event_handler
> record->element.port_num == priv->port) {
> ipoib_dbg(priv, "Port state change event\n");
> queue_work(ipoib_workqueue, &priv->flush_task);
> + } else if (record->event == IB_EVENT_PKEY_CHANGE &&
> + record->element.port_num == priv->port) {
> + ipoib_dbg(priv, "pkey change event on port:%d\n", priv->port);
> + queue_work(ipoib_workqueue, &priv->pkey_task);
> }
> }
--
MST
More information about the general
mailing list