[openib-general] [PATCH] IB/ipoib: Fix ipoib handling for pkey reordering
Moni Levy
monil at voltaire.com
Mon Feb 19 22:15:04 PST 2007
On 2/19/07, Moni Levy <monil at voltaire.com> wrote:
> This issue was found during partitioning & SM fail over testing. The fix was tested for 24
> hours with pkey reshuffling every few seconds. The patch applies to Roland's master
> branch.
I found an issue with that patch, I'll post an updated one soon.
-- Moni
>
> SM reconfiguration or failover possibly causes a shuffling of the values in the port pkey
> table. The current implementation only queries for the index of the pkey once, when it
> creates the device QP and after that moves it into working state, and hence does not
> address this scenario. Fix this by using the PKEY_CHANGE event as a trigger to
> reconfigure the device QP.
>
> Signed-off-by: Moni Levy <monil at voltaire.com>
> ---
> ipoib.h | 2 ++
> ipoib_ib.c | 22 ++++++++++++++++++++--
> ipoib_main.c | 1 +
> ipoib_verbs.c | 4 +++-
> 4 files changed, 26 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
> index 07deee8..ed854e8 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib.h
> +++ b/drivers/infiniband/ulp/ipoib/ipoib.h
> @@ -139,6 +139,7 @@ struct ipoib_dev_priv {
> struct delayed_work pkey_task;
> struct delayed_work mcast_task;
> struct work_struct flush_task;
> + struct work_struct flush_restart_qp_task;
> struct work_struct restart_task;
> struct delayed_work ah_reap_task;
>
> @@ -261,6 +262,7 @@ struct ipoib_dev_priv *ipoib_intf_alloc(
>
> int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
> void ipoib_ib_dev_flush(struct work_struct *work);
> +void ipoib_ib_dev_flush_restart_qp(struct work_struct *work);
> void ipoib_ib_dev_cleanup(struct net_device *dev);
>
> int ipoib_ib_dev_open(struct net_device *dev);
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> index 59d9594..5e2ada9 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> @@ -611,7 +611,7 @@ int ipoib_ib_dev_init(struct net_device
> return 0;
> }
>
> -void ipoib_ib_dev_flush(struct work_struct *work)
> +static void __ipoib_ib_dev_flush(struct work_struct *work, int restart_qp)
> {
> struct ipoib_dev_priv *cpriv, *priv =
> container_of(work, struct ipoib_dev_priv, flush_task);
> @@ -630,6 +630,12 @@ void ipoib_ib_dev_flush(struct work_stru
> ipoib_dbg(priv, "flushing\n");
>
> ipoib_ib_dev_down(dev, 0);
> +
> + if (restart_qp) {
> + ipoib_dbg(priv, "restarting the device QP\n");
> + ipoib_ib_dev_stop(dev);
> + ipoib_ib_dev_open(dev);
> + }
>
> /*
> * The device could have been brought down between the start and when
> @@ -644,11 +650,23 @@ void ipoib_ib_dev_flush(struct work_stru
>
> /* Flush any child interfaces too */
> list_for_each_entry(cpriv, &priv->child_intfs, list)
> - ipoib_ib_dev_flush(&cpriv->flush_task);
> + __ipoib_ib_dev_flush(&cpriv->flush_task, restart_qp);
>
> mutex_unlock(&priv->vlan_mutex);
> }
>
> +void ipoib_ib_dev_flush(struct work_struct *work)
> +{
> + /* We only restart the QP in case of PKEY change event */
> + __ipoib_ib_dev_flush(work, 0);
> +}
> +
> +void ipoib_ib_dev_flush_restart_qp(struct work_struct *work)
> +{
> + /* We only restart the QP in case of PKEY change event */
> + __ipoib_ib_dev_flush(work, 1);
> +}
> +
> void ipoib_ib_dev_cleanup(struct net_device *dev)
> {
> struct ipoib_dev_priv *priv = netdev_priv(dev);
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> index 705eb1d..da46b79 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> @@ -942,6 +942,7 @@ static void ipoib_setup(struct net_devic
> INIT_DELAYED_WORK(&priv->pkey_task, ipoib_pkey_poll);
> INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
> INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush);
> + INIT_WORK(&priv->flush_restart_qp_task, ipoib_ib_dev_flush_restart_qp);
> INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
> INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
> }
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
> index 7b717c6..c249915 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
> @@ -252,12 +252,14 @@ void ipoib_event(struct ib_event_handler
> container_of(handler, struct ipoib_dev_priv, event_handler);
>
> if (record->event == IB_EVENT_PORT_ERR ||
> - record->event == IB_EVENT_PKEY_CHANGE ||
> record->event == IB_EVENT_PORT_ACTIVE ||
> record->event == IB_EVENT_LID_CHANGE ||
> record->event == IB_EVENT_SM_CHANGE ||
> record->event == IB_EVENT_CLIENT_REREGISTER) {
> ipoib_dbg(priv, "Port state change event\n");
> queue_work(ipoib_workqueue, &priv->flush_task);
> + } else if (record->event == IB_EVENT_PKEY_CHANGE) {
> + ipoib_dbg(priv, "PKEY change event\n");
> + queue_work(ipoib_workqueue, &priv->flush_restart_qp_task);
> }
> }
>
> _______________________________________________
> openib-general mailing list
> openib-general at openib.org
> http://openib.org/mailman/listinfo/openib-general
>
> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
>
>
More information about the general
mailing list