[ofa-general] Re: [PATCHv3] IB/ipoib: Fix ipoib handling for pkey reordering
Moni Levy
monil at voltaire.com
Wed Mar 7 00:00:25 PST 2007
On 3/6/07, Moni Levy <monil at voltaire.com> wrote:
> Roland
> On 3/4/07, Moni Levy <myopenib at gmail.com> wrote:
> > On 3/1/07, Michael S. Tsirkin <mst at mellanox.co.il> wrote:
> > > > SM reconfiguration or failover possibly causes a shuffling of the values in the port pkey table. The current implementation only queries for the index of the pkey once, when it creates the device QP and after that moves it into working state, and hence
> > > > does not address this scenario. Fix this by using the PKEY_CHANGE event as a trigger to reconfigure the device QP.
> > >
> > > Please limit line length to 80 chars or so.
> >
> > Do you want me to change anything else? We really need that for OFED 1.2.
> > Here is an updated patch.
>
> Did you have a chance to look at the last version of the patch ? I
> think that it's now acceptable for Michael as I got no additional
> remarks, again, we really need a resolution to that issue.
Bug 420 was opened to track that issue.
-- Moni
>
> Thanks,
> Moni
>
> >
> > This issue was found during partitioning & SM fail over testing. The fix was
> > tested over the weekend with pkey reshuffling, removal and addition every few
> > seconds concurrent with OFED restart.
> >
> > Changes from v1:
> > * added flush flag to ipoib_ib_dev_stop(), ipoib_ib_dev_down() alike
> > * fixed a bug in device extraction from the work struct
> > * removed some warnings in case they are caused due to missing PKEY as
> > this seems like a valid flow now.
> >
> > Changes from v2:
> > * less/fixed debug prints
> > * flush_restart_qp stuff renamed to just restart_qp
> > * the patch now depends on Roland's "IPoIB: Only handle async
> > events for one port"
> >
> > SM reconfiguration or failover possibly causes a shuffling of the values in
> > the port pkey table. The current implementation only queries for the index of
> > the pkey once, when it creates the device QP and after that moves it into
> > working state, and hence does not address this scenario. Fix this by using the
> > PKEY_CHANGE event as a trigger to reconfigure the device QP.
> >
> > Signed-off-by: Moni Levy <monil at voltaire.com>
> > ---
> > drivers/infiniband/ulp/ipoib/ipoib.h | 4 +
> > drivers/infiniband/ulp/ipoib/ipoib_ib.c | 51 ++++++++++++++++++++-----
> > drivers/infiniband/ulp/ipoib/ipoib_main.c | 5 +-
> > drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 11 ++---
> > drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 7 ++-
> > 5 files changed, 59 insertions(+), 19 deletions(-)
> >
> > Index: b/drivers/infiniband/ulp/ipoib/ipoib.h
> > ===================================================================
> > --- a/drivers/infiniband/ulp/ipoib/ipoib.h 2007-03-01 14:11:43.698307017 +0200
> > +++ b/drivers/infiniband/ulp/ipoib/ipoib.h 2007-03-01 14:43:04.624119588 +0200
> > @@ -205,6 +205,7 @@ struct ipoib_dev_priv {
> > struct delayed_work pkey_task;
> > struct delayed_work mcast_task;
> > struct work_struct flush_task;
> > + struct work_struct restart_qp_task;
> > struct work_struct restart_task;
> > struct delayed_work ah_reap_task;
> >
> > @@ -334,12 +335,13 @@ struct ipoib_dev_priv *ipoib_intf_alloc(
> >
> > int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
> > void ipoib_ib_dev_flush(struct work_struct *work);
> > +void ipoib_ib_dev_restart_qp(struct work_struct *work);
> > void ipoib_ib_dev_cleanup(struct net_device *dev);
> >
> > int ipoib_ib_dev_open(struct net_device *dev);
> > int ipoib_ib_dev_up(struct net_device *dev);
> > int ipoib_ib_dev_down(struct net_device *dev, int flush);
> > -int ipoib_ib_dev_stop(struct net_device *dev);
> > +int ipoib_ib_dev_stop(struct net_device *dev, int flush);
> >
> > int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
> > void ipoib_dev_cleanup(struct net_device *dev);
> > Index: b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> > ===================================================================
> > --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2007-03-01 14:11:43.713304355 +0200
> > +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2007-03-01 16:14:17.003881103 +0200
> > @@ -415,21 +415,22 @@ int ipoib_ib_dev_open(struct net_device
> >
> > ret = ipoib_init_qp(dev);
> > if (ret) {
> > - ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
> > + if (ret != -ENOENT)
> > + ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
> > return -1;
> > }
> >
> > ret = ipoib_ib_post_receives(dev);
> > if (ret) {
> > ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
> > - ipoib_ib_dev_stop(dev);
> > + ipoib_ib_dev_stop(dev, 1);
> > return -1;
> > }
> >
> > ret = ipoib_cm_dev_open(dev);
> > if (ret) {
> > ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
> > - ipoib_ib_dev_stop(dev);
> > + ipoib_ib_dev_stop(dev, 1);
> > return -1;
> > }
> >
> > @@ -508,7 +509,7 @@ static int recvs_pending(struct net_devi
> > return pending;
> > }
> >
> > -int ipoib_ib_dev_stop(struct net_device *dev)
> > +int ipoib_ib_dev_stop(struct net_device *dev, int flush)
> > {
> > struct ipoib_dev_priv *priv = netdev_priv(dev);
> > struct ib_qp_attr qp_attr;
> > @@ -581,7 +582,8 @@ timeout:
> > /* Wait for all AHs to be reaped */
> > set_bit(IPOIB_STOP_REAPER, &priv->flags);
> > cancel_delayed_work(&priv->ah_reap_task);
> > - flush_workqueue(ipoib_workqueue);
> > + if (flush)
> > + flush_workqueue(ipoib_workqueue);
> >
> > begin = jiffies;
> >
> > @@ -622,13 +624,17 @@ int ipoib_ib_dev_init(struct net_device
> > return 0;
> > }
> >
> > -void ipoib_ib_dev_flush(struct work_struct *work)
> > +static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int restart_qp)
> > {
> > - struct ipoib_dev_priv *cpriv, *priv =
> > - container_of(work, struct ipoib_dev_priv, flush_task);
> > + struct ipoib_dev_priv *cpriv;
> > struct net_device *dev = priv->dev;
> >
> > - if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) {
> > + /*
> > + * ipoib_ib_dev_stop() below may not find the PKey and leave the
> > + * IPOIB_FLAG_INITIALIZED flag off so flush in that case with restart_qp
> > + * flag on is Ok.
> > + */
> > + if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) && !restart_qp) {
> > ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
> > return;
> > }
> > @@ -642,6 +648,13 @@ void ipoib_ib_dev_flush(struct work_stru
> >
> > ipoib_ib_dev_down(dev, 0);
> >
> > + if (restart_qp) {
> > + ipoib_dbg(priv, "restarting the device QP\n");
> > + if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) )
> > + ipoib_ib_dev_stop(dev, 0);
> > + ipoib_ib_dev_open(dev);
> > + }
> > +
> > /*
> > * The device could have been brought down between the start and when
> > * we get here, don't bring it back up if it's not configured up
> > @@ -655,11 +668,29 @@ void ipoib_ib_dev_flush(struct work_stru
> >
> > /* Flush any child interfaces too */
> > list_for_each_entry(cpriv, &priv->child_intfs, list)
> > - ipoib_ib_dev_flush(&cpriv->flush_task);
> > + __ipoib_ib_dev_flush(cpriv, restart_qp);
> >
> > mutex_unlock(&priv->vlan_mutex);
> > }
> >
> > +void ipoib_ib_dev_flush(struct work_struct *work)
> > +{
> > + struct ipoib_dev_priv *priv =
> > + container_of(work, struct ipoib_dev_priv, flush_task);
> > + /* We only restart the QP in case of pkey change event */
> > + ipoib_dbg(priv, "Flushing %s\n", priv->dev->name);
> > + __ipoib_ib_dev_flush(priv, 0);
> > +}
> > +
> > +void ipoib_ib_dev_restart_qp(struct work_struct *work)
> > +{
> > + struct ipoib_dev_priv *priv =
> > + container_of(work, struct ipoib_dev_priv, restart_qp_task);
> > + /* We only restart the QP in case of pkey change event */
> > + ipoib_dbg(priv, "Flushing %s and restarting it's QP\n", priv->dev->name);
> > + __ipoib_ib_dev_flush(priv, 1);
> > +}
> > +
> > void ipoib_ib_dev_cleanup(struct net_device *dev)
> > {
> > struct ipoib_dev_priv *priv = netdev_priv(dev);
> > Index: b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> > ===================================================================
> > --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-03-01 14:11:43.729301517 +0200
> > +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-03-01 14:43:04.666112093 +0200
> > @@ -107,7 +107,7 @@ int ipoib_open(struct net_device *dev)
> > return -EINVAL;
> >
> > if (ipoib_ib_dev_up(dev)) {
> > - ipoib_ib_dev_stop(dev);
> > + ipoib_ib_dev_stop(dev, 1);
> > return -EINVAL;
> > }
> >
> > @@ -152,7 +152,7 @@ static int ipoib_stop(struct net_device
> > flush_workqueue(ipoib_workqueue);
> >
> > ipoib_ib_dev_down(dev, 1);
> > - ipoib_ib_dev_stop(dev);
> > + ipoib_ib_dev_stop(dev, 1);
> >
> > if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
> > struct ipoib_dev_priv *cpriv;
> > @@ -993,6 +993,7 @@ static void ipoib_setup(struct net_devic
> > INIT_DELAYED_WORK(&priv->pkey_task, ipoib_pkey_poll);
> > INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
> > INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush);
> > + INIT_WORK(&priv->restart_qp_task, ipoib_ib_dev_restart_qp);
> > INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
> > INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
> > }
> > Index: b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> > ===================================================================
> > --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2007-03-01 14:11:43.743299033 +0200
> > +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2007-03-01 16:21:43.128181147 +0200
> > @@ -232,9 +232,10 @@ static int ipoib_mcast_join_finish(struc
> > ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
> > &mcast->mcmember.mgid);
> > if (ret < 0) {
> > - ipoib_warn(priv, "couldn't attach QP to multicast group "
> > - IPOIB_GID_FMT "\n",
> > - IPOIB_GID_ARG(mcast->mcmember.mgid));
> > + if (ret != -ENXIO) /* No pkey found */
> > + ipoib_warn(priv, "couldn't attach QP to multicast group "
> > + IPOIB_GID_FMT "\n",
> > + IPOIB_GID_ARG(mcast->mcmember.mgid));
> >
> > clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags);
> > return ret;
> > @@ -312,7 +313,7 @@ ipoib_mcast_sendonly_join_complete(int s
> > status = ipoib_mcast_join_finish(mcast, &multicast->rec);
> >
> > if (status) {
> > - if (mcast->logcount++ < 20)
> > + if (mcast->logcount++ < 20 && status != -ENXIO)
> > ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for "
> > IPOIB_GID_FMT ", status %d\n",
> > IPOIB_GID_ARG(mcast->mcmember.mgid), status);
> > @@ -416,7 +417,7 @@ static int ipoib_mcast_join_complete(int
> > ", status %d\n",
> > IPOIB_GID_ARG(mcast->mcmember.mgid),
> > status);
> > - } else {
> > + } else if (status != -ENXIO) {
> > ipoib_warn(priv, "multicast join failed for "
> > IPOIB_GID_FMT ", status %d\n",
> > IPOIB_GID_ARG(mcast->mcmember.mgid),
> > Index: b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
> > ===================================================================
> > --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2007-03-01 14:39:46.712444790 +0200
> > +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2007-03-01 16:12:55.069541201 +0200
> > @@ -52,8 +52,10 @@ int ipoib_mcast_attach(struct net_device
> > if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
> > clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
> > ret = -ENXIO;
> > + ipoib_dbg(priv, "pkey %X not found\n", priv->pkey);
> > goto out;
> > }
> > + ipoib_dbg(priv, "pkey %X found at index %d\n", priv->pkey, pkey_index);
> > set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
> >
> > /* set correct QKey for QP */
> > @@ -260,7 +262,6 @@ void ipoib_event(struct ib_event_handler
> > container_of(handler, struct ipoib_dev_priv, event_handler);
> >
> > if ((record->event == IB_EVENT_PORT_ERR ||
> > - record->event == IB_EVENT_PKEY_CHANGE ||
> > record->event == IB_EVENT_PORT_ACTIVE ||
> > record->event == IB_EVENT_LID_CHANGE ||
> > record->event == IB_EVENT_SM_CHANGE ||
> > @@ -268,5 +269,9 @@ void ipoib_event(struct ib_event_handler
> > record->element.port_num == priv->port) {
> > ipoib_dbg(priv, "Port state change event\n");
> > queue_work(ipoib_workqueue, &priv->flush_task);
> > + } else if (record->event == IB_EVENT_PKEY_CHANGE &&
> > + record->element.port_num == priv->port) {
> > + ipoib_dbg(priv, "pkey change event on port:%d\n", priv->port);
> > + queue_work(ipoib_workqueue, &priv->restart_qp_task);
> > }
> > }
> >
> >
>
More information about the general
mailing list