[ofa-general] Re: [PATCHv3 1/2] ipoib: handle pkey change events
Yosef Etigin
yosefe at voltaire.com
Wed May 9 04:01:26 PDT 2007
Michael S. Tsirkin wrote:
>>@@ -642,6 +651,12 @@ void ipoib_ib_dev_flush(struct work_stru
>>
>> ipoib_ib_dev_down(dev, 0);
>>
>>+ if (restart_qp) {
>>+ if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
>>+ ipoib_ib_dev_stop(dev, 0);
>>+ ipoib_ib_dev_open(dev);
>>+ }
>>+
>> /*
>> * The device could have been brought down between the start and when
>> * we get here, don't bring it back up if it's not configured up
>
>
> This is something that still puzzles me
>
> 1. We have tested IPOIB_FLAG_INITIALIZED above already, didn't we?
> Did you observe it flipping in testing? If yes there's some race ...
>
> 2. Let's assume that device is not initialized:
> how come you are calling ipoib_ib_dev_open on it here?
>
Option 2 is true. this test is a leftover from a previous version of the patch
and should be removed.
--
This issue was found during partitioning & SM fail over testing.
* Added flush flag to ipoib_ib_dev_stop(), ipoib_ib_dev_down() alike
* Rename the polling thread work to 'pkey_poll_task' to avoid ambiguity
* Upon PKEY_CHANGE event, schedule a work that restarts the QP
* Restart child interfaces before parent. They might be up even if the
parent is down
* Use uncached pkey query upon qp initiallization
SM reconfiguration or failover possibly causes a shuffling of the values
in the port pkey table. The current implementation only queries for the
index of the pkey once, when it creates the device QP and after that moves
it into working state, and hence does not address this scenario. Fix this
by using the PKEY_CHANGE event as a trigger to reconfigure the device QP.
Signed-off-by: Yosef Etigin <yosefe at voltaire.com>
---
drivers/infiniband/ulp/ipoib/ipoib.h | 6 +-
drivers/infiniband/ulp/ipoib/ipoib_ib.c | 59 ++++++++++++++++++++---------
drivers/infiniband/ulp/ipoib/ipoib_main.c | 7 +--
drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 11 ++---
4 files changed, 56 insertions(+), 27 deletions(-)
Index: infiniband/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- infiniband.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2007-05-08 15:46:53.000000000 +0300
+++ infiniband/drivers/infiniband/ulp/ipoib/ipoib.h 2007-05-08 16:45:44.000000000 +0300
@@ -202,11 +202,12 @@ struct ipoib_dev_priv {
struct list_head multicast_list;
struct rb_root multicast_tree;
- struct delayed_work pkey_task;
+ struct delayed_work pkey_poll_task;
struct delayed_work mcast_task;
struct work_struct flush_task;
struct work_struct restart_task;
struct delayed_work ah_reap_task;
+ struct work_struct pkey_event_task;
struct ib_device *ca;
u8 port;
@@ -333,12 +334,13 @@ struct ipoib_dev_priv *ipoib_intf_alloc(
int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_ib_dev_flush(struct work_struct *work);
+void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
int ipoib_ib_dev_open(struct net_device *dev);
int ipoib_ib_dev_up(struct net_device *dev);
int ipoib_ib_dev_down(struct net_device *dev, int flush);
-int ipoib_ib_dev_stop(struct net_device *dev);
+int ipoib_ib_dev_stop(struct net_device *dev, int flush);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_dev_cleanup(struct net_device *dev);
Index: infiniband/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- infiniband.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2007-05-08 15:46:53.000000000 +0300
+++ infiniband/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2007-05-09 13:56:00.754030478 +0300
@@ -422,14 +422,14 @@ int ipoib_ib_dev_open(struct net_device
ret = ipoib_ib_post_receives(dev);
if (ret) {
ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
- ipoib_ib_dev_stop(dev);
+ ipoib_ib_dev_stop(dev, 1);
return -1;
}
ret = ipoib_cm_dev_open(dev);
if (ret) {
ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
- ipoib_ib_dev_stop(dev);
+ ipoib_ib_dev_stop(dev, 1);
return -1;
}
@@ -481,7 +481,7 @@ int ipoib_ib_dev_down(struct net_device
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
mutex_lock(&pkey_mutex);
set_bit(IPOIB_PKEY_STOP, &priv->flags);
- cancel_delayed_work(&priv->pkey_task);
+ cancel_delayed_work(&priv->pkey_poll_task);
mutex_unlock(&pkey_mutex);
if (flush)
flush_workqueue(ipoib_workqueue);
@@ -508,7 +508,7 @@ static int recvs_pending(struct net_devi
return pending;
}
-int ipoib_ib_dev_stop(struct net_device *dev)
+int ipoib_ib_dev_stop(struct net_device *dev, int flush)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_attr qp_attr;
@@ -581,7 +581,8 @@ timeout:
/* Wait for all AHs to be reaped */
set_bit(IPOIB_STOP_REAPER, &priv->flags);
cancel_delayed_work(&priv->ah_reap_task);
- flush_workqueue(ipoib_workqueue);
+ if (flush)
+ flush_workqueue(ipoib_workqueue);
begin = jiffies;
@@ -622,13 +623,21 @@ int ipoib_ib_dev_init(struct net_device
return 0;
}
-void ipoib_ib_dev_flush(struct work_struct *work)
+static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int restart_qp)
{
- struct ipoib_dev_priv *cpriv, *priv =
- container_of(work, struct ipoib_dev_priv, flush_task);
+ struct ipoib_dev_priv *cpriv;
struct net_device *dev = priv->dev;
- if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) {
+ mutex_lock(&priv->vlan_mutex);
+
+ /* Flush any child interfaces too -
+ * they might be up even if the parent is down */
+ list_for_each_entry(cpriv, &priv->child_intfs, list)
+ __ipoib_ib_dev_flush(cpriv, restart_qp);
+
+ mutex_unlock(&priv->vlan_mutex);
+
+ if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
return;
}
@@ -642,6 +651,11 @@ void ipoib_ib_dev_flush(struct work_stru
ipoib_ib_dev_down(dev, 0);
+ if (restart_qp) {
+ ipoib_ib_dev_stop(dev, 0);
+ ipoib_ib_dev_open(dev);
+ }
+
/*
* The device could have been brought down between the start and when
* we get here, don't bring it back up if it's not configured up
@@ -650,14 +664,24 @@ void ipoib_ib_dev_flush(struct work_stru
ipoib_ib_dev_up(dev);
ipoib_mcast_restart_task(&priv->restart_task);
}
+}
- mutex_lock(&priv->vlan_mutex);
+void ipoib_ib_dev_flush(struct work_struct *work)
+{
+ struct ipoib_dev_priv *priv =
+ container_of(work, struct ipoib_dev_priv, flush_task);
- /* Flush any child interfaces too */
- list_for_each_entry(cpriv, &priv->child_intfs, list)
- ipoib_ib_dev_flush(&cpriv->flush_task);
+ ipoib_dbg(priv, "Flushing %s\n", priv->dev->name);
+ __ipoib_ib_dev_flush(priv, 0);
+}
- mutex_unlock(&priv->vlan_mutex);
+void ipoib_pkey_event(struct work_struct *work)
+{
+ struct ipoib_dev_priv *priv =
+ container_of(work, struct ipoib_dev_priv, pkey_event_task);
+
+ ipoib_dbg(priv, "Flushing %s and restarting it's QP\n", priv->dev->name);
+ __ipoib_ib_dev_flush(priv, 1);
}
void ipoib_ib_dev_cleanup(struct net_device *dev)
@@ -685,7 +709,7 @@ void ipoib_ib_dev_cleanup(struct net_dev
void ipoib_pkey_poll(struct work_struct *work)
{
struct ipoib_dev_priv *priv =
- container_of(work, struct ipoib_dev_priv, pkey_task.work);
+ container_of(work, struct ipoib_dev_priv, pkey_poll_task.work);
struct net_device *dev = priv->dev;
ipoib_pkey_dev_check_presence(dev);
@@ -696,7 +720,7 @@ void ipoib_pkey_poll(struct work_struct
mutex_lock(&pkey_mutex);
if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
queue_delayed_work(ipoib_workqueue,
- &priv->pkey_task,
+ &priv->pkey_poll_task,
HZ);
mutex_unlock(&pkey_mutex);
}
@@ -715,7 +739,7 @@ int ipoib_pkey_dev_delay_open(struct net
mutex_lock(&pkey_mutex);
clear_bit(IPOIB_PKEY_STOP, &priv->flags);
queue_delayed_work(ipoib_workqueue,
- &priv->pkey_task,
+ &priv->pkey_poll_task,
HZ);
mutex_unlock(&pkey_mutex);
return 1;
Index: infiniband/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- infiniband.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-05-08 15:46:53.000000000 +0300
+++ infiniband/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-05-08 16:45:44.000000000 +0300
@@ -107,7 +107,7 @@ int ipoib_open(struct net_device *dev)
return -EINVAL;
if (ipoib_ib_dev_up(dev)) {
- ipoib_ib_dev_stop(dev);
+ ipoib_ib_dev_stop(dev, 1);
return -EINVAL;
}
@@ -152,7 +152,7 @@ static int ipoib_stop(struct net_device
flush_workqueue(ipoib_workqueue);
ipoib_ib_dev_down(dev, 1);
- ipoib_ib_dev_stop(dev);
+ ipoib_ib_dev_stop(dev, 1);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv;
@@ -990,7 +990,8 @@ static void ipoib_setup(struct net_devic
INIT_LIST_HEAD(&priv->dead_ahs);
INIT_LIST_HEAD(&priv->multicast_list);
- INIT_DELAYED_WORK(&priv->pkey_task, ipoib_pkey_poll);
+ INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
+ INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event);
INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush);
INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
Index: infiniband/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
===================================================================
--- infiniband.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2007-05-08 15:46:53.000000000 +0300
+++ infiniband/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2007-05-08 16:45:44.000000000 +0300
@@ -33,8 +33,6 @@
* $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $
*/
-#include <rdma/ib_cache.h>
-
#include "ipoib.h"
int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
@@ -49,7 +47,7 @@ int ipoib_mcast_attach(struct net_device
if (!qp_attr)
goto out;
- if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
+ if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
ret = -ENXIO;
goto out;
@@ -103,7 +101,7 @@ int ipoib_init_qp(struct net_device *dev
* The port has to be assigned to the respective IB partition in
* advance.
*/
- ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index);
+ ret = ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index);
if (ret) {
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
return ret;
@@ -260,7 +258,6 @@ void ipoib_event(struct ib_event_handler
container_of(handler, struct ipoib_dev_priv, event_handler);
if ((record->event == IB_EVENT_PORT_ERR ||
- record->event == IB_EVENT_PKEY_CHANGE ||
record->event == IB_EVENT_PORT_ACTIVE ||
record->event == IB_EVENT_LID_CHANGE ||
record->event == IB_EVENT_SM_CHANGE ||
@@ -268,5 +265,9 @@ void ipoib_event(struct ib_event_handler
record->element.port_num == priv->port) {
ipoib_dbg(priv, "Port state change event\n");
queue_work(ipoib_workqueue, &priv->flush_task);
+ } else if (record->event == IB_EVENT_PKEY_CHANGE &&
+ record->element.port_num == priv->port) {
+ ipoib_dbg(priv, "pkey change event on port:%d\n", priv->port);
+ queue_work(ipoib_workqueue, &priv->pkey_event_task);
}
}
More information about the general
mailing list