[ofa-general] Re: [PATCHv3 2/2] ipoib: handle pkey change events

Yosef Etigin yosefe at voltaire.com
Wed May 9 04:53:33 PDT 2007


Michael S. Tsirkin wrote:
> OK. looks pretty good to me. One coding style violation I found:
> 
> 
fixed

--
This issue was found during partitioning & SM fail over testing.

 * Added flush flag to ipoib_ib_dev_stop(), ipoib_ib_dev_down() alike
 * Rename the polling thread work to 'pkey_poll_task' to avoid ambiguity
 * Upon PKEY_CHANGE event, schedule a work that restarts the QP
 * Restart child interfaces before parent. They might be up even if the
   parent is down
 * Use uncached pkey query upon qp initiallization

SM reconfiguration or failover possibly causes a shuffling of the values
in the port pkey table. The current implementation only queries for the
index of the pkey once, when it creates the device QP and after that moves
it into working state, and hence does not address this scenario. Fix this
by using the PKEY_CHANGE event as a trigger to reconfigure the device QP.

Signed-off-by: Yosef Etigin <yosefe at voltaire.com>
---
 drivers/infiniband/ulp/ipoib/ipoib.h       |    6 +-
 drivers/infiniband/ulp/ipoib/ipoib_ib.c    |   59 ++++++++++++++++++++---------
 drivers/infiniband/ulp/ipoib/ipoib_main.c  |    7 +--
 drivers/infiniband/ulp/ipoib/ipoib_verbs.c |   11 ++---
 4 files changed, 56 insertions(+), 27 deletions(-)

Index: infiniband/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- infiniband.orig/drivers/infiniband/ulp/ipoib/ipoib.h	2007-05-08 15:46:53.000000000 +0300
+++ infiniband/drivers/infiniband/ulp/ipoib/ipoib.h	2007-05-08 16:45:44.000000000 +0300
@@ -202,11 +202,12 @@ struct ipoib_dev_priv {
 	struct list_head multicast_list;
 	struct rb_root multicast_tree;
 
-	struct delayed_work pkey_task;
+	struct delayed_work pkey_poll_task;
 	struct delayed_work mcast_task;
 	struct work_struct flush_task;
 	struct work_struct restart_task;
 	struct delayed_work ah_reap_task;
+	struct work_struct pkey_event_task;
 
 	struct ib_device *ca;
 	u8            	  port;
@@ -333,12 +334,13 @@ struct ipoib_dev_priv *ipoib_intf_alloc(
 
 int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
 void ipoib_ib_dev_flush(struct work_struct *work);
+void ipoib_pkey_event(struct work_struct *work);
 void ipoib_ib_dev_cleanup(struct net_device *dev);
 
 int ipoib_ib_dev_open(struct net_device *dev);
 int ipoib_ib_dev_up(struct net_device *dev);
 int ipoib_ib_dev_down(struct net_device *dev, int flush);
-int ipoib_ib_dev_stop(struct net_device *dev);
+int ipoib_ib_dev_stop(struct net_device *dev, int flush);
 
 int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
 void ipoib_dev_cleanup(struct net_device *dev);
Index: infiniband/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- infiniband.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2007-05-08 15:46:53.000000000 +0300
+++ infiniband/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2007-05-09 13:56:00.754030478 +0300
@@ -422,14 +422,14 @@ int ipoib_ib_dev_open(struct net_device 
 	ret = ipoib_ib_post_receives(dev);
 	if (ret) {
 		ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
-		ipoib_ib_dev_stop(dev);
+		ipoib_ib_dev_stop(dev, 1);
 		return -1;
 	}
 
 	ret = ipoib_cm_dev_open(dev);
 	if (ret) {
 		ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
-		ipoib_ib_dev_stop(dev);
+		ipoib_ib_dev_stop(dev, 1);
 		return -1;
 	}
 
@@ -481,7 +481,7 @@ int ipoib_ib_dev_down(struct net_device 
 	if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
 		mutex_lock(&pkey_mutex);
 		set_bit(IPOIB_PKEY_STOP, &priv->flags);
-		cancel_delayed_work(&priv->pkey_task);
+		cancel_delayed_work(&priv->pkey_poll_task);
 		mutex_unlock(&pkey_mutex);
 		if (flush)
 			flush_workqueue(ipoib_workqueue);
@@ -508,7 +508,7 @@ static int recvs_pending(struct net_devi
 	return pending;
 }
 
-int ipoib_ib_dev_stop(struct net_device *dev)
+int ipoib_ib_dev_stop(struct net_device *dev, int flush)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_qp_attr qp_attr;
@@ -581,7 +581,8 @@ timeout:
 	/* Wait for all AHs to be reaped */
 	set_bit(IPOIB_STOP_REAPER, &priv->flags);
 	cancel_delayed_work(&priv->ah_reap_task);
-	flush_workqueue(ipoib_workqueue);
+	if (flush)
+		flush_workqueue(ipoib_workqueue);
 
 	begin = jiffies;
 
@@ -622,13 +623,21 @@ int ipoib_ib_dev_init(struct net_device 
 	return 0;
 }
 
-void ipoib_ib_dev_flush(struct work_struct *work)
+static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int restart_qp)
 {
-	struct ipoib_dev_priv *cpriv, *priv =
-		container_of(work, struct ipoib_dev_priv, flush_task);
+	struct ipoib_dev_priv *cpriv;
 	struct net_device *dev = priv->dev;
 
-	if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) {
+	mutex_lock(&priv->vlan_mutex);
+
+	/* Flush any child interfaces too -
+ 	 * they might be up even if the parent is down */
+ 	list_for_each_entry(cpriv, &priv->child_intfs, list)
+		__ipoib_ib_dev_flush(cpriv, restart_qp);
+
+	mutex_unlock(&priv->vlan_mutex);
+
+	if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
 		ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
 		return;
 	}
@@ -642,6 +651,11 @@ void ipoib_ib_dev_flush(struct work_stru
 
 	ipoib_ib_dev_down(dev, 0);
 
+	if (restart_qp) {
+		ipoib_ib_dev_stop(dev, 0);
+		ipoib_ib_dev_open(dev);
+	}
+
 	/*
 	 * The device could have been brought down between the start and when
 	 * we get here, don't bring it back up if it's not configured up
@@ -650,14 +664,24 @@ void ipoib_ib_dev_flush(struct work_stru
 		ipoib_ib_dev_up(dev);
 		ipoib_mcast_restart_task(&priv->restart_task);
 	}
+}
 
-	mutex_lock(&priv->vlan_mutex);
+void ipoib_ib_dev_flush(struct work_struct *work)
+{
+	struct ipoib_dev_priv *priv =
+		container_of(work, struct ipoib_dev_priv, flush_task);
 
-	/* Flush any child interfaces too */
-	list_for_each_entry(cpriv, &priv->child_intfs, list)
-		ipoib_ib_dev_flush(&cpriv->flush_task);
+	ipoib_dbg(priv, "Flushing %s\n", priv->dev->name);
+	__ipoib_ib_dev_flush(priv, 0);
+}
 
-	mutex_unlock(&priv->vlan_mutex);
+void ipoib_pkey_event(struct work_struct *work)
+{
+	struct ipoib_dev_priv *priv =
+		container_of(work, struct ipoib_dev_priv, pkey_event_task);
+
+	ipoib_dbg(priv, "Flushing %s and restarting it's QP\n", priv->dev->name);
+	__ipoib_ib_dev_flush(priv, 1);
 }
 
 void ipoib_ib_dev_cleanup(struct net_device *dev)
@@ -685,7 +709,7 @@ void ipoib_ib_dev_cleanup(struct net_dev
 void ipoib_pkey_poll(struct work_struct *work)
 {
 	struct ipoib_dev_priv *priv =
-		container_of(work, struct ipoib_dev_priv, pkey_task.work);
+		container_of(work, struct ipoib_dev_priv, pkey_poll_task.work);
 	struct net_device *dev = priv->dev;
 
 	ipoib_pkey_dev_check_presence(dev);
@@ -696,7 +720,7 @@ void ipoib_pkey_poll(struct work_struct 
 		mutex_lock(&pkey_mutex);
 		if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
 			queue_delayed_work(ipoib_workqueue,
-					   &priv->pkey_task,
+					   &priv->pkey_poll_task,
 					   HZ);
 		mutex_unlock(&pkey_mutex);
 	}
@@ -715,7 +739,7 @@ int ipoib_pkey_dev_delay_open(struct net
 		mutex_lock(&pkey_mutex);
 		clear_bit(IPOIB_PKEY_STOP, &priv->flags);
 		queue_delayed_work(ipoib_workqueue,
-				   &priv->pkey_task,
+				   &priv->pkey_poll_task,
 				   HZ);
 		mutex_unlock(&pkey_mutex);
 		return 1;
Index: infiniband/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- infiniband.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c	2007-05-08 15:46:53.000000000 +0300
+++ infiniband/drivers/infiniband/ulp/ipoib/ipoib_main.c	2007-05-08 16:45:44.000000000 +0300
@@ -107,7 +107,7 @@ int ipoib_open(struct net_device *dev)
 		return -EINVAL;
 
 	if (ipoib_ib_dev_up(dev)) {
-		ipoib_ib_dev_stop(dev);
+		ipoib_ib_dev_stop(dev, 1);
 		return -EINVAL;
 	}
 
@@ -152,7 +152,7 @@ static int ipoib_stop(struct net_device 
 	flush_workqueue(ipoib_workqueue);
 
 	ipoib_ib_dev_down(dev, 1);
-	ipoib_ib_dev_stop(dev);
+	ipoib_ib_dev_stop(dev, 1);
 
 	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
 		struct ipoib_dev_priv *cpriv;
@@ -990,7 +990,8 @@ static void ipoib_setup(struct net_devic
 	INIT_LIST_HEAD(&priv->dead_ahs);
 	INIT_LIST_HEAD(&priv->multicast_list);
 
-	INIT_DELAYED_WORK(&priv->pkey_task,    ipoib_pkey_poll);
+	INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
+	INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event);
 	INIT_DELAYED_WORK(&priv->mcast_task,   ipoib_mcast_join_task);
 	INIT_WORK(&priv->flush_task,   ipoib_ib_dev_flush);
 	INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
Index: infiniband/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
===================================================================
--- infiniband.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2007-05-08 15:46:53.000000000 +0300
+++ infiniband/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2007-05-09 14:51:32.684627634 +0300
@@ -33,8 +33,6 @@
  * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $
  */
 
-#include <rdma/ib_cache.h>
-
 #include "ipoib.h"
 
 int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
@@ -49,7 +47,7 @@ int ipoib_mcast_attach(struct net_device
 	if (!qp_attr)
 		goto out;
 
-	if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
+	if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
 		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
 		ret = -ENXIO;
 		goto out;
@@ -103,7 +101,7 @@ int ipoib_init_qp(struct net_device *dev
 	 * The port has to be assigned to the respective IB partition in
 	 * advance.
 	 */
-	ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index);
+	ret = ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index);
 	if (ret) {
 		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
 		return ret;
@@ -260,7 +258,6 @@ void ipoib_event(struct ib_event_handler
 		container_of(handler, struct ipoib_dev_priv, event_handler);
 
 	if ((record->event == IB_EVENT_PORT_ERR    ||
-	     record->event == IB_EVENT_PKEY_CHANGE ||
 	     record->event == IB_EVENT_PORT_ACTIVE ||
 	     record->event == IB_EVENT_LID_CHANGE  ||
 	     record->event == IB_EVENT_SM_CHANGE   ||
@@ -268,5 +265,9 @@ void ipoib_event(struct ib_event_handler
 	    record->element.port_num == priv->port) {
 		ipoib_dbg(priv, "Port state change event\n");
 		queue_work(ipoib_workqueue, &priv->flush_task);
+	} else if (record->event == IB_EVENT_PKEY_CHANGE &&
+		   record->element.port_num == priv->port) {
+		ipoib_dbg(priv, "pkey change event on port:%d\n", priv->port);
+		queue_work(ipoib_workqueue, &priv->pkey_event_task);
 	}
 }




More information about the general mailing list