[ofa-general] Re: [PATCHv4 2/2] ipoib: handle pkey change events

Yosef Etigin yosefe at voltaire.com
Thu May 10 05:52:34 PDT 2007


> 
> Return some error code -ENXIO.
> 
All other branches in this function return -1 (see next hunk)

Anyway, let it be -ENXIO.


--
This issue was found during partitioning & SM fail over testing.

 * Added flush flag to ipoib_ib_dev_stop(), ipoib_ib_dev_down() alike
 * Rename the polling thread work to 'pkey_poll_task' to avoid ambiguity
 * Obtain pkey index prior to entering init_qp, and save in in dev_priv
 * Upon PKEY_CHANGE event, schedule a work that restarts the qp.
 * Precondition the restart on whether the pkey index is really changed.
   Use the cached pkey_index to test this.  
 * Restart child interfaces before parent. They might be up even if the
   parent is down.
 * When interface is restarted, queue delayed initiallization, to handle
   the case that a pkey is deleted and later restored. 
 * Use uncached pkey query upon qp initiallization

SM reconfiguration or failover possibly causes a shuffling of the values
in the port pkey table. The current implementation only queries for the
index of the pkey once, when it creates the device QP and after that moves
it into working state, and hence does not address this scenario. Fix this
by using the PKEY_CHANGE event as a trigger to reconfigure the device QP.

Signed-off-by: Yosef Etigin <yosefe at voltaire.com>
---
 drivers/infiniband/ulp/ipoib/ipoib.h       |    7 +-
 drivers/infiniband/ulp/ipoib/ipoib_ib.c    |   88 +++++++++++++++++++++++------
 drivers/infiniband/ulp/ipoib/ipoib_main.c  |    7 +-
 drivers/infiniband/ulp/ipoib/ipoib_verbs.c |   26 ++------
 4 files changed, 89 insertions(+), 39 deletions(-)

Index: b/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- a/drivers/infiniband/ulp/ipoib/ipoib.h	2007-05-08 15:46:53.000000000 +0300
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h	2007-05-10 08:34:58.335171047 +0300
@@ -202,15 +202,17 @@ struct ipoib_dev_priv {
 	struct list_head multicast_list;
 	struct rb_root multicast_tree;
 
-	struct delayed_work pkey_task;
+	struct delayed_work pkey_poll_task;
 	struct delayed_work mcast_task;
 	struct work_struct flush_task;
 	struct work_struct restart_task;
 	struct delayed_work ah_reap_task;
+	struct work_struct pkey_event_task;
 
 	struct ib_device *ca;
 	u8            	  port;
 	u16           	  pkey;
+	u16               pkey_index;
 	struct ib_pd  	 *pd;
 	struct ib_mr  	 *mr;
 	struct ib_cq  	 *cq;
@@ -333,12 +335,13 @@ struct ipoib_dev_priv *ipoib_intf_alloc(
 
 int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
 void ipoib_ib_dev_flush(struct work_struct *work);
+void ipoib_pkey_event(struct work_struct *work);
 void ipoib_ib_dev_cleanup(struct net_device *dev);
 
 int ipoib_ib_dev_open(struct net_device *dev);
 int ipoib_ib_dev_up(struct net_device *dev);
 int ipoib_ib_dev_down(struct net_device *dev, int flush);
-int ipoib_ib_dev_stop(struct net_device *dev);
+int ipoib_ib_dev_stop(struct net_device *dev, int flush);
 
 int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
 void ipoib_dev_cleanup(struct net_device *dev);
Index: b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2007-05-08 15:46:53.000000000 +0300
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2007-05-10 15:50:29.315183358 +0300
@@ -413,6 +413,18 @@ int ipoib_ib_dev_open(struct net_device 
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int ret;
 
+	/*
+	 * Search through the port P_Key table for the requested pkey value.
+	 * The port has to be assigned to the respective IB partition in
+	 * advance.
+	 */
+	if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) {
+		ipoib_warn(priv, "pkey 0x%04x nof found\n", priv->pkey);
+		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+		return -ENXIO;
+	}
+	set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+
 	ret = ipoib_init_qp(dev);
 	if (ret) {
 		ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
@@ -422,14 +434,14 @@ int ipoib_ib_dev_open(struct net_device 
 	ret = ipoib_ib_post_receives(dev);
 	if (ret) {
 		ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
-		ipoib_ib_dev_stop(dev);
+		ipoib_ib_dev_stop(dev, 1);
 		return -1;
 	}
 
 	ret = ipoib_cm_dev_open(dev);
 	if (ret) {
 		ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
-		ipoib_ib_dev_stop(dev);
+		ipoib_ib_dev_stop(dev, 1);
 		return -1;
 	}
 
@@ -481,7 +493,7 @@ int ipoib_ib_dev_down(struct net_device 
 	if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
 		mutex_lock(&pkey_mutex);
 		set_bit(IPOIB_PKEY_STOP, &priv->flags);
-		cancel_delayed_work(&priv->pkey_task);
+		cancel_delayed_work(&priv->pkey_poll_task);
 		mutex_unlock(&pkey_mutex);
 		if (flush)
 			flush_workqueue(ipoib_workqueue);
@@ -508,7 +520,7 @@ static int recvs_pending(struct net_devi
 	return pending;
 }
 
-int ipoib_ib_dev_stop(struct net_device *dev)
+int ipoib_ib_dev_stop(struct net_device *dev, int flush)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_qp_attr qp_attr;
@@ -581,7 +593,8 @@ timeout:
 	/* Wait for all AHs to be reaped */
 	set_bit(IPOIB_STOP_REAPER, &priv->flags);
 	cancel_delayed_work(&priv->ah_reap_task);
-	flush_workqueue(ipoib_workqueue);
+	if (flush)
+		flush_workqueue(ipoib_workqueue);
 
 	begin = jiffies;
 
@@ -622,13 +635,22 @@ int ipoib_ib_dev_init(struct net_device 
 	return 0;
 }
 
-void ipoib_ib_dev_flush(struct work_struct *work)
+static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
 {
-	struct ipoib_dev_priv *cpriv, *priv =
-		container_of(work, struct ipoib_dev_priv, flush_task);
+	struct ipoib_dev_priv *cpriv;
 	struct net_device *dev = priv->dev;
+	u16 new_index;
+
+	mutex_lock(&priv->vlan_mutex);
+
+	/* Flush any child interfaces too -
+ 	 * they might be up even if the parent is down */
+ 	list_for_each_entry(cpriv, &priv->child_intfs, list)
+		__ipoib_ib_dev_flush(cpriv, pkey_event);
+
+	mutex_unlock(&priv->vlan_mutex);
 
-	if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) {
+	if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
 		ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
 		return;
 	}
@@ -638,10 +660,32 @@ void ipoib_ib_dev_flush(struct work_stru
 		return;
 	}
 
+	if (pkey_event) {
+		if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
+			clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+			ipoib_ib_dev_down(dev, 0);
+			ipoib_pkey_dev_delay_open(dev);
+			return;
+		}
+		set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+
+		/* restart qp only of pkey index is cahnged */
+		if (new_index == priv->pkey_index) {
+			ipoib_dbg(priv, "Not flushing - pkey index not changed.\n");
+			return;
+		}
+		priv->pkey_index = new_index;
+	}
+
 	ipoib_dbg(priv, "flushing\n");
 
 	ipoib_ib_dev_down(dev, 0);
 
+	if (pkey_event) {
+		ipoib_ib_dev_stop(dev, 0);
+		ipoib_ib_dev_open(dev);
+	}
+
 	/*
 	 * The device could have been brought down between the start and when
 	 * we get here, don't bring it back up if it's not configured up
@@ -650,14 +694,24 @@ void ipoib_ib_dev_flush(struct work_stru
 		ipoib_ib_dev_up(dev);
 		ipoib_mcast_restart_task(&priv->restart_task);
 	}
+}
 
-	mutex_lock(&priv->vlan_mutex);
+void ipoib_ib_dev_flush(struct work_struct *work)
+{
+	struct ipoib_dev_priv *priv =
+		container_of(work, struct ipoib_dev_priv, flush_task);
 
-	/* Flush any child interfaces too */
-	list_for_each_entry(cpriv, &priv->child_intfs, list)
-		ipoib_ib_dev_flush(&cpriv->flush_task);
+	ipoib_dbg(priv, "Flushing %s\n", priv->dev->name);
+	__ipoib_ib_dev_flush(priv, 0);
+}
 
-	mutex_unlock(&priv->vlan_mutex);
+void ipoib_pkey_event(struct work_struct *work)
+{
+	struct ipoib_dev_priv *priv =
+		container_of(work, struct ipoib_dev_priv, pkey_event_task);
+
+	ipoib_dbg(priv, "Flushing %s and restarting it's QP\n", priv->dev->name);
+	__ipoib_ib_dev_flush(priv, 1);
 }
 
 void ipoib_ib_dev_cleanup(struct net_device *dev)
@@ -685,7 +739,7 @@ void ipoib_ib_dev_cleanup(struct net_dev
 void ipoib_pkey_poll(struct work_struct *work)
 {
 	struct ipoib_dev_priv *priv =
-		container_of(work, struct ipoib_dev_priv, pkey_task.work);
+		container_of(work, struct ipoib_dev_priv, pkey_poll_task.work);
 	struct net_device *dev = priv->dev;
 
 	ipoib_pkey_dev_check_presence(dev);
@@ -696,7 +750,7 @@ void ipoib_pkey_poll(struct work_struct 
 		mutex_lock(&pkey_mutex);
 		if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
 			queue_delayed_work(ipoib_workqueue,
-					   &priv->pkey_task,
+					   &priv->pkey_poll_task,
 					   HZ);
 		mutex_unlock(&pkey_mutex);
 	}
@@ -715,7 +769,7 @@ int ipoib_pkey_dev_delay_open(struct net
 		mutex_lock(&pkey_mutex);
 		clear_bit(IPOIB_PKEY_STOP, &priv->flags);
 		queue_delayed_work(ipoib_workqueue,
-				   &priv->pkey_task,
+				   &priv->pkey_poll_task,
 				   HZ);
 		mutex_unlock(&pkey_mutex);
 		return 1;
Index: b/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c	2007-05-08 15:46:53.000000000 +0300
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c	2007-05-09 17:21:03.000000000 +0300
@@ -107,7 +107,7 @@ int ipoib_open(struct net_device *dev)
 		return -EINVAL;
 
 	if (ipoib_ib_dev_up(dev)) {
-		ipoib_ib_dev_stop(dev);
+		ipoib_ib_dev_stop(dev, 1);
 		return -EINVAL;
 	}
 
@@ -152,7 +152,7 @@ static int ipoib_stop(struct net_device 
 	flush_workqueue(ipoib_workqueue);
 
 	ipoib_ib_dev_down(dev, 1);
-	ipoib_ib_dev_stop(dev);
+	ipoib_ib_dev_stop(dev, 1);
 
 	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
 		struct ipoib_dev_priv *cpriv;
@@ -990,7 +990,8 @@ static void ipoib_setup(struct net_devic
 	INIT_LIST_HEAD(&priv->dead_ahs);
 	INIT_LIST_HEAD(&priv->multicast_list);
 
-	INIT_DELAYED_WORK(&priv->pkey_task,    ipoib_pkey_poll);
+	INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
+	INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event);
 	INIT_DELAYED_WORK(&priv->mcast_task,   ipoib_mcast_join_task);
 	INIT_WORK(&priv->flush_task,   ipoib_ib_dev_flush);
 	INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
Index: b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
===================================================================
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2007-05-08 15:46:53.000000000 +0300
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2007-05-10 09:13:28.997127223 +0300
@@ -33,8 +33,6 @@
  * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $
  */
 
-#include <rdma/ib_cache.h>
-
 #include "ipoib.h"
 
 int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
@@ -49,7 +47,7 @@ int ipoib_mcast_attach(struct net_device
 	if (!qp_attr)
 		goto out;
 
-	if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
+	if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
 		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
 		ret = -ENXIO;
 		goto out;
@@ -94,26 +92,17 @@ int ipoib_init_qp(struct net_device *dev
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int ret;
-	u16 pkey_index;
 	struct ib_qp_attr qp_attr;
 	int attr_mask;
 
-	/*
-	 * Search through the port P_Key table for the requested pkey value.
-	 * The port has to be assigned to the respective IB partition in
-	 * advance.
-	 */
-	ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index);
-	if (ret) {
-		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
-		return ret;
-	}
-	set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+	/* Make sure we have a valid pkey_index in priv->pkey_index */
+	if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
+		return -1;
 
 	qp_attr.qp_state = IB_QPS_INIT;
 	qp_attr.qkey = 0;
 	qp_attr.port_num = priv->port;
-	qp_attr.pkey_index = pkey_index;
+	qp_attr.pkey_index = priv->pkey_index;
 	attr_mask =
 	    IB_QP_QKEY |
 	    IB_QP_PORT |
@@ -260,7 +249,6 @@ void ipoib_event(struct ib_event_handler
 		container_of(handler, struct ipoib_dev_priv, event_handler);
 
 	if ((record->event == IB_EVENT_PORT_ERR    ||
-	     record->event == IB_EVENT_PKEY_CHANGE ||
 	     record->event == IB_EVENT_PORT_ACTIVE ||
 	     record->event == IB_EVENT_LID_CHANGE  ||
 	     record->event == IB_EVENT_SM_CHANGE   ||
@@ -268,5 +256,9 @@ void ipoib_event(struct ib_event_handler
 	    record->element.port_num == priv->port) {
 		ipoib_dbg(priv, "Port state change event\n");
 		queue_work(ipoib_workqueue, &priv->flush_task);
+	} else if (record->event == IB_EVENT_PKEY_CHANGE &&
+		   record->element.port_num == priv->port) {
+		ipoib_dbg(priv, "pkey change event on port:%d\n", priv->port);
+		queue_work(ipoib_workqueue, &priv->pkey_event_task);
 	}
 }




More information about the general mailing list