[ofa-general] Re: [PATCHv3 2/2] ipoib: handle pkey change events

Michael S. Tsirkin mst at dev.mellanox.co.il
Thu May 10 05:01:44 PDT 2007


OK, this is a whole different approach to the problem.
Seems to make sense to me.

> Quoting Yosef Etigin <yosefe at voltaire.com>:
> Subject: [PATCHv3 2/2] ipoib: handle pkey change events
> 
> Added - handling the case when a pkey of an interface is deleted and then restored
>
> --
> 
> This issue was found during partitioning & SM fail over testing.
> 
>  * Added flush flag to ipoib_ib_dev_stop(), ipoib_ib_dev_down() alike
>  * Rename the polling thread work to 'pkey_poll_task' to avoid ambiguity
>  * Obtain pkey index prior to entering init_qp, and save in in dev_priv
>  * Upon PKEY_CHANGE event, schedule a work that restarts the qp.
>  * Precondition the restart on whether the pkey index is really changed.
>    Use the cached pkey_index to test this.  
>  * Restart child interfaces before parent. They might be up even if the
>    parent is down.
>  * When interface is restarted, queue delayed initiallization, to handle
>    the case that a pkey is deleted and later restored. 
>  * Use uncached pkey query upon qp initiallization
> 
> SM reconfiguration or failover possibly causes a shuffling of the values
> in the port pkey table. The current implementation only queries for the
> index of the pkey once, when it creates the device QP and after that moves
> it into working state, and hence does not address this scenario. Fix this
> by using the PKEY_CHANGE event as a trigger to reconfigure the device QP.
> 
> Signed-off-by: Yosef Etigin <yosefe at voltaire.com>
>
> ---
>  drivers/infiniband/ulp/ipoib/ipoib.h       |    7 +-
>  drivers/infiniband/ulp/ipoib/ipoib_ib.c    |   96 +++++++++++++++++++++++------
>  drivers/infiniband/ulp/ipoib/ipoib_main.c  |    7 +-
>  drivers/infiniband/ulp/ipoib/ipoib_verbs.c |   26 ++-----
>  4 files changed, 97 insertions(+), 39 deletions(-)
> 
> Index: b/drivers/infiniband/ulp/ipoib/ipoib.h
> ===================================================================
> --- a/drivers/infiniband/ulp/ipoib/ipoib.h	2007-05-08 15:46:53.000000000 +0300
> +++ b/drivers/infiniband/ulp/ipoib/ipoib.h	2007-05-10 08:34:58.335171047 +0300
> @@ -202,15 +202,17 @@ struct ipoib_dev_priv {
>  	struct list_head multicast_list;
>  	struct rb_root multicast_tree;
>  
> -	struct delayed_work pkey_task;
> +	struct delayed_work pkey_poll_task;
>  	struct delayed_work mcast_task;
>  	struct work_struct flush_task;
>  	struct work_struct restart_task;
>  	struct delayed_work ah_reap_task;
> +	struct work_struct pkey_event_task;
>  
>  	struct ib_device *ca;
>  	u8            	  port;
>  	u16           	  pkey;
> +	u16               pkey_index;
>  	struct ib_pd  	 *pd;
>  	struct ib_mr  	 *mr;
>  	struct ib_cq  	 *cq;
> @@ -333,12 +335,13 @@ struct ipoib_dev_priv *ipoib_intf_alloc(
>  
>  int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
>  void ipoib_ib_dev_flush(struct work_struct *work);
> +void ipoib_pkey_event(struct work_struct *work);
>  void ipoib_ib_dev_cleanup(struct net_device *dev);
>  
>  int ipoib_ib_dev_open(struct net_device *dev);
>  int ipoib_ib_dev_up(struct net_device *dev);
>  int ipoib_ib_dev_down(struct net_device *dev, int flush);
> -int ipoib_ib_dev_stop(struct net_device *dev);
> +int ipoib_ib_dev_stop(struct net_device *dev, int flush);
>  
>  int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
>  void ipoib_dev_cleanup(struct net_device *dev);
> Index: b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> ===================================================================
> --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2007-05-08 15:46:53.000000000 +0300
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2007-05-10 13:01:10.737347938 +0300
> @@ -408,11 +408,40 @@ void ipoib_reap_ah(struct work_struct *w
>  		queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ);
>  }
>  
> +static int ipoib_find_pkey_index(struct ipoib_dev_priv *priv, int *is_changed)
> +{
> +	u16 new_index;
> +
> +	if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
> +		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
> +		return -ENXIO;
> +	}
> +
> +	if (is_changed)
> +		*is_changed = !test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) ||
> +				priv->pkey_index != new_index;
> +
> +	priv->pkey_index = new_index;
> +	set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
> +	return 0;
> +}

I suggest open-coding this - the name ipoib_find_pkey_index
does not tell me that it actually sets flags, etc.

> @@ -260,7 +249,6 @@ void ipoib_event(struct ib_event_handler
>  		container_of(handler, struct ipoib_dev_priv, event_handler);
>  
>  	if ((record->event == IB_EVENT_PORT_ERR    ||
> -	     record->event == IB_EVENT_PKEY_CHANGE ||
>  	     record->event == IB_EVENT_PORT_ACTIVE ||
>  	     record->event == IB_EVENT_LID_CHANGE  ||
>  	     record->event == IB_EVENT_SM_CHANGE   ||
> @@ -268,5 +256,9 @@ void ipoib_event(struct ib_event_handler
>  	    record->element.port_num == priv->port) {
>  		ipoib_dbg(priv, "Port state change event\n");
>  		queue_work(ipoib_workqueue, &priv->flush_task);
> +	} else if (record->event == IB_EVENT_PKEY_CHANGE &&
> +		   record->element.port_num == priv->port) {
> +		ipoib_dbg(priv, "pkey change event on port:%d\n", priv->port);
> +		queue_work(ipoib_workqueue, &priv->pkey_event_task);
>  	}
>  }

BTW, should we maybe do:
if (record->element.port_num != priv->port)
	return;

and then we won't have to do this test for each event type?

-- 
MST



More information about the general mailing list