[openib-general] [PATCH 1/7 v2] for 2.6.20 ib/ib_sa: add tracking of multicast join / leave requests

Michael S. Tsirkin mst at mellanox.co.il
Mon Nov 6 02:35:44 PST 2006


Quoting Sean Hefty <sean.hefty at intel.com>:
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> index 3faa182..d90f804 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> @@ -60,14 +60,11 @@ static DEFINE_MUTEX(mcast_mutex);
>  /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
>  struct ipoib_mcast {
>  	struct ib_sa_mcmember_rec mcmember;
> +	struct ib_sa_multicast	 *mc;
>  	struct ipoib_ah          *ah;
>  
>  	struct rb_node    rb_node;
>  	struct list_head  list;
> -	struct completion done;
> -
> -	int                 query_id;
> -	struct ib_sa_query *query;
>  
>  	unsigned long created;
>  	unsigned long backoff;
> @@ -299,18 +296,22 @@ static int ipoib_mcast_join_finish(struc
>  	return 0;
>  }
>  
> -static void
> +static int
>  ipoib_mcast_sendonly_join_complete(int status,
> -				   struct ib_sa_mcmember_rec *mcmember,
> -				   void *mcast_ptr)
> +				   struct ib_sa_multicast *multicast)
>  {
> -	struct ipoib_mcast *mcast = mcast_ptr;
> +	struct ipoib_mcast *mcast = multicast->context;
>  	struct net_device *dev = mcast->dev;
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
>  
> +	/* We trap for port events ourselves. */
> +	if (status == -ENETRESET)
> +		return 0;
> +
>  	if (!status)
> -		ipoib_mcast_join_finish(mcast, mcmember);
> -	else {
> +		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
> +	
> +	if (status) {
>  		if (mcast->logcount++ < 20)
>  			ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for "
>  					IPOIB_GID_FMT ", status %d\n",
> @@ -325,11 +326,10 @@ ipoib_mcast_sendonly_join_complete(int s
>  		spin_unlock_irq(&priv->tx_lock);
>  
>  		/* Clear the busy flag so we try again */
> -		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
> -		mcast->query = NULL;
> +		status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
> +					    &mcast->flags);
>  	}
> -
> -	complete(&mcast->done);
> +	return status;
>  }
>  
>  static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
> @@ -359,35 +359,33 @@ #endif
>  	rec.port_gid = priv->local_gid;
>  	rec.pkey     = cpu_to_be16(priv->pkey);
>  
> -	init_completion(&mcast->done);
> -
> -	ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, &rec,
> -				     IB_SA_MCMEMBER_REC_MGID		|
> -				     IB_SA_MCMEMBER_REC_PORT_GID	|
> -				     IB_SA_MCMEMBER_REC_PKEY		|
> -				     IB_SA_MCMEMBER_REC_JOIN_STATE,
> -				     1000, GFP_ATOMIC,
> -				     ipoib_mcast_sendonly_join_complete,
> -				     mcast, &mcast->query);
> -	if (ret < 0) {
> -		ipoib_warn(priv, "ib_sa_mcmember_rec_set failed (ret = %d)\n",
> +	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
> +					 priv->port, &rec,
> +					 IB_SA_MCMEMBER_REC_MGID	|
> +					 IB_SA_MCMEMBER_REC_PORT_GID	|
> +					 IB_SA_MCMEMBER_REC_PKEY	|
> +					 IB_SA_MCMEMBER_REC_JOIN_STATE,
> +					 GFP_ATOMIC,
> +					 ipoib_mcast_sendonly_join_complete,
> +					 mcast);
> +	if (IS_ERR(mcast->mc)) {
> +		ret = PTR_ERR(mcast->mc);
> +		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
> +		ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
>  			   ret);
>  	} else {
>  		ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT
>  				", starting join\n",
>  				IPOIB_GID_ARG(mcast->mcmember.mgid));
> -
> -		mcast->query_id = ret;
>  	}
>  
>  	return ret;
>  }
>  
> -static void ipoib_mcast_join_complete(int status,
> -				      struct ib_sa_mcmember_rec *mcmember,
> -				      void *mcast_ptr)
> +static int ipoib_mcast_join_complete(int status,
> +				     struct ib_sa_multicast *multicast)
>  {
> -	struct ipoib_mcast *mcast = mcast_ptr;
> +	struct ipoib_mcast *mcast = multicast->context;
>  	struct net_device *dev = mcast->dev;
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
>  
> @@ -395,23 +393,24 @@ static void ipoib_mcast_join_complete(in
>  			" (status %d)\n",
>  			IPOIB_GID_ARG(mcast->mcmember.mgid), status);
>  
> -	if (!status && !ipoib_mcast_join_finish(mcast, mcmember)) {
> +	/* We trap for port events ourselves. */
> +	if (status == -ENETRESET)
> +		return 0;
> +
> +	if (!status)
> +		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
> +
> +	if (!status) {
>  		mcast->backoff = 1;
>  		mutex_lock(&mcast_mutex);
>  		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
>  			queue_work(ipoib_workqueue, &priv->mcast_task);
>  		mutex_unlock(&mcast_mutex);
> -		complete(&mcast->done);
> -		return;
> -	}
> -
> -	if (status == -EINTR) {
> -		complete(&mcast->done);
> -		return;
> +		return 0;
>  	}
>  
> -	if (status && mcast->logcount++ < 20) {
> -		if (status == -ETIMEDOUT || status == -EINTR) {
> +	if (mcast->logcount++ < 20) {
> +		if (status == -ETIMEDOUT) {
>  			ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT
>  					", status %d\n",
>  					IPOIB_GID_ARG(mcast->mcmember.mgid),
> @@ -428,23 +427,18 @@ static void ipoib_mcast_join_complete(in
>  	if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
>  		mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
>  
> -	mutex_lock(&mcast_mutex);
> +	/* Clear the busy flag so we try again */
> +	status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
>  
> +	mutex_lock(&mcast_mutex);
>  	spin_lock_irq(&priv->lock);
> -	mcast->query = NULL;
> -
> -	if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) {
> -		if (status == -ETIMEDOUT)
> -			queue_work(ipoib_workqueue, &priv->mcast_task);
> -		else
> -			queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
> -					   mcast->backoff * HZ);
> -	} else
> -		complete(&mcast->done);
> +	if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
> +		queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
> +				   mcast->backoff * HZ);
>  	spin_unlock_irq(&priv->lock);
>  	mutex_unlock(&mcast_mutex);
>  
> -	return;
> +	return status;
>  }
>  
>  static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
> @@ -493,15 +487,14 @@ static void ipoib_mcast_join(struct net_
>  		rec.hop_limit	  = priv->broadcast->mcmember.hop_limit;
>  	}
>  
> -	init_completion(&mcast->done);
> -
> -	ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port,
> -				     &rec, comp_mask, mcast->backoff * 1000,
> -				     GFP_ATOMIC, ipoib_mcast_join_complete,
> -				     mcast, &mcast->query);
> -
> -	if (ret < 0) {
> -		ipoib_warn(priv, "ib_sa_mcmember_rec_set failed, status %d\n", ret);
> +	set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
> +	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
> +					 &rec, comp_mask, GFP_KERNEL,
> +					 ipoib_mcast_join_complete, mcast);
> +	if (IS_ERR(mcast->mc)) {
> +		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
> +		ret = PTR_ERR(mcast->mc);
> +		ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
>  
>  		mcast->backoff *= 2;
>  		if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
> @@ -513,8 +506,7 @@ static void ipoib_mcast_join(struct net_
>  					   &priv->mcast_task,
>  					   mcast->backoff * HZ);
>  		mutex_unlock(&mcast_mutex);
> -	} else
> -		mcast->query_id = ret;
> +	}
>  }
>  
>  void ipoib_mcast_join_task(void *dev_ptr)
> @@ -538,7 +530,7 @@ void ipoib_mcast_join_task(void *dev_ptr
>  			priv->local_rate = attr.active_speed *
>  				ib_width_enum_to_int(attr.active_width);
>  		} else
> -		ipoib_warn(priv, "ib_query_port failed\n");
> +			ipoib_warn(priv, "ib_query_port failed\n");
>  	}
>  
>  	if (!priv->broadcast) {
> @@ -565,7 +557,8 @@ void ipoib_mcast_join_task(void *dev_ptr
>  	}
>  
>  	if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
> -		ipoib_mcast_join(dev, priv->broadcast, 0);
> +		if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
> +			ipoib_mcast_join(dev, priv->broadcast, 0);
>  		return;
>  	}
>  
> @@ -620,26 +613,9 @@ int ipoib_mcast_start_thread(struct net_
>  	return 0;
>  }
>  
> -static void wait_for_mcast_join(struct ipoib_dev_priv *priv,
> -				struct ipoib_mcast *mcast)
> -{
> -	spin_lock_irq(&priv->lock);
> -	if (mcast && mcast->query) {
> -		ib_sa_cancel_query(mcast->query_id, mcast->query);
> -		mcast->query = NULL;
> -		spin_unlock_irq(&priv->lock);
> -		ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
> -				IPOIB_GID_ARG(mcast->mcmember.mgid));
> -		wait_for_completion(&mcast->done);
> -	}
> -	else
> -		spin_unlock_irq(&priv->lock);
> -}
> -
>  int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
>  {
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
> -	struct ipoib_mcast *mcast;
>  
>  	ipoib_dbg_mcast(priv, "stopping multicast thread\n");
>  
> @@ -655,52 +631,27 @@ int ipoib_mcast_stop_thread(struct net_d
>  	if (flush)
>  		flush_workqueue(ipoib_workqueue);
>  
> -	wait_for_mcast_join(priv, priv->broadcast);
> -
> -	list_for_each_entry(mcast, &priv->multicast_list, list)
> -		wait_for_mcast_join(priv, mcast);
> -
>  	return 0;
>  }
>  
>  static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
>  {
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
> -	struct ib_sa_mcmember_rec rec = {
> -		.join_state = 1
> -	};
>  	int ret = 0;
>  
> -	if (!test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags))
> -		return 0;
> -
> -	ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n",
> -			IPOIB_GID_ARG(mcast->mcmember.mgid));
> -
> -	rec.mgid     = mcast->mcmember.mgid;
> -	rec.port_gid = priv->local_gid;
> -	rec.pkey     = cpu_to_be16(priv->pkey);
> +	if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
> +		ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n",
> +				IPOIB_GID_ARG(mcast->mcmember.mgid));
>  
> -	/* Remove ourselves from the multicast group */
> -	ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid),
> -				 &mcast->mcmember.mgid);
> -	if (ret)
> -		ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret);
> +		/* Remove ourselves from the multicast group */
> +		ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid),
> +					 &mcast->mcmember.mgid);
> +		if (ret)
> +			ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret);
> +	}
>  
> -	/*
> -	 * Just make one shot at leaving and don't wait for a reply;
> -	 * if we fail, too bad.
> -	 */
> -	ret = ib_sa_mcmember_rec_delete(&ipoib_sa_client, priv->ca, priv->port, &rec,
> -					IB_SA_MCMEMBER_REC_MGID		|
> -					IB_SA_MCMEMBER_REC_PORT_GID	|
> -					IB_SA_MCMEMBER_REC_PKEY		|
> -					IB_SA_MCMEMBER_REC_JOIN_STATE,
> -					0, GFP_ATOMIC, NULL,
> -					mcast, &mcast->query);
> -	if (ret < 0)
> -		ipoib_warn(priv, "ib_sa_mcmember_rec_delete failed "
> -			   "for leave (result = %d)\n", ret);
> +	if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
> +		ib_sa_free_multicast(mcast->mc);
>  
>  	return 0;
>  }
> @@ -753,7 +704,7 @@ void ipoib_mcast_send(struct net_device 
>  			dev_kfree_skb_any(skb);
>  		}
>  
> -		if (mcast->query)
> +		if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
>  			ipoib_dbg_mcast(priv, "no address vector, "
>  					"but multicast join already started\n");
>  		else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
> @@ -910,7 +861,6 @@ void ipoib_mcast_restart_task(void *dev_
>  
>  	/* We have to cancel outside of the spinlock */
>  	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
> -		wait_for_mcast_join(priv, mcast);
>  		ipoib_mcast_leave(mcast->dev, mcast);
>  		ipoib_mcast_free(mcast);
>  	}
> diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
> index 97715b0..3b957e5 100644
> --- a/include/rdma/ib_sa.h
> +++ b/include/rdma/ib_sa.h
> @@ -285,18 +285,6 @@ int ib_sa_path_rec_get(struct ib_sa_clie
>  		       void *context,
>  		       struct ib_sa_query **query);
>  
> -int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
> -			     struct ib_device *device, u8 port_num,
> -			     u8 method,
> -			     struct ib_sa_mcmember_rec *rec,
> -			     ib_sa_comp_mask comp_mask,
> -			     int timeout_ms, gfp_t gfp_mask,
> -			     void (*callback)(int status,
> -					      struct ib_sa_mcmember_rec *resp,
> -					      void *context),
> -			     void *context,
> -			     struct ib_sa_query **query);
> -
>  int ib_sa_service_rec_query(struct ib_sa_client *client,
>  			 struct ib_device *device, u8 port_num,
>  			 u8 method,
> @@ -309,93 +297,87 @@ int ib_sa_service_rec_query(struct ib_sa
>  			 void *context,
>  			 struct ib_sa_query **sa_query);
>  
> +struct ib_sa_multicast {
> +	struct ib_sa_mcmember_rec rec;
> +	ib_sa_comp_mask		comp_mask;
> +	int			(*callback)(int status,
> +					    struct ib_sa_multicast *multicast);
> +	void			*context;
> +};
> +
>  /**
> - * ib_sa_mcmember_rec_set - Start an MCMember set query
> - * @client:SA client
> - * @device:device to send query on
> - * @port_num: port number to send query on
> - * @rec:MCMember Record to send in query
> - * @comp_mask:component mask to send in query
> - * @timeout_ms:time to wait for response
> - * @gfp_mask:GFP mask to use for internal allocations
> - * @callback:function called when query completes, times out or is
> - * canceled
> - * @context:opaque user context passed to callback
> - * @sa_query:query context, used to cancel query
> + * ib_sa_join_multicast - Initiates a join request to the specified multicast
> + *   group.
> + * @client: SA client
> + * @device: Device associated with the multicast group.
> + * @port_num: Port on the specified device to associate with the multicast
> + *   group.
> + * @rec: SA multicast member record specifying group attributes.
> + * @comp_mask: Component mask indicating which group attributes of %rec are
> + *   valid.
> + * @gfp_mask: GFP mask for memory allocations.
> + * @callback: User callback invoked once the join operation completes.
> + * @context: User specified context stored with the ib_sa_multicast structure.
>   *
> - * Send an MCMember Set query to the SA (eg to join a multicast
> - * group).  The callback function will be called when the query
> - * completes (or fails); status is 0 for a successful response, -EINTR
> - * if the query is canceled, -ETIMEDOUT is the query timed out, or
> - * -EIO if an error occurred sending the query.  The resp parameter of
> - * the callback is only valid if status is 0.
> + * This call initiates a multicast join request with the SA for the specified
> + * multicast group.  If the join operation is started successfully, it returns
> + * an ib_sa_multicast structure that is used to track the multicast operation.
> + * Users must free this structure by calling ib_free_multicast, even if the
> + * join operation later fails.  (The callback status is non-zero.)
>   *
> - * If the return value of ib_sa_mcmember_rec_set() is negative, it is
> - * an error code.  Otherwise it is a query ID that can be used to
> - * cancel the query.
> + * If the join operation fails; status will be non-zero, with the following
> + * failures possible:
> + * -ETIMEDOUT: The request timed out.
> + * -EIO: An error occurred sending the query.
> + * -EINVAL: The MCMemberRecord values differed from the existing group's.
> + * -ENETRESET: Indicates that an fatal error has occurred on the multicast
> + *   group, and the user must rejoin the group to continue using it.
>   */
> -static inline int
> -ib_sa_mcmember_rec_set(struct ib_sa_client *client,
> -		       struct ib_device *device, u8 port_num,
> -		       struct ib_sa_mcmember_rec *rec,
> -		       ib_sa_comp_mask comp_mask,
> -		       int timeout_ms, gfp_t gfp_mask,
> -		       void (*callback)(int status,
> -					struct ib_sa_mcmember_rec *resp,
> -					void *context),
> -		       void *context,
> -		       struct ib_sa_query **query)
> -{
> -	return ib_sa_mcmember_rec_query(client, device, port_num,
> -					IB_MGMT_METHOD_SET,
> -					rec, comp_mask,
> -					timeout_ms, gfp_mask, callback,
> -					context, query);
> -}
> +struct ib_sa_multicast *ib_sa_join_multicast(struct ib_sa_client *client,
> +					     struct ib_device *device, u8 port_num,
> +					     struct ib_sa_mcmember_rec *rec,
> +					     ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
> +					     int (*callback)(int status,
> +							     struct ib_sa_multicast
> +								    *multicast),
> +					     void *context);
>  
>  /**
> - * ib_sa_mcmember_rec_delete - Start an MCMember delete query
> - * @client:SA client
> - * @device:device to send query on
> - * @port_num: port number to send query on
> - * @rec:MCMember Record to send in query
> - * @comp_mask:component mask to send in query
> - * @timeout_ms:time to wait for response
> - * @gfp_mask:GFP mask to use for internal allocations
> - * @callback:function called when query completes, times out or is
> - * canceled
> - * @context:opaque user context passed to callback
> - * @sa_query:query context, used to cancel query
> + * ib_free_multicast - Frees the multicast tracking structure, and releases
> + *    any reference on the multicast group.
> + * @multicast: Multicast tracking structure allocated by ib_join_multicast.
>   *
> - * Send an MCMember Delete query to the SA (eg to leave a multicast
> - * group).  The callback function will be called when the query
> - * completes (or fails); status is 0 for a successful response, -EINTR
> - * if the query is canceled, -ETIMEDOUT is the query timed out, or
> - * -EIO if an error occurred sending the query.  The resp parameter of
> - * the callback is only valid if status is 0.
> + * This call blocks until the multicast identifier is destroyed.  It may
> + * not be called from within the multicast callback; however, returning a non-
> + * zero value from the callback will result in destroying the multicast
> + * tracking structure.
> + */
> +void ib_sa_free_multicast(struct ib_sa_multicast *multicast);
> +
> +/**
> + * ib_get_mcmember_rec - Looks up a multicast member record by its MGID and
> + *   returns it if found.
> + * @device: Device associated with the multicast group.
> + * @port_num: Port on the specified device to associate with the multicast
> + *   group.
> + * @mgid: optional MGID of multicast group.
> + * @rec: Location to copy SA multicast member record.
>   *
> - * If the return value of ib_sa_mcmember_rec_delete() is negative, it
> - * is an error code.  Otherwise it is a query ID that can be used to
> - * cancel the query.
> + * If an MGID is specified, returns an existing multicast member record if
> + * one is found for the local port.  If no MGID is specified, or the specified
> + * MGID is 0, returns a multicast member record filled in with default values
> + * that may be used to create a new multicast group.
>   */
> -static inline int
> -ib_sa_mcmember_rec_delete(struct ib_sa_client *client,
> -			  struct ib_device *device, u8 port_num,
> -			  struct ib_sa_mcmember_rec *rec,
> -			  ib_sa_comp_mask comp_mask,
> -			  int timeout_ms, gfp_t gfp_mask,
> -			  void (*callback)(int status,
> -					   struct ib_sa_mcmember_rec *resp,
> -					   void *context),
> -			  void *context,
> -			  struct ib_sa_query **query)
> -{
> -	return ib_sa_mcmember_rec_query(client, device, port_num,
> -					IB_SA_METHOD_DELETE,
> -					rec, comp_mask,
> -					timeout_ms, gfp_mask, callback,
> -					context, query);
> -}
> +int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
> +			   union ib_gid *mgid, struct ib_sa_mcmember_rec *rec);
> +
> +/**
> + * ib_init_ah_from_mcmember - Initialize address handle attributes based on
> + * an SA multicast member record.
> + */
> +int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
> +			     struct ib_sa_mcmember_rec *rec,
> +			     struct ib_ah_attr *ah_attr);
>  
>  /**
>   * ib_init_ah_from_path - Initialize address handle attributes based on an SA
> 

OK, I went over this again. Some questions:

So instead of ib_sa_mcmember_rec_set which returned the query by pointer we now
have ib_sa_join_multicast which returns the pointer.  This part looks OK I guess, but
I still do not understand why does the patch tinker with logic (e.g.
setting/clearing IPOIB_MCAST_FLAG_BUSY) in the IPoIB code.

*All* the new API was supposed to do is add reference counting on top of
join/leave queries. So why the need to rework the logic?  Is it possible that
what is missing is the analog of ib_sa_cancel_query - a non-blocking call which
would guarantee that join callback is invoked soon?  If yes, I think we should
just add that to the new API.

Sean, it seems like you are trying to push some unrelated re-factoring in the IPoIB
code, which might be fine, but should be done separately from the update to
the new API - could be before, or after the multicast change.

-- 
MST




More information about the general mailing list