[ofa-general] IPoIB kernel Oops -- race condition

Jack Morgenstein jackm at dev.mellanox.co.il
Mon Jun 29 12:29:30 PDT 2009


On Monday 29 June 2009 18:06, Moni Shoua wrote:
> Jack Morgenstein wrote:
> > On Sunday 28 June 2009 19:09, Moni Shoua wrote:
> >> maybe synchronizing the race with a completion var (like IPoIB does in struct ipoib_path) will help. I think this will work. I can send a patch if you want unless you see this idea doesn't work for this case.
> >>
> >> MoniS
> 
> What do you think of this (see below)?
> I used a completion struct to synchronize between leave and join in a similar way that ipoib_path is using the field 'done'.
> There is one difference though from the original code (I don't know if that's acceptable), here we wait for the join request to finish before we destroy the mcast struct and not canceling it.
> Please note that I didn't compile this code. I just want to hear opinions first before I make a real patch out of this.
> 
I have to think about this some.  I'm a bit nervous about 2 things:
1. Somehow, the complete never arrives, and the "leave" gets stuck forever waiting.
2. What if someone does another ifconfig ib0 up while we are waiting for the complete?
   (I think it is disregarded, but I'm not sure -- with this change and all).

Nice idea, though.
-Jack
> 
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
> index 753a983..a551258 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib.h
> +++ b/drivers/infiniband/ulp/ipoib/ipoib.h
> @@ -145,6 +145,7 @@ struct ipoib_mcast {
>  	struct sk_buff_head pkt_queue;
>  
>  	struct net_device *dev;
> +	struct completion     join_req;
>  };
>  
>  struct ipoib_rx_buf {
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> index a0e9753..afe72dd 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> @@ -117,6 +117,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
>  	mcast->dev = dev;
>  	mcast->created = jiffies;
>  	mcast->backoff = 1;
> +	complete(&mcast->join_req);	
>  
>  	INIT_LIST_HEAD(&mcast->list);
>  	INIT_LIST_HEAD(&mcast->neigh_list);
> @@ -286,6 +287,7 @@ ipoib_mcast_sendonly_join_complete(int status,
>  	if (status == -ENETRESET)
>  		return 0;
>  
> +	complete(&mcast->join_req);
>  	if (!status)
>  		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
>  
> @@ -336,6 +338,7 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
>  	rec.port_gid = priv->local_gid;
>  	rec.pkey     = cpu_to_be16(priv->pkey);
>  
> +	init_completion(&mcast->join_req);
>  	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
>  					 priv->port, &rec,
>  					 IB_SA_MCMEMBER_REC_MGID	|
> @@ -384,6 +387,7 @@ static int ipoib_mcast_join_complete(int status,
>  			mcast->mcmember.mgid.raw, status);
>  
>  	/* We trap for port events ourselves. */
> +	complete(&mcast->join_req);
>  	if (status == -ENETRESET)
>  		return 0;
>  
> @@ -482,10 +486,12 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
>  	}
>  
>  	set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
> +	init_completion(&mcast->join_req);
>  	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
>  					 &rec, comp_mask, GFP_KERNEL,
>  					 ipoib_mcast_join_complete, mcast);
>  	if (IS_ERR(mcast->mc)) {
> +		complete(&mcast->join_req);
>  		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
>  		ret = PTR_ERR(mcast->mc);
>  		ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
> @@ -630,8 +636,7 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
>  	int ret = 0;
>  
> -	if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
> -		ib_sa_free_multicast(mcast->mc);
> +	wait_for_completion(mcast->join_req);
>  
>  	if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
>  		ipoib_dbg_mcast(priv, "leaving MGID %pI6\n",
> 



More information about the general mailing list