[openib-general] Re: user_mad.c: deadlock?

Michael S. Tsirkin mst at mellanox.co.il
Mon Nov 7 10:06:34 PST 2005


Quoting r. Roland Dreier <rolandd at cisco.com>:
> Subject: Re: user_mad.c: deadlock?
> 
>     Michael> It seems, therefore, that we can have a deadlock inside
>     Michael> user_mad, where ib_umad_close calls
>     Michael> ib_unregister_mad_agent which blocks until send_handler
>     Michael> runs which is blocked by the port mutex.
> 
> It certainly looks that way, and it also looks like
> ib_umad_unreg_agent() has had the same potential deadlock for a
> while.  In any case, I don't see any reason to hold the port mutex
> while unregistering agents in ib_umad_close() (the file is already
> gone, so it can't race against userspace registering or unregistering
> MAD agents via ioctl).  So something like this should be good enough.
> 
> Does anyone see anything wrong with this?
> 
>  - R.
> 
> Index: infiniband/core/user_mad.c
> ===================================================================
> --- infiniband/core/user_mad.c	(revision 3971)
> +++ infiniband/core/user_mad.c	(working copy)
> @@ -505,8 +505,6 @@ found:
>  		goto out;
>  	}
>  
> -	file->agent[agent_id] = agent;
> -
>  	file->mr[agent_id] = ib_get_dma_mr(agent->qp->pd, IB_ACCESS_LOCAL_WRITE);
>  	if (IS_ERR(file->mr[agent_id])) {
>  		ret = -ENOMEM;
> @@ -519,14 +517,15 @@ found:
>  		goto err_mr;
>  	}
>  
> +	file->agent[agent_id] = agent;
>  	ret = 0;
> +
>  	goto out;
>  
>  err_mr:
>  	ib_dereg_mr(file->mr[agent_id]);
>  
>  err:
> -	file->agent[agent_id] = NULL;
>  	ib_unregister_mad_agent(agent);
>  
>  out:
> @@ -536,27 +535,33 @@ out:
>  
>  static int ib_umad_unreg_agent(struct ib_umad_file *file, unsigned long arg)
>  {
> +	struct ib_mad_agent *agent = NULL;
> +	struct ib_mr *mr = NULL;
>  	u32 id;
>  	int ret = 0;
>  
> -	down_write(&file->port->mutex);
> +	if (get_user(id, (u32 __user *) arg))
> +		return -EFAULT;
>  
> -	if (get_user(id, (u32 __user *) arg)) {
> -		ret = -EFAULT;
> -		goto out;
> -	}
> +	down_write(&file->port->mutex);
>  
>  	if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !file->agent[id]) {
>  		ret = -EINVAL;
>  		goto out;
>  	}
>  
> -	ib_dereg_mr(file->mr[id]);
> -	ib_unregister_mad_agent(file->agent[id]);
> +	agent = file->agent[id];
> +	mr    = file->mr[id];
>  	file->agent[id] = NULL;
>  
>  out:
>  	up_write(&file->port->mutex);
> +
> +	if (agent) {
> +		ib_unregister_mad_agent(agent);
> +		ib_dereg_mr(mr);
> +	}
> +
>  	return ret;
>  }
>  
> @@ -623,16 +628,16 @@ static int ib_umad_close(struct inode *i
>  	struct ib_umad_packet *packet, *tmp;
>  	int i;
>  
> -	down_write(&file->port->mutex);
>  	for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i)
>  		if (file->agent[i]) {
> -			ib_dereg_mr(file->mr[i]);
>  			ib_unregister_mad_agent(file->agent[i]);
> +			ib_dereg_mr(file->mr[i]);
>  		}
>  
>  	list_for_each_entry_safe(packet, tmp, &file->recv_list, list)
>  		kfree(packet);
>  
> +	down_write(&file->port->mutex);
>  	list_del(&file->port_list);
>  	up_write(&file->port->mutex);
>  
> @@ -801,7 +806,7 @@ static int ib_umad_init_port(struct ib_d
>  		goto err_class;
>  	port->sm_dev->owner = THIS_MODULE;
>  	port->sm_dev->ops   = &umad_sm_fops;
> -	kobject_set_name(&port->dev->kobj, "issm%d", port->dev_num);
> +	kobject_set_name(&port->sm_dev->kobj, "issm%d", port->dev_num);
>  	if (cdev_add(port->sm_dev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1))
>  		goto err_sm_cdev;
>  
> @@ -913,7 +918,7 @@ static void ib_umad_add_one(struct ib_de
>  
>  err:
>  	while (--i >= s)
> -		ib_umad_kill_port(&umad_dev->port[i]);
> +		ib_umad_kill_port(&umad_dev->port[i - s]);
>  
>  	kref_put(&umad_dev->ref, ib_umad_release_dev);
>  }
> 

Looks fine except that it includes two of my patches which you said you
have applied.

-- 
MST



More information about the general mailing list