[openib-general] Re: user_mad.c: deadlock?
Michael S. Tsirkin
mst at mellanox.co.il
Mon Nov 7 10:06:34 PST 2005
Quoting r. Roland Dreier <rolandd at cisco.com>:
> Subject: Re: user_mad.c: deadlock?
>
> Michael> It seems, therefore, that we can have a deadlock inside
> Michael> user_mad, where ib_umad_close calls
> Michael> ib_unregister_mad_agent which blocks until send_handler
> Michael> runs which is blocked by the port mutex.
>
> It certainly looks that way, and it also looks like
> ib_umad_unreg_agent() has had the same potential deadlock for a
> while. In any case, I don't see any reason to hold the port mutex
> while unregistering agents in ib_umad_close() (the file is already
> gone, so it can't race against userspace registering or unregistering
> MAD agents via ioctl). So something like this should be good enough.
>
> Does anyone see anything wrong with this?
>
> - R.
>
> Index: infiniband/core/user_mad.c
> ===================================================================
> --- infiniband/core/user_mad.c (revision 3971)
> +++ infiniband/core/user_mad.c (working copy)
> @@ -505,8 +505,6 @@ found:
> goto out;
> }
>
> - file->agent[agent_id] = agent;
> -
> file->mr[agent_id] = ib_get_dma_mr(agent->qp->pd, IB_ACCESS_LOCAL_WRITE);
> if (IS_ERR(file->mr[agent_id])) {
> ret = -ENOMEM;
> @@ -519,14 +517,15 @@ found:
> goto err_mr;
> }
>
> + file->agent[agent_id] = agent;
> ret = 0;
> +
> goto out;
>
> err_mr:
> ib_dereg_mr(file->mr[agent_id]);
>
> err:
> - file->agent[agent_id] = NULL;
> ib_unregister_mad_agent(agent);
>
> out:
> @@ -536,27 +535,33 @@ out:
>
> static int ib_umad_unreg_agent(struct ib_umad_file *file, unsigned long arg)
> {
> + struct ib_mad_agent *agent = NULL;
> + struct ib_mr *mr = NULL;
> u32 id;
> int ret = 0;
>
> - down_write(&file->port->mutex);
> + if (get_user(id, (u32 __user *) arg))
> + return -EFAULT;
>
> - if (get_user(id, (u32 __user *) arg)) {
> - ret = -EFAULT;
> - goto out;
> - }
> + down_write(&file->port->mutex);
>
> if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !file->agent[id]) {
> ret = -EINVAL;
> goto out;
> }
>
> - ib_dereg_mr(file->mr[id]);
> - ib_unregister_mad_agent(file->agent[id]);
> + agent = file->agent[id];
> + mr = file->mr[id];
> file->agent[id] = NULL;
>
> out:
> up_write(&file->port->mutex);
> +
> + if (agent) {
> + ib_unregister_mad_agent(agent);
> + ib_dereg_mr(mr);
> + }
> +
> return ret;
> }
>
> @@ -623,16 +628,16 @@ static int ib_umad_close(struct inode *i
> struct ib_umad_packet *packet, *tmp;
> int i;
>
> - down_write(&file->port->mutex);
> for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i)
> if (file->agent[i]) {
> - ib_dereg_mr(file->mr[i]);
> ib_unregister_mad_agent(file->agent[i]);
> + ib_dereg_mr(file->mr[i]);
> }
>
> list_for_each_entry_safe(packet, tmp, &file->recv_list, list)
> kfree(packet);
>
> + down_write(&file->port->mutex);
> list_del(&file->port_list);
> up_write(&file->port->mutex);
>
> @@ -801,7 +806,7 @@ static int ib_umad_init_port(struct ib_d
> goto err_class;
> port->sm_dev->owner = THIS_MODULE;
> port->sm_dev->ops = &umad_sm_fops;
> - kobject_set_name(&port->dev->kobj, "issm%d", port->dev_num);
> + kobject_set_name(&port->sm_dev->kobj, "issm%d", port->dev_num);
> if (cdev_add(port->sm_dev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1))
> goto err_sm_cdev;
>
> @@ -913,7 +918,7 @@ static void ib_umad_add_one(struct ib_de
>
> err:
> while (--i >= s)
> - ib_umad_kill_port(&umad_dev->port[i]);
> + ib_umad_kill_port(&umad_dev->port[i - s]);
>
> kref_put(&umad_dev->ref, ib_umad_release_dev);
> }
>
Looks fine except that it includes two of my patches which you said you
have applied.
--
MST
More information about the general
mailing list