[openib-general] Re: user_mad.c: deadlock?

Roland Dreier rolandd at cisco.com
Mon Nov 7 09:32:33 PST 2005


    Michael> It seems, therefore, that we can have a deadlock inside
    Michael> user_mad, where ib_umad_close calls
    Michael> ib_unregister_mad_agent which blocks until send_handler
    Michael> runs which is blocked by the port mutex.

It certainly looks that way, and it also looks like
ib_umad_unreg_agent() has had the same potential deadlock for a
while.  In any case, I don't see any reason to hold the port mutex
while unregistering agents in ib_umad_close() (the file is already
gone, so it can't race against userspace registering or unregistering
MAD agents via ioctl).  So something like this should be good enough.

Does anyone see anything wrong with this?

 - R.

Index: infiniband/core/user_mad.c
===================================================================
--- infiniband/core/user_mad.c	(revision 3971)
+++ infiniband/core/user_mad.c	(working copy)
@@ -505,8 +505,6 @@ found:
 		goto out;
 	}
 
-	file->agent[agent_id] = agent;
-
 	file->mr[agent_id] = ib_get_dma_mr(agent->qp->pd, IB_ACCESS_LOCAL_WRITE);
 	if (IS_ERR(file->mr[agent_id])) {
 		ret = -ENOMEM;
@@ -519,14 +517,15 @@ found:
 		goto err_mr;
 	}
 
+	file->agent[agent_id] = agent;
 	ret = 0;
+
 	goto out;
 
 err_mr:
 	ib_dereg_mr(file->mr[agent_id]);
 
 err:
-	file->agent[agent_id] = NULL;
 	ib_unregister_mad_agent(agent);
 
 out:
@@ -536,27 +535,33 @@ out:
 
 static int ib_umad_unreg_agent(struct ib_umad_file *file, unsigned long arg)
 {
+	struct ib_mad_agent *agent = NULL;
+	struct ib_mr *mr = NULL;
 	u32 id;
 	int ret = 0;
 
-	down_write(&file->port->mutex);
+	if (get_user(id, (u32 __user *) arg))
+		return -EFAULT;
 
-	if (get_user(id, (u32 __user *) arg)) {
-		ret = -EFAULT;
-		goto out;
-	}
+	down_write(&file->port->mutex);
 
 	if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !file->agent[id]) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	ib_dereg_mr(file->mr[id]);
-	ib_unregister_mad_agent(file->agent[id]);
+	agent = file->agent[id];
+	mr    = file->mr[id];
 	file->agent[id] = NULL;
 
 out:
 	up_write(&file->port->mutex);
+
+	if (agent) {
+		ib_unregister_mad_agent(agent);
+		ib_dereg_mr(mr);
+	}
+
 	return ret;
 }
 
@@ -623,16 +628,16 @@ static int ib_umad_close(struct inode *i
 	struct ib_umad_packet *packet, *tmp;
 	int i;
 
-	down_write(&file->port->mutex);
 	for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i)
 		if (file->agent[i]) {
-			ib_dereg_mr(file->mr[i]);
 			ib_unregister_mad_agent(file->agent[i]);
+			ib_dereg_mr(file->mr[i]);
 		}
 
 	list_for_each_entry_safe(packet, tmp, &file->recv_list, list)
 		kfree(packet);
 
+	down_write(&file->port->mutex);
 	list_del(&file->port_list);
 	up_write(&file->port->mutex);
 
@@ -801,7 +806,7 @@ static int ib_umad_init_port(struct ib_d
 		goto err_class;
 	port->sm_dev->owner = THIS_MODULE;
 	port->sm_dev->ops   = &umad_sm_fops;
-	kobject_set_name(&port->dev->kobj, "issm%d", port->dev_num);
+	kobject_set_name(&port->sm_dev->kobj, "issm%d", port->dev_num);
 	if (cdev_add(port->sm_dev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1))
 		goto err_sm_cdev;
 
@@ -913,7 +918,7 @@ static void ib_umad_add_one(struct ib_de
 
 err:
 	while (--i >= s)
-		ib_umad_kill_port(&umad_dev->port[i]);
+		ib_umad_kill_port(&umad_dev->port[i - s]);
 
 	kref_put(&umad_dev->ref, ib_umad_release_dev);
 }



More information about the general mailing list