[openib-general] Re: user_mad.c: deadlock?

Roland Dreier rolandd at cisco.com
Wed Nov 9 15:07:04 PST 2005


    Michael> I think I see a solution: replace up_write with
    Michael> downgrade_write.  This way ib_umad_close has a read lock
    Michael> most of the time, and write lock only while it is
    Michael> changing the list.

Yes, excellent idea.  I wasn't familiar with that API, but that's
almost exactly what we need.  It's still a little ugly but I think
this works:

--- infiniband/core/user_mad.c	(revision 4008)
+++ infiniband/core/user_mad.c	(working copy)
@@ -110,12 +110,13 @@ struct ib_umad_device {
 };
 
 struct ib_umad_file {
-	struct ib_umad_port *port;
-	struct list_head     recv_list;
-	struct list_head     port_list;
-	spinlock_t           recv_lock;
-	wait_queue_head_t    recv_wait;
-	struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS];
+	struct ib_umad_port    *port;
+	struct list_head	recv_list;
+	struct list_head	port_list;
+	spinlock_t		recv_lock;
+	wait_queue_head_t	recv_wait;
+	struct ib_mad_agent    *agent[IB_UMAD_MAX_AGENTS];
+	int			agents_dead;
 };
 
 struct ib_umad_packet {
@@ -144,6 +145,12 @@ static void ib_umad_release_dev(struct k
 	kfree(dev);
 }
 
+/* caller must hold port->mutex at least for reading */
+static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id)
+{
+	return file->agents_dead ? NULL : file->agent[id];
+}
+
 static int queue_packet(struct ib_umad_file *file,
 			struct ib_mad_agent *agent,
 			struct ib_umad_packet *packet)
@@ -151,10 +158,11 @@ static int queue_packet(struct ib_umad_f
 	int ret = 1;
 
 	down_read(&file->port->mutex);
+
 	for (packet->mad.hdr.id = 0;
 	     packet->mad.hdr.id < IB_UMAD_MAX_AGENTS;
 	     packet->mad.hdr.id++)
-		if (agent == file->agent[packet->mad.hdr.id]) {
+		if (agent == __get_agent(file, packet->mad.hdr.id)) {
 			spin_lock_irq(&file->recv_lock);
 			list_add_tail(&packet->list, &file->recv_list);
 			spin_unlock_irq(&file->recv_lock);
@@ -326,7 +334,7 @@ static ssize_t ib_umad_write(struct file
 
 	down_read(&file->port->mutex);
 
-	agent = file->agent[packet->mad.hdr.id];
+	agent = __get_agent(file, packet->mad.hdr.id);
 	if (!agent) {
 		ret = -EINVAL;
 		goto err_up;
@@ -480,7 +488,7 @@ static int ib_umad_reg_agent(struct ib_u
 	}
 
 	for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id)
-		if (!file->agent[agent_id])
+		if (!__get_agent(file, agent_id))
 			goto found;
 
 	ret = -ENOMEM;
@@ -530,7 +538,7 @@ static int ib_umad_unreg_agent(struct ib
 
 	down_write(&file->port->mutex);
 
-	if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !file->agent[id]) {
+	if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -608,21 +616,29 @@ static int ib_umad_close(struct inode *i
 	struct ib_umad_file *file = filp->private_data;
 	struct ib_umad_device *dev = file->port->umad_dev;
 	struct ib_umad_packet *packet, *tmp;
+	int already_dead;
 	int i;
 
-	for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i)
-		if (file->agent[i])
-			ib_unregister_mad_agent(file->agent[i]);
+	down_write(&file->port->mutex);
+
+	already_dead = file->agents_dead;
+	file->agents_dead = 1;
 
 	list_for_each_entry_safe(packet, tmp, &file->recv_list, list)
 		kfree(packet);
 
-	down_write(&file->port->mutex);
 	list_del(&file->port_list);
-	up_write(&file->port->mutex);
 
-	kfree(file);
+	downgrade_write(&file->port->mutex);
+
+	if (!already_dead)
+		for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i)
+			if (file->agent[i])
+				ib_unregister_mad_agent(file->agent[i]);
+
+	up_read(&file->port->mutex);
 
+	kfree(file);
 	kref_put(&dev->ref, ib_umad_release_dev);
 
 	return 0;
@@ -829,7 +845,6 @@ err_cdev:
 static void ib_umad_kill_port(struct ib_umad_port *port)
 {
 	struct ib_umad_file *file;
-	struct ib_mad_agent *agent;
 	int id;
 
 	class_set_devdata(port->class_dev,    NULL);
@@ -849,16 +864,26 @@ static void ib_umad_kill_port(struct ib_
 	port->ib_dev = NULL;
 	up_write(&port->mutex);
 
-	list_for_each_entry(file, &port->file_list, port_list)
-		for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id) {
-			down_write(&port->mutex);
-			agent = file->agent[id];
-			file->agent[id] = NULL;
-			up_write(&port->mutex);
+	down_write(&port->mutex);
 
-			if (agent)
-				ib_unregister_mad_agent(agent);
-		}
+	while (!list_empty(&port->file_list)) {
+		file = list_entry(port->file_list.next, struct ib_umad_file,
+				  port_list);
+
+		file->agents_dead = 1;
+		list_del_init(&file->port_list);
+
+		downgrade_write(&port->mutex);
+
+		for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id)
+			if (file->agent[id])
+				ib_unregister_mad_agent(file->agent[id]);
+
+		up_read(&port->mutex);
+		down_write(&port->mutex);
+	}
+
+	up_write(&port->mutex);
 
 	clear_bit(port->dev_num, dev_map);
 }



More information about the general mailing list