[ofa-general] Re: [PATCH 2.6.24] rdma/cm: fix deadlock destroying listen requests
Kanoj Sarcar
kanoj at netxen.com
Tue Oct 9 11:40:00 PDT 2007
Sean,
I will take a look at your code changes and comment, and hopefully be
able to run a quick test on your patch within this week.
Just so I understand, did you discover problems (maybe preexisting race
conditions) with my previously posted patch? If yes, please point it
out, so its easier to review yours; if not, I will assume your patch
implements a better locking scheme and review it as such.
Thanks.
Kanoj
Sean Hefty wrote:
>Deadlock condition reported by Kanoj Sarcar <kanoj at netxen.com>
>The deadlock occurs when a connection request arrives at the same
>time that a wildcard listen is being destroyed.
>
>A wildcard listen maintains per device listen requests for each
>RDMA device in the system. The per device listens are automatically
>added and removed when RDMA devices are inserted or removed from
>the system.
>
>When a wildcard listen is destroyed, rdma_destroy_id() acquires
>the rdma_cm's device mutex ('lock') to protect against hot-plug
>events adding or removing per device listens. It then tries to
>destroy the per device listens by calling ib_destroy_cm_id() or
>iw_destroy_cm_id(). It does this while holding the device mutex.
>
>However, if the underlying iw/ib CM reports a connection request
>while this is occurring, the rdma_cm callback function will try
>to acquire the same device mutex. Since we're in a callback,
>the ib_destroy_cm_id() or iw_destroy_cm_id() calls will block until
>their callback thread returns, but the callback is blocked waiting for
>the device mutex.
>
>Fix this by re-working how per device listens are destroyed. Use
>rdma_destroy_id(), which avoids the deadlock, in place of
>cma_destroy_listen(). Additional synchronization is added
>to handle device hot-plug events and ensure that the id is not destroyed
>twice.
>
>Signed-off-by: Sean Hefty <sean.hefty at intel.com>
>---
>Fix from discussion started at:
>http://lists.openfabrics.org/pipermail/general/2007-October/041456.html
>
>Kanoj, please verify that this fix looks correct and works for you, and I
>will queue for 2.6.24.
>
> drivers/infiniband/core/cma.c | 70 +++++++++++++----------------------------
> 1 files changed, 23 insertions(+), 47 deletions(-)
>
>diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
>index 9ffb998..21ea92c 100644
>--- a/drivers/infiniband/core/cma.c
>+++ b/drivers/infiniband/core/cma.c
>@@ -113,11 +113,12 @@ struct rdma_id_private {
>
> struct rdma_bind_list *bind_list;
> struct hlist_node node;
>- struct list_head list;
>- struct list_head listen_list;
>+ struct list_head list; /* listen_any_list or cma_device.list */
>+ struct list_head listen_list; /* per device listens */
> struct cma_device *cma_dev;
> struct list_head mc_list;
>
>+ int internal_id;
> enum cma_state state;
> spinlock_t lock;
> struct completion comp;
>@@ -715,50 +716,27 @@ static void cma_cancel_route(struct rdma_id_private *id_priv)
> }
> }
>
>-static inline int cma_internal_listen(struct rdma_id_private *id_priv)
>-{
>- return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev &&
>- cma_any_addr(&id_priv->id.route.addr.src_addr);
>-}
>-
>-static void cma_destroy_listen(struct rdma_id_private *id_priv)
>-{
>- cma_exch(id_priv, CMA_DESTROYING);
>-
>- if (id_priv->cma_dev) {
>- switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
>- case RDMA_TRANSPORT_IB:
>- if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
>- ib_destroy_cm_id(id_priv->cm_id.ib);
>- break;
>- case RDMA_TRANSPORT_IWARP:
>- if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
>- iw_destroy_cm_id(id_priv->cm_id.iw);
>- break;
>- default:
>- break;
>- }
>- cma_detach_from_dev(id_priv);
>- }
>- list_del(&id_priv->listen_list);
>-
>- cma_deref_id(id_priv);
>- wait_for_completion(&id_priv->comp);
>-
>- kfree(id_priv);
>-}
>-
> static void cma_cancel_listens(struct rdma_id_private *id_priv)
> {
> struct rdma_id_private *dev_id_priv;
>
>+ /*
>+ * Remove from listen_any_list to prevent added devices from spawning
>+ * additional listen requests.
>+ */
> mutex_lock(&lock);
> list_del(&id_priv->list);
>
> while (!list_empty(&id_priv->listen_list)) {
> dev_id_priv = list_entry(id_priv->listen_list.next,
> struct rdma_id_private, listen_list);
>- cma_destroy_listen(dev_id_priv);
>+ /* sync with device removal to avoid duplicate destruction */
>+ list_del_init(&dev_id_priv->list);
>+ list_del(&dev_id_priv->listen_list);
>+ mutex_unlock(&lock);
>+
>+ rdma_destroy_id(&dev_id_priv->id);
>+ mutex_lock(&lock);
> }
> mutex_unlock(&lock);
> }
>@@ -846,6 +824,9 @@ void rdma_destroy_id(struct rdma_cm_id *id)
> cma_deref_id(id_priv);
> wait_for_completion(&id_priv->comp);
>
>+ if (id_priv->internal_id)
>+ cma_deref_id(id_priv->id.context);
>+
> kfree(id_priv->id.route.path_rec);
> kfree(id_priv);
> }
>@@ -1401,14 +1382,13 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
>
> cma_attach_to_dev(dev_id_priv, cma_dev);
> list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
>+ atomic_inc(&id_priv->refcount);
>+ dev_id_priv->internal_id = 1;
>
> ret = rdma_listen(id, id_priv->backlog);
> if (ret)
>- goto err;
>-
>- return;
>-err:
>- cma_destroy_listen(dev_id_priv);
>+ printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
>+ "listening on device %s", ret, cma_dev->device->name);
> }
>
> static void cma_listen_on_all(struct rdma_id_private *id_priv)
>@@ -2729,16 +2709,12 @@ static void cma_process_remove(struct cma_device *cma_dev)
> id_priv = list_entry(cma_dev->id_list.next,
> struct rdma_id_private, list);
>
>- if (cma_internal_listen(id_priv)) {
>- cma_destroy_listen(id_priv);
>- continue;
>- }
>-
>+ list_del(&id_priv->listen_list);
> list_del_init(&id_priv->list);
> atomic_inc(&id_priv->refcount);
> mutex_unlock(&lock);
>
>- ret = cma_remove_id_dev(id_priv);
>+ ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
> cma_deref_id(id_priv);
> if (ret)
> rdma_destroy_id(&id_priv->id);
>
>
>
>
More information about the general
mailing list