[openib-general] [PATCH] fix 2 race conditions in ib_destroy_cm_id
Michael S. Tsirkin
mst at mellanox.co.il
Sun May 7 08:54:47 PDT 2006
Fix two issues in CM.
1. crash if cm id is destroyed from handler because of non-0 return code,
and at the same time from user thread by direct call to ib_destroy_cm_id.
2. use after free if ib_destroy_cm_id tests the refcount after cm_deref_id has
decremented the reference count but before it has called wake_up.
I'm sure the first one has caused crashes for me, and I suspect
the second one caused a system hang.
Signed-off-by: Michael S. Tsirkin <mst at mellanox.co.il>
Index: linux-2.6.16/drivers/infiniband/core/cm.c
===================================================================
--- linux-2.6.16.orig/drivers/infiniband/core/cm.c 2006-05-07 20:41:38.000000000 +0300
+++ linux-2.6.16/drivers/infiniband/core/cm.c 2006-05-07 20:43:28.000000000 +0300
@@ -159,8 +159,12 @@ static void cm_work_handler(void *data);
static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
if (atomic_dec_and_test(&cm_id_priv->refcount))
wake_up(&cm_id_priv->wait);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
}
static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
@@ -320,13 +324,22 @@ static int cm_alloc_id(struct cm_id_priv
return ret;
}
-static void cm_free_id(__be32 local_id)
+static int cm_free_id(__be32 local_id)
{
+ struct cm_id_private *cm_id_priv;
unsigned long flags;
+ int rc;
spin_lock_irqsave(&cm.lock, flags);
- idr_remove(&cm.local_id_table, (__force int) local_id);
+ cm_id_priv = idr_find(&cm.local_id_table, (__force int) local_id);
+ if (unlikely(!cm_id_priv))
+ rc = -EINVAL;
+ else {
+ rc = 0;
+ idr_remove(&cm.local_id_table, (__force int) local_id);
+ }
spin_unlock_irqrestore(&cm.lock, flags);
+ return rc;
}
static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
@@ -710,11 +723,12 @@ static void cm_reset_to_idle(struct cm_i
}
}
-void ib_destroy_cm_id(struct ib_cm_id *cm_id)
+static void __ib_destroy_cm_id(struct ib_cm_id *cm_id, int flush)
{
struct cm_id_private *cm_id_priv;
struct cm_work *work;
unsigned long flags;
+ int rc;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
retest:
@@ -775,15 +789,32 @@ retest:
break;
}
- cm_free_id(cm_id->local_id);
- atomic_dec(&cm_id_priv->refcount);
- wait_event(cm_id_priv->wait, !atomic_read(&cm_id_priv->refcount));
+ rc = cm_free_id(cm_id->local_id);
+ cm_deref_id(cm_id_priv);
+ if (unlikely(rc) && flush) {
+ /* A handler has removed the id from the idr.
+ Make sure it is not still running. */
+ flush_workqueue(&cm.wq);
+ return;
+ }
+
+ wait_event(cm_id_priv->wait, ({
+ spin_lock_irq(&cm_id_priv->lock);
+ rc = !atomic_read(&cm_id_priv->refcount)
+ spin_unlock_irq(&cm_id_priv->lock);
+ rc;
+ }));
while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
cm_free_work(work);
kfree(cm_id_priv->compare_data);
kfree(cm_id_priv->private_data);
kfree(cm_id_priv);
}
+
+void ib_destroy_cm_id(struct ib_cm_id *cm_id)
+{
+ __ib_destroy_cm_id(cm_id, 1);
+}
EXPORT_SYMBOL(ib_destroy_cm_id);
int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
@@ -1171,8 +1202,8 @@ static void cm_process_work(struct cm_id
cm_free_work(work);
}
cm_deref_id(cm_id_priv);
- if (ret)
- ib_destroy_cm_id(&cm_id_priv->id);
+ if (ret) /* don't flush: we are running from cm.wq */
+ __ib_destroy_cm_id(&cm_id_priv->id, 0);
}
static void cm_format_mra(struct cm_mra_msg *mra_msg,
--
MST
More information about the general
mailing list