[openib-general] [PATCH] roland-uverbs: possible race condition

Michael S. Tsirkin mst at mellanox.co.il
Sun Jan 30 06:34:45 PST 2005



Hello!
Looking at roland-uverbs, I see that an EQ is armed only if
and EQE entry was detected.  Consider this:
1. In MSI-X, since the interrupt is specific for this EQ,
	we know that the EQ wont be empty when we poll it.
	In any case, (even if you believe we may get
        an extra MSI-X interrupt in rare cases) ,
        an extra set ci + arm do no harm.

  Thus in my opinion the check is not needed (but does no actual harm).

2. In regular interrupt: I think I see a race condition.
        We must always arm an eq - even if it is found empty.
	This is because the interrupt may bypass an EQ entry
	write to memory. Thus the EQ will be empty when we poll it,
	but on the other hand if we do not arm the EQ, we will never
	get another interrupt.

	In light of this, I also changed IRQ_RETVAL to IRQ_HANDLED.

This is true for both Tavor and Arbel.
For Arbel, I also noticed that since all eqs are armed in one shot,
and the eq arm mask can be pre-computed.

MST

The following applies to the roland-uverbs branch or to the trunk,
after the backport patch.

Signed-off-by: Michael S. Tsirkin <mst at mellanox.co.il>

Fix a race condition in regular interrupt handling. Kill
a useless check in MSI-X handler.

Index: hw/mthca/mthca_dev.h
===================================================================
--- hw/mthca/mthca_dev.h	(revision 1701)
+++ hw/mthca/mthca_dev.h	(working copy)
@@ -171,6 +171,7 @@
 	struct mthca_alloc alloc;
 	void __iomem      *clr_int;
 	u32                clr_mask;
+	u32                eq_mask;
 	struct mthca_eq    eq[MTHCA_NUM_EQ];
 	u64                icm_virt;
 	struct page       *icm_page;
Index: hw/mthca/mthca_eq.c
===================================================================
--- hw/mthca/mthca_eq.c	(revision 1701)
+++ hw/mthca/mthca_eq.c	(working copy)
@@ -400,10 +400,12 @@
 		       MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);
 
 		for (i = 0; i < MTHCA_NUM_EQ; ++i)
-			if (ecr & dev->eq_table.eq[i].eqn_mask &&
-			    mthca_eq_int(dev, &dev->eq_table.eq[i])) {
-				tavor_set_eq_ci(dev, &dev->eq_table.eq[i],
-						dev->eq_table.eq[i].cons_index);
+			if (ecr & dev->eq_table.eq[i].eqn_mask) {
+				if (mthca_eq_int(dev, &dev->eq_table.eq[i]))
+					tavor_set_eq_ci(dev,
+						       	&dev->eq_table.eq[i],
+							dev->eq_table.eq[i].
+								cons_index);
 				tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
 			}
 	}
@@ -417,10 +419,11 @@
 	struct mthca_eq  *eq  = eq_ptr;
 	struct mthca_dev *dev = eq->dev;
 
-	if (mthca_eq_int(dev, eq)) {
-		tavor_set_eq_ci(dev, eq, eq->cons_index);
-		tavor_eq_req_not(dev, eq->eqn);
-	}
+	mthca_eq_int(dev, eq);
+	/* EQ can not be empty, and even if it is,
+	 * an extra set ci + arm do no harm */
+	tavor_set_eq_ci(dev, eq, eq->cons_index);
+	tavor_eq_req_not(dev, eq->eqn);
 
 	/* MSI-X vectors always belong to us */
 	return IRQ_HANDLED;
@@ -429,7 +432,6 @@
 static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr, struct pt_regs *regs)
 {
 	struct mthca_dev *dev = dev_ptr;
-	u32 arm = 0;
 	int i;
 
 	if (dev->eq_table.clr_mask)
@@ -439,12 +441,11 @@
 		if (mthca_eq_int(dev, &dev->eq_table.eq[i])) {
 			arbel_set_eq_ci(dev, &dev->eq_table.eq[i],
 					dev->eq_table.eq[i].cons_index);
-			arm |= dev->eq_table.eq[i].eqn_mask;
 		}
 
-	arbel_eq_req_not(dev, arm);
+	arbel_eq_req_not(dev, dev->eq_table.eq_mask);
 
-	return IRQ_RETVAL(arm);
+	return IRQ_HANDLED;
 }
 
 static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr,
@@ -453,10 +454,11 @@
 	struct mthca_eq  *eq  = eq_ptr;
 	struct mthca_dev *dev = eq->dev;
 
-	if (mthca_eq_int(dev, eq)) {
-		arbel_set_eq_ci(dev, eq, eq->cons_index);
-		arbel_eq_req_not(dev, eq->eqn_mask);
-	}
+	mthca_eq_int(dev, eq);
+	/* EQ can not be empty, and even if it is,
+	 * an extra set ci + arm do no harm */
+	arbel_set_eq_ci(dev, eq, eq->cons_index);
+	arbel_eq_req_not(dev, eq->eqn_mask);
 
 	/* MSI-X vectors always belong to us */
 	return IRQ_HANDLED;
@@ -568,6 +570,8 @@
 	eq->eqn_mask   = swab32(1 << eq->eqn);
 	eq->cons_index = 0;
 
+	dev->eq_table.eq_mask |= eq->eqn_mask;
+
 	mthca_dbg(dev, "Allocated EQ %d with %d entries\n",
 		  eq->eqn, nent);
 
@@ -605,6 +609,8 @@
 		PAGE_SIZE;
 	int i;
 
+	dev->eq_table.eq_mask &= ~eq->eqn_mask;
+
 	mailbox = kmalloc(sizeof (struct mthca_eq_context) + MTHCA_CMD_MAILBOX_EXTRA,
 			  GFP_KERNEL);
 	if (!mailbox)
-------------- next part --------------
Signed-off-by: Michael S. Tsirkin <mst at mellanox.co.il>

Index: hw/mthca/mthca_dev.h
===================================================================
--- hw/mthca/mthca_dev.h	(revision 1701)
+++ hw/mthca/mthca_dev.h	(working copy)
@@ -171,6 +171,7 @@ struct mthca_eq_table {
 	struct mthca_alloc alloc;
 	void __iomem      *clr_int;
 	u32                clr_mask;
+	u32                eq_mask;
 	struct mthca_eq    eq[MTHCA_NUM_EQ];
 	u64                icm_virt;
 	struct page       *icm_page;
Index: hw/mthca/mthca_eq.c
===================================================================
--- hw/mthca/mthca_eq.c	(revision 1701)
+++ hw/mthca/mthca_eq.c	(working copy)
@@ -400,10 +400,12 @@ static irqreturn_t mthca_tavor_interrupt
 		       MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);
 
 		for (i = 0; i < MTHCA_NUM_EQ; ++i)
-			if (ecr & dev->eq_table.eq[i].eqn_mask &&
-			    mthca_eq_int(dev, &dev->eq_table.eq[i])) {
-				tavor_set_eq_ci(dev, &dev->eq_table.eq[i],
-						dev->eq_table.eq[i].cons_index);
+			if (ecr & dev->eq_table.eq[i].eqn_mask) {
+				if (mthca_eq_int(dev, &dev->eq_table.eq[i]))
+					tavor_set_eq_ci(dev,
+						       	&dev->eq_table.eq[i],
+							dev->eq_table.eq[i].
+								cons_index);
 				tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
 			}
 	}
@@ -417,10 +419,11 @@ static irqreturn_t mthca_tavor_msi_x_int
 	struct mthca_eq  *eq  = eq_ptr;
 	struct mthca_dev *dev = eq->dev;
 
-	if (mthca_eq_int(dev, eq)) {
-		tavor_set_eq_ci(dev, eq, eq->cons_index);
-		tavor_eq_req_not(dev, eq->eqn);
-	}
+	mthca_eq_int(dev, eq);
+	/* EQ can not be empty, and even if it is,
+	 * an extra set ci + arm do no harm */
+	tavor_set_eq_ci(dev, eq, eq->cons_index);
+	tavor_eq_req_not(dev, eq->eqn);
 
 	/* MSI-X vectors always belong to us */
 	return IRQ_HANDLED;
@@ -429,20 +432,19 @@ static irqreturn_t mthca_tavor_msi_x_int
 static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr, struct pt_regs *regs)
 {
 	struct mthca_dev *dev = dev_ptr;
-	u32 arm = 0;
 	int i;
+	int arm = 0;
 
 	if (dev->eq_table.clr_mask)
 		writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
 
 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
-		if (mthca_eq_int(dev, &dev->eq_table.eq[i])) {
+		if (arm |= mthca_eq_int(dev, &dev->eq_table.eq[i])) {
 			arbel_set_eq_ci(dev, &dev->eq_table.eq[i],
 					dev->eq_table.eq[i].cons_index);
-			arm |= dev->eq_table.eq[i].eqn_mask;
 		}
 
-	arbel_eq_req_not(dev, arm);
+	arbel_eq_req_not(dev, dev->eq_table.eq_mask);
 
 	return IRQ_RETVAL(arm);
 }
@@ -453,10 +455,11 @@ static irqreturn_t mthca_arbel_msi_x_int
 	struct mthca_eq  *eq  = eq_ptr;
 	struct mthca_dev *dev = eq->dev;
 
-	if (mthca_eq_int(dev, eq)) {
-		arbel_set_eq_ci(dev, eq, eq->cons_index);
-		arbel_eq_req_not(dev, eq->eqn_mask);
-	}
+	mthca_eq_int(dev, eq);
+	/* EQ can not be empty, and even if it is,
+	 * an extra set ci + arm do no harm */
+	arbel_set_eq_ci(dev, eq, eq->cons_index);
+	arbel_eq_req_not(dev, eq->eqn_mask);
 
 	/* MSI-X vectors always belong to us */
 	return IRQ_HANDLED;
@@ -568,6 +571,8 @@ static int __devinit mthca_create_eq(str
 	eq->eqn_mask   = swab32(1 << eq->eqn);
 	eq->cons_index = 0;
 
+	dev->eq_table.eq_mask |= eq->eqn_mask;
+
 	mthca_dbg(dev, "Allocated EQ %d with %d entries\n",
 		  eq->eqn, nent);
 
@@ -605,6 +610,8 @@ static void mthca_free_eq(struct mthca_d
 		PAGE_SIZE;
 	int i;
 
+	dev->eq_table.eq_mask &= ~eq->eqn_mask;
+
 	mailbox = kmalloc(sizeof (struct mthca_eq_context) + MTHCA_CMD_MAILBOX_EXTRA,
 			  GFP_KERNEL);
 	if (!mailbox)


More information about the general mailing list