[ofa-general] [PATCH for-2.6.21] mthca: QP reset race fixup

Michael S. Tsirkin mst at dev.mellanox.co.il
Tue Mar 20 06:39:32 PDT 2007


This fixes openfabrics bugzilla 394:
- Use common EQ for command interface and async events
- Clean CQ after moving QP to reset

This also fixes a potential crash in ipoib cm:
- sync with completion event ISR after QP is reset
  to prevent ULP from getting and using QP pointer and/or WRID
  after they are freed.

Signed-off-by: Michael S. Tsirkin <mst at dev.mellanox.co.il>

---

Roland, here's a patch that OFED has - it has been through testing here
and seems to work fine.

Can you queue this for 2.6.21 please?

We can rip the lines that sync with MTHCA_EQ_COMP out if you think
the issue needs to be dealt with in some other way - and I agree
this is only good for ULPs that do all their polling
inside the ISR, but at least this covers all in-kernel code.


diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index efd79ef..e3c774b 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -279,15 +279,13 @@ static inline int is_recv_cqe(struct mthca_cqe *cqe)
 		return !(cqe->is_send & 0x80);
 }
 
-void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
-		    struct mthca_srq *srq)
+void __mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
+		      struct mthca_srq *srq)
 {
 	struct mthca_cqe *cqe;
 	u32 prod_index;
 	int nfreed = 0;
 
-	spin_lock_irq(&cq->lock);
-
 	/*
 	 * First we need to find the current producer index, so we
 	 * know where to start cleaning from.  It doesn't matter if HW
@@ -325,7 +323,13 @@ void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
 		cq->cons_index += nfreed;
 		update_cons_index(dev, cq, nfreed);
 	}
+}
 
+void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
+		    struct mthca_srq *srq)
+{
+	spin_lock_irq(&cq->lock);
+	__mthca_cq_clean(dev, cq, qpn, srq);
 	spin_unlock_irq(&cq->lock);
 }
 
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index b7e42ef..78a092d 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -93,9 +93,8 @@ enum {
 };
 
 enum {
-	MTHCA_EQ_CMD,
-	MTHCA_EQ_ASYNC,
 	MTHCA_EQ_COMP,
+	MTHCA_EQ_ASYNC,
 	MTHCA_NUM_EQ
 };
 
@@ -505,6 +504,8 @@ void mthca_free_cq(struct mthca_dev *dev,
 void mthca_cq_completion(struct mthca_dev *dev, u32 cqn);
 void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
 		    enum ib_event_type event_type);
+void __mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
+		      struct mthca_srq *srq);
 void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
 		    struct mthca_srq *srq);
 void mthca_cq_resize_copy_cqes(struct mthca_cq *cq);
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 8ec9fa1..f7a41b8 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -110,11 +110,11 @@ enum {
 				(1ULL << MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR)    | \
 				(1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR)  | \
 				(1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE)        | \
-				(1ULL << MTHCA_EVENT_TYPE_ECC_DETECT))
+				(1ULL << MTHCA_EVENT_TYPE_ECC_DETECT))        | \
+				(1ULL << MTHCA_EVENT_TYPE_CMD)
 #define MTHCA_SRQ_EVENT_MASK   ((1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
 				(1ULL << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE)    | \
 				(1ULL << MTHCA_EVENT_TYPE_SRQ_LIMIT))
-#define MTHCA_CMD_EVENT_MASK    (1ULL << MTHCA_EVENT_TYPE_CMD)
 
 #define MTHCA_EQ_DB_INC_CI     (1 << 24)
 #define MTHCA_EQ_DB_REQ_NOT    (2 << 24)
@@ -863,23 +863,17 @@ int mthca_init_eq_table(struct mthca_dev *dev)
 	if (err)
 		goto err_out_unmap;
 
-	err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE + MTHCA_NUM_SPARE_EQE,
+	err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE + MTHCA_NUM_ASYNC_EQE +
+			      MTHCA_NUM_SPARE_EQE,
 			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 129 : intr,
 			      &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
 	if (err)
 		goto err_out_comp;
 
-	err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE + MTHCA_NUM_SPARE_EQE,
-			      (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 130 : intr,
-			      &dev->eq_table.eq[MTHCA_EQ_CMD]);
-	if (err)
-		goto err_out_async;
-
 	if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
 		static const char *eq_name[] = {
 			[MTHCA_EQ_COMP]  = DRV_NAME " (comp)",
 			[MTHCA_EQ_ASYNC] = DRV_NAME " (async)",
-			[MTHCA_EQ_CMD]   = DRV_NAME " (cmd)"
 		};
 
 		for (i = 0; i < MTHCA_NUM_EQ; ++i) {
@@ -889,7 +883,7 @@ int mthca_init_eq_table(struct mthca_dev *dev)
 					  mthca_tavor_msi_x_interrupt,
 					  0, eq_name[i], dev->eq_table.eq + i);
 			if (err)
-				goto err_out_cmd;
+				goto err_out_async;
 			dev->eq_table.eq[i].have_irq = 1;
 		}
 	} else {
@@ -899,7 +893,7 @@ int mthca_init_eq_table(struct mthca_dev *dev)
 				  mthca_tavor_interrupt,
 				  IRQF_SHARED, DRV_NAME, dev);
 		if (err)
-			goto err_out_cmd;
+			goto err_out_async;
 		dev->eq_table.have_irq = 1;
 	}
 
@@ -912,15 +906,6 @@ int mthca_init_eq_table(struct mthca_dev *dev)
 		mthca_warn(dev, "MAP_EQ for async EQ %d returned status 0x%02x\n",
 			   dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, status);
 
-	err = mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
-			   0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
-	if (err)
-		mthca_warn(dev, "MAP_EQ for cmd EQ %d failed (%d)\n",
-			   dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err);
-	if (status)
-		mthca_warn(dev, "MAP_EQ for cmd EQ %d returned status 0x%02x\n",
-			   dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status);
-
 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
 		if (mthca_is_memfree(dev))
 			arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask);
@@ -929,11 +914,8 @@ int mthca_init_eq_table(struct mthca_dev *dev)
 
 	return 0;
 
-err_out_cmd:
-	mthca_free_irqs(dev);
-	mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_CMD]);
-
 err_out_async:
+	mthca_free_irqs(dev);
 	mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
 
 err_out_comp:
@@ -956,8 +938,6 @@ void mthca_cleanup_eq_table(struct mthca_dev *dev)
 
 	mthca_MAP_EQ(dev, async_mask(dev),
 		     1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status);
-	mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
-		     1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
 
 	for (i = 0; i < MTHCA_NUM_EQ; ++i)
 		mthca_free_eq(dev, &dev->eq_table.eq[i]);
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 0d9b7d0..5bfef62 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -835,7 +835,7 @@ static int mthca_setup_hca(struct mthca_dev *dev)
 	if (err || status) {
 		mthca_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting.\n",
 			  dev->mthca_flags & MTHCA_FLAG_MSI_X ?
-			  dev->eq_table.eq[MTHCA_EQ_CMD].msi_x_vector :
+			  dev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector :
 			  dev->pdev->irq);
 		if (dev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X))
 			mthca_err(dev, "Try again with MSI/MSI-X disabled.\n");
@@ -976,12 +976,11 @@ static void mthca_release_regions(struct pci_dev *pdev,
 
 static int mthca_enable_msi_x(struct mthca_dev *mdev)
 {
-	struct msix_entry entries[3];
+	struct msix_entry entries[2];
 	int err;
 
 	entries[0].entry = 0;
 	entries[1].entry = 1;
-	entries[2].entry = 2;
 
 	err = pci_enable_msix(mdev->pdev, entries, ARRAY_SIZE(entries));
 	if (err) {
@@ -993,7 +992,6 @@ static int mthca_enable_msi_x(struct mthca_dev *mdev)
 
 	mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector = entries[0].vector;
 	mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector = entries[1].vector;
-	mdev->eq_table.eq[MTHCA_EQ_CMD  ].msi_x_vector = entries[2].vector;
 
 	return 0;
 }
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 71dc84b..3d6591b 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -37,6 +37,7 @@
 
 #include <linux/string.h>
 #include <linux/slab.h>
+#include <linux/hardirq.h>
 
 #include <asm/io.h>
 
@@ -864,6 +865,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
 		if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
 			mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, NULL);
 
+		if (dev->mthca_flags & MTHCA_FLAG_MSI_X)
+			synchronize_irq(dev->eq_table.eq[MTHCA_EQ_COMP].msi_x_vector);
+		else
+			synchronize_irq(dev->pdev->irq);
+
 		mthca_wq_reset(&qp->sq);
 		qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
 
@@ -1390,6 +1396,10 @@ void mthca_free_qp(struct mthca_dev *dev,
 	struct mthca_cq *send_cq;
 	struct mthca_cq *recv_cq;
 
+	if (qp->state != IB_QPS_RESET)
+		mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0,
+				NULL, 0, &status);
+
 	send_cq = to_mcq(qp->ibqp.send_cq);
 	recv_cq = to_mcq(qp->ibqp.recv_cq);
 
@@ -1403,15 +1413,25 @@ void mthca_free_qp(struct mthca_dev *dev,
 	mthca_array_clear(&dev->qp_table.qp,
 			  qp->qpn & (dev->limits.num_qps - 1));
 	--qp->refcount;
+
+	if (!qp->ibqp.uobject) {
+		__mthca_cq_clean(dev, send_cq, qp->qpn,
+			       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
+		if (send_cq != recv_cq)
+			__mthca_cq_clean(dev, recv_cq, qp->qpn,
+					 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
+	}
+
 	spin_unlock(&dev->qp_table.lock);
 
 	mthca_unlock_cqs(send_cq, recv_cq);
 
-	wait_event(qp->wait, !get_qp_refcount(dev, qp));
+	if (dev->mthca_flags & MTHCA_FLAG_MSI_X)
+		synchronize_irq(dev->eq_table.eq[MTHCA_EQ_COMP].msi_x_vector);
+	else
+		synchronize_irq(dev->pdev->irq);
 
-	if (qp->state != IB_QPS_RESET)
-		mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0,
-				NULL, 0, &status);
+	wait_event(qp->wait, !get_qp_refcount(dev, qp));
 
 	/*
 	 * If this is a userspace QP, the buffers, MR, CQs and so on
@@ -1419,12 +1439,6 @@ void mthca_free_qp(struct mthca_dev *dev,
 	 * unref the mem-free tables and free the QPN in our table.
 	 */
 	if (!qp->ibqp.uobject) {
-		mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn,
-			       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
-		if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
-			mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
-				       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
-
 		mthca_free_memfree(dev, qp);
 		mthca_free_wqe_buf(dev, qp);
 	}

-- 
MST



More information about the general mailing list