[ofa-general][PATCH 1/3]mlx4: Multiple completion vectors support

Yevgeny Petrilin yevgenyp at mellanox.co.il
Tue Oct 28 08:49:34 PDT 2008


The driver now creates a completion EQ for every cpu.
While allocating CQ a ULP asks a completion vector number
it wants the CQ to be attached to. The number of completion
vectors is advertised via ib_device.num_comp_vectors

Signed-off-by: Yevgeny Petrilin <yevgenyp at mellanox.co.il>
---
 drivers/infiniband/hw/mlx4/cq.c   |    2 +-
 drivers/infiniband/hw/mlx4/main.c |    2 +-
 drivers/net/mlx4/cq.c             |   14 ++++++++--
 drivers/net/mlx4/en_cq.c          |    9 ++++--
 drivers/net/mlx4/en_main.c        |    4 +-
 drivers/net/mlx4/eq.c             |   47 ++++++++++++++++++++++++------------
 drivers/net/mlx4/main.c           |   14 ++++++----
 drivers/net/mlx4/mlx4.h           |    4 +-
 include/linux/mlx4/device.h       |    4 ++-
 9 files changed, 65 insertions(+), 35 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index d0866a3..5de41bd 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -222,7 +222,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
 	}

 	err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
-			    cq->db.dma, &cq->mcq, 0);
+			    cq->db.dma, &cq->mcq, vector, 0);
 	if (err)
 		goto err_dbmap;

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 2e80f8f..dcefe1f 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -578,7 +578,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
 		ibdev->num_ports++;
 	ibdev->ib_dev.phys_port_cnt     = ibdev->num_ports;
-	ibdev->ib_dev.num_comp_vectors	= 1;
+	ibdev->ib_dev.num_comp_vectors	= dev->caps.num_comp_vectors;
 	ibdev->ib_dev.dma_device	= &dev->pdev->dev;

 	ibdev->ib_dev.uverbs_abi_ver	= MLX4_IB_UVERBS_ABI_VERSION;
diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c
index b7ad282..a675e85 100644
--- a/drivers/net/mlx4/cq.c
+++ b/drivers/net/mlx4/cq.c
@@ -189,7 +189,7 @@ EXPORT_SYMBOL_GPL(mlx4_cq_resize);

 int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
 		  struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
-		  int collapsed)
+		  unsigned vector, int collapsed)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_cq_table *cq_table = &priv->cq_table;
@@ -227,7 +227,15 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,

 	cq_context->flags	    = cpu_to_be32(!!collapsed << 18);
 	cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index);
-	cq_context->comp_eqn        = priv->eq_table.eq[MLX4_EQ_COMP].eqn;
+
+	if (vector >= dev->caps.num_comp_vectors) {
+		err = -EINVAL;
+		goto err_radix;
+	}
+
+	cq->comp_eq_idx		    = MLX4_EQ_COMP_CPU0 + vector;
+	cq_context->comp_eqn	    = priv->eq_table.eq[MLX4_EQ_COMP_CPU0 +
+							vector].eqn;
 	cq_context->log_page_size   = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;

 	mtt_addr = mlx4_mtt_addr(dev, mtt);
@@ -276,7 +284,7 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
 	if (err)
 		mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);

-	synchronize_irq(priv->eq_table.eq[MLX4_EQ_COMP].irq);
+	synchronize_irq(priv->eq_table.eq[cq->comp_eq_idx].irq);

 	spin_lock_irq(&cq_table->lock);
 	radix_tree_delete(&cq_table->tree, cq->cqn);
diff --git a/drivers/net/mlx4/en_cq.c b/drivers/net/mlx4/en_cq.c
index 1368a80..8f388e8 100644
--- a/drivers/net/mlx4/en_cq.c
+++ b/drivers/net/mlx4/en_cq.c
@@ -51,10 +51,13 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
 	int err;

 	cq->size = entries;
-	if (mode == RX)
+	if (mode == RX) {
 		cq->buf_size = cq->size * sizeof(struct mlx4_cqe);
-	else
+		cq->vector = ring % mdev->dev->caps.num_comp_vectors;
+	} else {
 		cq->buf_size = sizeof(struct mlx4_cqe);
+		cq->vector = 0;
+	}

 	cq->ring = ring;
 	cq->is_tx = mode;
@@ -86,7 +89,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
 	memset(cq->buf, 0, cq->buf_size);

 	err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar,
-			    cq->wqres.db.dma, &cq->mcq, cq->is_tx);
+			    cq->wqres.db.dma, &cq->mcq, cq->vector, cq->is_tx);
 	if (err)
 		return err;

diff --git a/drivers/net/mlx4/en_main.c b/drivers/net/mlx4/en_main.c
index 1b0eebf..7423bf9 100644
--- a/drivers/net/mlx4/en_main.c
+++ b/drivers/net/mlx4/en_main.c
@@ -171,9 +171,9 @@ static void *mlx4_en_add(struct mlx4_dev *dev)
 		mlx4_info(mdev, "Using %d tx rings for port:%d\n",
 			  mdev->profile.prof[i].tx_ring_num, i);
 		if (!mdev->profile.prof[i].rx_ring_num) {
-			mdev->profile.prof[i].rx_ring_num = 1;
+			mdev->profile.prof[i].rx_ring_num = dev->caps.num_comp_vectors;
 			mlx4_info(mdev, "Defaulting to %d rx rings for port:%d\n",
-				  1, i);
+				  mdev->profile.prof[i].rx_ring_num, i);
 		} else
 			mlx4_info(mdev, "Using %d rx rings for port:%d\n",
 				  mdev->profile.prof[i].rx_ring_num, i);
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index de16933..b436234 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -266,7 +266,7 @@ static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr)

 	writel(priv->eq_table.clr_mask, priv->eq_table.clr_int);

-	for (i = 0; i < MLX4_NUM_EQ; ++i)
+	for (i = 0; i < MLX4_EQ_COMP_CPU0 + dev->caps.num_comp_vectors; ++i)
 		work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]);

 	return IRQ_RETVAL(work);
@@ -483,7 +483,7 @@ static void mlx4_free_irqs(struct mlx4_dev *dev)

 	if (eq_table->have_irq)
 		free_irq(dev->pdev->irq, dev);
-	for (i = 0; i < MLX4_NUM_EQ; ++i)
+	for (i = 0; i < MLX4_EQ_COMP_CPU0 + dev->caps.num_comp_vectors; ++i)
 		if (eq_table->eq[i].have_irq)
 			free_irq(eq_table->eq[i].irq, eq_table->eq + i);
 }
@@ -554,6 +554,7 @@ void mlx4_unmap_eq_icm(struct mlx4_dev *dev)
 int mlx4_init_eq_table(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
+	int req_eqs;
 	int err;
 	int i;

@@ -574,11 +575,21 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
 	priv->eq_table.clr_int  = priv->clr_base +
 		(priv->eq_table.inta_pin < 32 ? 4 : 0);

-	err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE,
-			     (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_COMP : 0,
-			     &priv->eq_table.eq[MLX4_EQ_COMP]);
-	if (err)
-		goto err_out_unmap;
+	dev->caps.num_comp_vectors = 0;
+	req_eqs = (dev->flags & MLX4_FLAG_MSI_X) ? num_online_cpus() : 1;
+	while (req_eqs) {
+		err = mlx4_create_eq(
+			dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE,
+			(dev->flags & MLX4_FLAG_MSI_X) ?
+			(MLX4_EQ_COMP_CPU0 + dev->caps.num_comp_vectors) : 0,
+			&priv->eq_table.eq[MLX4_EQ_COMP_CPU0 +
+			dev->caps.num_comp_vectors]);
+		if (err)
+			goto err_out_comp;
+
+		dev->caps.num_comp_vectors++;
+		req_eqs--;
+	}

 	err = mlx4_create_eq(dev, MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE,
 			     (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_ASYNC : 0,
@@ -587,12 +598,16 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
 		goto err_out_comp;

 	if (dev->flags & MLX4_FLAG_MSI_X) {
-		static const char *eq_name[] = {
-			[MLX4_EQ_COMP]  = DRV_NAME " (comp)",
-			[MLX4_EQ_ASYNC] = DRV_NAME " (async)"
-		};
+		static char eq_name[MLX4_NUM_EQ][20];
+
+		for (i = 0; i < MLX4_EQ_COMP_CPU0 +
+		      dev->caps.num_comp_vectors; ++i) {
+			if (i == 0)
+				snprintf(eq_name[0], 20, DRV_NAME "(async)");
+			else
+				snprintf(eq_name[i], 20, "comp_" DRV_NAME "%d",
+					 i - 1);

-		for (i = 0; i < MLX4_NUM_EQ; ++i) {
 			err = request_irq(priv->eq_table.eq[i].irq,
 					  mlx4_msi_x_interrupt,
 					  0, eq_name[i], priv->eq_table.eq + i);
@@ -617,7 +632,7 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
 		mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
 			   priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err);

-	for (i = 0; i < MLX4_NUM_EQ; ++i)
+	for (i = 0; i < MLX4_EQ_COMP_CPU0 + dev->caps.num_comp_vectors; ++i)
 		eq_set_ci(&priv->eq_table.eq[i], 1);

 	return 0;
@@ -626,9 +641,9 @@ err_out_async:
 	mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_ASYNC]);

 err_out_comp:
-	mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_COMP]);
+	for (i = 0; i < dev->caps.num_comp_vectors; ++i)
+		mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_COMP_CPU0 + i]);

-err_out_unmap:
 	mlx4_unmap_clr_int(dev);
 	mlx4_free_irqs(dev);

@@ -647,7 +662,7 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev)

 	mlx4_free_irqs(dev);

-	for (i = 0; i < MLX4_NUM_EQ; ++i)
+	for (i = 0; i < MLX4_EQ_COMP_CPU0 + dev->caps.num_comp_vectors; ++i)
 		mlx4_free_eq(dev, &priv->eq_table.eq[i]);

 	mlx4_unmap_clr_int(dev);
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index 468921b..aaf3eec 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -901,22 +901,24 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct msix_entry entries[MLX4_NUM_EQ];
+	int needed_vectors = MLX4_EQ_COMP_CPU0 + num_online_cpus();
 	int err;
 	int i;

 	if (msi_x) {
-		for (i = 0; i < MLX4_NUM_EQ; ++i)
+		for (i = 0; i < needed_vectors; ++i)
 			entries[i].entry = i;

-		err = pci_enable_msix(dev->pdev, entries, ARRAY_SIZE(entries));
+		err = pci_enable_msix(dev->pdev, entries, needed_vectors);
 		if (err) {
 			if (err > 0)
-				mlx4_info(dev, "Only %d MSI-X vectors available, "
-					  "not using MSI-X\n", err);
+				mlx4_info(dev, "Only %d MSI-X vectors "
+					  "available, need %d. Not using MSI-X\n",
+					  err, needed_vectors);
 			goto no_msi;
 		}

-		for (i = 0; i < MLX4_NUM_EQ; ++i)
+		for (i = 0; i < needed_vectors; ++i)
 			priv->eq_table.eq[i].irq = entries[i].vector;

 		dev->flags |= MLX4_FLAG_MSI_X;
@@ -924,7 +926,7 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
 	}

 no_msi:
-	for (i = 0; i < MLX4_NUM_EQ; ++i)
+	for (i = 0; i < needed_vectors; ++i)
 		priv->eq_table.eq[i].irq = dev->pdev->irq;
 }

diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index fa431fa..612abe6 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -64,8 +64,8 @@ enum {

 enum {
 	MLX4_EQ_ASYNC,
-	MLX4_EQ_COMP,
-	MLX4_NUM_EQ
+	MLX4_EQ_COMP_CPU0,
+	MLX4_NUM_EQ = MLX4_EQ_COMP_CPU0 + NR_CPUS
 };

 enum {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index bd9977b..6228b97 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -205,6 +205,7 @@ struct mlx4_caps {
 	int			reserved_cqs;
 	int			num_eqs;
 	int			reserved_eqs;
+	int			num_comp_vectors;
 	int			num_mpts;
 	int			num_mtt_segs;
 	int			fmr_reserved_mtts;
@@ -327,6 +328,7 @@ struct mlx4_cq {
 	int			arm_sn;

 	int			cqn;
+	int			comp_eq_idx;

 	atomic_t		refcount;
 	struct completion	free;
@@ -436,7 +438,7 @@ void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres,

 int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
 		  struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
-		  int collapsed);
+		  unsigned vector, int collapsed);
 void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq);

 int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base);
-- 
1.5.4






More information about the general mailing list