[ewg] [PATCH] mlx4_core: map sufficient ICM memory for EQs

Eli Cohen eli at mellanox.co.il
Thu Jul 30 06:04:34 PDT 2009


Current implementation allocates a single host page for EQ context memory. As
the number of CPU cores increases, and since the number of required EQs depends
on this number, this patch removes the hard coded limit and makes the
allocation dependent on EQ entry size and the number of required EQs.

Signed-off-by: Eli Cohen <eli at mellanox.co.il>
---
 drivers/net/mlx4/eq.c       |   42 ++++++++++++++++++++++++------------------
 drivers/net/mlx4/main.c     |    1 +
 drivers/net/mlx4/mlx4.h     |    1 +
 include/linux/mlx4/device.h |    1 +
 4 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index b9ceddd..1d41b1a 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -530,29 +530,34 @@ int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int ret;
+	int host_pages, icm_pages;
+	int i;
 
-	/*
-	 * We assume that mapping one page is enough for the whole EQ
-	 * context table.  This is fine with all current HCAs, because
-	 * we only use 32 EQs and each EQ uses 64 bytes of context
-	 * memory, or 1 KB total.
-	 */
+	host_pages = ALIGN(min_t(int, dev->caps.num_eqs, num_possible_cpus() + 1) *
+			   dev->caps.eqc_entry_size, PAGE_SIZE) >> PAGE_SHIFT;
+	priv->eq_table.order = ilog2(roundup_pow_of_two(host_pages));
 	priv->eq_table.icm_virt = icm_virt;
-	priv->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
+	priv->eq_table.icm_page = alloc_pages(GFP_HIGHUSER, priv->eq_table.order);
 	if (!priv->eq_table.icm_page)
 		return -ENOMEM;
 	priv->eq_table.icm_dma  = pci_map_page(dev->pdev, priv->eq_table.icm_page, 0,
-					       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+					       PAGE_SIZE << priv->eq_table.order,
+					       PCI_DMA_BIDIRECTIONAL);
 	if (pci_dma_mapping_error(dev->pdev, priv->eq_table.icm_dma)) {
-		__free_page(priv->eq_table.icm_page);
+		__free_pages(priv->eq_table.icm_page, priv->eq_table.order);
 		return -ENOMEM;
 	}
 
-	ret = mlx4_MAP_ICM_page(dev, priv->eq_table.icm_dma, icm_virt);
-	if (ret) {
-		pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE,
-			       PCI_DMA_BIDIRECTIONAL);
-		__free_page(priv->eq_table.icm_page);
+	icm_pages = (PAGE_SIZE / MLX4_ICM_PAGE_SIZE) * (1 << priv->eq_table.order);
+	for (i = 0; i < icm_pages; ++i) {
+		ret = mlx4_MAP_ICM_page(dev, priv->eq_table.icm_dma, icm_virt + i * MLX4_ICM_PAGE_SIZE);
+		if (ret) {
+			mlx4_UNMAP_ICM(dev, priv->eq_table.icm_virt, i);
+			pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE,
+				       PCI_DMA_BIDIRECTIONAL);
+			__free_pages(priv->eq_table.icm_page, priv->eq_table.order);
+			break;
+		}
 	}
 
 	return ret;
@@ -561,11 +566,12 @@ int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt)
 void mlx4_unmap_eq_icm(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
+	int icm_pages = (PAGE_SIZE / MLX4_ICM_PAGE_SIZE) * (1 << priv->eq_table.order);
 
-	mlx4_UNMAP_ICM(dev, priv->eq_table.icm_virt, 1);
-	pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE,
-		       PCI_DMA_BIDIRECTIONAL);
-	__free_page(priv->eq_table.icm_page);
+	mlx4_UNMAP_ICM(dev, priv->eq_table.icm_virt, icm_pages);
+	pci_unmap_page(dev->pdev, priv->eq_table.icm_dma,
+		       PAGE_SIZE << priv->eq_table.order, PCI_DMA_BIDIRECTIONAL);
+	__free_pages(priv->eq_table.icm_page, priv->eq_table.order);
 }
 
 int mlx4_alloc_eq_table(struct mlx4_dev *dev)
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index dac621b..872becd 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -207,6 +207,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.max_cqes	     = dev_cap->max_cq_sz - 1;
 	dev->caps.reserved_cqs	     = dev_cap->reserved_cqs;
 	dev->caps.reserved_eqs	     = dev_cap->reserved_eqs;
+	dev->caps.eqc_entry_size     = dev_cap->eqc_entry_sz;
 	dev->caps.mtts_per_seg	     = 1 << log_mtts_per_seg;
 	dev->caps.reserved_mtts	     = DIV_ROUND_UP(dev_cap->reserved_mtts,
 						    dev->caps.mtts_per_seg);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 5bd79c2..1a20fa3 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -211,6 +211,7 @@ struct mlx4_eq_table {
 	struct mlx4_icm_table	cmpt_table;
 	int			have_irq;
 	u8			inta_pin;
+	int			order;
 };
 
 struct mlx4_srq_table {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ce7cc6c..8923c9b 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -206,6 +206,7 @@ struct mlx4_caps {
 	int			max_cqes;
 	int			reserved_cqs;
 	int			num_eqs;
+	int			eqc_entry_size;
 	int			reserved_eqs;
 	int			num_comp_vectors;
 	int			num_mpts;
-- 
1.6.3.3




More information about the ewg mailing list