[ofw] [PATCH] MLX4: Fix ioremap to use consistent caching attribute

Fab Tillier ftillier at microsoft.com
Thu Sep 9 22:16:27 PDT 2010


When mapping Blue Flame registers to user-mode, the UAR page is first mapped from I/O space to kernel-mode, then from kernel-mode to user-mode.  The mapping from I/O space uses the function ioremap, which is hard coded to use MmNonCached as the caching attribute.  The subsequent mapping to user-mode specifies MmWriteCombined.  On Windows Server 2008 R2, the memory manager catches the inconsistency and preserves the original MmNonCached attribute.  This leads to a 200-400% regression in small message latencies due to not write combining when writing the request to the blue flame register.

The following patch adds a caching type as a parameter to ioremap, changes all previous callers to specify MmNonCached except in __map_memory_for_user, which already took a caching type, in which case it forwards the caching type to ioremap.

With this patch, small message latency is identical on Windows Server 2008 and Windows Server 2008 R2.

Signed-off-by: Fab Tillier <ftillier at microsoft.com>

Index: hw/mlx4/kernel/bus/net/cmd.c
===================================================================
--- hw/mlx4/kernel/bus/net/cmd.c	(revision 2909)
+++ hw/mlx4/kernel/bus/net/cmd.c	(working copy)
@@ -493,7 +493,7 @@ int mlx4_cmd_init(struct mlx4_dev *dev)
 	priv->cmd.toggle     = 1;
 
 	priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_HCR_BASE,
-		MLX4_HCR_SIZE);
+		MLX4_HCR_SIZE, MmNonCached);
 	if (!priv->cmd.hcr) {
 		mlx4_err(dev, "Couldn't map command register.");
 		return -ENOMEM;
Index: hw/mlx4/kernel/bus/net/eq.c
===================================================================
--- hw/mlx4/kernel/bus/net/eq.c	(revision 2909)
+++ hw/mlx4/kernel/bus/net/eq.c	(working copy)
@@ -432,7 +432,7 @@ static void __iomem *mlx4_get_eq_uar(str
 		priv->eq_table.uar_map[index] =
 			ioremap(pci_resource_start(dev->pdev, 2) +
 				((eq->eqn / 4) << PAGE_SHIFT),
-				PAGE_SIZE);
+				PAGE_SIZE, MmNonCached);
 		if (!priv->eq_table.uar_map[index]) {
 			mlx4_err(dev, "Couldn't map EQ doorbell for EQN 0x%06x\n",
 				 eq->eqn);
@@ -672,7 +672,7 @@ static int mlx4_map_clr_int(struct mlx4_
 	struct mlx4_priv *priv = mlx4_priv(dev);
 
 	priv->clr_base = ioremap(pci_resource_start(dev->pdev, priv->fw.clr_int_bar) +
-				 priv->fw.clr_int_base, MLX4_CLR_INT_SIZE);
+				 priv->fw.clr_int_base, MLX4_CLR_INT_SIZE, MmNonCached);
 	if (!priv->clr_base) {
 		mlx4_err(dev, "Couldn't map interrupt clear register, aborting.\n");
 		return -ENOMEM;
Index: hw/mlx4/kernel/bus/net/main.c
===================================================================
--- hw/mlx4/kernel/bus/net/main.c	(revision 2909)
+++ hw/mlx4/kernel/bus/net/main.c	(working copy)
@@ -780,7 +780,7 @@ static int mlx4_setup_hca(struct mlx4_de
 		goto err_uar_table_free;
 	}
 
-	priv->kar = ioremap((u64)priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+	priv->kar = ioremap((u64)priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE, MmNonCached);
 	if (!priv->kar) {
 		mlx4_err(dev, "Couldn't map kernel access region, "
 			 "aborting.\n");
Index: hw/mlx4/kernel/bus/net/catas.c
===================================================================
--- hw/mlx4/kernel/bus/net/catas.c	(revision 2909)
+++ hw/mlx4/kernel/bus/net/catas.c	(working copy)
@@ -189,7 +189,7 @@ int mlx4_start_catas_poll(struct mlx4_de
 	addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) +
 		priv->fw.catas_offset;
 
-	priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
+	priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4, MmNonCached);
 	if (!priv->catas_err.map) {
 		mlx4_warn(dev, "Failed to map internal error buffer at 0x%lx\n",
 			  addr);
Index: hw/mlx4/kernel/bus/ib/main.c
===================================================================
--- hw/mlx4/kernel/bus/ib/main.c	(revision 2909)
+++ hw/mlx4/kernel/bus/ib/main.c	(working copy)
@@ -631,7 +631,7 @@ static void *mlx4_ib_add(struct mlx4_dev
 	if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
 		goto err_pd;
 
-	ibdev->uar_map = ioremap((u64)ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+	ibdev->uar_map = ioremap((u64)ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE, MmNonCached);
 	if (!ibdev->uar_map)
 		goto err_uar;
 
Index: hw/mlx4/kernel/inc/l2w_memory.h
===================================================================
--- hw/mlx4/kernel/inc/l2w_memory.h	(revision 2909)
+++ hw/mlx4/kernel/inc/l2w_memory.h	(working copy)
@@ -145,14 +145,14 @@ static inline void kfree (const void *po
 * address is not guaranteed to be usable directly as a virtual
 * address. 
 */
-static inline 	void *ioremap(io_addr_t addr, SIZE_T size)
+static inline void *ioremap(io_addr_t addr, SIZE_T size, MEMORY_CACHING_TYPE cache_type)
 {
 	PHYSICAL_ADDRESS pa;
 	void *va;
 	
 	ASSERT( KeGetCurrentIrql() <= DISPATCH_LEVEL );
 	pa.QuadPart = addr;
-	va = MmMapIoSpace( pa, size, MmNonCached ); 
+	va = MmMapIoSpace( pa, size, cache_type ); 
 	return va;
 }
 
Index: hw/mlx4/kernel/hca/hverbs.c
===================================================================
--- hw/mlx4/kernel/hca/hverbs.c	(revision 2909)
+++ hw/mlx4/kernel/hca/hverbs.c	(working copy)
@@ -479,7 +479,7 @@ static NTSTATUS __map_memory_for_user(
 	p_map->mapped = 0;
 	
 	// map UAR to kernel 
-	p_map->kva = ioremap(addr, size);
+	p_map->kva = ioremap(addr, size, mem_type);
 	if (!p_map->kva) {
 		HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_LOW ,
 			("Couldn't map kernel access region, aborting.\n") );
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ioremap.patch
Type: application/octet-stream
Size: 4524 bytes
Desc: ioremap.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20100910/0de17660/attachment.obj>


More information about the ofw mailing list