[ofw] [PATCH] MLX4: Fix ioremap to use consistent caching attribute

Leonid Keller leonid at mellanox.co.il
Tue Sep 14 02:22:59 PDT 2010


Applied in 2922, thank you.

> -----Original Message-----
> From: ofw-bounces at lists.openfabrics.org [mailto:ofw-
> bounces at lists.openfabrics.org] On Behalf Of Fab Tillier
> Sent: Tuesday, September 14, 2010 1:37 AM
> To: 'ofw at lists.openfabrics.org'
> Cc: Jeff Baxter
> Subject: [ofw] [PATCH] MLX4: Fix ioremap to use consistent caching
> attribute
> 
> Resending, as apparently the mailing list had issues last week...
> 
> When mapping Blue Flame registers to user-mode, the UAR page is first
> mapped from I/O space to kernel-mode, then from kernel-mode to user-
> mode.  The mapping from I/O space uses the function ioremap, which is
> hard coded to use MmNonCached as the caching attribute.  The subsequent
> mapping to user-mode specifies MmWriteCombined.  On Windows Server 2008
> R2, the memory manager catches the inconsistency and preserves the
> original MmNonCached attribute.  This leads to a 200-400% regression in
> small message latencies due to not write combining when writing the
> request to the blue flame register.
> 
> The following patch adds a caching type as a parameter to ioremap,
> changes all previous callers to specify MmNonCached except in
> __map_memory_for_user, which already took a caching type, in which case
> it forwards the caching type to ioremap.
> 
> With this patch, small message latency is identical on Windows Server
> 2008 and Windows Server 2008 R2.
> 
> Signed-off-by: Fab Tillier <ftillier at microsoft.com>
> 
> Index: hw/mlx4/kernel/bus/net/cmd.c
> ===================================================================
> --- hw/mlx4/kernel/bus/net/cmd.c	(revision 2909)
> +++ hw/mlx4/kernel/bus/net/cmd.c	(working copy)
> @@ -493,7 +493,7 @@ int mlx4_cmd_init(struct mlx4_dev *dev)
>  	priv->cmd.toggle     = 1;
> 
>  	priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) +
> MLX4_HCR_BASE,
> -		MLX4_HCR_SIZE);
> +		MLX4_HCR_SIZE, MmNonCached);
>  	if (!priv->cmd.hcr) {
>  		mlx4_err(dev, "Couldn't map command register.");
>  		return -ENOMEM;
> Index: hw/mlx4/kernel/bus/net/eq.c
> ===================================================================
> --- hw/mlx4/kernel/bus/net/eq.c	(revision 2909)
> +++ hw/mlx4/kernel/bus/net/eq.c	(working copy)
> @@ -432,7 +432,7 @@ static void __iomem *mlx4_get_eq_uar(str
>  		priv->eq_table.uar_map[index] =
>  			ioremap(pci_resource_start(dev->pdev, 2) +
>  				((eq->eqn / 4) << PAGE_SHIFT),
> -				PAGE_SIZE);
> +				PAGE_SIZE, MmNonCached);
>  		if (!priv->eq_table.uar_map[index]) {
>  			mlx4_err(dev, "Couldn't map EQ doorbell for EQN
> 0x%06x\n",
>  				 eq->eqn);
> @@ -672,7 +672,7 @@ static int mlx4_map_clr_int(struct mlx4_
>  	struct mlx4_priv *priv = mlx4_priv(dev);
> 
>  	priv->clr_base = ioremap(pci_resource_start(dev->pdev, priv-
> >fw.clr_int_bar) +
> -				 priv->fw.clr_int_base, MLX4_CLR_INT_SIZE);
> +				 priv->fw.clr_int_base, MLX4_CLR_INT_SIZE,
> MmNonCached);
>  	if (!priv->clr_base) {
>  		mlx4_err(dev, "Couldn't map interrupt clear register,
> aborting.\n");
>  		return -ENOMEM;
> Index: hw/mlx4/kernel/bus/net/main.c
> ===================================================================
> --- hw/mlx4/kernel/bus/net/main.c	(revision 2909)
> +++ hw/mlx4/kernel/bus/net/main.c	(working copy)
> @@ -780,7 +780,7 @@ static int mlx4_setup_hca(struct mlx4_de
>  		goto err_uar_table_free;
>  	}
> 
> -	priv->kar = ioremap((u64)priv->driver_uar.pfn << PAGE_SHIFT,
> PAGE_SIZE);
> +	priv->kar = ioremap((u64)priv->driver_uar.pfn << PAGE_SHIFT,
> PAGE_SIZE, MmNonCached);
>  	if (!priv->kar) {
>  		mlx4_err(dev, "Couldn't map kernel access region, "
>  			 "aborting.\n");
> Index: hw/mlx4/kernel/bus/net/catas.c
> ===================================================================
> --- hw/mlx4/kernel/bus/net/catas.c	(revision 2909)
> +++ hw/mlx4/kernel/bus/net/catas.c	(working copy)
> @@ -189,7 +189,7 @@ int mlx4_start_catas_poll(struct mlx4_de
>  	addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) +
>  		priv->fw.catas_offset;
> 
> -	priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
> +	priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4,
> MmNonCached);
>  	if (!priv->catas_err.map) {
>  		mlx4_warn(dev, "Failed to map internal error buffer at
> 0x%lx\n",
>  			  addr);
> Index: hw/mlx4/kernel/bus/ib/main.c
> ===================================================================
> --- hw/mlx4/kernel/bus/ib/main.c	(revision 2909)
> +++ hw/mlx4/kernel/bus/ib/main.c	(working copy)
> @@ -631,7 +631,7 @@ static void *mlx4_ib_add(struct mlx4_dev
>  	if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
>  		goto err_pd;
> 
> -	ibdev->uar_map = ioremap((u64)ibdev->priv_uar.pfn << PAGE_SHIFT,
> PAGE_SIZE);
> +	ibdev->uar_map = ioremap((u64)ibdev->priv_uar.pfn << PAGE_SHIFT,
> PAGE_SIZE, MmNonCached);
>  	if (!ibdev->uar_map)
>  		goto err_uar;
> 
> Index: hw/mlx4/kernel/inc/l2w_memory.h
> ===================================================================
> --- hw/mlx4/kernel/inc/l2w_memory.h	(revision 2909)
> +++ hw/mlx4/kernel/inc/l2w_memory.h	(working copy)
> @@ -145,14 +145,14 @@ static inline void kfree (const void *po
>  * address is not guaranteed to be usable directly as a virtual
>  * address.
>  */
> -static inline 	void *ioremap(io_addr_t addr, SIZE_T size)
> +static inline void *ioremap(io_addr_t addr, SIZE_T size,
> MEMORY_CACHING_TYPE cache_type)
>  {
>  	PHYSICAL_ADDRESS pa;
>  	void *va;
> 
>  	ASSERT( KeGetCurrentIrql() <= DISPATCH_LEVEL );
>  	pa.QuadPart = addr;
> -	va = MmMapIoSpace( pa, size, MmNonCached );
> +	va = MmMapIoSpace( pa, size, cache_type );
>  	return va;
>  }
> 
> Index: hw/mlx4/kernel/hca/hverbs.c
> ===================================================================
> --- hw/mlx4/kernel/hca/hverbs.c	(revision 2909)
> +++ hw/mlx4/kernel/hca/hverbs.c	(working copy)
> @@ -479,7 +479,7 @@ static NTSTATUS __map_memory_for_user(
>  	p_map->mapped = 0;
> 
>  	// map UAR to kernel
> -	p_map->kva = ioremap(addr, size);
> +	p_map->kva = ioremap(addr, size, mem_type);
>  	if (!p_map->kva) {
>  		HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_LOW ,
>  			("Couldn't map kernel access region, aborting.\n") );



More information about the ofw mailing list