[ofa-general] [PATCH 2 of 3 for-2.6.24] mlx4: always fill MTTs from CPU

Michael S. Tsirkin mst at dev.mellanox.co.il
Wed Aug 1 02:28:53 PDT 2007


From: Jack Morgenstein <jackm at dev.mellanox.co.il>

Write MTT entries directly from the driver (eliminating use of
WRITE_MTT command).  This reduces the number of FW commands needed to
register an MR by at least a factor of 2 and speeds up memory
registration significantly.
This code will be reused by the FMR implementation.

Signed-off-by: Jack Morgenstein <jackm at dev.mellanox.co.il>
Signed-off-by: Michael S. Tsirkin <mst at dev.mellanox.co.il>

---

Note: this code is similiar to what we have in mthca, and so
has the same theoretical issue with using dma_sync_single
on addresses mapped with map_sg. Once we decide what
the fix for mthca should be, we'll apply it here, too.

Index: connectx/drivers/net/mlx4/mr.c
===================================================================
--- connectx.orig/drivers/net/mlx4/mr.c	2007-07-18 11:06:15.000000000 +0300
+++ connectx/drivers/net/mlx4/mr.c	2007-07-22 15:22:18.110757000 +0300
@@ -358,58 +358,49 @@ err_table:
 }
 EXPORT_SYMBOL_GPL(mlx4_mr_enable);
 
-static int mlx4_WRITE_MTT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
-			  int num_mtt)
+static void mlx4_write_mtt_seg(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index,
+				      int npages, u64 *page_list)
 {
-	return mlx4_cmd(dev, mailbox->dma, num_mtt, 0, MLX4_CMD_WRITE_MTT,
-			MLX4_CMD_TIME_CLASS_B);
+	__be64 *mtts;
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	dma_addr_t dma_handle;
+	int i;
+	int s = start_index * sizeof (u64);
+
+	/* For Hermon, all MTTs must fit in the same page. */
+	BUG_ON(s / PAGE_SIZE != (s + npages * sizeof(u64) - 1) / PAGE_SIZE);
+	/* Require full segments */
+	BUG_ON(s % dev->caps.mtt_entry_sz);
+
+	mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->first_seg +
+				s / dev->caps.mtt_entry_sz, &dma_handle);
+
+	BUG_ON(!mtts);
+
+	for (i = 0; i < npages; ++i)
+		mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
+
+	dma_sync_single(&dev->pdev->dev, dma_handle, npages * sizeof (u64), DMA_TO_DEVICE);
 }
 
 int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 		   int start_index, int npages, u64 *page_list)
 {
-	struct mlx4_cmd_mailbox *mailbox;
-	__be64 *mtt_entry;
-	int i;
-	int err = 0;
+	int chunk;
 
 	if (mtt->order < 0)
 		return -EINVAL;
 
-	mailbox = mlx4_alloc_cmd_mailbox(dev);
-	if (IS_ERR(mailbox))
-		return PTR_ERR(mailbox);
-
-	mtt_entry = mailbox->buf;
-
 	while (npages > 0) {
-		mtt_entry[0] = cpu_to_be64(mlx4_mtt_addr(dev, mtt) + start_index * 8);
-		mtt_entry[1] = 0;
+		chunk = min((int)(PAGE_SIZE / sizeof(u64)), npages);
+		mlx4_write_mtt_seg(dev, mtt, start_index, chunk, page_list);
 
-		for (i = 0; i < npages && i < MLX4_MAILBOX_SIZE / 8 - 2; ++i)
-			mtt_entry[i + 2] = cpu_to_be64(page_list[i] |
-						       MLX4_MTT_FLAG_PRESENT);
-
-		/*
-		 * If we have an odd number of entries to write, add
-		 * one more dummy entry for firmware efficiency.
-		 */
-		if (i & 1)
-			mtt_entry[i + 2] = 0;
-
-		err = mlx4_WRITE_MTT(dev, mailbox, (i + 1) & ~1);
-		if (err)
-			goto out;
-
-		npages      -= i;
-		start_index += i;
-		page_list   += i;
+		npages      -= chunk;
+		start_index += chunk;
+		page_list   += chunk;
 	}
 
-out:
-	mlx4_free_cmd_mailbox(dev, mailbox);
-
-	return err;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(mlx4_write_mtt);
 
Index: connectx/drivers/net/mlx4/icm.c
===================================================================
--- connectx.orig/drivers/net/mlx4/icm.c	2007-07-19 16:34:38.000000000 +0300
+++ connectx/drivers/net/mlx4/icm.c	2007-07-22 14:43:05.584012000 +0300
@@ -302,9 +302,9 @@ void mlx4_table_put(struct mlx4_dev *dev
 	mutex_unlock(&table->mutex);
 }
 
-void *mlx4_table_find(struct mlx4_icm_table *table, int obj)
+void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle)
 {
-	int idx, offset, i;
+	int idx, offset, dma_offset, i;
 	struct mlx4_icm_chunk *chunk;
 	struct mlx4_icm *icm;
 	struct page *page = NULL;
@@ -314,15 +314,24 @@ void *mlx4_table_find(struct mlx4_icm_ta
 
 	mutex_lock(&table->mutex);
 
-	idx = obj & (table->num_obj - 1);
-	icm = table->icm[idx / (MLX4_TABLE_CHUNK_SIZE / table->obj_size)];
-	offset = idx % (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
+	idx = (obj & (table->num_obj - 1)) * table->obj_size;
+	icm = table->icm[idx / MLX4_TABLE_CHUNK_SIZE];
+	dma_offset = offset = idx % MLX4_TABLE_CHUNK_SIZE;
 
 	if (!icm)
 		goto out;
 
 	list_for_each_entry(chunk, &icm->chunk_list, list) {
 		for (i = 0; i < chunk->npages; ++i) {
+			if (dma_handle && dma_offset >= 0) {
+				if (sg_dma_len(&chunk->mem[i]) > dma_offset)
+					*dma_handle = sg_dma_address(&chunk->mem[i]) +
+						dma_offset;
+				dma_offset -= sg_dma_len(&chunk->mem[i]);
+			}
+			/* DMA mapping can merge pages but not split them,
+			 * so if we found the page, dma_handle has already
+			 * been assigned to. */
 			if (chunk->mem[i].length > offset) {
 				page = chunk->mem[i].page;
 				goto out;
Index: connectx/drivers/net/mlx4/icm.h
===================================================================
--- connectx.orig/drivers/net/mlx4/icm.h	2007-07-19 16:34:38.000000000 +0300
+++ connectx/drivers/net/mlx4/icm.h	2007-07-19 19:29:58.282773000 +0300
@@ -84,7 +84,7 @@ int mlx4_init_icm_table(struct mlx4_dev 
 void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table);
 int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
 void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
-void *mlx4_table_find(struct mlx4_icm_table *table, int obj);
+void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle);
 int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
 			 int start, int end);
 void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
Index: connectx/drivers/net/mlx4/main.c
===================================================================
--- connectx.orig/drivers/net/mlx4/main.c	2007-07-19 16:34:38.000000000 +0300
+++ connectx/drivers/net/mlx4/main.c	2007-07-22 15:57:28.378142000 +0300
@@ -41,6 +41,7 @@
 
 #include <linux/mlx4/device.h>
 #include <linux/mlx4/doorbell.h>
+#include <asm/cache.h>
 
 #include "mlx4.h"
 #include "fw.h"
@@ -263,6 +264,7 @@ static int __devinit mlx4_init_icm(struc
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	u64 aux_pages;
 	int err;
+	int num_mtt_res_bytes;
 
 	err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
 	if (err) {
@@ -299,11 +301,21 @@ static int __devinit mlx4_init_icm(struc
 		goto err_unmap_cmpt;
 	}
 
+	/* Reserved mtt entries must be aligned up to a cacheline boundary,
+	 * since the FW will write to them, while the driver writes to all
+	 * other mtt entries. (Note that the variable dev->caps.mtt_entry_sz
+	 * below is really the mtt segment size, not the raw entry size)
+	 */
+	num_mtt_res_bytes = ((dev->caps.reserved_mtts *
+			      (dev->caps.mtt_entry_sz / MLX4_MTT_ENTRY_PER_SEG)
+			     + L1_CACHE_BYTES - 1) /
+			     L1_CACHE_BYTES) * L1_CACHE_BYTES;
 	err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
 				  init_hca->mtt_base,
 				  dev->caps.mtt_entry_sz,
 				  dev->caps.num_mtt_segs,
-				  dev->caps.reserved_mtts, 1, 0);
+				  num_mtt_res_bytes / dev->caps.mtt_entry_sz,
+				  1, 0);
 	if (err) {
 		mlx4_err(dev, "Failed to map MTT context memory, aborting.\n");
 		goto err_unmap_eq;
Index: connectx/drivers/infiniband/hw/mlx4/mr.c
===================================================================
--- connectx.orig/drivers/infiniband/hw/mlx4/mr.c	2007-07-18 11:06:15.000000000 +0300
+++ connectx/drivers/infiniband/hw/mlx4/mr.c	2007-07-22 17:00:53.672771000 +0300
@@ -96,11 +96,10 @@ int mlx4_ib_umem_write_mtt(struct mlx4_i
 				pages[i++] = sg_dma_address(&chunk->page_list[j]) +
 					umem->page_size * k;
 				/*
-				 * Be friendly to WRITE_MTT firmware
-				 * command, and pass it chunks of
-				 * appropriate size.
+				 * Be friendly to mlx4_write_mtt
+				 * and pass it chunks of appropriate size.
 				 */
-				if (i == PAGE_SIZE / sizeof (u64) - 2) {
+				if (i == PAGE_SIZE / sizeof (u64)) {
 					err = mlx4_write_mtt(dev->dev, mtt, n,
 							     i, pages);
 					if (err)

-- 
MST



More information about the general mailing list