[ofa-general] [PATCH v1] mlx4_ib: Optimize hugetlab pages support

Eli Cohen eli at mellanox.co.il
Thu Jan 22 08:31:05 PST 2009


Since Linux may not merge adjacent pages into a single scatter entry through
calls to dma_map_sg(), we check the special case of hugetlb pages which are
likely to be mapped to coniguous dma addresses and if they are, take advantage
of this. This will result in a significantly lower number of MTT segments used
for registering hugetlb memory regions.

Signed-off-by: Eli Cohen <eli at mellanox.co.il>
---

In this version I also took care of the case where the kernel is
compiled without hugetlb support.


 drivers/infiniband/hw/mlx4/mr.c |   86 ++++++++++++++++++++++++++++++++++-----
 1 files changed, 75 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 8e4d26d..4c7a5bf 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -119,6 +119,68 @@ out:
 	return err;
 }
 
+static int handle_hugetlb_user_mr(struct ib_pd *pd, struct mlx4_ib_mr *mr,
+				  u64 virt_addr, int access_flags)
+{
+#ifdef CONFIG_HUGETLB_PAGE
+	struct mlx4_ib_dev *dev = to_mdev(pd->device);
+	struct ib_umem_chunk *chunk;
+	unsigned dsize;
+	dma_addr_t daddr;
+	unsigned uninitialized_var(cur_size);
+	dma_addr_t uninitialized_var(cur_addr);
+	int restart;
+	int n;
+	struct ib_umem	*umem = mr->umem;
+	u64 *arr;
+	int err = 0;
+	int i;
+	int j = 0;
+
+        n = PAGE_ALIGN(umem->length + umem->offset) >> HPAGE_SHIFT;
+	arr = kmalloc(n * sizeof *arr, GFP_KERNEL);
+	if (!arr)
+		return -ENOMEM;
+
+	restart = 1;
+	list_for_each_entry(chunk, &umem->chunk_list, list)
+		for (i = 0; i < chunk->nmap; ++i) {
+			daddr = sg_dma_address(&chunk->page_list[i]);
+			dsize = sg_dma_len(&chunk->page_list[i]);
+			if (restart) {
+				cur_addr = daddr;
+				cur_size = dsize;
+				restart = 0;
+			} else if (cur_addr + cur_size != daddr) {
+				err = -EINVAL;
+				goto out;
+			} else
+                                cur_size += dsize;
+
+			if (cur_size > HPAGE_SIZE) {
+				err = -EINVAL;
+				goto out;
+			} else if (cur_size == HPAGE_SIZE) {
+				restart = 1;
+				arr[j++] = cur_addr;
+			}
+		}
+
+	err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, umem->length,
+			    convert_access(access_flags), n, HPAGE_SHIFT, &mr->mmr);
+	if (err)
+		goto out;
+
+        err = mlx4_write_mtt(dev->dev, &mr->mmr.mtt, 0, n, arr);
+
+out:
+	kfree(arr);
+	return err;
+#else
+	return -ENOSYS;
+#endif
+}
+
 struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 				  u64 virt_addr, int access_flags,
 				  struct ib_udata *udata)
@@ -140,17 +202,19 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 		goto err_free;
 	}
 
-	n = ib_umem_page_count(mr->umem);
-	shift = ilog2(mr->umem->page_size);
-
-	err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
-			    convert_access(access_flags), n, shift, &mr->mmr);
-	if (err)
-		goto err_umem;
-
-	err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
-	if (err)
-		goto err_mr;
+	if (!mr->umem->hugetlb || handle_hugetlb_user_mr(pd, mr, virt_addr, access_flags)) {
+		n = ib_umem_page_count(mr->umem);
+		shift = ilog2(mr->umem->page_size);
+	
+		err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
+				    convert_access(access_flags), n, shift, &mr->mmr);
+		if (err)
+			goto err_umem;
+	
+		err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
+		if (err)
+			goto err_mr;
+	}
 
 	err = mlx4_mr_enable(dev->dev, &mr->mmr);
 	if (err)
-- 
1.6.0.5




More information about the general mailing list