[ewg] Re: [PATCH v3] mlx4_ib: Optimize hugetlab pages support
Yossi Etigin
yosefe at Voltaire.COM
Mon Mar 30 08:49:37 PDT 2009
Eli Cohen wrote:
> Since Linux may not merge adjacent pages into a single scatter entry through
> calls to dma_map_sg(), we check the special case of hugetlb pages which are
> likely to be mapped to coniguous dma addresses and if they are, take advantage
> of this. This will result in a significantly lower number of MTT segments used
> for registering hugetlb memory regions.
>
How about the one below - it fixes bugzilla #1569 (fix mapping for size that is not
on page boundary):
---
Since Linux may not merge adjacent pages into a single scatter entry through
calls to dma_map_sg(), we check the special case of hugetlb pages which are
likely to be mapped to coniguous dma addresses and if they are, take advantage
of this. This will result in a significantly lower number of MTT segments used
for registering hugetlb memory regions.
Signed-off-by: Eli Cohen <eli at mellanox.co.il>
---
drivers/infiniband/hw/mlx4/mr.c | 81 ++++++++++++++++++++++++++++++++++----
1 files changed, 72 insertions(+), 9 deletions(-)
Index: b/drivers/infiniband/hw/mlx4/mr.c
===================================================================
--- a/drivers/infiniband/hw/mlx4/mr.c 2008-11-19 21:32:15.000000000 +0200
+++ b/drivers/infiniband/hw/mlx4/mr.c 2009-03-30 18:29:55.000000000 +0300
@@ -119,6 +119,70 @@ out:
return err;
}
+static int handle_hugetlb_user_mr(struct ib_pd *pd, struct mlx4_ib_mr *mr,
+ u64 start, u64 virt_addr, int access_flags)
+{
+#if defined(CONFIG_HUGETLB_PAGE) && !defined(__powerpc__) && !defined(__ia64__)
+ struct mlx4_ib_dev *dev = to_mdev(pd->device);
+ struct ib_umem_chunk *chunk;
+ unsigned dsize;
+ dma_addr_t daddr;
+ unsigned cur_size = 0;
+ dma_addr_t uninitialized_var(cur_addr);
+ int n;
+ struct ib_umem *umem = mr->umem;
+ u64 *arr;
+ int err = 0;
+ int i;
+ int j = 0;
+ int off = start & (HPAGE_SIZE - 1);
+
+ n = DIV_ROUND_UP(off + umem->length, HPAGE_SIZE);
+ arr = kmalloc(n * sizeof *arr, GFP_KERNEL);
+ if (!arr)
+ return -ENOMEM;
+
+ list_for_each_entry(chunk, &umem->chunk_list, list)
+ for (i = 0; i < chunk->nmap; ++i) {
+ daddr = sg_dma_address(&chunk->page_list[i]);
+ dsize = sg_dma_len(&chunk->page_list[i]);
+ if (!cur_size) {
+ cur_addr = daddr;
+ cur_size = dsize;
+ } else if (cur_addr + cur_size != daddr) {
+ err = -EINVAL;
+ goto out;
+ } else
+ cur_size += dsize;
+
+ if (cur_size > HPAGE_SIZE) {
+ err = -EINVAL;
+ goto out;
+ } else if (cur_size == HPAGE_SIZE) {
+ cur_size = 0;
+ arr[j++] = cur_addr;
+ }
+ }
+
+ if (cur_size) {
+ arr[j++] = cur_addr;
+ }
+
+ err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, umem->length,
+ convert_access(access_flags), n, HPAGE_SHIFT, &mr->mmr);
+ if (err)
+ goto out;
+
+ err = mlx4_write_mtt(dev->dev, &mr->mmr.mtt, 0, n, arr);
+
+out:
+ kfree(arr);
+ return err;
+#else
+ return -ENOSYS;
+#endif
+}
+
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata)
@@ -140,17 +204,20 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct
goto err_free;
}
- n = ib_umem_page_count(mr->umem);
- shift = ilog2(mr->umem->page_size);
-
- err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
- convert_access(access_flags), n, shift, &mr->mmr);
- if (err)
- goto err_umem;
-
- err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
- if (err)
- goto err_mr;
+ if (!mr->umem->hugetlb ||
+ handle_hugetlb_user_mr(pd, mr, start, virt_addr, access_flags)) {
+ n = ib_umem_page_count(mr->umem);
+ shift = ilog2(mr->umem->page_size);
+
+ err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
+ convert_access(access_flags), n, shift, &mr->mmr);
+ if (err)
+ goto err_umem;
+
+ err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
+ if (err)
+ goto err_mr;
+ }
err = mlx4_mr_enable(dev->dev, &mr->mmr);
if (err)
More information about the ewg
mailing list