[ofa-general] [PATCH] IB/ehca: Make sure user pages are from hugetlb before using MR large pages

Joachim Fenkes fenkes at de.ibm.com
Wed Sep 12 05:39:31 PDT 2007


From: Hoang-Nam Nguyen <hnguyen at de.ibm.com>

...because, on virtualized hardware like System p, we can't be sure that the
physical pages behind them are contiguous.

Signed-off-by: Joachim Fenkes <fenkes at de.ibm.com>
---

Another patch for 2.6.24 that will apply cleanly on top of my previous
patchset. Please review and apply. Thanks!

 drivers/infiniband/hw/ehca/ehca_classes.h |    8 ++--
 drivers/infiniband/hw/ehca/ehca_mrmw.c    |   82 +++++++++++++++++++++++++----
 2 files changed, 75 insertions(+), 15 deletions(-)

diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 206d4eb..c2edd4c 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -99,10 +99,10 @@ struct ehca_sport {
 	struct ehca_sma_attr saved_attr;
 };
 
-#define HCA_CAP_MR_PGSIZE_4K  1
-#define HCA_CAP_MR_PGSIZE_64K 2
-#define HCA_CAP_MR_PGSIZE_1M  4
-#define HCA_CAP_MR_PGSIZE_16M 8
+#define HCA_CAP_MR_PGSIZE_4K  0x80000000
+#define HCA_CAP_MR_PGSIZE_64K 0x40000000
+#define HCA_CAP_MR_PGSIZE_1M  0x20000000
+#define HCA_CAP_MR_PGSIZE_16M 0x10000000
 
 struct ehca_shca {
 	struct ib_device ib_device;
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index 4c8f3b3..1bb9d23 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -41,6 +41,8 @@
  */
 
 #include <asm/current.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
 
 #include <rdma/ib_umem.h>
 
@@ -51,6 +53,7 @@
 
 #define NUM_CHUNKS(length, chunk_size) \
 	(((length) + (chunk_size - 1)) / (chunk_size))
+
 /* max number of rpages (per hcall register_rpages) */
 #define MAX_RPAGES 512
 
@@ -279,6 +282,52 @@ reg_phys_mr_exit0:
 } /* end ehca_reg_phys_mr() */
 
 /*----------------------------------------------------------------------*/
+static int ehca_is_mem_hugetlb(unsigned long addr, unsigned long size)
+{
+	struct vm_area_struct **vma_list;
+	unsigned long cur_base;
+	unsigned long npages;
+	int ret, i;
+
+	vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL);
+	if (!vma_list) {
+		ehca_gen_err("Can not alloc vma_list");
+		return -ENOMEM;
+	}
+
+	down_write(&current->mm->mmap_sem);
+	npages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT;
+	cur_base = addr & PAGE_MASK;
+
+	while (npages) {
+		ret = get_user_pages(current, current->mm, cur_base,
+				     min_t(int, npages,
+					   PAGE_SIZE / sizeof (*vma_list)),
+				     1, 0, NULL, vma_list);
+
+		if (ret < 0) {
+			ehca_gen_err("get_user_pages() failed "
+				     "ret=%x cur_base=%lx", ret, cur_base);
+			goto is_hugetlb_out;
+		}
+
+		for (i = 0; i < ret; ++i)
+			if (!is_vm_hugetlb_page(vma_list[i])) {
+				ret = 0;
+				goto is_hugetlb_out;
+			}
+
+		cur_base += ret * PAGE_SIZE;
+		npages -= ret;
+	}
+	ret = 1;
+
+is_hugetlb_out:
+	up_write(&current->mm->mmap_sem);
+	free_page((unsigned long) vma_list);
+
+	return ret;
+}
 
 struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 			       u64 virt, int mr_access_flags,
@@ -346,18 +395,29 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
 	/* select proper hw_pgsize */
 	if (ehca_mr_largepage &&
-	    (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M)) {
-		if (length <= EHCA_MR_PGSIZE4K
-		    && PAGE_SIZE == EHCA_MR_PGSIZE4K)
-			hwpage_size = EHCA_MR_PGSIZE4K;
-		else if (length <= EHCA_MR_PGSIZE64K)
-			hwpage_size = EHCA_MR_PGSIZE64K;
-		else if (length <= EHCA_MR_PGSIZE1M)
-			hwpage_size = EHCA_MR_PGSIZE1M;
-		else
-			hwpage_size = EHCA_MR_PGSIZE16M;
+	    shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M) {
+		ret = ehca_is_mem_hugetlb(virt, length);
+		switch (ret) {
+		case 0: /* mem is not from hugetlb */
+			hwpage_size = PAGE_SIZE;
+			break;
+		case 1:
+			if (length <= EHCA_MR_PGSIZE4K
+			    && PAGE_SIZE == EHCA_MR_PGSIZE4K)
+				hwpage_size = EHCA_MR_PGSIZE4K;
+			else if (length <= EHCA_MR_PGSIZE64K)
+				hwpage_size = EHCA_MR_PGSIZE64K;
+			else if (length <= EHCA_MR_PGSIZE1M)
+				hwpage_size = EHCA_MR_PGSIZE1M;
+			else
+				hwpage_size = EHCA_MR_PGSIZE16M;
+			break;
+		default: /* out of mem */
+			ib_mr = ERR_PTR(-ENOMEM);
+			goto reg_user_mr_exit1;
+		}
 	} else
-		hwpage_size = EHCA_MR_PGSIZE4K;
+		hwpage_size = EHCA_MR_PGSIZE4K; /* ehca1 can only 4k */
 	ehca_dbg(pd->device, "hwpage_size=%lx", hwpage_size);
 
 reg_user_mr_fallback:
-- 
1.5.2






More information about the general mailing list