[openib-general] [PATCH] huge pages support
Robert Rex
robert.rex at s2001.tu-chemnitz.de
Fri Aug 18 06:23:41 PDT 2006
Hello,
I've also worked on the same topic. Here is what I've done so far as I
successfully tested it on mthca and ehca. I'd appreciate for comments and
suggestions.
diff -Nurp a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/uverbs_mem.c
--- old/drivers/infiniband/core/uverbs_mem.c 2006-08-15 05:42:06.000000000 -0700
+++ new/drivers/infiniband/core/uverbs_mem.c 2006-08-18 04:22:22.000000000 -0700
@@ -36,6 +36,7 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
+#include <linux/hugetlb.h>
#include "uverbs.h"
@@ -73,6 +74,8 @@ int ib_umem_get(struct ib_device *dev, s
unsigned long lock_limit;
unsigned long cur_base;
unsigned long npages;
+ unsigned long region_page_mask, region_page_shift, region_page_size;
+ int use_hugepages;
int ret = 0;
int off;
int i;
@@ -84,19 +87,39 @@ int ib_umem_get(struct ib_device *dev, s
if (!page_list)
return -ENOMEM;
+ down_read(¤t->mm->mmap_sem);
+ if (is_vm_hugetlb_page(find_vma(current->mm, (unsigned long) addr))) {
+ use_hugepages = 1;
+ region_page_mask = HPAGE_MASK;
+ region_page_size = HPAGE_SIZE;
+ } else {
+ use_hugepages = 0;
+ region_page_mask = PAGE_MASK;
+ region_page_size = PAGE_SIZE;
+ }
+ up_read(¤t->mm->mmap_sem);
+
+ region_page_shift = ffs(region_page_size) - 1;
+
mem->user_base = (unsigned long) addr;
mem->length = size;
- mem->offset = (unsigned long) addr & ~PAGE_MASK;
- mem->page_size = PAGE_SIZE;
+ mem->offset = (unsigned long) addr & ~region_page_mask;
+ mem->page_size = region_page_size;
mem->writable = write;
INIT_LIST_HEAD(&mem->chunk_list);
- npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT;
+ npages = ((size + mem->offset + (region_page_size - 1)) &
+ (~(region_page_size - 1))) >> region_page_shift;
down_write(¤t->mm->mmap_sem);
- locked = npages + current->mm->locked_vm;
+ if (use_hugepages)
+ locked = npages * (HPAGE_SIZE / PAGE_SIZE) +
+ current->mm->locked_vm;
+ else
+ locked = npages + current->mm->locked_vm;
+
lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
@@ -104,19 +127,34 @@ int ib_umem_get(struct ib_device *dev, s
goto out;
}
- cur_base = (unsigned long) addr & PAGE_MASK;
+ cur_base = (unsigned long) addr & region_page_mask;
while (npages) {
- ret = get_user_pages(current, current->mm, cur_base,
- min_t(int, npages,
- PAGE_SIZE / sizeof (struct page *)),
- 1, !write, page_list, NULL);
+ if (!use_hugepages) {
+ ret = get_user_pages(current, current->mm, cur_base,
+ min_t(int, npages, PAGE_SIZE
+ / sizeof (struct page *)),
+ 1, !write, page_list, NULL);
- if (ret < 0)
- goto out;
+ if (ret < 0)
+ goto out;
+
+ cur_base += ret * PAGE_SIZE;
+ npages -= ret;
+ } else {
+ while (npages && (ret <= PAGE_SIZE /
+ sizeof (struct page *))) {
+ if (get_user_pages(current, current->mm,
+ cur_base, 1, 1, !write,
+ &page_list[ret], NULL) < 0)
+ goto out;
+
+ ret++;
+ cur_base += HPAGE_SIZE;
+ npages--;
+ }
- cur_base += ret * PAGE_SIZE;
- npages -= ret;
+ }
off = 0;
@@ -133,7 +171,7 @@ int ib_umem_get(struct ib_device *dev, s
for (i = 0; i < chunk->nents; ++i) {
chunk->page_list[i].page = page_list[i + off];
chunk->page_list[i].offset = 0;
- chunk->page_list[i].length = PAGE_SIZE;
+ chunk->page_list[i].length = region_page_size;
}
chunk->nmap = dma_map_sg(dev->dma_device,
More information about the general
mailing list