[ofa-general] [PATCH 1/2][RFC] IB/uverbs: Export ib_umem_get()/ib_umem_release() to modules
Roland Dreier
rdreier at cisco.com
Thu Apr 19 16:02:25 PDT 2007
Export ib_umem_get()/ib_umem_release() and put low-level drivers in
control of when to call ib_umem_get() to pin and DMA map userspace,
rather than always calling it in ib_uverbs_reg_mr() before calling the
low-level driver's reg_user_mr method.
Also move these functions to be in the ib_core module instead of
ib_uverbs, so that driver modules using them do not depend on
ib_uverbs.
This has a number of advantages:
- It is better design from the standpoint of making generic code a
library that can be used or overridden by device-specific code as
the details of specific devices dictate.
- Drivers that do not need to pin userspace memory regions do not
need to take the performance hit of calling ib_mem_get(). For
example, although I have not tried to implement it in this patch,
the ipath driver should be able to avoid pinning memory and just
use copy_{to,from}_user() to access userspace memory regions.
- Buffers that need special mapping treatment can be identified by
the low-level driver. For example, it may be possible to solve
some Altix-specific memory ordering issues with mthca CQs in
userspace by mapping CQ buffers with extra flags.
- Drivers that need to pin and DMA map userspace memory for things
other than memory regions can use ib_umem_get() directly, instead
of hacks using extra parameters to their reg_phys_mr method. For
example, the mlx4 driver that is pending being merged needs to pin
and DMA map QP and CQ buffers, but it does not need to create a
memory key for these buffers. So the cleanest solution is for mlx4
to call ib_umem_get() in the create_qp and create_cq methods.
Signed-off-by: Roland Dreier <rolandd at cisco.com>
---
Here's the updated patch that puts a flush_scheduled_work() in the
ib_core module's exit function.
drivers/infiniband/Kconfig | 5 +
drivers/infiniband/core/Makefile | 4 +-
drivers/infiniband/core/device.c | 2 +
drivers/infiniband/core/umem.c | 273 ++++++++++++++++++++++++++
drivers/infiniband/core/uverbs.h | 6 +-
drivers/infiniband/core/uverbs_cmd.c | 60 ++----
drivers/infiniband/core/uverbs_main.c | 11 +-
drivers/infiniband/core/uverbs_mem.c | 225 ---------------------
drivers/infiniband/hw/amso1100/c2_provider.c | 42 +++--
drivers/infiniband/hw/amso1100/c2_provider.h | 1 +
drivers/infiniband/hw/cxgb3/iwch_provider.c | 28 ++-
drivers/infiniband/hw/cxgb3/iwch_provider.h | 1 +
drivers/infiniband/hw/ehca/ehca_classes.h | 1 +
drivers/infiniband/hw/ehca/ehca_iverbs.h | 3 +-
drivers/infiniband/hw/ehca/ehca_mrmw.c | 69 ++++---
drivers/infiniband/hw/ipath/ipath_mr.c | 38 +++-
drivers/infiniband/hw/ipath/ipath_verbs.h | 5 +-
drivers/infiniband/hw/mthca/mthca_provider.c | 38 +++-
drivers/infiniband/hw/mthca/mthca_provider.h | 1 +
include/rdma/ib_umem.h | 78 ++++++++
include/rdma/ib_verbs.h | 28 +---
21 files changed, 536 insertions(+), 383 deletions(-)
create mode 100644 drivers/infiniband/core/umem.c
delete mode 100644 drivers/infiniband/core/uverbs_mem.c
create mode 100644 include/rdma/ib_umem.h
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 66b36de..82afba5 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -29,6 +29,11 @@ config INFINIBAND_USER_ACCESS
libibverbs, libibcm and a hardware driver library from
<http://www.openib.org>.
+config INFINIBAND_USER_MEM
+ bool
+ depends on INFINIBAND_USER_ACCESS != n
+ default y
+
config INFINIBAND_ADDR_TRANS
bool
depends on INFINIBAND && INET
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 189e5d4..cb1ab3e 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
device.o fmr_pool.o cache.o
+ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
@@ -28,5 +29,4 @@ ib_umad-y := user_mad.o
ib_ucm-y := ucm.o
-ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \
- uverbs_marshall.o
+ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 7fabb42..592c90a 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -613,6 +613,8 @@ static void __exit ib_core_cleanup(void)
{
ib_cache_cleanup();
ib_sysfs_cleanup();
+ /* Make sure that any pending umem accounting work is done. */
+ flush_scheduled_work();
}
module_init(ib_core_init);
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
new file mode 100644
index 0000000..48e854c
--- /dev/null
+++ b/drivers/infiniband/core/umem.c
@@ -0,0 +1,273 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: uverbs_mem.c 2743 2005-06-28 22:27:59Z roland $
+ */
+
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+
+#include "uverbs.h"
+
+struct ib_umem_account_work {
+ struct work_struct work;
+ struct mm_struct *mm;
+ unsigned long diff;
+};
+
+
+static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
+{
+ struct ib_umem_chunk *chunk, *tmp;
+ int i;
+
+ list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
+ ib_dma_unmap_sg(dev, chunk->page_list,
+ chunk->nents, DMA_BIDIRECTIONAL);
+ for (i = 0; i < chunk->nents; ++i) {
+ if (umem->writable && dirty)
+ set_page_dirty_lock(chunk->page_list[i].page);
+ put_page(chunk->page_list[i].page);
+ }
+
+ kfree(chunk);
+ }
+}
+
+/**
+ * ib_umem_get - Pin and DMA map userspace memory.
+ * @context: userspace context to pin memory for
+ * @addr: userspace virtual address to start at
+ * @size: length of region to pin
+ * @access: IB_ACCESS_xxx flags for memory being pinned
+ */
+struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
+ size_t size, int access)
+{
+ struct ib_umem *umem;
+ struct page **page_list;
+ struct ib_umem_chunk *chunk;
+ unsigned long locked;
+ unsigned long lock_limit;
+ unsigned long cur_base;
+ unsigned long npages;
+ int ret;
+ int off;
+ int i;
+
+ if (!can_do_mlock())
+ return ERR_PTR(-EPERM);
+
+ umem = kmalloc(sizeof *umem, GFP_KERNEL);
+ if (!umem)
+ return ERR_PTR(-ENOMEM);
+
+ umem->context = context;
+ umem->length = size;
+ umem->offset = addr & ~PAGE_MASK;
+ umem->page_size = PAGE_SIZE;
+ /*
+ * We ask for writable memory if any access flags other than
+ * "remote read" are set. "Local write" and "remote write"
+ * obviously require write access. "Remote atomic" can do
+ * things like fetch and add, which will modify memory, and
+ * "MW bind" can change permissions by binding a window.
+ */
+ umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
+
+ INIT_LIST_HEAD(&umem->chunk_list);
+
+ page_list = (struct page **) __get_free_page(GFP_KERNEL);
+ if (!page_list) {
+ kfree(umem);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
+
+ down_write(¤t->mm->mmap_sem);
+
+ locked = npages + current->mm->locked_vm;
+ lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+
+ if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ cur_base = addr & PAGE_MASK;
+
+ while (npages) {
+ ret = get_user_pages(current, current->mm, cur_base,
+ min_t(int, npages,
+ PAGE_SIZE / sizeof (struct page *)),
+ 1, !umem->writable, page_list, NULL);
+
+ if (ret < 0)
+ goto out;
+
+ cur_base += ret * PAGE_SIZE;
+ npages -= ret;
+
+ off = 0;
+
+ while (ret) {
+ chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) *
+ min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
+ GFP_KERNEL);
+ if (!chunk) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
+ for (i = 0; i < chunk->nents; ++i) {
+ chunk->page_list[i].page = page_list[i + off];
+ chunk->page_list[i].offset = 0;
+ chunk->page_list[i].length = PAGE_SIZE;
+ }
+
+ chunk->nmap = ib_dma_map_sg(context->device,
+ &chunk->page_list[0],
+ chunk->nents,
+ DMA_BIDIRECTIONAL);
+ if (chunk->nmap <= 0) {
+ for (i = 0; i < chunk->nents; ++i)
+ put_page(chunk->page_list[i].page);
+ kfree(chunk);
+
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret -= chunk->nents;
+ off += chunk->nents;
+ list_add_tail(&chunk->list, &umem->chunk_list);
+ }
+
+ ret = 0;
+ }
+
+out:
+ if (ret < 0) {
+ __ib_umem_release(context->device, umem, 0);
+ kfree(umem);
+ } else
+ current->mm->locked_vm = locked;
+
+ up_write(¤t->mm->mmap_sem);
+ free_page((unsigned long) page_list);
+
+ return ret < 0 ? ERR_PTR(ret) : umem;
+}
+EXPORT_SYMBOL(ib_umem_get);
+
+static void ib_umem_account(struct work_struct *_work)
+{
+ struct ib_umem_account_work *work =
+ container_of(_work, struct ib_umem_account_work, work);
+
+ down_write(&work->mm->mmap_sem);
+ work->mm->locked_vm -= work->diff;
+ up_write(&work->mm->mmap_sem);
+ mmput(work->mm);
+ kfree(work);
+}
+
+/**
+ * ib_umem_release - release memory pinned with ib_umem_get
+ * @umem: umem struct to release
+ */
+void ib_umem_release(struct ib_umem *umem)
+{
+ struct ib_umem_account_work *work;
+ struct ib_ucontext *context = umem->context;
+ struct mm_struct *mm;
+ unsigned long diff;
+
+ __ib_umem_release(umem->context->device, umem, 1);
+
+ mm = get_task_mm(current);
+ if (!mm)
+ return;
+
+ diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
+ kfree(umem);
+
+ /*
+ * We may be called with the mm's mmap_sem already held. This
+ * can happen when a userspace munmap() is the call that drops
+ * the last reference to our file and calls our release
+ * method. If there are memory regions to destroy, we'll end
+ * up here and not be able to take the mmap_sem. In that case
+ * we defer the vm_locked accounting to the system workqueue.
+ */
+ if (context->closing && !down_write_trylock(&mm->mmap_sem)) {
+ work = kmalloc(sizeof *work, GFP_KERNEL);
+ if (!work) {
+ mmput(mm);
+ return;
+ }
+
+ INIT_WORK(&work->work, ib_umem_account);
+ work->mm = mm;
+ work->diff = diff;
+
+ schedule_work(&work->work);
+ return;
+ } else
+ down_write(&mm->mmap_sem);
+
+ current->mm->locked_vm -= diff;
+ up_write(&mm->mmap_sem);
+ mmput(mm);
+}
+EXPORT_SYMBOL(ib_umem_release);
+
+int ib_umem_page_count(struct ib_umem *umem)
+{
+ struct ib_umem_chunk *chunk;
+ int shift;
+ int i;
+ int n;
+
+ shift = ilog2(umem->page_size);
+
+ n = 0;
+ list_for_each_entry(chunk, &umem->chunk_list, list)
+ for (i = 0; i < chunk->nmap; ++i)
+ n += sg_dma_len(&chunk->page_list[i]) >> shift;
+
+ return n;
+}
+EXPORT_SYMBOL(ib_umem_page_count);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 102a59c..c33546f 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -45,6 +45,7 @@
#include <linux/completion.h>
#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
/*
@@ -163,11 +164,6 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
-int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
- void *addr, size_t size, int write);
-void ib_umem_release(struct ib_device *dev, struct ib_umem *umem);
-void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem);
-
#define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
const char __user *buf, int in_len, \
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 4fd75af..8c338bc 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
* Copyright (c) 2005 PathScale, Inc. All rights reserved.
* Copyright (c) 2006 Mellanox Technologies. All rights reserved.
*
@@ -295,6 +295,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&ucontext->qp_list);
INIT_LIST_HEAD(&ucontext->srq_list);
INIT_LIST_HEAD(&ucontext->ah_list);
+ ucontext->closing = 0;
resp.num_comp_vectors = file->device->num_comp_vectors;
@@ -573,7 +574,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
struct ib_uverbs_reg_mr cmd;
struct ib_uverbs_reg_mr_resp resp;
struct ib_udata udata;
- struct ib_umem_object *obj;
+ struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mr *mr;
int ret;
@@ -599,35 +600,21 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
!(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
return -EINVAL;
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
+ uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+ if (!uobj)
return -ENOMEM;
- init_uobj(&obj->uobject, 0, file->ucontext, &mr_lock_key);
- down_write(&obj->uobject.mutex);
-
- /*
- * We ask for writable memory if any access flags other than
- * "remote read" are set. "Local write" and "remote write"
- * obviously require write access. "Remote atomic" can do
- * things like fetch and add, which will modify memory, and
- * "MW bind" can change permissions by binding a window.
- */
- ret = ib_umem_get(file->device->ib_dev, &obj->umem,
- (void *) (unsigned long) cmd.start, cmd.length,
- !!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ));
- if (ret)
- goto err_free;
-
- obj->umem.virt_base = cmd.hca_va;
+ init_uobj(uobj, 0, file->ucontext, &mr_lock_key);
+ down_write(&uobj->mutex);
pd = idr_read_pd(cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
- goto err_release;
+ goto err_free;
}
- mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata);
+ mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
+ cmd.access_flags, &udata);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
goto err_put;
@@ -635,19 +622,19 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
mr->device = pd->device;
mr->pd = pd;
- mr->uobject = &obj->uobject;
+ mr->uobject = uobj;
atomic_inc(&pd->usecnt);
atomic_set(&mr->usecnt, 0);
- obj->uobject.object = mr;
- ret = idr_add_uobj(&ib_uverbs_mr_idr, &obj->uobject);
+ uobj->object = mr;
+ ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
if (ret)
goto err_unreg;
memset(&resp, 0, sizeof resp);
resp.lkey = mr->lkey;
resp.rkey = mr->rkey;
- resp.mr_handle = obj->uobject.id;
+ resp.mr_handle = uobj->id;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
@@ -658,17 +645,17 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
put_pd_read(pd);
mutex_lock(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
+ list_add_tail(&uobj->list, &file->ucontext->mr_list);
mutex_unlock(&file->mutex);
- obj->uobject.live = 1;
+ uobj->live = 1;
- up_write(&obj->uobject.mutex);
+ up_write(&uobj->mutex);
return in_len;
err_copy:
- idr_remove_uobj(&ib_uverbs_mr_idr, &obj->uobject);
+ idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
err_unreg:
ib_dereg_mr(mr);
@@ -676,11 +663,8 @@ err_unreg:
err_put:
put_pd_read(pd);
-err_release:
- ib_umem_release(file->device->ib_dev, &obj->umem);
-
err_free:
- put_uobj_write(&obj->uobject);
+ put_uobj_write(uobj);
return ret;
}
@@ -691,7 +675,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
struct ib_uverbs_dereg_mr cmd;
struct ib_mr *mr;
struct ib_uobject *uobj;
- struct ib_umem_object *memobj;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -701,8 +684,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
if (!uobj)
return -EINVAL;
- memobj = container_of(uobj, struct ib_umem_object, uobject);
- mr = uobj->object;
+ mr = uobj->object;
ret = ib_dereg_mr(mr);
if (!ret)
@@ -719,8 +701,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
list_del(&uobj->list);
mutex_unlock(&file->mutex);
- ib_umem_release(file->device->ib_dev, &memobj->umem);
-
put_uobj(uobj);
return in_len;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index f8bc822..41c2065 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -183,6 +183,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
if (!context)
return 0;
+ context->closing = 1;
+
list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
struct ib_ah *ah = uobj->object;
@@ -230,16 +232,10 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
struct ib_mr *mr = uobj->object;
- struct ib_device *mrdev = mr->device;
- struct ib_umem_object *memobj;
idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
ib_dereg_mr(mr);
-
- memobj = container_of(uobj, struct ib_umem_object, uobject);
- ib_umem_release_on_close(mrdev, &memobj->umem);
-
- kfree(memobj);
+ kfree(uobj);
}
list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
@@ -906,7 +902,6 @@ static void __exit ib_uverbs_cleanup(void)
unregister_filesystem(&uverbs_event_fs);
class_destroy(uverbs_class);
unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
- flush_scheduled_work();
idr_destroy(&ib_uverbs_pd_idr);
idr_destroy(&ib_uverbs_mr_idr);
idr_destroy(&ib_uverbs_mw_idr);
diff --git a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/uverbs_mem.c
deleted file mode 100644
index c95fe95..0000000
--- a/drivers/infiniband/core/uverbs_mem.c
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * Copyright (c) 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Cisco Systems. All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * $Id: uverbs_mem.c 2743 2005-06-28 22:27:59Z roland $
- */
-
-#include <linux/mm.h>
-#include <linux/dma-mapping.h>
-
-#include "uverbs.h"
-
-struct ib_umem_account_work {
- struct work_struct work;
- struct mm_struct *mm;
- unsigned long diff;
-};
-
-
-static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
-{
- struct ib_umem_chunk *chunk, *tmp;
- int i;
-
- list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
- ib_dma_unmap_sg(dev, chunk->page_list,
- chunk->nents, DMA_BIDIRECTIONAL);
- for (i = 0; i < chunk->nents; ++i) {
- if (umem->writable && dirty)
- set_page_dirty_lock(chunk->page_list[i].page);
- put_page(chunk->page_list[i].page);
- }
-
- kfree(chunk);
- }
-}
-
-int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
- void *addr, size_t size, int write)
-{
- struct page **page_list;
- struct ib_umem_chunk *chunk;
- unsigned long locked;
- unsigned long lock_limit;
- unsigned long cur_base;
- unsigned long npages;
- int ret = 0;
- int off;
- int i;
-
- if (!can_do_mlock())
- return -EPERM;
-
- page_list = (struct page **) __get_free_page(GFP_KERNEL);
- if (!page_list)
- return -ENOMEM;
-
- mem->user_base = (unsigned long) addr;
- mem->length = size;
- mem->offset = (unsigned long) addr & ~PAGE_MASK;
- mem->page_size = PAGE_SIZE;
- mem->writable = write;
-
- INIT_LIST_HEAD(&mem->chunk_list);
-
- npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT;
-
- down_write(¤t->mm->mmap_sem);
-
- locked = npages + current->mm->locked_vm;
- lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
-
- if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
- ret = -ENOMEM;
- goto out;
- }
-
- cur_base = (unsigned long) addr & PAGE_MASK;
-
- while (npages) {
- ret = get_user_pages(current, current->mm, cur_base,
- min_t(int, npages,
- PAGE_SIZE / sizeof (struct page *)),
- 1, !write, page_list, NULL);
-
- if (ret < 0)
- goto out;
-
- cur_base += ret * PAGE_SIZE;
- npages -= ret;
-
- off = 0;
-
- while (ret) {
- chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) *
- min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
- GFP_KERNEL);
- if (!chunk) {
- ret = -ENOMEM;
- goto out;
- }
-
- chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
- for (i = 0; i < chunk->nents; ++i) {
- chunk->page_list[i].page = page_list[i + off];
- chunk->page_list[i].offset = 0;
- chunk->page_list[i].length = PAGE_SIZE;
- }
-
- chunk->nmap = ib_dma_map_sg(dev,
- &chunk->page_list[0],
- chunk->nents,
- DMA_BIDIRECTIONAL);
- if (chunk->nmap <= 0) {
- for (i = 0; i < chunk->nents; ++i)
- put_page(chunk->page_list[i].page);
- kfree(chunk);
-
- ret = -ENOMEM;
- goto out;
- }
-
- ret -= chunk->nents;
- off += chunk->nents;
- list_add_tail(&chunk->list, &mem->chunk_list);
- }
-
- ret = 0;
- }
-
-out:
- if (ret < 0)
- __ib_umem_release(dev, mem, 0);
- else
- current->mm->locked_vm = locked;
-
- up_write(¤t->mm->mmap_sem);
- free_page((unsigned long) page_list);
-
- return ret;
-}
-
-void ib_umem_release(struct ib_device *dev, struct ib_umem *umem)
-{
- __ib_umem_release(dev, umem, 1);
-
- down_write(¤t->mm->mmap_sem);
- current->mm->locked_vm -=
- PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
- up_write(¤t->mm->mmap_sem);
-}
-
-static void ib_umem_account(struct work_struct *_work)
-{
- struct ib_umem_account_work *work =
- container_of(_work, struct ib_umem_account_work, work);
-
- down_write(&work->mm->mmap_sem);
- work->mm->locked_vm -= work->diff;
- up_write(&work->mm->mmap_sem);
- mmput(work->mm);
- kfree(work);
-}
-
-void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem)
-{
- struct ib_umem_account_work *work;
- struct mm_struct *mm;
-
- __ib_umem_release(dev, umem, 1);
-
- mm = get_task_mm(current);
- if (!mm)
- return;
-
- /*
- * We may be called with the mm's mmap_sem already held. This
- * can happen when a userspace munmap() is the call that drops
- * the last reference to our file and calls our release
- * method. If there are memory regions to destroy, we'll end
- * up here and not be able to take the mmap_sem. Therefore we
- * defer the vm_locked accounting to the system workqueue.
- */
-
- work = kmalloc(sizeof *work, GFP_KERNEL);
- if (!work) {
- mmput(mm);
- return;
- }
-
- INIT_WORK(&work->work, ib_umem_account);
- work->mm = mm;
- work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
-
- schedule_work(&work->work);
-}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index fef9727..10a085d 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -56,6 +56,7 @@
#include <asm/byteorder.h>
#include <rdma/ib_smi.h>
+#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include "c2.h"
#include "c2_provider.h"
@@ -396,6 +397,7 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
}
mr->pd = to_c2pd(ib_pd);
+ mr->umem = NULL;
pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
"*iova_start %llx, first pa %llx, last pa %llx\n",
__FUNCTION__, page_shift, pbl_depth, total_len,
@@ -428,8 +430,8 @@ static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
}
-static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
- int acc, struct ib_udata *udata)
+static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt, int acc, struct ib_udata *udata)
{
u64 *pages;
u64 kva = 0;
@@ -441,15 +443,23 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
struct c2_mr *c2mr;
pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
- shift = ffs(region->page_size) - 1;
c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
if (!c2mr)
return ERR_PTR(-ENOMEM);
c2mr->pd = c2pd;
+ c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
+ if (IS_ERR(c2mr->umem)) {
+ err = PTR_ERR(c2mr->umem);
+ kfree(c2mr);
+ return ERR_PTR(err);
+ }
+
+ shift = ffs(c2mr->umem->page_size) - 1;
+
n = 0;
- list_for_each_entry(chunk, ®ion->chunk_list, list)
+ list_for_each_entry(chunk, &c2mr->umem->chunk_list, list)
n += chunk->nents;
pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
@@ -459,35 +469,34 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
}
i = 0;
- list_for_each_entry(chunk, ®ion->chunk_list, list) {
+ list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) {
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] =
sg_dma_address(&chunk->page_list[j]) +
- (region->page_size * k);
+ (c2mr->umem->page_size * k);
}
}
}
- kva = (u64)region->virt_base;
+ kva = virt;
err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
pages,
- region->page_size,
+ c2mr->umem->page_size,
i,
- region->length,
- region->offset,
+ length,
+ c2mr->umem->offset,
&kva,
c2_convert_access(acc),
c2mr);
kfree(pages);
- if (err) {
- kfree(c2mr);
- return ERR_PTR(err);
- }
+ if (err)
+ goto err;
return &c2mr->ibmr;
err:
+ ib_umem_release(c2mr->umem);
kfree(c2mr);
return ERR_PTR(err);
}
@@ -502,8 +511,11 @@ static int c2_dereg_mr(struct ib_mr *ib_mr)
err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
if (err)
pr_debug("c2_stag_dealloc failed: %d\n", err);
- else
+ else {
+ if (mr->umem)
+ ib_umem_release(mr->umem);
kfree(mr);
+ }
return err;
}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.h b/drivers/infiniband/hw/amso1100/c2_provider.h
index fc90622..1076df2 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.h
+++ b/drivers/infiniband/hw/amso1100/c2_provider.h
@@ -73,6 +73,7 @@ struct c2_pd {
struct c2_mr {
struct ib_mr ibmr;
struct c2_pd *pd;
+ struct ib_umem *umem;
};
struct c2_av;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 24e0df0..98cdd13 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -47,6 +47,7 @@
#include <rdma/iw_cm.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_smi.h>
+#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include "cxio_hal.h"
@@ -441,6 +442,8 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
remove_handle(rhp, &rhp->mmidr, mmid);
if (mhp->kva)
kfree((void *) (unsigned long) mhp->kva);
+ if (mhp->umem)
+ ib_umem_release(mhp->umem);
PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp);
kfree(mhp);
return 0;
@@ -575,8 +578,8 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
}
-static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
- int acc, struct ib_udata *udata)
+static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt, int acc, struct ib_udata *udata)
{
__be64 *pages;
int shift, n, len;
@@ -589,7 +592,6 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
struct iwch_reg_user_mr_resp uresp;
PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
- shift = ffs(region->page_size) - 1;
php = to_iwch_pd(pd);
rhp = php->rhp;
@@ -597,8 +599,17 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
if (!mhp)
return ERR_PTR(-ENOMEM);
+ mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc);
+ if (IS_ERR(mhp->umem)) {
+ err = PTR_ERR(mhp->umem);
+ kfree(mhp);
+ return ERR_PTR(err);
+ }
+
+ shift = ffs(mhp->umem->page_size) - 1;
+
n = 0;
- list_for_each_entry(chunk, ®ion->chunk_list, list)
+ list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
n += chunk->nents;
pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
@@ -609,13 +620,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
i = n = 0;
- list_for_each_entry(chunk, ®ion->chunk_list, list)
+ list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(
&chunk->page_list[j]) +
- region->page_size * k);
+ mhp->umem->page_size * k);
}
}
@@ -623,9 +634,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
mhp->attr.pdid = php->pdid;
mhp->attr.zbva = 0;
mhp->attr.perms = iwch_ib_to_tpt_access(acc);
- mhp->attr.va_fbo = region->virt_base;
+ mhp->attr.va_fbo = virt;
mhp->attr.page_size = shift - 12;
- mhp->attr.len = (u32) region->length;
+ mhp->attr.len = (u32) length;
mhp->attr.pbl_size = i;
err = iwch_register_mem(rhp, php, mhp, shift, pages);
kfree(pages);
@@ -648,6 +659,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
return &mhp->ibmr;
err:
+ ib_umem_release(mhp->umem);
kfree(mhp);
return ERR_PTR(err);
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 93bcc56..48833f3 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -73,6 +73,7 @@ struct tpt_attributes {
struct iwch_mr {
struct ib_mr ibmr;
+ struct ib_umem *umem;
struct iwch_dev *rhp;
u64 kva;
struct tpt_attributes attr;
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 82ded44..88e7866 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -175,6 +175,7 @@ struct ehca_mr {
struct ib_mr ib_mr; /* must always be first in ehca_mr */
struct ib_fmr ib_fmr; /* must always be first in ehca_mr */
} ib;
+ struct ib_umem *umem;
spinlock_t mrlock;
enum ehca_mr_flag flags;
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
index 95fd59f..9b22c5a 100644
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -78,8 +78,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
int num_phys_buf,
int mr_access_flags, u64 *iova_start);
-struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
- struct ib_umem *region,
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt,
int mr_access_flags, struct ib_udata *udata);
int ehca_rereg_phys_mr(struct ib_mr *mr,
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index d22ab56..84c5bb4 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -39,6 +39,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <rdma/ib_umem.h>
+
#include <asm/current.h>
#include "ehca_iverbs.h"
@@ -238,10 +240,8 @@ reg_phys_mr_exit0:
/*----------------------------------------------------------------------*/
-struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
- struct ib_umem *region,
- int mr_access_flags,
- struct ib_udata *udata)
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt,
+ int mr_access_flags, struct ib_udata *udata)
{
struct ib_mr *ib_mr;
struct ehca_mr *e_mr;
@@ -257,11 +257,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
ehca_gen_err("bad pd=%p", pd);
return ERR_PTR(-EFAULT);
}
- if (!region) {
- ehca_err(pd->device, "bad input values: region=%p", region);
- ib_mr = ERR_PTR(-EINVAL);
- goto reg_user_mr_exit0;
- }
+
if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
!(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
@@ -275,17 +271,10 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit0;
}
- if (region->page_size != PAGE_SIZE) {
- ehca_err(pd->device, "page size not supported, "
- "region->page_size=%x", region->page_size);
- ib_mr = ERR_PTR(-EINVAL);
- goto reg_user_mr_exit0;
- }
- if ((region->length == 0) ||
- ((region->virt_base + region->length) < region->virt_base)) {
+ if (length == 0 || virt + length < virt) {
ehca_err(pd->device, "bad input values: length=%lx "
- "virt_base=%lx", region->length, region->virt_base);
+ "virt_base=%lx", length, virt);
ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit0;
}
@@ -297,40 +286,55 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
goto reg_user_mr_exit0;
}
+ e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
+ mr_access_flags);
+ if (IS_ERR(e_mr->umem)) {
+ ib_mr = (void *) e_mr->umem;
+ goto reg_user_mr_exit1;
+ }
+
+ if (e_mr->umem->page_size != PAGE_SIZE) {
+ ehca_err(pd->device, "page size not supported, "
+ "e_mr->umem->page_size=%x", e_mr->umem->page_size);
+ ib_mr = ERR_PTR(-EINVAL);
+ goto reg_user_mr_exit2;
+ }
+
/* determine number of MR pages */
- num_pages_mr = (((region->virt_base % PAGE_SIZE) + region->length +
- PAGE_SIZE - 1) / PAGE_SIZE);
- num_pages_4k = (((region->virt_base % EHCA_PAGESIZE) + region->length +
- EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
+ num_pages_mr = (((virt % PAGE_SIZE) + length + PAGE_SIZE - 1) /
+ PAGE_SIZE);
+ num_pages_4k = (((virt % EHCA_PAGESIZE) + length + EHCA_PAGESIZE - 1) /
+ EHCA_PAGESIZE);
/* register MR on HCA */
pginfo.type = EHCA_MR_PGI_USER;
pginfo.num_pages = num_pages_mr;
pginfo.num_4k = num_pages_4k;
- pginfo.region = region;
- pginfo.next_4k = region->offset / EHCA_PAGESIZE;
+ pginfo.region = e_mr->umem;
+ pginfo.next_4k = e_mr->umem->offset / EHCA_PAGESIZE;
pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk,
- (®ion->chunk_list),
+ (&e_mr->umem->chunk_list),
list);
- ret = ehca_reg_mr(shca, e_mr, (u64*)region->virt_base,
- region->length, mr_access_flags, e_pd, &pginfo,
- &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
+ ret = ehca_reg_mr(shca, e_mr, (u64*) virt, length, mr_access_flags, e_pd,
+ &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
if (ret) {
ib_mr = ERR_PTR(ret);
- goto reg_user_mr_exit1;
+ goto reg_user_mr_exit2;
}
/* successful registration of all pages */
return &e_mr->ib.ib_mr;
+reg_user_mr_exit2:
+ ib_umem_release(e_mr->umem);
reg_user_mr_exit1:
ehca_mr_delete(e_mr);
reg_user_mr_exit0:
if (IS_ERR(ib_mr))
- ehca_err(pd->device, "rc=%lx pd=%p region=%p mr_access_flags=%x"
+ ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x"
" udata=%p",
- PTR_ERR(ib_mr), pd, region, mr_access_flags, udata);
+ PTR_ERR(ib_mr), pd, mr_access_flags, udata);
return ib_mr;
} /* end ehca_reg_user_mr() */
@@ -596,6 +600,9 @@ int ehca_dereg_mr(struct ib_mr *mr)
goto dereg_mr_exit0;
}
+ if (e_mr->umem)
+ ib_umem_release(e_mr->umem);
+
/* successful deregistration */
ehca_mr_delete(e_mr);
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 8cc8598..8e91c8b 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -31,6 +31,7 @@
* SOFTWARE.
*/
+#include <rdma/ib_umem.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_smi.h>
@@ -147,6 +148,7 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
mr->mr.offset = 0;
mr->mr.access_flags = acc;
mr->mr.max_segs = num_phys_buf;
+ mr->umem = NULL;
m = 0;
n = 0;
@@ -170,50 +172,60 @@ bail:
/**
* ipath_reg_user_mr - register a userspace memory region
* @pd: protection domain for this memory region
- * @region: the user memory region
+ * @start: starting userspace address
+ * @length: length of region to register
+ * @virt_addr: virtual address to use (from HCA's point of view)
* @mr_access_flags: access flags for this memory region
* @udata: unused by the InfiniPath driver
*
* Returns the memory region on success, otherwise returns an errno.
*/
-struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
- int mr_access_flags, struct ib_udata *udata)
+struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_udata *udata)
{
struct ipath_mr *mr;
+ struct ib_umem *umem;
struct ib_umem_chunk *chunk;
int n, m, i;
struct ib_mr *ret;
- if (region->length == 0) {
+ if (length == 0) {
ret = ERR_PTR(-EINVAL);
goto bail;
}
+ umem = ib_umem_get(pd->uobject->context, start, length, mr_access_flags);
+ if (IS_ERR(umem))
+ return (void *) umem;
+
n = 0;
- list_for_each_entry(chunk, ®ion->chunk_list, list)
+ list_for_each_entry(chunk, &umem->chunk_list, list)
n += chunk->nents;
mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
if (!mr) {
ret = ERR_PTR(-ENOMEM);
+ ib_umem_release(umem);
goto bail;
}
mr->mr.pd = pd;
- mr->mr.user_base = region->user_base;
- mr->mr.iova = region->virt_base;
- mr->mr.length = region->length;
- mr->mr.offset = region->offset;
+ mr->mr.user_base = start;
+ mr->mr.iova = virt_addr;
+ mr->mr.length = length;
+ mr->mr.offset = umem->offset;
mr->mr.access_flags = mr_access_flags;
mr->mr.max_segs = n;
+ mr->umem = umem;
m = 0;
n = 0;
- list_for_each_entry(chunk, ®ion->chunk_list, list) {
+ list_for_each_entry(chunk, &umem->chunk_list, list) {
for (i = 0; i < chunk->nmap; i++) {
mr->mr.map[m]->segs[n].vaddr =
page_address(chunk->page_list[i].page);
- mr->mr.map[m]->segs[n].length = region->page_size;
+ mr->mr.map[m]->segs[n].length = umem->page_size;
n++;
if (n == IPATH_SEGSZ) {
m++;
@@ -247,6 +259,10 @@ int ipath_dereg_mr(struct ib_mr *ibmr)
i--;
kfree(mr->mr.map[i]);
}
+
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+
kfree(mr);
return 0;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index c0c8d5b..8f7af7a 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -248,6 +248,7 @@ struct ipath_sge {
/* Memory region */
struct ipath_mr {
struct ib_mr ibmr;
+ struct ib_umem *umem;
struct ipath_mregion mr; /* must be last */
};
@@ -726,8 +727,8 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
struct ib_phys_buf *buffer_list,
int num_phys_buf, int acc, u64 *iova_start);
-struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
- int mr_access_flags,
+struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
int ipath_dereg_mr(struct ib_mr *ibmr);
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 0725ad7..cd5eb60 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -37,6 +37,7 @@
*/
#include <rdma/ib_smi.h>
+#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include <linux/mm.h>
@@ -907,6 +908,8 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
return ERR_PTR(err);
}
+ mr->umem = NULL;
+
return &mr->ibmr;
}
@@ -1002,11 +1005,13 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
}
kfree(page_list);
+ mr->umem = NULL;
+
return &mr->ibmr;
}
-static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
- int acc, struct ib_udata *udata)
+static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt, int acc, struct ib_udata *udata)
{
struct mthca_dev *dev = to_mdev(pd->device);
struct ib_umem_chunk *chunk;
@@ -1017,20 +1022,26 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
int err = 0;
int write_mtt_size;
- shift = ffs(region->page_size) - 1;
-
mr = kmalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
+ mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
+ if (IS_ERR(mr->umem)) {
+ err = PTR_ERR(mr->umem);
+ goto err;
+ }
+
+ shift = ffs(mr->umem->page_size) - 1;
+
n = 0;
- list_for_each_entry(chunk, ®ion->chunk_list, list)
+ list_for_each_entry(chunk, &mr->umem->chunk_list, list)
n += chunk->nents;
mr->mtt = mthca_alloc_mtt(dev, n);
if (IS_ERR(mr->mtt)) {
err = PTR_ERR(mr->mtt);
- goto err;
+ goto err_umem;
}
pages = (u64 *) __get_free_page(GFP_KERNEL);
@@ -1043,12 +1054,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
- list_for_each_entry(chunk, ®ion->chunk_list, list)
+ list_for_each_entry(chunk, &mr->umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = sg_dma_address(&chunk->page_list[j]) +
- region->page_size * k;
+ mr->umem->page_size * k;
/*
* Be friendly to write_mtt and pass it chunks
* of appropriate size.
@@ -1070,8 +1081,8 @@ mtt_done:
if (err)
goto err_mtt;
- err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base,
- region->length, convert_access(acc), mr);
+ err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, virt, length,
+ convert_access(acc), mr);
if (err)
goto err_mtt;
@@ -1081,6 +1092,9 @@ mtt_done:
err_mtt:
mthca_free_mtt(dev, mr->mtt);
+err_umem:
+ ib_umem_release(mr->umem);
+
err:
kfree(mr);
return ERR_PTR(err);
@@ -1089,8 +1103,12 @@ err:
static int mthca_dereg_mr(struct ib_mr *mr)
{
struct mthca_mr *mmr = to_mmr(mr);
+
mthca_free_mr(to_mdev(mr->device), mmr);
+ if (mmr->umem)
+ ib_umem_release(mmr->umem);
kfree(mmr);
+
return 0;
}
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 1d266ac..262616c 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -73,6 +73,7 @@ struct mthca_mtt;
struct mthca_mr {
struct ib_mr ibmr;
+ struct ib_umem *umem;
struct mthca_mtt *mtt;
};
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
new file mode 100644
index 0000000..06307f7
--- /dev/null
+++ b/include/rdma/ib_umem.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2007 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_UMEM_H
+#define IB_UMEM_H
+
+#include <linux/list.h>
+#include <linux/scatterlist.h>
+
+struct ib_ucontext;
+
+struct ib_umem {
+ struct ib_ucontext *context;
+ size_t length;
+ int offset;
+ int page_size;
+ int writable;
+ struct list_head chunk_list;
+};
+
+struct ib_umem_chunk {
+ struct list_head list;
+ int nents;
+ int nmap;
+ struct scatterlist page_list[0];
+};
+
+#ifdef CONFIG_INFINIBAND_USER_MEM
+
+struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
+ size_t size, int access);
+void ib_umem_release(struct ib_umem *umem);
+int ib_umem_page_count(struct ib_umem *umem);
+
+#else /* CONFIG_INFINIBAND_USER_MEM */
+
+#include <linux/err.h>
+
+static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
+ unsigned long addr, size_t size,
+ int access) {
+ return ERR_PTR(-EINVAL);
+}
+static inline void ib_umem_release(struct ib_umem *umem) { }
+static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }
+
+#endif /* CONFIG_INFINIBAND_USER_MEM */
+
+#endif /* IB_UMEM_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 765589f..b910baa 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -5,7 +5,7 @@
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -708,6 +708,7 @@ struct ib_ucontext {
struct list_head qp_list;
struct list_head srq_list;
struct list_head ah_list;
+ int closing;
};
struct ib_uobject {
@@ -721,23 +722,6 @@ struct ib_uobject {
int live;
};
-struct ib_umem {
- unsigned long user_base;
- unsigned long virt_base;
- size_t length;
- int offset;
- int page_size;
- int writable;
- struct list_head chunk_list;
-};
-
-struct ib_umem_chunk {
- struct list_head list;
- int nents;
- int nmap;
- struct scatterlist page_list[0];
-};
-
struct ib_udata {
void __user *inbuf;
void __user *outbuf;
@@ -750,11 +734,6 @@ struct ib_udata {
((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \
(void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
-struct ib_umem_object {
- struct ib_uobject uobject;
- struct ib_umem umem;
-};
-
struct ib_pd {
struct ib_device *device;
struct ib_uobject *uobject;
@@ -998,7 +977,8 @@ struct ib_device {
int mr_access_flags,
u64 *iova_start);
struct ib_mr * (*reg_user_mr)(struct ib_pd *pd,
- struct ib_umem *region,
+ u64 start, u64 length,
+ u64 virt_addr,
int mr_access_flags,
struct ib_udata *udata);
int (*query_mr)(struct ib_mr *mr,
--
1.5.1
More information about the general
mailing list