[Openib-windows] [PATCH] MTHCA: Support kernel virtual memory registration
Leonid Keller
leonid at mellanox.co.il
Sun Aug 13 08:41:39 PDT 2006
Thank you for the patch.
I didn't like you breaking iobuf object data incapsulation, so I've
changed the patch a little.
We'll check it here and I'll commit it, I believe, tomorrow.
It will be nice if you could check it in parallel against SRP.
> -----Original Message-----
> From: Fab Tillier [mailto:ftillier at silverstorm.com]
> Sent: Saturday, August 12, 2006 12:55 AM
> To: Leonid Keller
> Cc: openib-windows at openib.org
> Subject: [PATCH] MTHCA: Support kernel virtual memory registration
>
> Hi Leonid,
>
> Here is a patch that enables kernel clients to register
> virtual memory regions. Note that this change was actually
> quite a bit simpler than I anticipated. I have tested this
> with the SRP driver and validated that it works.
>
> Please apply, or let me know if you have any questions about it.
>
> Thanks!
>
> - Fab
>
> Index: hw/mthca/kernel/ib_verbs.h
> ===================================================================
> --- hw/mthca/kernel/ib_verbs.h (revision 445)
> +++ hw/mthca/kernel/ib_verbs.h (working copy)
> @@ -776,8 +776,9 @@
> int num_phys_buf,
> mthca_qp_access_t
> mr_access_flags,
> u64 *iova_start);
> - struct ib_mr * (*reg_user_mr)(struct ib_pd *pd,
> - void* __ptr64 vaddr,
> uint64_t length, uint64_t hca_va, mthca_qp_access_t acc);
> + struct ib_mr * (*reg_virt_mr)(struct ib_pd *pd,
> + void* __ptr64 vaddr,
> uint64_t length, uint64_t hca_va,
> + mthca_qp_access_t acc,
> boolean_t um_call);
> int (*query_mr)(struct ib_mr *mr,
> struct
> ib_mr_attr *mr_attr);
> int (*dereg_mr)(struct ib_mr *mr);
> Index: hw/mthca/kernel/mt_verbs.c
> ===================================================================
> --- hw/mthca/kernel/mt_verbs.c (revision 445)
> +++ hw/mthca/kernel/mt_verbs.c (working copy)
> @@ -625,14 +625,8 @@
> struct ib_mr *ib_mr;
> int err;
> HCA_ENTER(HCA_DBG_MEMORY);
> - /* sanity check */
> - if (!um_call) {
> - err = -ENOSYS;
> - HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_MEMORY
> ,("ibv_reg_mr for
> kernel mode is not supported (%d)\n", err));
> - goto err_not_supported;
> - }
>
> - ib_mr = pd->device->reg_user_mr(pd, vaddr, length, hca_va,
> mr_access_flags);
> + ib_mr = pd->device->reg_virt_mr(pd, vaddr, length, hca_va,
> mr_access_flags, um_call);
> if (IS_ERR(ib_mr)) {
> err = PTR_ERR(ib_mr);
> HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_MEMORY
> ,("mthca_reg_user_mr failed (%d)\n", err)); @@ -649,7 +643,6 @@
> return ib_mr;
>
> err_reg_user_mr:
> -err_not_supported:
> HCA_EXIT(HCA_DBG_MEMORY);
> return ERR_PTR(err);
> }
> Index: hw/mthca/kernel/hca_memory.c
> ===================================================================
> --- hw/mthca/kernel/hca_memory.c (revision 445)
> +++ hw/mthca/kernel/hca_memory.c (working copy)
> @@ -408,7 +408,7 @@
> if (err) {
> status = errno_to_iberr(err);
> HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,
> - ("mthca_dereg_mr failed (%d) for mr
> %p\n", err,
> h_fmr));
> + ("ibv_map_phys_fmr failed (%d) for mr
> %p\n", err,
> h_fmr));
> goto err_dealloc_fmr;
> }
>
> @@ -494,7 +494,7 @@
> if (err) {
> status = errno_to_iberr(err);
> HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,
> - ("mthca_dereg_mr failed (%d) for mr
> %p\n", err,
> h_fmr));
> + ("ibv_dealloc_fmr failed (%d) for mr
> %p\n", err,
> h_fmr));
> goto err_dealloc_fmr;
> }
>
> Index: hw/mthca/kernel/mt_memory.c
> ===================================================================
> --- hw/mthca/kernel/mt_memory.c (revision 445)
> +++ hw/mthca/kernel/mt_memory.c (working copy)
> @@ -375,15 +375,15 @@
> // make context-dependent things
> if (is_user) {
> ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);
> - mode = UserMode;
> + mode = UserMode;
> }
> else { /* Mapping to kernel virtual address */
> // MmBuildMdlForNonPagedPool(mdl_p); //
> fill MDL ??? -
> should we do that really ?
> - mode = KernelMode;
> + mode = KernelMode;
> }
>
> __try { /* try */
> - MmProbeAndLockPages( mdl_p, mode, Operation ); /*
> lock memory */
> + MmProbeAndLockPages( mdl_p, mode, Operation ); /* lock
> memory */
> } /* try */
>
> __except (EXCEPTION_EXECUTE_HANDLER) {
> @@ -431,10 +431,6 @@
>
> ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
>
> - // init IOBUF object
> - InitializeListHead( &iobuf_p->seg_que );
> - iobuf_p->seg_num = 0;
> -
> // Round the seg_va down to a page boundary so that we
> always get a seg_size
> // that is an integral number of pages.
> delta = va & (PAGE_SIZE - 1);
> Index: hw/mthca/kernel/mthca_provider.c
> ===================================================================
> --- hw/mthca/kernel/mthca_provider.c (revision 445)
> +++ hw/mthca/kernel/mthca_provider.c (working copy)
> @@ -876,6 +876,10 @@
> return ERR_PTR(-ENOMEM);
> RtlZeroMemory(mr, sizeof *mr);
>
> + // init IOBUF object
> + InitializeListHead( &mr->iobuf.seg_que );
> + mr->iobuf.seg_num = 0;
> +
> err = mthca_mr_alloc_notrans(to_mdev(pd->device),
> to_mpd(pd)->pd_num,
> map_qp_mpt(acc), mr);
> @@ -945,6 +949,10 @@
> return ERR_PTR(-ENOMEM);
> RtlZeroMemory(mr, sizeof *mr);
>
> + // init IOBUF object
> + InitializeListHead( &mr->iobuf.seg_que );
> + mr->iobuf.seg_num = 0;
> +
> npages = 0;
> for (i = 0; i < num_phys_buf; ++i)
> npages += (int)((buffer_list[i].size + (1Ui64
> << shift) - 1)
> >> shift);
> @@ -988,8 +996,9 @@
> return &mr->ibmr;
> }
>
> -struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd,
> - void* __ptr64 vaddr, uint64_t length, uint64_t hca_va,
> mthca_qp_access_t acc)
> +struct ib_mr *mthca_reg_virt_mr(struct ib_pd *pd,
> + void* __ptr64 vaddr, uint64_t length, uint64_t hca_va,
> + mthca_qp_access_t acc, boolean_t um_call)
> {
> struct mthca_dev *dev = to_mdev(pd->device);
> struct mthca_mr *mr;
> @@ -1007,13 +1016,16 @@
>
> HCA_ENTER(HCA_DBG_MEMORY);
>
> -
> mr = kzalloc(sizeof *mr, GFP_KERNEL);
> if (!mr) {
> err = -ENOMEM;
> goto err_nomem;
> }
>
> + // init IOBUF object
> + InitializeListHead( &mr->iobuf.seg_que );
> + mr->iobuf.seg_num = 0;
> +
> /*
> * We ask for writable memory if any access flags other than
> * "remote read" are set. "Local write" and "remote write"
> @@ -1024,7 +1036,7 @@
>
> // try register the buffer
> iobuf_p = &mr->iobuf;
> - err = iobuf_register( (u64)vaddr, length, TRUE,
> + err = iobuf_register( (u64)vaddr, length, um_call,
> (acc & ~MTHCA_ACCESS_REMOTE_READ) ?
> IB_AC_LOCAL_WRITE : 0, iobuf_p );
> if (err)
> goto err_reg_mem;
> @@ -1117,13 +1129,11 @@
> struct mthca_mr *mmr = to_mmr(mr);
> struct mthca_dev* dev = to_mdev(mr->device);
>
> - if (mr->pd->ucontext) {
> - MmUnsecureVirtualMemory ( mmr->secure_handle );
> - mthca_free_mr(dev, mmr);
> - iobuf_deregister(&mmr->iobuf);
> - }
> - else
> - mthca_free_mr(dev, mmr);
> + if (mr->pd->ucontext)
> + MmUnsecureVirtualMemory( mmr->secure_handle );
> +
> + mthca_free_mr(dev, mmr);
> + iobuf_deregister(&mmr->iobuf);
> kfree(mmr);
> return 0;
> }
> @@ -1272,7 +1282,7 @@
> dev->ib_dev.poll_cq = mthca_poll_cq;
> dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
> dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr;
> - dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
> + dev->ib_dev.reg_virt_mr = mthca_reg_virt_mr;
> dev->ib_dev.dereg_mr = mthca_dereg_mr;
>
> if (dev->mthca_flags & MTHCA_FLAG_FMR) {
> Index: hw/mthca/kernel/mthca_provider.h
> ===================================================================
> --- hw/mthca/kernel/mthca_provider.h (revision 445)
> +++ hw/mthca/kernel/mthca_provider.h (working copy)
> @@ -420,8 +420,9 @@
> mthca_qp_access_t
> acc,
> u64 *iova_start);
>
> -struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd,
> - void* __ptr64 vaddr, uint64_t length, uint64_t hca_va,
> mthca_qp_access_t acc);
> +struct ib_mr *mthca_reg_virt_mr(struct ib_pd *pd,
> + void* __ptr64 vaddr, uint64_t length, uint64_t hca_va,
> + mthca_qp_access_t acc, boolean_t um_call);
>
> int mthca_dereg_mr(struct ib_mr *mr);
>
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: mthca_reg_virt1.patch
Type: application/octet-stream
Size: 5830 bytes
Desc: mthca_reg_virt1.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20060813/3f535bf1/attachment.obj>
More information about the ofw
mailing list