[ofw] [Patch 36/62] Reference implementation of NDv2
Fab Tillier
ftillier at microsoft.com
Wed Feb 20 18:16:32 PST 2013
Add new API for registering an MDL.
This patch also aligns the flag values between the HCA driver, IBAL, WinVerbs, and ND for not securing registered memory.
Signed-off-by: Fab Tillier <ftillier at microsoft.com>
diff -dwup3 -X excl.txt -r \dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\bus\inc\ib_verbs.h .\hw\mlx4\kernel\bus\inc\ib_verbs.h
--- \dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\bus\inc\ib_verbs.h Tue Aug 07 12:46:47 2012
+++ .\hw\mlx4\kernel\bus\inc\ib_verbs.h Wed Aug 08 15:40:24 2012
@@ -754,7 +754,6 @@ enum ib_access_flags {
IB_ACCESS_REMOTE_READ = (1<<2),
IB_ACCESS_REMOTE_ATOMIC = (1<<3),
IB_ACCESS_MW_BIND = (1<<4),
- IB_ACCESS_ADDR_IS_MDL = (1<<5),
IB_ACCESS_NO_SECURE = (1<<31)
};
diff -dwup3 -X excl.txt -r \dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\hca\hverbs.c .\hw\mlx4\kernel\hca\hverbs.c
--- \dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\hca\hverbs.c Tue Aug 07 17:06:08 2012
+++ .\hw\mlx4\kernel\hca\hverbs.c Wed Aug 08 15:45:46 2012
@@ -91,42 +91,59 @@ struct ib_mr *ib_reg_phys_mr(struct ib_p
}
+static inline void commit_mr(struct ib_pd *pd, struct ib_mr *ib_mr)
+{
+ ib_mr->device = pd->device;
+ ib_mr->pd = pd;
+ atomic_inc(&pd->usecnt);
+ atomic_set(&ib_mr->usecnt, 0);
+ HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("PD%d use cnt %d, pd_handle %p, ctx %p \n",
+ ((struct mlx4_ib_pd*)pd)->pdn, pd->usecnt, pd, pd->p_uctx));
+}
+
+
struct ib_mr *ibv_reg_mr(struct ib_pd *pd,
u64 start, u64 length,
u64 virt_addr,
int mr_access_flags)
{
struct ib_mr *ib_mr;
- int err;
HCA_ENTER(HCA_DBG_MEMORY);
- if ((mr_access_flags & IB_ACCESS_ADDR_IS_MDL) == 0) {
ib_mr = pd->device->reg_user_mr(pd, start, length, virt_addr, mr_access_flags);
+
+ if (IS_ERR(ib_mr)) {
+ HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_MEMORY ,("ibv_reg_mr failed (%d)\n", PTR_ERR(ib_mr)));
}
else {
- /* kernel space call */
- ib_mr = pd->device->x.reg_krnl_mr(pd, (PMDL)(ULONG_PTR)start, length, mr_access_flags);
+ commit_mr(pd, ib_mr);
}
+ HCA_EXIT(HCA_DBG_MEMORY);
+ return ib_mr;
+}
+
+
+struct ib_mr *ibv_reg_mdl(struct ib_pd *pd,
+ MDL *mdl, u64 length,
+ int mr_access_flags)
+{
+ struct ib_mr *ib_mr;
+ HCA_ENTER(HCA_DBG_MEMORY);
+
+ ib_mr = pd->device->x.reg_krnl_mr(pd, mdl, length, mr_access_flags);
+
if (IS_ERR(ib_mr)) {
- err = PTR_ERR(ib_mr);
- HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_MEMORY ,("mthca_reg_user_mr failed (%d)\n", err));
- goto err_reg_user_mr;
+ HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_MEMORY ,("ibv_reg_mdl failed (%d)\n", PTR_ERR(ib_mr)));
+ }
+ else {
+ commit_mr(pd, ib_mr);
}
- ib_mr->device = pd->device;
- ib_mr->pd = pd;
- atomic_inc(&pd->usecnt);
- atomic_set(&ib_mr->usecnt, 0);
- HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("PD%d use cnt %d, pd_handle %p, ctx %p \n",
- ((struct mlx4_ib_pd*)pd)->pdn, pd->usecnt, pd, pd->p_uctx));
HCA_EXIT(HCA_DBG_MEMORY);
return ib_mr;
-
-err_reg_user_mr:
- HCA_EXIT(HCA_DBG_MEMORY);
- return ERR_PTR(err);
}
+
struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
{
diff -dwup3 -X excl.txt -r \dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\hca\hverbs.h .\hw\mlx4\kernel\hca\hverbs.h
--- \dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\hca\hverbs.h Thu Aug 02 16:18:42 2012
+++ .\hw\mlx4\kernel\hca\hverbs.h Wed Aug 08 14:24:59 2012
@@ -45,6 +45,10 @@ struct ib_mr *ibv_reg_mr(struct ib_pd *p
u64 virt_addr,
int mr_access_flags);
+struct ib_mr *ibv_reg_mdl(struct ib_pd *pd,
+ MDL *mdl, u64 length,
+ int mr_access_flags);
+
struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len);
ib_fast_reg_page_list_t *ib_alloc_fast_reg_page_list(struct ib_device *device,
diff -dwup3 -X excl.txt -r \dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\hca\mr.c .\hw\mlx4\kernel\hca\mr.c
--- \dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\hca\mr.c Tue Aug 07 12:37:23 2012
+++ .\hw\mlx4\kernel\hca\mr.c Wed Aug 08 14:35:49 2012
@@ -55,10 +55,11 @@ mlnx_register_mr_remap (
int err;
struct ib_mr *p_ib_mr;
struct ib_pd *p_ib_pd = (struct ib_pd *)h_pd;
- int access_ctrl;
HCA_ENTER(HCA_DBG_MEMORY);
+ UNREFERENCED_PARAMETER(um_call);
+
// sanity checks
if( !cl_is_blockable() ) {
status = IB_UNSUPPORTED;
@@ -90,15 +91,10 @@ mlnx_register_mr_remap (
goto err_invalid_access;
}
- access_ctrl = to_qp_acl(p_mr_create->access_ctrl);
- if (um_call == FALSE) {
- access_ctrl |= IB_ACCESS_ADDR_IS_MDL;
- }
-
// register mr
p_ib_mr = ibv_reg_mr(p_ib_pd, (ULONG_PTR)p_mr_create->vaddr,
p_mr_create->length, mapaddr,
- access_ctrl);
+ to_qp_acl(p_mr_create->access_ctrl));
if (IS_ERR(p_ib_mr)) {
err = PTR_ERR(p_ib_mr);
HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY,
@@ -124,7 +120,6 @@ err_unsupported:
return status;
}
-
ib_api_status_t
mlnx_register_mr (
IN const ib_pd_handle_t h_pd,
@@ -155,6 +150,74 @@ err_invalid_parm:
}
ib_api_status_t
+mlnx_register_mdl (
+ IN const ib_pd_handle_t h_pd,
+ IN const ib_mr_create_t *p_mr_create,
+ IN MDL *p_mdl,
+ OUT net32_t* const p_lkey,
+ OUT net32_t* const p_rkey,
+ OUT ib_mr_handle_t *ph_mr )
+{
+ ib_api_status_t status;
+ int err;
+ struct ib_mr *p_ib_mr;
+ struct ib_pd *p_ib_pd = (struct ib_pd *)h_pd;
+
+ HCA_ENTER(HCA_DBG_MEMORY);
+
+ // sanity checks
+ if( !cl_is_blockable() ) {
+ status = IB_UNSUPPORTED;
+ goto err_unsupported;
+ }
+ if (!p_mr_create || 0 == p_mr_create->length) {
+ HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY,
+ ("invalid attributes\n"));
+ status = IB_INVALID_PARAMETER;
+ goto err_invalid_parm;
+ }
+
+ /*
+ * Local write permission is required if remote write or
+ * remote atomic permission is also requested.
+ */
+ if (p_mr_create->access_ctrl & (IB_AC_RDMA_WRITE | IB_AC_ATOMIC) &&
+ !(p_mr_create->access_ctrl & IB_AC_LOCAL_WRITE)) {
+ HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY,
+ ("invalid access rights\n"));
+ status = IB_INVALID_PERMISSION;
+ goto err_invalid_access;
+ }
+
+ // register mr
+ p_ib_mr = ibv_reg_mdl(p_ib_pd, p_mdl,
+ p_mr_create->length, to_qp_acl(p_mr_create->access_ctrl));
+ if (IS_ERR(p_ib_mr)) {
+ err = PTR_ERR(p_ib_mr);
+ HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY,
+ ("ibv_reg_mdl failed (%d)\n", err));
+ status = errno_to_iberr(err);
+ goto err_reg_mr;
+ }
+
+ // results
+ *p_lkey = p_ib_mr->lkey;
+ *p_rkey = cl_hton32( p_ib_mr->rkey );
+ *ph_mr = (ib_mr_handle_t)p_ib_mr;
+ status = IB_SUCCESS;
+
+err_reg_mr:
+err_invalid_access:
+err_invalid_parm:
+err_unsupported:
+ if (status != IB_SUCCESS)
+ HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY,
+ ("completes with ERROR status %x\n", status));
+ HCA_EXIT(HCA_DBG_MEMORY);
+ return status;
+}
+
+ib_api_status_t
mlnx_register_pmr (
IN const ib_pd_handle_t h_pd,
IN const ib_phys_create_t* const p_pmr_create,
@@ -703,6 +766,7 @@ mlnx_mr_if(
p_interface->free_fast_reg_page_list = mlnx_free_fast_reg_page_list;
p_interface->register_mr_remap = mlnx_register_mr_remap;
+ p_interface->register_mdl = mlnx_register_mdl;
}
void
diff -dwup3 -X excl.txt -r \dev\openib\ofw\gen1\branches\mlx4_30\trunk\inc\iba\ib_ci.h .\inc\iba\ib_ci.h
--- \dev\openib\ofw\gen1\branches\mlx4_30\trunk\inc\iba\ib_ci.h Tue Aug 07 12:37:23 2012
+++ .\inc\iba\ib_ci.h Wed Aug 08 14:33:02 2012
@@ -72,7 +72,7 @@ extern "C"
* definition.
*/
#define VERBS_MAJOR_VER (0x0002)
-#define VERBS_MINOR_VER (0x0006)
+#define VERBS_MINOR_VER (0x0007)
#define VERBS_EX_MAJOR_VER (0x0001)
#define VERBS_EX_MINOR_VER (0x0000)
@@ -1695,6 +1695,72 @@ typedef ib_api_status_t
*/
+/****f* Verbs/ci_register_mdl
+* NAME
+* ci_register_mdl -- Register a memory region with the HCA.
+* SYNOPSIS
+*/
+
+typedef ib_api_status_t
+(*ci_register_mdl) (
+ IN const ib_pd_handle_t h_pd,
+ IN const ib_mr_create_t *p_mr_create,
+ IN MDL *p_mdl,
+ OUT net32_t* const p_lkey,
+ OUT net32_t* const p_rkey,
+ OUT ib_mr_handle_t *ph_mr );
+/*
+* DESCRIPTION
+* This routine registers a virtually contiguous region of memory with the
+* HCA. All memory regions that need to be used by the HCA must be registered
+* prior to use in data transfer operations. On successful completion
+* the region handle, lkey are returned. If remote access rights are specified
+* then the rkey is also returned.
+* PARAMETERS
+* h_pd
+* [in] Handle to the PD on which memory is being registered
+* p_mr_create
+* [in] Holds attributes for the region being registered. Look at
+* ib_mr_create_t for more details.
+* p_mdl
+* [in] Pointer to an MDL chain that describes the memory to register.
+* The memory referenced by the MDL chain is already pinned and the
+* page numbers in the MDLs are valid.
+* p_lkey
+* [out] Local Key Attributes of the registered memory region
+* p_rkey
+* [out] Remote key of the registered memory region. The verbs provider
+* is required to give this in the expected ordering on the wire. When
+* rkey's are exchanged between remote nodes, no swapping of this data
+* will be performed.
+* ph_mr
+* [out] Handle to the registered memory region. This handle is used when
+* submitting work requests to refer to this region of memory.
+* RETURN VALUE
+* IB_SUCCESS
+* Registration with the adapter was successful.
+* IB_INSUFFICIENT_RESOURCES
+* Insufficient resources to satisfy request.
+* IB_INVALID_PARAMETER
+* One of the input pointers was NULL.
+* IB_INVALID_PD_HANDLE
+* Invalid mr_pdhandle
+* IB_INVALID_PERMISSION
+* Invalid access rights.
+* NOTES
+* The caller must have pinned the memory referenced by the MDLs, so that the
+* physical page associated with the virtual address does not get swapped
+* out during the time the HCA is attempting to transfer data to this
+* address. If the memory is not pinned, this could lead to data-corruption
+* and unpredictable behavior by the operating environment.
+*
+* SEE ALSO
+* ci_deregister_mr, ci_query_mr, ci_register_pmr, ci_modify_mr,
+* ci_register_smr
+******
+*/
+
+
/****f* Verbs/ci_register_pmr
* NAME
* ci_register_pmr -- Register a physical memory region with the HCA.
@@ -3348,10 +3414,12 @@ typedef struct _ci_interface
/* 2.5 verbs */
ci_create_qp_ex create_qp_ex;
+
/*
* Extended functionality to support kernel clients.
*/
ci_register_mr_remap register_mr_remap;
+ ci_register_mdl register_mdl;
} ci_interface_t;
/********/
diff -dwup3 -X excl.txt -r \dev\openib\ofw\gen1\branches\mlx4_30\trunk\inc\iba\ib_types.h .\inc\iba\ib_types.h
--- \dev\openib\ofw\gen1\branches\mlx4_30\trunk\inc\iba\ib_types.h Fri Aug 03 17:24:07 2012
+++ .\inc\iba\ib_types.h Wed Aug 08 15:41:24 2012
@@ -10468,7 +10468,7 @@ typedef uint32_t ib_access_t;
#define IB_AC_ATOMIC 0x00000004
#define IB_AC_LOCAL_WRITE 0x00000008
#define IB_AC_MW_BIND 0x00000010
-#define IB_AC_NOT_CACHABLE 0x00000020
+#define IB_AC_NOT_CACHABLE 0x80000000
/*
* NOTES
* Users may combine access rights using a bit-wise or operation to specify
diff -dwup3 -X excl.txt -r \dev\openib\ofw\gen1\branches\mlx4_30\trunk\inc\user\rdma\winverbs.h .\inc\user\rdma\winverbs.h
--- \dev\openib\ofw\gen1\branches\mlx4_30\trunk\inc\user\rdma\winverbs.h Thu Aug 02 13:08:45 2012
+++ .\inc\user\rdma\winverbs.h Wed Aug 08 15:42:09 2012
@@ -264,7 +264,7 @@ typedef struct _WV_MEMORY_KEYS
#define WV_ACCESS_REMOTE_ATOMIC 0x00000004
#define WV_ACCESS_LOCAL_WRITE 0x00000008
#define WV_ACCESS_MW_BIND 0x00000010
-#define WV_ACCESS_NON_CACHABLE 0x00000020
+#define WV_ACCESS_NON_CACHABLE 0x80000000
// Send queue operation flags
#define WV_SEND_IMMEDIATE 0x00000001
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ndv2.36.patch
Type: application/octet-stream
Size: 11936 bytes
Desc: ndv2.36.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20130221/532afc84/attachment.obj>
More information about the ofw
mailing list