[ofw] [Patch 36/62] Reference implementation of NDv2

Fab Tillier ftillier at microsoft.com
Wed Feb 20 18:16:32 PST 2013


Add new API for registering an MDL.
This patch also aligns the flag values between the HCA driver, IBAL, WinVerbs, and ND for not securing registered memory.

Signed-off-by: Fab Tillier <ftillier at microsoft.com>

diff -dwup3 -X excl.txt -r \dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\bus\inc\ib_verbs.h .\hw\mlx4\kernel\bus\inc\ib_verbs.h
--- \dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\bus\inc\ib_verbs.h	Tue Aug 07 12:46:47 2012
+++ .\hw\mlx4\kernel\bus\inc\ib_verbs.h	Wed Aug 08 15:40:24 2012
@@ -754,7 +754,6 @@ enum ib_access_flags {
 	IB_ACCESS_REMOTE_READ	= (1<<2),
 	IB_ACCESS_REMOTE_ATOMIC	= (1<<3),
 	IB_ACCESS_MW_BIND	    = (1<<4),
-	IB_ACCESS_ADDR_IS_MDL   = (1<<5),
 	IB_ACCESS_NO_SECURE     = (1<<31)
 };
 
diff -dwup3 -X excl.txt -r \dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\hca\hverbs.c .\hw\mlx4\kernel\hca\hverbs.c
--- \dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\hca\hverbs.c	Tue Aug 07 17:06:08 2012
+++ .\hw\mlx4\kernel\hca\hverbs.c	Wed Aug 08 15:45:46 2012
@@ -91,42 +91,59 @@ struct ib_mr *ib_reg_phys_mr(struct ib_p
 }
 
 
+static inline void commit_mr(struct ib_pd *pd, struct ib_mr *ib_mr)
+{
+	ib_mr->device  = pd->device;
+	ib_mr->pd      = pd;
+	atomic_inc(&pd->usecnt);
+	atomic_set(&ib_mr->usecnt, 0);
+	HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", 
+		((struct mlx4_ib_pd*)pd)->pdn, pd->usecnt, pd, pd->p_uctx));
+}
+
+
  struct ib_mr *ibv_reg_mr(struct ib_pd *pd, 
 	u64 start, u64 length,
 	u64 virt_addr,
 	int mr_access_flags)
 {
 	struct ib_mr *ib_mr;
-	int err;
 	HCA_ENTER(HCA_DBG_MEMORY);
 
-	if ((mr_access_flags & IB_ACCESS_ADDR_IS_MDL) == 0) {
 		ib_mr = pd->device->reg_user_mr(pd, start, length, virt_addr, mr_access_flags);
+
+	if (IS_ERR(ib_mr)) {
+		HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_MEMORY ,("ibv_reg_mr failed (%d)\n", PTR_ERR(ib_mr)));
 	}
 	else {
-		/* kernel space call */
-		ib_mr = pd->device->x.reg_krnl_mr(pd, (PMDL)(ULONG_PTR)start, length, mr_access_flags);
+        commit_mr(pd, ib_mr);
 	}
 
+	HCA_EXIT(HCA_DBG_MEMORY);
+	return ib_mr;
+}
+
+
+struct ib_mr *ibv_reg_mdl(struct ib_pd *pd,
+    MDL *mdl, u64 length,
+    int mr_access_flags)
+{
+	struct ib_mr *ib_mr;
+	HCA_ENTER(HCA_DBG_MEMORY);
+
+	ib_mr = pd->device->x.reg_krnl_mr(pd, mdl, length, mr_access_flags);
+
 	if (IS_ERR(ib_mr)) {
-		err = PTR_ERR(ib_mr);
-		HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_MEMORY ,("mthca_reg_user_mr failed (%d)\n", err));
-		goto err_reg_user_mr;
+		HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_MEMORY ,("ibv_reg_mdl failed (%d)\n", PTR_ERR(ib_mr)));
+	}
+    else {
+        commit_mr(pd, ib_mr);
 	}
 
-	ib_mr->device  = pd->device;
-	ib_mr->pd      = pd;
-	atomic_inc(&pd->usecnt);
-	atomic_set(&ib_mr->usecnt, 0);
-	HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", 
-		((struct mlx4_ib_pd*)pd)->pdn, pd->usecnt, pd, pd->p_uctx));
 	HCA_EXIT(HCA_DBG_MEMORY);
 	return ib_mr;
-
-err_reg_user_mr:
-	HCA_EXIT(HCA_DBG_MEMORY);
-	return ERR_PTR(err);
 }
+
 
 struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
 {
diff -dwup3 -X excl.txt -r \dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\hca\hverbs.h .\hw\mlx4\kernel\hca\hverbs.h
--- \dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\hca\hverbs.h	Thu Aug 02 16:18:42 2012
+++ .\hw\mlx4\kernel\hca\hverbs.h	Wed Aug 08 14:24:59 2012
@@ -45,6 +45,10 @@ struct ib_mr *ibv_reg_mr(struct ib_pd *p
 	u64 virt_addr,
 	int mr_access_flags);
 
+struct ib_mr *ibv_reg_mdl(struct ib_pd *pd,
+    MDL *mdl, u64 length,
+    int mr_access_flags);
+
 struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len);
 
 ib_fast_reg_page_list_t *ib_alloc_fast_reg_page_list(struct ib_device *device,
diff -dwup3 -X excl.txt -r \dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\hca\mr.c .\hw\mlx4\kernel\hca\mr.c
--- \dev\openib\ofw\gen1\branches\mlx4_30\trunk\hw\mlx4\kernel\hca\mr.c	Tue Aug 07 12:37:23 2012
+++ .\hw\mlx4\kernel\hca\mr.c	Wed Aug 08 14:35:49 2012
@@ -55,10 +55,11 @@ mlnx_register_mr_remap (
 	int err;
 	struct ib_mr *p_ib_mr;
 	struct ib_pd *p_ib_pd = (struct ib_pd *)h_pd;
-	int access_ctrl;
 
 	HCA_ENTER(HCA_DBG_MEMORY);
 
+	UNREFERENCED_PARAMETER(um_call);
+
 	// sanity checks
 	if( !cl_is_blockable() ) {
 		status = IB_UNSUPPORTED;
@@ -90,15 +91,10 @@ mlnx_register_mr_remap (
 		goto err_invalid_access; 
 	}
 
-	access_ctrl = to_qp_acl(p_mr_create->access_ctrl);
-	if (um_call == FALSE) {
-		access_ctrl |= IB_ACCESS_ADDR_IS_MDL;
-	}
-
 	// register mr 
 	p_ib_mr = ibv_reg_mr(p_ib_pd, (ULONG_PTR)p_mr_create->vaddr, 
 		p_mr_create->length, mapaddr, 
-		access_ctrl);
+		to_qp_acl(p_mr_create->access_ctrl));
 	if (IS_ERR(p_ib_mr)) {
 		err = PTR_ERR(p_ib_mr);
 		HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY,
@@ -124,7 +120,6 @@ err_unsupported:
 	return status;
 }
 
-
 ib_api_status_t
 mlnx_register_mr (
 	IN		const	ib_pd_handle_t				h_pd,
@@ -155,6 +150,74 @@ err_invalid_parm:
 }
 
 ib_api_status_t
+mlnx_register_mdl (
+	IN		const	ib_pd_handle_t				h_pd,
+	IN		const	ib_mr_create_t				*p_mr_create,
+	IN				MDL                         *p_mdl,
+	OUT				net32_t* const				p_lkey,
+	OUT				net32_t* const				p_rkey,
+	OUT				ib_mr_handle_t				*ph_mr )
+{
+	ib_api_status_t 	status;
+	int err;
+	struct ib_mr *p_ib_mr;
+	struct ib_pd *p_ib_pd = (struct ib_pd *)h_pd;
+
+	HCA_ENTER(HCA_DBG_MEMORY);
+
+	// sanity checks
+	if( !cl_is_blockable() ) {
+		status = IB_UNSUPPORTED;
+		goto err_unsupported;
+	} 
+	if (!p_mr_create || 0 == p_mr_create->length) {
+		HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY,
+			("invalid attributes\n"));
+		status = IB_INVALID_PARAMETER;
+		goto err_invalid_parm; 
+	}
+
+	/*
+	 * Local write permission is required if remote write or
+	 * remote atomic permission is also requested.
+	 */
+	if (p_mr_create->access_ctrl & (IB_AC_RDMA_WRITE | IB_AC_ATOMIC) &&
+		!(p_mr_create->access_ctrl & IB_AC_LOCAL_WRITE)) {
+		HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY,
+			("invalid access rights\n"));
+		status = IB_INVALID_PERMISSION;
+		goto err_invalid_access; 
+	}
+
+	// register mr 
+	p_ib_mr = ibv_reg_mdl(p_ib_pd, p_mdl, 
+		p_mr_create->length, to_qp_acl(p_mr_create->access_ctrl));
+	if (IS_ERR(p_ib_mr)) {
+		err = PTR_ERR(p_ib_mr);
+		HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY,
+			("ibv_reg_mdl failed (%d)\n", err));
+		status = errno_to_iberr(err);
+		goto err_reg_mr;
+	}
+
+	// results
+	*p_lkey = p_ib_mr->lkey;
+	*p_rkey = cl_hton32( p_ib_mr->rkey );
+	*ph_mr = (ib_mr_handle_t)p_ib_mr;
+	status = IB_SUCCESS;
+
+err_reg_mr:
+err_invalid_access:
+err_invalid_parm:
+err_unsupported:
+	if (status != IB_SUCCESS) 
+		HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY,
+			("completes with ERROR status %x\n", status));
+	HCA_EXIT(HCA_DBG_MEMORY);
+	return status;
+}
+
+ib_api_status_t
 mlnx_register_pmr (
 	IN		const	ib_pd_handle_t				h_pd,
 	IN		const	ib_phys_create_t* const		p_pmr_create,
@@ -703,6 +766,7 @@ mlnx_mr_if(
 	p_interface->free_fast_reg_page_list = mlnx_free_fast_reg_page_list;
 
 	p_interface->register_mr_remap = mlnx_register_mr_remap;
+    p_interface->register_mdl = mlnx_register_mdl;
 }
 
 void
diff -dwup3 -X excl.txt -r \dev\openib\ofw\gen1\branches\mlx4_30\trunk\inc\iba\ib_ci.h .\inc\iba\ib_ci.h
--- \dev\openib\ofw\gen1\branches\mlx4_30\trunk\inc\iba\ib_ci.h	Tue Aug 07 12:37:23 2012
+++ .\inc\iba\ib_ci.h	Wed Aug 08 14:33:02 2012
@@ -72,7 +72,7 @@ extern "C"
  * definition.
  */
 #define VERBS_MAJOR_VER			(0x0002)
-#define VERBS_MINOR_VER			(0x0006)
+#define VERBS_MINOR_VER			(0x0007)
 #define VERBS_EX_MAJOR_VER		(0x0001)
 #define VERBS_EX_MINOR_VER		(0x0000)
 
@@ -1695,6 +1695,72 @@ typedef ib_api_status_t
 */
 
 
+/****f* Verbs/ci_register_mdl
+* NAME
+*	ci_register_mdl -- Register a memory region with the HCA.
+* SYNOPSIS
+*/
+
+typedef ib_api_status_t
+(*ci_register_mdl) (
+	IN		const	ib_pd_handle_t				h_pd,
+	IN		const	ib_mr_create_t				*p_mr_create,
+	IN				MDL                         *p_mdl,
+	OUT				net32_t* const				p_lkey,
+	OUT				net32_t* const				p_rkey,
+	OUT				ib_mr_handle_t				*ph_mr );
+/*
+* DESCRIPTION
+*	This routine registers a virtually contiguous region of memory with the
+*	HCA. All memory regions that need to be used by the HCA must be registered
+*	prior to use in data transfer operations. On successful completion
+*	the region handle, lkey are returned. If remote access rights are specified
+*	then the rkey is also returned.
+* PARAMETERS
+*	h_pd
+*		[in] Handle to the PD on which memory is being registered
+*	p_mr_create
+*		[in] Holds attributes for the region being registered. Look at
+*		ib_mr_create_t for more details.
+*   p_mdl
+*       [in] Pointer to an MDL chain that describes the memory to register.
+*       The memory referenced by the MDL chain is already pinned and the
+*       page numbers in the MDLs are valid.
+*	p_lkey
+*		[out] Local Key Attributes of the registered memory region
+*	p_rkey
+*		[out] Remote key of the registered memory region. The verbs provider
+*		is required to give this in the expected ordering on the wire. When
+*		rkey's are exchanged between remote nodes, no swapping of this data
+*		will be performed.
+*	ph_mr
+*		[out] Handle to the registered memory region. This handle is used when
+*		submitting work requests to refer to this region of memory.
+* RETURN VALUE
+*	IB_SUCCESS
+*		Registration with the adapter was successful.
+*	IB_INSUFFICIENT_RESOURCES
+*		Insufficient resources to satisfy request.
+*	IB_INVALID_PARAMETER
+*		One of the input pointers was NULL.
+*	IB_INVALID_PD_HANDLE
+*		Invalid mr_pdhandle
+*	IB_INVALID_PERMISSION
+*		Invalid access rights.
+* NOTES
+*	The caller must have pinned the memory referenced by the MDLs, so that the
+*	physical page associated with the virtual address does not get swapped
+*	out during the time the HCA is attempting to transfer data to this
+*	address. If the memory is not pinned, this could lead to data-corruption
+*	and unpredictable behavior by the operating environment.
+*
+* SEE ALSO
+*	ci_deregister_mr, ci_query_mr, ci_register_pmr, ci_modify_mr,
+*	ci_register_smr
+******
+*/
+
+
 /****f* Verbs/ci_register_pmr
 * NAME
 *	ci_register_pmr -- Register a physical memory region with the HCA.
@@ -3348,10 +3414,12 @@ typedef struct _ci_interface
 
 	/* 2.5 verbs */
 	ci_create_qp_ex			   create_qp_ex;
+
 	/*
 	 * Extended functionality to support kernel clients.
 	 */
 	ci_register_mr_remap        register_mr_remap;
+    ci_register_mdl             register_mdl;
 
 } ci_interface_t;
 /********/
diff -dwup3 -X excl.txt -r \dev\openib\ofw\gen1\branches\mlx4_30\trunk\inc\iba\ib_types.h .\inc\iba\ib_types.h
--- \dev\openib\ofw\gen1\branches\mlx4_30\trunk\inc\iba\ib_types.h	Fri Aug 03 17:24:07 2012
+++ .\inc\iba\ib_types.h	Wed Aug 08 15:41:24 2012
@@ -10468,7 +10468,7 @@ typedef uint32_t				ib_access_t;
 #define IB_AC_ATOMIC			0x00000004
 #define IB_AC_LOCAL_WRITE		0x00000008
 #define IB_AC_MW_BIND			0x00000010
-#define IB_AC_NOT_CACHABLE      0x00000020
+#define IB_AC_NOT_CACHABLE      0x80000000
 /*
 * NOTES
 *	Users may combine access rights using a bit-wise or operation to specify
diff -dwup3 -X excl.txt -r \dev\openib\ofw\gen1\branches\mlx4_30\trunk\inc\user\rdma\winverbs.h .\inc\user\rdma\winverbs.h
--- \dev\openib\ofw\gen1\branches\mlx4_30\trunk\inc\user\rdma\winverbs.h	Thu Aug 02 13:08:45 2012
+++ .\inc\user\rdma\winverbs.h	Wed Aug 08 15:42:09 2012
@@ -264,7 +264,7 @@ typedef struct _WV_MEMORY_KEYS
 #define WV_ACCESS_REMOTE_ATOMIC			0x00000004
 #define WV_ACCESS_LOCAL_WRITE			0x00000008
 #define WV_ACCESS_MW_BIND				0x00000010
-#define WV_ACCESS_NON_CACHABLE			0x00000020
+#define WV_ACCESS_NON_CACHABLE			0x80000000
 
 // Send queue operation flags
 #define WV_SEND_IMMEDIATE				0x00000001
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ndv2.36.patch
Type: application/octet-stream
Size: 11936 bytes
Desc: ndv2.36.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20130221/532afc84/attachment.obj>


More information about the ofw mailing list