[openib-general][PATCH][kdapl]: FMR implementation for kdapl
Guy German
guyg at voltaire.com
Thu Aug 11 06:28:08 PDT 2005
James,
This is a Tavor FMR implementation for kDPAL, that works over
gen2's fmr_pool.c implementation.
A few notes:
1. I've added mod params to kdapl_ib to control the fmr size and pool
length. There are still reasonable defaults, but I think we should allow
consumers to control this.
2. the fmr pool allocation is done in the pz_create (if active_fmr =1).
The problem is that at the time of creating the pool we don't
know what the consumer's privileges request is going to be (passed
afterwords in dapl_lmr_kcreate). I think this can be solved by creating
3 pools and taking from the right pool at lmr_kcreate time.
Do you have any "cheaper" solution to this ?
3. Can we get rid of DAT_MEM_TYPE_LMR code ? I don't understand whats it
for.
Signed-off-by: Guy German <guyg at voltaire.com>
Index: dapl_openib_util.c
===================================================================
--- dapl_openib_util.c (revision 3056)
+++ dapl_openib_util.c (working copy)
@@ -196,6 +196,53 @@ int dapl_ib_mr_register_physical(struct
return 0;
}
+int dapl_ib_mr_register_fmr(struct dapl_ia *ia, struct dapl_lmr *lmr,
+ void *phys_addr, u64 length,
+ enum dat_mem_priv_flags privileges)
+{
+ /* FIXME: this phase-1 implementation of fmr doesn't take "privileges"
+ into account. This is a security breech. */
+ u64 io_addr;
+ u64 *page_list;
+ int page_count;
+ struct ib_pool_fmr *mem;
+ int status;
+
+ page_list = (u64 *)phys_addr;
+ page_count = (int)length;
+ io_addr = page_list[0];
+
+ mem = ib_fmr_pool_map_phys (((struct dapl_pz *)lmr->param.pz)->fmr_pool,
+ page_list,
+ page_count,
+ &io_addr);
+ if (IS_ERR(mem)) {
+ status = (int)PTR_ERR(mem);
+ if (status != -EAGAIN)
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ "fmr_pool_map_phys ret=%d <%d pages>\n",
+ status, page_count);
+
+ lmr->param.registered_address = 0;
+ lmr->fmr = 0;
+ return status;
+ }
+
+ lmr->param.lmr_context = mem->fmr->lkey;
+ lmr->param.rmr_context = mem->fmr->rkey;
+ lmr->param.registered_size = length * PAGE_SIZE;
+ lmr->param.registered_address = io_addr;
+ lmr->fmr = mem;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ "%s: (addr=%p size=0x%x) lkey=0x%x rkey=0x%x\n", __func__,
+ lmr->param.registered_address,
+ lmr->param.registered_size,
+ lmr->param.lmr_context,
+ lmr->param.rmr_context);
+ return 0;
+}
+
int dapl_ib_mr_register_shared(struct dapl_ia *ia, struct dapl_lmr *lmr,
enum dat_mem_priv_flags privileges)
{
@@ -222,7 +269,10 @@ int dapl_ib_mr_deregister(struct dapl_lm
{
int status;
- status = ib_dereg_mr(lmr->mr);
+ if (DAT_MEM_TYPE_PLATFORM == lmr->param.mem_type && lmr->fmr)
+ status = ib_fmr_pool_unmap(lmr->fmr);
+ else
+ status = ib_dereg_mr(lmr->mr);
if (status < 0) {
dapl_dbg_log(DAPL_DBG_TYPE_ERR,
" ib_dereg_mr error code return = %d\n",
Index: dapl_openib_util.h
===================================================================
--- dapl_openib_util.h (revision 3056)
+++ dapl_openib_util.h (working copy)
@@ -87,6 +87,10 @@ int dapl_ib_mr_register_physical(struct
void *phys_addr, u64 length,
enum dat_mem_priv_flags privileges);
+int dapl_ib_mr_register_fmr(struct dapl_ia *ia, struct dapl_lmr *lmr,
+ void *phys_addr, u64 length,
+ enum dat_mem_priv_flags privileges);
+
int dapl_ib_mr_deregister(struct dapl_lmr *lmr);
int dapl_ib_mr_register_shared(struct dapl_ia *ia, struct dapl_lmr *lmr,
Index: dapl_lmr.c
===================================================================
--- dapl_lmr.c (revision 3056)
+++ dapl_lmr.c (working copy)
@@ -137,7 +137,7 @@ static inline int dapl_lmr_create_physic
u64 *registered_address)
{
struct dapl_lmr *new_lmr;
- int status;
+ int status = 0;
new_lmr = dapl_lmr_alloc(ia, mem_type, phys_addr,
page_count, (struct dat_pz *) pz, privileges);
@@ -151,13 +151,22 @@ static inline int dapl_lmr_create_physic
status = dapl_ib_mr_register_ia(ia, new_lmr, phys_addr,
page_count, privileges);
}
- else {
+ else if (DAT_MEM_TYPE_PHYSICAL == mem_type) {
status = dapl_ib_mr_register_physical(ia, new_lmr,
phys_addr.for_array,
page_count, privileges);
}
+ else if (DAT_MEM_TYPE_PLATFORM == mem_type) {
+ status = dapl_ib_mr_register_fmr(ia, new_lmr,
+ phys_addr.for_array,
+ page_count, privileges);
+ }
+ else {
+ status = -EINVAL;
+ goto error1;
+ }
- if (0 != status)
+ if (status)
goto error2;
atomic_inc(&pz->pz_ref_count);
@@ -243,7 +252,7 @@ int dapl_lmr_kcreate(struct dat_ia *ia,
int status;
dapl_dbg_log(DAPL_DBG_TYPE_API,
- "dapl_lmr_kcreate(ia:%p, mem_type:%x, ...)\n",
+ "dapl_lmr_kcreate(ia:%p, mem_type:%x)\n",
ia, mem_type);
dapl_ia = (struct dapl_ia *)ia;
@@ -258,6 +267,11 @@ int dapl_lmr_kcreate(struct dat_ia *ia,
rmr_context, registered_length,
registered_address);
break;
+ case DAT_MEM_TYPE_PLATFORM: /* used as a proprietary Tavor-FMR */
+ if (!active_fmr) {
+ status = -EINVAL;
+ break;
+ }
case DAT_MEM_TYPE_PHYSICAL:
case DAT_MEM_TYPE_IA:
status = dapl_lmr_create_physical(dapl_ia, region_description,
@@ -307,6 +321,7 @@ int dapl_lmr_free(struct dat_lmr *lmr)
switch (dapl_lmr->param.mem_type) {
case DAT_MEM_TYPE_PHYSICAL:
+ case DAT_MEM_TYPE_PLATFORM:
case DAT_MEM_TYPE_IA:
case DAT_MEM_TYPE_LMR:
{
Index: dapl_pz.c
===================================================================
--- dapl_pz.c (revision 3056)
+++ dapl_pz.c (working copy)
@@ -89,7 +89,17 @@ int dapl_pz_create(struct dat_ia *ia, st
status);
goto error2;
}
-
+
+ if (active_fmr) {
+ struct ib_fmr_pool_param params;
+ set_fmr_params (¶ms);
+ dapl_pz->fmr_pool = ib_create_fmr_pool(dapl_pz->pd, ¶ms);
+ if (IS_ERR(dapl_pz->fmr_pool))
+ dapl_dbg_log(DAPL_DBG_TYPE_WARN,
+ "could not create FMR pool <%ld>",
+ PTR_ERR(dapl_pz->fmr_pool));
+ }
+
*pz = (struct dat_pz *)dapl_pz;
return 0;
@@ -104,7 +114,7 @@ error1:
int dapl_pz_free(struct dat_pz *pz)
{
struct dapl_pz *dapl_pz;
- int status;
+ int status=0;
dapl_dbg_log(DAPL_DBG_TYPE_API, "dapl_pz_free(%p)\n", pz);
@@ -114,8 +124,10 @@ int dapl_pz_free(struct dat_pz *pz)
status = -EINVAL;
goto error;
}
-
- status = ib_dealloc_pd(dapl_pz->pd);
+ if (active_fmr)
+ (void)ib_destroy_fmr_pool(dapl_pz->fmr_pool);
+ else
+ status = ib_dealloc_pd(dapl_pz->pd);
if (status) {
dapl_dbg_log(DAPL_DBG_TYPE_ERR, "ib_dealloc_pd failed: %X\n",
status);
Index: dapl_ia.c
===================================================================
--- dapl_ia.c (revision 3056)
+++ dapl_ia.c (working copy)
@@ -745,7 +745,8 @@ int dapl_ia_query(struct dat_ia *ia_ptr,
provider_attr->provider_version_major = DAPL_PROVIDER_MAJOR;
provider_attr->provider_version_minor = DAPL_PROVIDER_MINOR;
provider_attr->lmr_mem_types_supported =
- DAT_MEM_TYPE_PHYSICAL | DAT_MEM_TYPE_IA;
+ DAT_MEM_TYPE_PHYSICAL | DAT_MEM_TYPE_IA |
+ DAT_MEM_TYPE_PLATFORM;
provider_attr->iov_ownership_on_return = DAT_IOV_CONSUMER;
provider_attr->dat_qos_supported = DAT_QOS_BEST_EFFORT;
provider_attr->completion_flags_supported =
Index: dapl_provider.c
===================================================================
--- dapl_provider.c (revision 3056)
+++ dapl_provider.c (working copy)
@@ -48,8 +48,19 @@ MODULE_AUTHOR("James Lentini");
#ifdef CONFIG_KDAPL_INFINIBAND_DEBUG
static DAPL_DBG_MASK g_dapl_dbg_mask = 0;
+unsigned int active_fmr = 1;
+static unsigned int pool_size = 2048;
+static unsigned int max_pages_per_fmr = 64;
+
module_param_named(dbg_mask, g_dapl_dbg_mask, int, 0644);
-MODULE_PARM_DESC(dbg_mask, "Bitmask to enable debug message types.");
+module_param_named(active_fmr, active_fmr, int, 0644);
+module_param_named(pool_size, pool_size, int, 0644);
+module_param_named(max_pages_per_fmr, max_pages_per_fmr, int, 0644);
+MODULE_PARM_DESC(dbg_mask, "Bitmask to enable debug message types. <default=0>");
+MODULE_PARM_DESC(active_fmr, "if active_fmr==1, creates fmr pool in pz_create <default=0>");
+MODULE_PARM_DESC(pool_size, "num of fmr handles in pool <default=2048>");
+MODULE_PARM_DESC(max_pages_per_fmr, "max size (in pages) of an fmr handle <default=64>");
+
#endif /* CONFIG_KDAPL_INFINIBAND_DEBUG */
static LIST_HEAD(g_dapl_provider_list);
@@ -152,6 +163,18 @@ void dapl_dbg_log(enum dapl_dbg_type typ
#endif /* KDAPL_INFINIBAND_DEBUG */
+void set_fmr_params (struct ib_fmr_pool_param *fmr_param_s)
+{
+ fmr_param_s->max_pages_per_fmr = max_pages_per_fmr;
+ fmr_param_s->pool_size = pool_size;
+ fmr_param_s->dirty_watermark = 32;
+ fmr_param_s->cache = 1;
+ fmr_param_s->flush_function = NULL;
+ fmr_param_s->access = (IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ);
+}
+
static struct dapl_provider *dapl_provider_alloc(const char *name,
struct ib_device *device,
u8 port)
-------------- next part --------------
Index: dapl_openib_util.c
===================================================================
--- dapl_openib_util.c (revision 3056)
+++ dapl_openib_util.c (working copy)
@@ -196,6 +196,53 @@ int dapl_ib_mr_register_physical(struct
return 0;
}
+int dapl_ib_mr_register_fmr(struct dapl_ia *ia, struct dapl_lmr *lmr,
+ void *phys_addr, u64 length,
+ enum dat_mem_priv_flags privileges)
+{
+ /* FIXME: this phase-1 implementation of fmr doesn't take "privileges"
+ into account. This is a security breech. */
+ u64 io_addr;
+ u64 *page_list;
+ int page_count;
+ struct ib_pool_fmr *mem;
+ int status;
+
+ page_list = (u64 *)phys_addr;
+ page_count = (int)length;
+ io_addr = page_list[0];
+
+ mem = ib_fmr_pool_map_phys (((struct dapl_pz *)lmr->param.pz)->fmr_pool,
+ page_list,
+ page_count,
+ &io_addr);
+ if (IS_ERR(mem)) {
+ status = (int)PTR_ERR(mem);
+ if (status != -EAGAIN)
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ "fmr_pool_map_phys ret=%d <%d pages>\n",
+ status, page_count);
+
+ lmr->param.registered_address = 0;
+ lmr->fmr = 0;
+ return status;
+ }
+
+ lmr->param.lmr_context = mem->fmr->lkey;
+ lmr->param.rmr_context = mem->fmr->rkey;
+ lmr->param.registered_size = length * PAGE_SIZE;
+ lmr->param.registered_address = io_addr;
+ lmr->fmr = mem;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ "%s: (addr=%p size=0x%x) lkey=0x%x rkey=0x%x\n", __func__,
+ lmr->param.registered_address,
+ lmr->param.registered_size,
+ lmr->param.lmr_context,
+ lmr->param.rmr_context);
+ return 0;
+}
+
int dapl_ib_mr_register_shared(struct dapl_ia *ia, struct dapl_lmr *lmr,
enum dat_mem_priv_flags privileges)
{
@@ -222,7 +269,10 @@ int dapl_ib_mr_deregister(struct dapl_lm
{
int status;
- status = ib_dereg_mr(lmr->mr);
+ if (DAT_MEM_TYPE_PLATFORM == lmr->param.mem_type && lmr->fmr)
+ status = ib_fmr_pool_unmap(lmr->fmr);
+ else
+ status = ib_dereg_mr(lmr->mr);
if (status < 0) {
dapl_dbg_log(DAPL_DBG_TYPE_ERR,
" ib_dereg_mr error code return = %d\n",
Index: dapl_openib_util.h
===================================================================
--- dapl_openib_util.h (revision 3056)
+++ dapl_openib_util.h (working copy)
@@ -87,6 +87,10 @@ int dapl_ib_mr_register_physical(struct
void *phys_addr, u64 length,
enum dat_mem_priv_flags privileges);
+int dapl_ib_mr_register_fmr(struct dapl_ia *ia, struct dapl_lmr *lmr,
+ void *phys_addr, u64 length,
+ enum dat_mem_priv_flags privileges);
+
int dapl_ib_mr_deregister(struct dapl_lmr *lmr);
int dapl_ib_mr_register_shared(struct dapl_ia *ia, struct dapl_lmr *lmr,
Index: dapl_lmr.c
===================================================================
--- dapl_lmr.c (revision 3056)
+++ dapl_lmr.c (working copy)
@@ -137,7 +137,7 @@ static inline int dapl_lmr_create_physic
u64 *registered_address)
{
struct dapl_lmr *new_lmr;
- int status;
+ int status = 0;
new_lmr = dapl_lmr_alloc(ia, mem_type, phys_addr,
page_count, (struct dat_pz *) pz, privileges);
@@ -151,13 +151,22 @@ static inline int dapl_lmr_create_physic
status = dapl_ib_mr_register_ia(ia, new_lmr, phys_addr,
page_count, privileges);
}
- else {
+ else if (DAT_MEM_TYPE_PHYSICAL == mem_type) {
status = dapl_ib_mr_register_physical(ia, new_lmr,
phys_addr.for_array,
page_count, privileges);
}
+ else if (DAT_MEM_TYPE_PLATFORM == mem_type) {
+ status = dapl_ib_mr_register_fmr(ia, new_lmr,
+ phys_addr.for_array,
+ page_count, privileges);
+ }
+ else {
+ status = -EINVAL;
+ goto error1;
+ }
- if (0 != status)
+ if (status)
goto error2;
atomic_inc(&pz->pz_ref_count);
@@ -243,7 +252,7 @@ int dapl_lmr_kcreate(struct dat_ia *ia,
int status;
dapl_dbg_log(DAPL_DBG_TYPE_API,
- "dapl_lmr_kcreate(ia:%p, mem_type:%x, ...)\n",
+ "dapl_lmr_kcreate(ia:%p, mem_type:%x)\n",
ia, mem_type);
dapl_ia = (struct dapl_ia *)ia;
@@ -258,6 +267,11 @@ int dapl_lmr_kcreate(struct dat_ia *ia,
rmr_context, registered_length,
registered_address);
break;
+ case DAT_MEM_TYPE_PLATFORM: /* used as a proprietary Tavor-FMR */
+ if (!active_fmr) {
+ status = -EINVAL;
+ break;
+ }
case DAT_MEM_TYPE_PHYSICAL:
case DAT_MEM_TYPE_IA:
status = dapl_lmr_create_physical(dapl_ia, region_description,
@@ -307,6 +321,7 @@ int dapl_lmr_free(struct dat_lmr *lmr)
switch (dapl_lmr->param.mem_type) {
case DAT_MEM_TYPE_PHYSICAL:
+ case DAT_MEM_TYPE_PLATFORM:
case DAT_MEM_TYPE_IA:
case DAT_MEM_TYPE_LMR:
{
Index: dapl_pz.c
===================================================================
--- dapl_pz.c (revision 3056)
+++ dapl_pz.c (working copy)
@@ -89,7 +89,17 @@ int dapl_pz_create(struct dat_ia *ia, st
status);
goto error2;
}
-
+
+ if (active_fmr) {
+ struct ib_fmr_pool_param params;
+ set_fmr_params (¶ms);
+ dapl_pz->fmr_pool = ib_create_fmr_pool(dapl_pz->pd, ¶ms);
+ if (IS_ERR(dapl_pz->fmr_pool))
+ dapl_dbg_log(DAPL_DBG_TYPE_WARN,
+ "could not create FMR pool <%ld>",
+ PTR_ERR(dapl_pz->fmr_pool));
+ }
+
*pz = (struct dat_pz *)dapl_pz;
return 0;
@@ -104,7 +114,7 @@ error1:
int dapl_pz_free(struct dat_pz *pz)
{
struct dapl_pz *dapl_pz;
- int status;
+ int status=0;
dapl_dbg_log(DAPL_DBG_TYPE_API, "dapl_pz_free(%p)\n", pz);
@@ -114,8 +124,10 @@ int dapl_pz_free(struct dat_pz *pz)
status = -EINVAL;
goto error;
}
-
- status = ib_dealloc_pd(dapl_pz->pd);
+ if (active_fmr)
+ (void)ib_destroy_fmr_pool(dapl_pz->fmr_pool);
+ else
+ status = ib_dealloc_pd(dapl_pz->pd);
if (status) {
dapl_dbg_log(DAPL_DBG_TYPE_ERR, "ib_dealloc_pd failed: %X\n",
status);
Index: dapl_ia.c
===================================================================
--- dapl_ia.c (revision 3056)
+++ dapl_ia.c (working copy)
@@ -745,7 +745,8 @@ int dapl_ia_query(struct dat_ia *ia_ptr,
provider_attr->provider_version_major = DAPL_PROVIDER_MAJOR;
provider_attr->provider_version_minor = DAPL_PROVIDER_MINOR;
provider_attr->lmr_mem_types_supported =
- DAT_MEM_TYPE_PHYSICAL | DAT_MEM_TYPE_IA;
+ DAT_MEM_TYPE_PHYSICAL | DAT_MEM_TYPE_IA |
+ DAT_MEM_TYPE_PLATFORM;
provider_attr->iov_ownership_on_return = DAT_IOV_CONSUMER;
provider_attr->dat_qos_supported = DAT_QOS_BEST_EFFORT;
provider_attr->completion_flags_supported =
Index: dapl_provider.c
===================================================================
--- dapl_provider.c (revision 3056)
+++ dapl_provider.c (working copy)
@@ -48,8 +48,19 @@ MODULE_AUTHOR("James Lentini");
#ifdef CONFIG_KDAPL_INFINIBAND_DEBUG
static DAPL_DBG_MASK g_dapl_dbg_mask = 0;
+unsigned int active_fmr = 1;
+static unsigned int pool_size = 2048;
+static unsigned int max_pages_per_fmr = 64;
+
module_param_named(dbg_mask, g_dapl_dbg_mask, int, 0644);
-MODULE_PARM_DESC(dbg_mask, "Bitmask to enable debug message types.");
+module_param_named(active_fmr, active_fmr, int, 0644);
+module_param_named(pool_size, pool_size, int, 0644);
+module_param_named(max_pages_per_fmr, max_pages_per_fmr, int, 0644);
+MODULE_PARM_DESC(dbg_mask, "Bitmask to enable debug message types. <default=0>");
+MODULE_PARM_DESC(active_fmr, "if active_fmr==1, creates fmr pool in pz_create <default=0>");
+MODULE_PARM_DESC(pool_size, "num of fmr handles in pool <default=2048>");
+MODULE_PARM_DESC(max_pages_per_fmr, "max size (in pages) of an fmr handle <default=64>");
+
#endif /* CONFIG_KDAPL_INFINIBAND_DEBUG */
static LIST_HEAD(g_dapl_provider_list);
@@ -152,6 +163,18 @@ void dapl_dbg_log(enum dapl_dbg_type typ
#endif /* KDAPL_INFINIBAND_DEBUG */
+void set_fmr_params (struct ib_fmr_pool_param *fmr_param_s)
+{
+ fmr_param_s->max_pages_per_fmr = max_pages_per_fmr;
+ fmr_param_s->pool_size = pool_size;
+ fmr_param_s->dirty_watermark = 32;
+ fmr_param_s->cache = 1;
+ fmr_param_s->flush_function = NULL;
+ fmr_param_s->access = (IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ);
+}
+
static struct dapl_provider *dapl_provider_alloc(const char *name,
struct ib_device *device,
u8 port)
More information about the general
mailing list