[openib-general][PATCH][kdapl]: FMR and EVD patch
Guy German
guyg at voltaire.com
Wed Aug 17 09:02:13 PDT 2005
[kdapl]: this is the same FMR and EVD patch, sent before
with modifications, after discussions with James.
Signed-off-by: Guy German <guyg at voltaire.com>
Index: ib/dapl_openib_util.c
===================================================================
--- ib/dapl_openib_util.c (revision 3113)
+++ ib/dapl_openib_util.c (working copy)
@@ -138,7 +138,7 @@ int dapl_ib_mr_register_ia(struct dapl_i
}
int dapl_ib_mr_register_physical(struct dapl_ia *ia, struct dapl_lmr *lmr,
- void *phys_addr, u64 length,
+ void *phys_addr, u32 page_count,
enum dat_mem_priv_flags privileges)
{
int status;
@@ -150,11 +150,11 @@ int dapl_ib_mr_register_physical(struct
u64 *array;
array = (u64 *) phys_addr;
- buf_list = kmalloc(length * sizeof *buf_list, GFP_ATOMIC);
+ buf_list = kmalloc(page_count * sizeof *buf_list, GFP_ATOMIC);
if (!buf_list)
return -ENOMEM;
- for (i = 0; i < length; i++) {
+ for (i = 0; i < page_count; i++) {
buf_list[i].addr = array[i];
buf_list[i].size = PAGE_SIZE;
}
@@ -163,7 +163,7 @@ int dapl_ib_mr_register_physical(struct
acl = dapl_ib_convert_mem_privileges(privileges);
acl |= IB_ACCESS_MW_BIND;
mr = ib_reg_phys_mr(((struct dapl_pz *)lmr->param.pz)->pd,
- buf_list, length, acl, &iova);
+ buf_list, page_count, acl, &iova);
kfree(buf_list);
if (IS_ERR(mr)) {
status = PTR_ERR(mr);
@@ -186,13 +186,58 @@ int dapl_ib_mr_register_physical(struct
lmr->param.lmr_context = mr->lkey;
lmr->param.rmr_context = mr->rkey;
- lmr->param.registered_size = length * PAGE_SIZE;
+ lmr->param.registered_size = page_count * PAGE_SIZE;
lmr->param.registered_address = array[0];
lmr->mr = mr;
dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
"%s: (%p %d) got lkey 0x%x \n", __func__,
- buf_list, length, lmr->param.lmr_context);
+ buf_list, page_count, lmr->param.lmr_context);
+ return 0;
+}
+
+int dapl_ib_mr_register_fmr(struct dapl_ia *ia, struct dapl_lmr *lmr,
+ void *phys_addr, u32 page_count,
+ enum dat_mem_priv_flags privileges)
+{
+ /* FIXME: this phase-1 implementation of fmr doesn't take "privileges"
+ into account. This is a security breech. */
+ u64 io_addr;
+ u64 *page_list;
+ struct ib_pool_fmr *mem;
+ int status;
+
+ page_list = (u64 *)phys_addr;
+ io_addr = page_list[0];
+
+ mem = ib_fmr_pool_map_phys (((struct dapl_pz *)lmr->param.pz)->fmr_pool,
+ page_list,
+ page_count,
+ &io_addr);
+ if (IS_ERR(mem)) {
+ status = (int)PTR_ERR(mem);
+ if (status != -EAGAIN)
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+ "fmr_pool_map_phys ret=%d <%d pages>\n",
+ status, page_count);
+
+ lmr->param.registered_address = 0;
+ lmr->fmr = 0;
+ return status;
+ }
+
+ lmr->param.lmr_context = mem->fmr->lkey;
+ lmr->param.rmr_context = mem->fmr->rkey;
+ lmr->param.registered_size = page_count * PAGE_SIZE;
+ lmr->param.registered_address = io_addr;
+ lmr->fmr = mem;
+
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+ "%s: (addr=%p size=0x%x) lkey=0x%x rkey=0x%x\n", __func__,
+ lmr->param.registered_address,
+ lmr->param.registered_size,
+ lmr->param.lmr_context,
+ lmr->param.rmr_context);
return 0;
}
@@ -222,7 +267,10 @@ int dapl_ib_mr_deregister(struct dapl_lm
{
int status;
- status = ib_dereg_mr(lmr->mr);
+ if (DAT_MEM_TYPE_PLATFORM == lmr->param.mem_type && lmr->fmr)
+ status = ib_fmr_pool_unmap(lmr->fmr);
+ else
+ status = ib_dereg_mr(lmr->mr);
if (status < 0) {
dapl_dbg_log(DAPL_DBG_TYPE_ERR,
" ib_dereg_mr error code return = %d\n",
Index: ib/dapl_evd.c
===================================================================
--- ib/dapl_evd.c (revision 3113)
+++ ib/dapl_evd.c (working copy)
@@ -42,19 +42,30 @@ static void dapl_evd_upcall_trigger(stru
int status = 0;
struct dat_event event;
- /* Only process events if there is an enabled callback function. */
- if ((evd->upcall.upcall_func == (DAT_UPCALL_FUNC) NULL) ||
- (evd->upcall_policy == DAT_UPCALL_DISABLE))
+ /* DAT_UPCALL_MANY is not supported */
+ if (evd->is_triggered)
return;
- for (;;) {
+ spin_lock_irqsave (&evd->common.lock, evd->common.flags);
+ if (evd->is_triggered) {
+ spin_unlock_irqrestore (&evd->common.lock, evd->common.flags);
+ return;
+ }
+ evd->is_triggered = 1;
+ spin_unlock_irqrestore (&evd->common.lock, evd->common.flags);
+ /* Only process events if there is an enabled callback function */
+ while ((evd->upcall.upcall_func != (DAT_UPCALL_FUNC)NULL) &&
+ (evd->upcall_policy != DAT_UPCALL_TEARDOWN) &&
+ (evd->upcall_policy != DAT_UPCALL_DISABLE)) {
status = dapl_evd_dequeue((struct dat_evd *)evd, &event);
- if (0 != status)
- return;
-
+ if (status)
+ break;
evd->upcall.upcall_func(evd->upcall.instance_data, &event,
FALSE);
}
+ evd->is_triggered = 0;
+
+ return;
}
static void dapl_evd_eh_print_wc(struct ib_wc *wc)
@@ -163,6 +174,7 @@ static struct dapl_evd *dapl_evd_alloc(s
evd->cq = NULL;
atomic_set(&evd->evd_ref_count, 0);
evd->catastrophic_overflow = FALSE;
+ evd->is_triggered = 0;
evd->qlen = qlen;
evd->upcall_policy = upcall_policy;
if ( NULL != upcall )
@@ -798,25 +810,28 @@ static void dapl_evd_dto_callback(struct
overflow);
/*
- * This function does not dequeue from the CQ; only the consumer
- * can do that. It rearms the completion only if completions should
- * always occur.
+ * This function does not dequeue from the CQ;
+ * It rearms the completion only if the consumer did not
+ * disable the upcall policy (in order to dequeu the rest
+ * of the events himself)
*/
- if (!overflow && evd->upcall_policy != DAT_UPCALL_DISABLE) {
- /*
- * Re-enable callback, *then* trigger.
- * This guarantees we won't miss any events.
- */
- status = ib_req_notify_cq(evd->cq, IB_CQ_NEXT_COMP);
- if (0 != status)
- (void)dapl_evd_post_async_error_event(
- evd->common.owner_ia->async_error_evd,
- DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR,
- evd->common.owner_ia);
-
+ if (!overflow) {
dapl_evd_upcall_trigger(evd);
+ if ((evd->upcall_policy != DAT_UPCALL_TEARDOWN) &&
+ (evd->upcall_policy != DAT_UPCALL_DISABLE)) {
+ status = ib_req_notify_cq(evd->cq, IB_CQ_NEXT_COMP);
+ if (status)
+ (void)dapl_evd_post_async_error_event(
+ evd->common.owner_ia->async_error_evd,
+ DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR,
+ evd->common.owner_ia);
+ else
+ dapl_evd_upcall_trigger(evd);
+ }
}
+ else
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR, "%s: evd %p overflowed\n",evd);
dapl_dbg_log(DAPL_DBG_TYPE_RTN, "%s() returns\n", __func__);
}
@@ -868,10 +883,11 @@ int dapl_evd_internal_create(struct dapl
/* reset the qlen in the attributes, it may have changed */
evd->qlen = evd->cq->cqe;
+ if ((evd->upcall_policy != DAT_UPCALL_TEARDOWN) &&
+ (evd->upcall_policy != DAT_UPCALL_DISABLE))
+ status = ib_req_notify_cq(evd->cq, IB_CQ_NEXT_COMP);
- status = ib_req_notify_cq(evd->cq, IB_CQ_NEXT_COMP);
-
- if (status != 0)
+ if (status)
goto bail;
}
@@ -879,14 +895,14 @@ int dapl_evd_internal_create(struct dapl
* the EVD
*/
status = dapl_evd_event_alloc(evd);
- if (status != 0)
+ if (status)
goto bail;
dapl_ia_link_evd(ia, evd);
*evd_ptr = evd;
bail:
- if (status != 0)
+ if (status)
if (evd)
dapl_evd_dealloc(evd);
@@ -1012,15 +1028,40 @@ int dapl_evd_modify_upcall(struct dat_ev
const struct dat_upcall_object *upcall)
{
struct dapl_evd *evd;
-
- dapl_dbg_log(DAPL_DBG_TYPE_API, "dapl_modify_upcall(%p)\n", evd_handle);
+ int status = 0;
+ int pending_events;
evd = (struct dapl_evd *)evd_handle;
+ dapl_dbg_log (DAPL_DBG_TYPE_API, "%s: (evd=%p, upcall_policy=%d)\n",
+ __func__, evd_handle, upcall_policy);
+ spin_lock_irqsave(&evd->common.lock, evd->common.flags);
+ if ((upcall_policy != DAT_UPCALL_TEARDOWN) &&
+ (upcall_policy != DAT_UPCALL_DISABLE)) {
+ pending_events = dapl_rbuf_count(&evd->pending_event_queue);
+ if (pending_events) {
+ dapl_dbg_log (DAPL_DBG_TYPE_WARN,
+ "%s: (evd %p) there are still %d pending "
+ "events in the queue - policy stays disabled\n",
+ __func__, evd_handle, pending_events);
+ status = -EBUSY;
+ goto bail;
+ }
+ if (evd->evd_flags & DAT_EVD_DTO_FLAG) {
+ status = ib_req_notify_cq(evd->cq, IB_CQ_NEXT_COMP);
+ if (status) {
+ printk(KERN_ERR "%s: dapls_ib_completion_notify"
+ " failed (status=0x%x) \n",__func__,
+ status);
+ goto bail;
+ }
+ }
+ }
evd->upcall_policy = upcall_policy;
evd->upcall = *upcall;
-
- return 0;
+bail:
+ spin_unlock_irqrestore(&evd->common.lock, evd->common.flags);
+ return status;
}
int dapl_evd_post_se(struct dat_evd *evd_handle, const struct dat_event *event)
@@ -1090,7 +1131,6 @@ int dapl_evd_dequeue(struct dat_evd *evd
bail:
spin_unlock_irqrestore(&evd->common.lock, evd->common.flags);
-
dapl_dbg_log(DAPL_DBG_TYPE_RTN,
"dapl_evd_dequeue () returns 0x%x\n", status);
Index: ib/dapl_openib_util.h
===================================================================
--- ib/dapl_openib_util.h (revision 3113)
+++ ib/dapl_openib_util.h (working copy)
@@ -84,9 +84,13 @@ int dapl_ib_mr_register_ia(struct dapl_i
enum dat_mem_priv_flags privileges);
int dapl_ib_mr_register_physical(struct dapl_ia *ia, struct dapl_lmr *lmr,
- void *phys_addr, u64 length,
+ void *phys_addr, u32 page_count,
enum dat_mem_priv_flags privileges);
+int dapl_ib_mr_register_fmr(struct dapl_ia *ia, struct dapl_lmr *lmr,
+ void *phys_addr, u32 page_count,
+ enum dat_mem_priv_flags privileges);
+
int dapl_ib_mr_deregister(struct dapl_lmr *lmr);
int dapl_ib_mr_register_shared(struct dapl_ia *ia, struct dapl_lmr *lmr,
Index: ib/dapl.h
===================================================================
--- ib/dapl.h (revision 3113)
+++ ib/dapl.h (working copy)
@@ -40,9 +40,9 @@
#include <asm/atomic.h>
#include <kdapl.h>
-
-#include "ib_verbs.h"
-#include "ib_cm.h"
+#include <ib_verbs.h>
+#include <ib_cm.h>
+#include <ib_fmr_pool.h>
/*********************************************************************
* *
@@ -173,6 +173,7 @@ struct dapl_evd {
struct dapl_ring_buffer pending_event_queue;
enum dat_upcall_policy upcall_policy;
struct dat_upcall_object upcall;
+ int is_triggered;
};
struct dapl_ep {
@@ -229,6 +230,7 @@ struct dapl_pz {
struct list_head list;
struct ib_pd *pd;
atomic_t pz_ref_count;
+ struct ib_fmr_pool *fmr_pool;
};
struct dapl_lmr {
@@ -237,6 +239,7 @@ struct dapl_lmr {
struct list_head list;
struct dat_lmr_param param;
struct ib_mr *mr;
+ struct ib_pool_fmr *fmr;
atomic_t lmr_ref_count;
};
@@ -628,4 +631,6 @@ extern void dapl_dbg_log(enum dapl_dbg_t
#define dapl_dbg_log(...)
#endif /* KDAPL_INFINIBAND_DEBUG */
+extern void set_fmr_params (struct ib_fmr_pool_param *fmr_param_s);
+extern unsigned int g_dapl_active_fmr;
#endif /* DAPL_H */
Index: ib/dapl_pz.c
===================================================================
--- ib/dapl_pz.c (revision 3113)
+++ ib/dapl_pz.c (working copy)
@@ -29,7 +29,7 @@
/*
* $Id$
*/
-
+#include <linux/delay.h>
#include "dapl.h"
#include "dapl_ia.h"
#include "dapl_openib_util.h"
@@ -89,7 +89,17 @@ int dapl_pz_create(struct dat_ia *ia, st
status);
goto error2;
}
-
+
+ if (g_dapl_active_fmr) {
+ struct ib_fmr_pool_param params;
+ set_fmr_params (¶ms);
+ dapl_pz->fmr_pool = ib_create_fmr_pool(dapl_pz->pd, ¶ms);
+ if (IS_ERR(dapl_pz->fmr_pool))
+ dapl_dbg_log(DAPL_DBG_TYPE_WARN,
+ "could not create FMR pool <%ld>",
+ PTR_ERR(dapl_pz->fmr_pool));
+ }
+
*pz = (struct dat_pz *)dapl_pz;
return 0;
@@ -104,7 +114,7 @@ error1:
int dapl_pz_free(struct dat_pz *pz)
{
struct dapl_pz *dapl_pz;
- int status;
+ int status=0;
dapl_dbg_log(DAPL_DBG_TYPE_API, "dapl_pz_free(%p)\n", pz);
@@ -114,8 +124,15 @@ int dapl_pz_free(struct dat_pz *pz)
status = -EINVAL;
goto error;
}
-
- status = ib_dealloc_pd(dapl_pz->pd);
+
+ if (g_dapl_active_fmr && dapl_pz->fmr_pool) {
+ (void)ib_destroy_fmr_pool(dapl_pz->fmr_pool);
+ dapl_pz->fmr_pool = NULL;
+ }
+
+ if (dapl_pz->pd)
+ status = ib_dealloc_pd(dapl_pz->pd);
+
if (status) {
dapl_dbg_log(DAPL_DBG_TYPE_ERR, "ib_dealloc_pd failed: %X\n",
status);
Index: ib/dapl_ia.c
===================================================================
--- ib/dapl_ia.c (revision 3113)
+++ ib/dapl_ia.c (working copy)
@@ -115,7 +115,6 @@ static int dapl_ia_abrupt_close(struct d
* when we run out of entries, or when we get back to the head
* if we end up skipping an entry.
*/
-
list_for_each_entry(rmr, &ia->rmr_list, list) {
status = dapl_rmr_free((struct dat_rmr *)rmr);
if (status != 0)
@@ -196,7 +195,6 @@ static int dapl_ia_abrupt_close(struct d
"ia_close(ABRUPT): psp_free(%p) returns %x\n",
sp, status);
}
-
list_for_each_entry(pz, &ia->pz_list, list) {
status = dapl_pz_free((struct dat_pz *)pz);
if (status != 0)
@@ -266,7 +264,6 @@ static int dapl_ia_graceful_close(struct
int status = 0;
int cur_status;
struct dapl_evd *evd;
-
if (!list_empty(&ia->rmr_list) ||
!list_empty(&ia->rsp_list) ||
!list_empty(&ia->ep_list) ||
@@ -745,7 +742,8 @@ int dapl_ia_query(struct dat_ia *ia_ptr,
provider_attr->provider_version_major = DAPL_PROVIDER_MAJOR;
provider_attr->provider_version_minor = DAPL_PROVIDER_MINOR;
provider_attr->lmr_mem_types_supported =
- DAT_MEM_TYPE_PHYSICAL | DAT_MEM_TYPE_IA;
+ DAT_MEM_TYPE_PHYSICAL | DAT_MEM_TYPE_IA |
+ DAT_MEM_TYPE_PLATFORM;
provider_attr->iov_ownership_on_return = DAT_IOV_CONSUMER;
provider_attr->dat_qos_supported = DAT_QOS_BEST_EFFORT;
provider_attr->completion_flags_supported =
Index: ib/dapl_provider.c
===================================================================
--- ib/dapl_provider.c (revision 3113)
+++ ib/dapl_provider.c (working copy)
@@ -49,8 +49,17 @@ MODULE_AUTHOR("James Lentini");
#ifdef CONFIG_KDAPL_INFINIBAND_DEBUG
static DAPL_DBG_MASK g_dapl_dbg_mask = 0;
module_param_named(dbg_mask, g_dapl_dbg_mask, int, 0644);
-MODULE_PARM_DESC(dbg_mask, "Bitmask to enable debug message types.");
+MODULE_PARM_DESC(dbg_mask, "Bitmask to enable debug message types. <default=0>");
#endif /* CONFIG_KDAPL_INFINIBAND_DEBUG */
+unsigned int g_dapl_active_fmr = 1;
+static unsigned int g_dapl_pool_size = 2048;
+static unsigned int g_dapl_max_pages_per_fmr = 64;
+module_param_named(active_fmr, g_dapl_active_fmr, int, 0644);
+module_param_named(pool_size, g_dapl_pool_size, int, 0644);
+module_param_named(max_pages_per_fmr, g_dapl_max_pages_per_fmr, int, 0644);
+MODULE_PARM_DESC(active_fmr, "if active_fmr==1, creates fmr pool in pz_create <default=0>");
+MODULE_PARM_DESC(pool_size, "num of fmr handles in pool <default=2048>");
+MODULE_PARM_DESC(max_pages_per_fmr, "max size (in pages) of an fmr handle <default=64>");
static LIST_HEAD(g_dapl_provider_list);
@@ -152,6 +161,18 @@ void dapl_dbg_log(enum dapl_dbg_type typ
#endif /* KDAPL_INFINIBAND_DEBUG */
+void set_fmr_params (struct ib_fmr_pool_param *fmr_params)
+{
+ fmr_params->max_pages_per_fmr = g_dapl_max_pages_per_fmr;
+ fmr_params->pool_size = g_dapl_pool_size;
+ fmr_params->dirty_watermark = 32;
+ fmr_params->cache = 1;
+ fmr_params->flush_function = NULL;
+ fmr_params->access = (IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ);
+}
+
static struct dapl_provider *dapl_provider_alloc(const char *name,
struct ib_device *device,
u8 port)
Index: ib/dapl_lmr.c
===================================================================
--- ib/dapl_lmr.c (revision 3113)
+++ ib/dapl_lmr.c (working copy)
@@ -126,7 +126,7 @@ error1:
static inline int dapl_lmr_create_physical(struct dapl_ia *ia,
union dat_region_description phys_addr,
- u64 page_count,
+ u64 length,
enum dat_mem_type mem_type,
struct dapl_pz *pz,
enum dat_mem_priv_flags privileges,
@@ -137,8 +137,10 @@ static inline int dapl_lmr_create_physic
u64 *registered_address)
{
struct dapl_lmr *new_lmr;
- int status;
+ int status = 0;
+ u32 page_count;
+ page_count = (u32)length;
new_lmr = dapl_lmr_alloc(ia, mem_type, phys_addr,
page_count, (struct dat_pz *) pz, privileges);
@@ -149,15 +151,24 @@ static inline int dapl_lmr_create_physic
if (DAT_MEM_TYPE_IA == mem_type) {
status = dapl_ib_mr_register_ia(ia, new_lmr, phys_addr,
- page_count, privileges);
+ length, privileges);
}
- else {
+ else if (DAT_MEM_TYPE_PHYSICAL == mem_type) {
status = dapl_ib_mr_register_physical(ia, new_lmr,
phys_addr.for_array,
page_count, privileges);
}
+ else if (DAT_MEM_TYPE_PLATFORM == mem_type) {
+ status = dapl_ib_mr_register_fmr(ia, new_lmr,
+ phys_addr.for_array,
+ page_count, privileges);
+ }
+ else {
+ status = -EINVAL;
+ goto error1;
+ }
- if (0 != status)
+ if (status)
goto error2;
atomic_inc(&pz->pz_ref_count);
@@ -258,6 +269,11 @@ int dapl_lmr_kcreate(struct dat_ia *ia,
rmr_context, registered_length,
registered_address);
break;
+ case DAT_MEM_TYPE_PLATFORM: /* used as a proprietary Mellanox-FMR */
+ if (!g_dapl_active_fmr) {
+ status = -EINVAL;
+ break;
+ }
case DAT_MEM_TYPE_PHYSICAL:
case DAT_MEM_TYPE_IA:
status = dapl_lmr_create_physical(dapl_ia, region_description,
@@ -307,6 +323,7 @@ int dapl_lmr_free(struct dat_lmr *lmr)
switch (dapl_lmr->param.mem_type) {
case DAT_MEM_TYPE_PHYSICAL:
+ case DAT_MEM_TYPE_PLATFORM:
case DAT_MEM_TYPE_IA:
case DAT_MEM_TYPE_LMR:
{
More information about the general
mailing list