[openib-general][PATCH][kdapl]: FMR and EVD patch

Guy German guyg at voltaire.com
Wed Aug 17 09:02:13 PDT 2005


[kdapl]: this is the same FMR and EVD patch, sent before
with modifications,  after discussions with James.

Signed-off-by: Guy German <guyg at voltaire.com>

Index: ib/dapl_openib_util.c
===================================================================
--- ib/dapl_openib_util.c	(revision 3113)
+++ ib/dapl_openib_util.c	(working copy)
@@ -138,7 +138,7 @@ int dapl_ib_mr_register_ia(struct dapl_i
 }
 
 int dapl_ib_mr_register_physical(struct dapl_ia *ia, struct dapl_lmr *lmr,
-				 void *phys_addr, u64 length,
+				 void *phys_addr, u32 page_count,
 				 enum dat_mem_priv_flags privileges)
 {
 	int status;
@@ -150,11 +150,11 @@ int dapl_ib_mr_register_physical(struct 
 	u64 *array;
 
 	array = (u64 *) phys_addr;
-	buf_list = kmalloc(length * sizeof *buf_list, GFP_ATOMIC);
+	buf_list = kmalloc(page_count * sizeof *buf_list, GFP_ATOMIC);
 	if (!buf_list)
 		return -ENOMEM;
 
-	for (i = 0; i < length; i++) {
+	for (i = 0; i < page_count; i++) {
 		buf_list[i].addr = array[i];
 		buf_list[i].size = PAGE_SIZE;
 	}
@@ -163,7 +163,7 @@ int dapl_ib_mr_register_physical(struct 
 	acl = dapl_ib_convert_mem_privileges(privileges);
 	acl |= IB_ACCESS_MW_BIND;
 	mr = ib_reg_phys_mr(((struct dapl_pz *)lmr->param.pz)->pd,
-			    buf_list, length, acl, &iova);
+			    buf_list, page_count, acl, &iova);
 	kfree(buf_list);
 	if (IS_ERR(mr)) {
 		status = PTR_ERR(mr);
@@ -186,13 +186,58 @@ int dapl_ib_mr_register_physical(struct 
 
 	lmr->param.lmr_context = mr->lkey;
 	lmr->param.rmr_context = mr->rkey;
-	lmr->param.registered_size = length * PAGE_SIZE;
+	lmr->param.registered_size = page_count * PAGE_SIZE;
 	lmr->param.registered_address = array[0];
 	lmr->mr = mr;
 
 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
 		"%s: (%p %d) got lkey 0x%x \n", __func__,
-		buf_list, length, lmr->param.lmr_context);
+		buf_list, page_count, lmr->param.lmr_context);
+	return 0;
+}
+
+int dapl_ib_mr_register_fmr(struct dapl_ia *ia, struct dapl_lmr *lmr,
+			    void *phys_addr, u32 page_count,
+			    enum dat_mem_priv_flags privileges)
+{
+	/* FIXME: this phase-1 implementation of fmr doesn't take "privileges"
+	   into account. This is a security breech. */
+	u64 io_addr;
+	u64 *page_list;
+	struct ib_pool_fmr *mem;
+        int status;
+
+	page_list = (u64 *)phys_addr;
+	io_addr = page_list[0];
+
+	mem = ib_fmr_pool_map_phys (((struct dapl_pz *)lmr->param.pz)->fmr_pool,
+					page_list,
+					page_count,
+					&io_addr);
+	if (IS_ERR(mem)) {
+		status = (int)PTR_ERR(mem);
+		if (status != -EAGAIN)
+			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+				    "fmr_pool_map_phys ret=%d <%d pages>\n",
+				    status, page_count);
+
+		lmr->param.registered_address = 0;
+		lmr->fmr = 0;
+		return status;
+	}
+
+	lmr->param.lmr_context = mem->fmr->lkey;
+	lmr->param.rmr_context = mem->fmr->rkey;
+	lmr->param.registered_size = page_count * PAGE_SIZE;
+	lmr->param.registered_address = io_addr;
+	lmr->fmr = mem;
+
+	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+		"%s: (addr=%p size=0x%x) lkey=0x%x rkey=0x%x\n", __func__,
+		lmr->param.registered_address, 
+		lmr->param.registered_size, 
+		lmr->param.lmr_context,
+		lmr->param.rmr_context);
 	return 0;
 }
 
@@ -222,7 +267,10 @@ int dapl_ib_mr_deregister(struct dapl_lm
 {
 	int status;
 
-	status = ib_dereg_mr(lmr->mr);
+	if (DAT_MEM_TYPE_PLATFORM == lmr->param.mem_type && lmr->fmr)
+		status = ib_fmr_pool_unmap(lmr->fmr);
+	else
+		status = ib_dereg_mr(lmr->mr);
 	if (status < 0) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
 			     " ib_dereg_mr error code return = %d\n",
Index: ib/dapl_evd.c
===================================================================
--- ib/dapl_evd.c	(revision 3113)
+++ ib/dapl_evd.c	(working copy)
@@ -42,19 +42,30 @@ static void dapl_evd_upcall_trigger(stru
 	int status = 0;
 	struct dat_event event;
 
-	/* Only process events if there is an enabled callback function. */
-	if ((evd->upcall.upcall_func == (DAT_UPCALL_FUNC) NULL) ||
-	    (evd->upcall_policy == DAT_UPCALL_DISABLE))
+	/* DAT_UPCALL_MANY is not supported */
+	if (evd->is_triggered)
 		return;
 
-	for (;;) {
+	spin_lock_irqsave (&evd->common.lock, evd->common.flags);
+	if (evd->is_triggered) {
+		spin_unlock_irqrestore (&evd->common.lock, evd->common.flags);
+		return;
+	}
+	evd->is_triggered = 1;
+	spin_unlock_irqrestore (&evd->common.lock, evd->common.flags);
+	/* Only process events if there is an enabled callback function */
+	while ((evd->upcall.upcall_func != (DAT_UPCALL_FUNC)NULL) &&
+	       (evd->upcall_policy != DAT_UPCALL_TEARDOWN) &&
+	       (evd->upcall_policy != DAT_UPCALL_DISABLE)) {
 		status = dapl_evd_dequeue((struct dat_evd *)evd, &event);
-		if (0 != status) 
-			return;
-
+		if (status)
+			break;
 		evd->upcall.upcall_func(evd->upcall.instance_data, &event,
 					FALSE);
 	}
+	evd->is_triggered = 0;
+
+	return;
 }
 
 static void dapl_evd_eh_print_wc(struct ib_wc *wc)
@@ -163,6 +174,7 @@ static struct dapl_evd *dapl_evd_alloc(s
 	evd->cq = NULL;
 	atomic_set(&evd->evd_ref_count, 0);
 	evd->catastrophic_overflow = FALSE;
+	evd->is_triggered = 0;
 	evd->qlen = qlen;
 	evd->upcall_policy = upcall_policy;
 	if ( NULL != upcall )
@@ -798,25 +810,28 @@ static void dapl_evd_dto_callback(struct
 		     overflow);
 
 	/*
-	 * This function does not dequeue from the CQ; only the consumer
-	 * can do that. It rearms the completion only if completions should 
-	 * always occur.
+	 * This function does not dequeue from the CQ; 
+	 * It rearms the completion only if the consumer did not
+	 * disable the upcall policy (in order to dequeu the rest 
+	 * of the events himself)
 	 */
 
-	if (!overflow && evd->upcall_policy != DAT_UPCALL_DISABLE) {
-		/*
-		 * Re-enable callback, *then* trigger.
-		 * This guarantees we won't miss any events.
-		 */
-		status = ib_req_notify_cq(evd->cq, IB_CQ_NEXT_COMP);
-		if (0 != status) 
-			(void)dapl_evd_post_async_error_event(
-				evd->common.owner_ia->async_error_evd,
-				DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR,
-				evd->common.owner_ia);
-
+	if (!overflow) {
 		dapl_evd_upcall_trigger(evd);
+		if ((evd->upcall_policy != DAT_UPCALL_TEARDOWN) &&
+		    (evd->upcall_policy != DAT_UPCALL_DISABLE)) {
+			status = ib_req_notify_cq(evd->cq, IB_CQ_NEXT_COMP);
+			if (status) 
+				(void)dapl_evd_post_async_error_event(
+					evd->common.owner_ia->async_error_evd,
+					DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR,
+					evd->common.owner_ia);
+			else
+				dapl_evd_upcall_trigger(evd);
+		}
 	}
+	else
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR, "%s: evd %p overflowed\n",evd);
 	dapl_dbg_log(DAPL_DBG_TYPE_RTN, "%s() returns\n", __func__);
 }
 
@@ -868,10 +883,11 @@ int dapl_evd_internal_create(struct dapl
 
 		/* reset the qlen in the attributes, it may have changed */
 		evd->qlen = evd->cq->cqe;
+		if ((evd->upcall_policy != DAT_UPCALL_TEARDOWN) && 
+		    (evd->upcall_policy != DAT_UPCALL_DISABLE))
+			status = ib_req_notify_cq(evd->cq, IB_CQ_NEXT_COMP);
 
-		status = ib_req_notify_cq(evd->cq, IB_CQ_NEXT_COMP);
-
-		if (status != 0)
+		if (status)
 			goto bail;
 	}
 
@@ -879,14 +895,14 @@ int dapl_evd_internal_create(struct dapl
 	 * the EVD
 	 */
 	status = dapl_evd_event_alloc(evd);
-	if (status != 0)
+	if (status)
 		goto bail;
 
 	dapl_ia_link_evd(ia, evd);
 	*evd_ptr = evd;
 
 bail:
-	if (status != 0) 
+	if (status)
 		if (evd) 
 			dapl_evd_dealloc(evd);
 
@@ -1012,15 +1028,40 @@ int dapl_evd_modify_upcall(struct dat_ev
 			   const struct dat_upcall_object *upcall)
 {
 	struct dapl_evd *evd;
-
-	dapl_dbg_log(DAPL_DBG_TYPE_API, "dapl_modify_upcall(%p)\n", evd_handle);
+	int status = 0;
+	int pending_events;
 
 	evd = (struct dapl_evd *)evd_handle;
+	dapl_dbg_log (DAPL_DBG_TYPE_API, "%s: (evd=%p, upcall_policy=%d)\n",
+			__func__, evd_handle, upcall_policy);
 
+	spin_lock_irqsave(&evd->common.lock, evd->common.flags);
+	if ((upcall_policy != DAT_UPCALL_TEARDOWN) &&
+	    (upcall_policy != DAT_UPCALL_DISABLE)) {
+		pending_events = dapl_rbuf_count(&evd->pending_event_queue);
+		if (pending_events) {
+			dapl_dbg_log (DAPL_DBG_TYPE_WARN,
+				"%s: (evd %p) there are still %d pending "
+				"events in the queue - policy stays disabled\n",
+				__func__, evd_handle, pending_events);
+			status = -EBUSY;
+			goto bail;
+		}
+		if (evd->evd_flags & DAT_EVD_DTO_FLAG) {
+			status = ib_req_notify_cq(evd->cq, IB_CQ_NEXT_COMP);
+			if (status) {
+				printk(KERN_ERR "%s: dapls_ib_completion_notify"
+					" failed (status=0x%x) \n",__func__,
+					status);
+				goto bail;
+			}
+		}
+	}
 	evd->upcall_policy = upcall_policy;
 	evd->upcall = *upcall;
-
-	return 0;
+bail:
+	spin_unlock_irqrestore(&evd->common.lock, evd->common.flags);
+	return status;
 }
 
 int dapl_evd_post_se(struct dat_evd *evd_handle, const struct dat_event *event)
@@ -1090,7 +1131,6 @@ int dapl_evd_dequeue(struct dat_evd *evd
 
 bail:
 	spin_unlock_irqrestore(&evd->common.lock, evd->common.flags);
-
 	dapl_dbg_log(DAPL_DBG_TYPE_RTN,
 		     "dapl_evd_dequeue () returns 0x%x\n", status);
 
Index: ib/dapl_openib_util.h
===================================================================
--- ib/dapl_openib_util.h	(revision 3113)
+++ ib/dapl_openib_util.h	(working copy)
@@ -84,9 +84,13 @@ int dapl_ib_mr_register_ia(struct dapl_i
 			   enum dat_mem_priv_flags privileges);
 
 int dapl_ib_mr_register_physical(struct dapl_ia *ia, struct dapl_lmr *lmr,
-				 void *phys_addr, u64 length,
+				 void *phys_addr, u32 page_count,
 				 enum dat_mem_priv_flags privileges);
 
+int dapl_ib_mr_register_fmr(struct dapl_ia *ia, struct dapl_lmr *lmr,
+			    void *phys_addr, u32 page_count,
+			    enum dat_mem_priv_flags privileges);
+
 int dapl_ib_mr_deregister(struct dapl_lmr *lmr);
 
 int dapl_ib_mr_register_shared(struct dapl_ia *ia, struct dapl_lmr *lmr,
Index: ib/dapl.h
===================================================================
--- ib/dapl.h	(revision 3113)
+++ ib/dapl.h	(working copy)
@@ -40,9 +40,9 @@
 #include <asm/atomic.h>
 
 #include <kdapl.h>
-
-#include "ib_verbs.h"
-#include "ib_cm.h"
+#include <ib_verbs.h>
+#include <ib_cm.h>
+#include <ib_fmr_pool.h>
 
 /*********************************************************************
  *                                                                   *
@@ -173,6 +173,7 @@ struct dapl_evd {
 	struct dapl_ring_buffer pending_event_queue;
 	enum dat_upcall_policy  upcall_policy;
 	struct dat_upcall_object upcall;
+	int is_triggered;
 };
 
 struct dapl_ep {
@@ -229,6 +230,7 @@ struct dapl_pz {
 	struct list_head list;
 	struct ib_pd *pd;
 	atomic_t pz_ref_count;
+	struct ib_fmr_pool *fmr_pool;
 };
 
 struct dapl_lmr {
@@ -237,6 +239,7 @@ struct dapl_lmr {
 	struct list_head list;
 	struct dat_lmr_param param;
 	struct ib_mr *mr;
+	struct ib_pool_fmr *fmr;
 	atomic_t lmr_ref_count;
 };
 
@@ -628,4 +631,6 @@ extern void dapl_dbg_log(enum dapl_dbg_t
 #define dapl_dbg_log(...)
 #endif /* KDAPL_INFINIBAND_DEBUG */
 
+extern void set_fmr_params (struct ib_fmr_pool_param *fmr_param_s);
+extern unsigned int g_dapl_active_fmr;
 #endif /* DAPL_H */
Index: ib/dapl_pz.c
===================================================================
--- ib/dapl_pz.c	(revision 3113)
+++ ib/dapl_pz.c	(working copy)
@@ -29,7 +29,7 @@
 /*
  * $Id$
  */
-
+#include <linux/delay.h>
 #include "dapl.h"
 #include "dapl_ia.h"
 #include "dapl_openib_util.h"
@@ -89,7 +89,17 @@ int dapl_pz_create(struct dat_ia *ia, st
 			     status);
 		goto error2;
 	}
-	
+
+        if (g_dapl_active_fmr) {
+		struct ib_fmr_pool_param params;
+		set_fmr_params (&params);
+		dapl_pz->fmr_pool = ib_create_fmr_pool(dapl_pz->pd, &params);
+		if (IS_ERR(dapl_pz->fmr_pool))
+			dapl_dbg_log(DAPL_DBG_TYPE_WARN, 
+				     "could not create FMR pool <%ld>",
+				     PTR_ERR(dapl_pz->fmr_pool));
+	}
+
 	*pz = (struct dat_pz *)dapl_pz;
 	return 0;
 
@@ -104,7 +114,7 @@ error1:
 int dapl_pz_free(struct dat_pz *pz)
 {
 	struct dapl_pz *dapl_pz;
-	int status;
+	int status=0;
 
 	dapl_dbg_log(DAPL_DBG_TYPE_API, "dapl_pz_free(%p)\n", pz);
 
@@ -114,8 +124,15 @@ int dapl_pz_free(struct dat_pz *pz)
 		status = -EINVAL;
 		goto error;
 	}
-	
-	status = ib_dealloc_pd(dapl_pz->pd);
+
+	if (g_dapl_active_fmr && dapl_pz->fmr_pool) {
+		(void)ib_destroy_fmr_pool(dapl_pz->fmr_pool);
+		dapl_pz->fmr_pool = NULL;
+	}
+
+	if (dapl_pz->pd)
+		status = ib_dealloc_pd(dapl_pz->pd);
+
 	if (status) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR, "ib_dealloc_pd failed: %X\n",
 			     status);
Index: ib/dapl_ia.c
===================================================================
--- ib/dapl_ia.c	(revision 3113)
+++ ib/dapl_ia.c	(working copy)
@@ -115,7 +115,6 @@ static int dapl_ia_abrupt_close(struct d
 	 * when we run out of entries, or when we get back to the head
 	 * if we end up skipping an entry.
 	 */
-
 	list_for_each_entry(rmr, &ia->rmr_list, list) {
 		status = dapl_rmr_free((struct dat_rmr *)rmr);
 		if (status != 0)
@@ -196,7 +195,6 @@ static int dapl_ia_abrupt_close(struct d
 				     "ia_close(ABRUPT): psp_free(%p) returns %x\n",
 				     sp, status);
 	}
-
 	list_for_each_entry(pz, &ia->pz_list, list) {
 		status = dapl_pz_free((struct dat_pz *)pz);
 		if (status != 0)
@@ -266,7 +264,6 @@ static int dapl_ia_graceful_close(struct
 	int status = 0;
 	int cur_status;
 	struct dapl_evd *evd;
-
 	if (!list_empty(&ia->rmr_list) ||
 	    !list_empty(&ia->rsp_list) ||
 	    !list_empty(&ia->ep_list) ||
@@ -745,7 +742,8 @@ int dapl_ia_query(struct dat_ia *ia_ptr,
 		provider_attr->provider_version_major = DAPL_PROVIDER_MAJOR;
 		provider_attr->provider_version_minor = DAPL_PROVIDER_MINOR;
 		provider_attr->lmr_mem_types_supported =
-		    DAT_MEM_TYPE_PHYSICAL | DAT_MEM_TYPE_IA;
+			DAT_MEM_TYPE_PHYSICAL | DAT_MEM_TYPE_IA | 
+			DAT_MEM_TYPE_PLATFORM;
 		provider_attr->iov_ownership_on_return = DAT_IOV_CONSUMER;
 		provider_attr->dat_qos_supported = DAT_QOS_BEST_EFFORT;
 		provider_attr->completion_flags_supported =
Index: ib/dapl_provider.c
===================================================================
--- ib/dapl_provider.c	(revision 3113)
+++ ib/dapl_provider.c	(working copy)
@@ -49,8 +49,17 @@ MODULE_AUTHOR("James Lentini");
 #ifdef CONFIG_KDAPL_INFINIBAND_DEBUG
 static DAPL_DBG_MASK g_dapl_dbg_mask = 0;
 module_param_named(dbg_mask, g_dapl_dbg_mask, int, 0644);
-MODULE_PARM_DESC(dbg_mask, "Bitmask to enable debug message types.");
+MODULE_PARM_DESC(dbg_mask, "Bitmask to enable debug message types. <default=0>");
 #endif /* CONFIG_KDAPL_INFINIBAND_DEBUG */
+unsigned int g_dapl_active_fmr = 1;
+static unsigned int g_dapl_pool_size = 2048;
+static unsigned int g_dapl_max_pages_per_fmr = 64;
+module_param_named(active_fmr, g_dapl_active_fmr, int, 0644);
+module_param_named(pool_size, g_dapl_pool_size, int, 0644);
+module_param_named(max_pages_per_fmr, g_dapl_max_pages_per_fmr, int, 0644);
+MODULE_PARM_DESC(active_fmr, "if active_fmr==1, creates fmr pool in pz_create <default=0>");
+MODULE_PARM_DESC(pool_size, "num of fmr handles in pool <default=2048>");
+MODULE_PARM_DESC(max_pages_per_fmr, "max size (in pages) of an fmr handle <default=64>");
 
 static LIST_HEAD(g_dapl_provider_list);
 
@@ -152,6 +161,18 @@ void dapl_dbg_log(enum dapl_dbg_type typ
 
 #endif /* KDAPL_INFINIBAND_DEBUG */
 
+void set_fmr_params (struct ib_fmr_pool_param *fmr_params)
+{
+	fmr_params->max_pages_per_fmr = g_dapl_max_pages_per_fmr;
+	fmr_params->pool_size = g_dapl_pool_size;
+	fmr_params->dirty_watermark = 32;
+	fmr_params->cache = 1;
+	fmr_params->flush_function = NULL;
+	fmr_params->access = (IB_ACCESS_LOCAL_WRITE  |
+			      IB_ACCESS_REMOTE_WRITE |
+			      IB_ACCESS_REMOTE_READ);
+}
+		
 static struct dapl_provider *dapl_provider_alloc(const char *name, 
 						 struct ib_device *device, 
 						 u8 port)
Index: ib/dapl_lmr.c
===================================================================
--- ib/dapl_lmr.c	(revision 3113)
+++ ib/dapl_lmr.c	(working copy)
@@ -126,7 +126,7 @@ error1:
 
 static inline int dapl_lmr_create_physical(struct dapl_ia *ia,
 					   union dat_region_description phys_addr,
-					   u64 page_count, 
+					   u64 length,
 					   enum dat_mem_type mem_type,
 					   struct dapl_pz *pz,
 					   enum dat_mem_priv_flags privileges,
@@ -137,8 +137,10 @@ static inline int dapl_lmr_create_physic
 					   u64 *registered_address)
 {
 	struct dapl_lmr *new_lmr;
-	int status;
+	int status = 0;
+	u32 page_count;
 
+	page_count = (u32)length;
 	new_lmr = dapl_lmr_alloc(ia, mem_type, phys_addr,
 				 page_count, (struct dat_pz *) pz, privileges);
 
@@ -149,15 +151,24 @@ static inline int dapl_lmr_create_physic
 
 	if (DAT_MEM_TYPE_IA == mem_type) {
 		status = dapl_ib_mr_register_ia(ia, new_lmr, phys_addr,
-						page_count, privileges);
+						length, privileges);
 	}
-	else {
+	else if (DAT_MEM_TYPE_PHYSICAL == mem_type) {
 		status = dapl_ib_mr_register_physical(ia, new_lmr, 
 						      phys_addr.for_array,
 						      page_count, privileges);
 	}
+	else if (DAT_MEM_TYPE_PLATFORM == mem_type) {
+		status = dapl_ib_mr_register_fmr(ia, new_lmr,
+						 phys_addr.for_array,
+						 page_count, privileges);
+	}
+	else {
+		status = -EINVAL;
+		goto error1;
+	}
 
-	if (0 != status)
+	if (status)
 		goto error2;
 
 	atomic_inc(&pz->pz_ref_count);
@@ -258,6 +269,11 @@ int dapl_lmr_kcreate(struct dat_ia *ia, 
 						 rmr_context, registered_length,
 						 registered_address);
 		break;
+	case DAT_MEM_TYPE_PLATFORM: /* used as a proprietary Mellanox-FMR */
+		if (!g_dapl_active_fmr) {
+			status = -EINVAL;
+			break;
+		}
 	case DAT_MEM_TYPE_PHYSICAL:
 	case DAT_MEM_TYPE_IA:
 		status = dapl_lmr_create_physical(dapl_ia, region_description,
@@ -307,6 +323,7 @@ int dapl_lmr_free(struct dat_lmr *lmr)
 
 	switch (dapl_lmr->param.mem_type) {
 	case DAT_MEM_TYPE_PHYSICAL:
+	case DAT_MEM_TYPE_PLATFORM:
 	case DAT_MEM_TYPE_IA:
 	case DAT_MEM_TYPE_LMR:
 	{



More information about the general mailing list