[ofw] [HW] memory allocation improvement in user space

Leonid Keller leonid at mellanox.co.il
Mon Jun 23 06:44:07 PDT 2008


Investigation of INSUFFICIENT_MEMORY failures on our stress tests
brought us to "revelation", that VirtualAlloc function, used for
implementation of posix_memalign, is a very "greedy" one: it allocates
at least 64KB memory.
As far as we usually ask one page, it is 16 times more than necessary.
 
Presented below a patch, which implements posix_memalign with
(ultimately) HeapAlloc functions.
The patch was tested and worked OK.
 
An important nuance, that was revealed during testing is as follows:
A system function, which releases the resources of an exiting process,
damages in some way the work of MmSecureVirtualMemory function, which we
use today to secure CQ\QP\SRQ circular buffers and user buffers.
If an application gets killed or exits without releasing the resources,
IBBUS catches this event, starts its cascading destroy of resources and
crashes on MmUnsecureVirtualMemory.
Putting MmUnsecureVirtualMemory in try-except block saves from the
crash, but an async thread, releasing QPs, freezes on
MmUnsecureVirtualMemory, which fails to get some mutex.
As far as there is no real reason to secure circular  buffers, i've
solved the problem by skipping securing for IB objects.
User buffers are still secured while memory registration.
 
Therefore the patch contains 3 kinds of changes:
1) new implementation of posix_memalign and all related to that;
2) try-except block around MmUnsecureVirtualMemory;
3) new parameter in ib_umem_get and mthca_reg_virt_mr for skipping
memory securing and all related to it;
 
Index: hw/mlx4/kernel/bus/core/l2w_umem.c
===================================================================
--- hw/mlx4/kernel/bus/core/l2w_umem.c (revision 1294)
+++ hw/mlx4/kernel/bus/core/l2w_umem.c (working copy)
@@ -9,8 +9,20 @@
 void ib_umem_release(struct ib_umem *p_ib_umem)
 {
  MLX4_ENTER(MLX4_DBG_MEMORY);
- if (p_ib_umem->secure_handle)
-  MmUnsecureVirtualMemory( p_ib_umem->secure_handle );
+ if (p_ib_umem->secure_handle) {
+  __try {
+   MmUnsecureVirtualMemory( p_ib_umem->secure_handle );
+   p_ib_umem->secure_handle = NULL;
+  }
+  __except (EXCEPTION_EXECUTE_HANDLER) {
+   NTSTATUS Status = GetExceptionCode();
+   UNUSED_PARAM_WOWPP(Status);
+   MLX4_PRINT(TRACE_LEVEL_ERROR ,MLX4_DBG_MEMORY ,
+    ("Exception 0x%x on MmUnsecureVirtualMemory(), addr %I64x, size
%I64x, seg_num %d, nr_pages %d\n", 
+    Status, p_ib_umem->iobuf.va, (u64)p_ib_umem->iobuf.size, 
+    p_ib_umem->iobuf.seg_num, p_ib_umem->iobuf.nr_pages ));
+  }
+ }
  if (p_ib_umem->iobuf_used)
   iobuf_deregister_with_cash(&p_ib_umem->iobuf);
  kfree(p_ib_umem);
@@ -26,7 +38,7 @@
  * @access: IB_ACCESS_xxx flags for memory being pinned
  */
 struct ib_umem *ib_umem_get(struct ib_ucontext *context, u64 addr,
-       size_t size, enum ib_access_flags access)
+       size_t size, enum ib_access_flags access, boolean_t secure)
 {
  int err;
  struct ib_umem *p_ib_umem;
@@ -52,7 +64,7 @@
  // TODO: map the memory for DMA
  
  // secure memory
- if (!context)
+ if (!context || !secure)
   goto done;
  __try {
   p_ib_umem->secure_handle = MmSecureVirtualMemory ( 
Index: hw/mlx4/kernel/bus/ib/cq.c
===================================================================
--- hw/mlx4/kernel/bus/ib/cq.c (revision 1294)
+++ hw/mlx4/kernel/bus/ib/cq.c (working copy)
@@ -142,7 +142,7 @@
   }
 
   cq->umem = ib_umem_get(context, ucmd.buf_addr, buf_size,
-           IB_ACCESS_LOCAL_WRITE);
+           IB_ACCESS_LOCAL_WRITE, FALSE);
   if (IS_ERR(cq->umem)) {
    err = PTR_ERR(cq->umem);
    goto err_cq;
Index: hw/mlx4/kernel/bus/ib/doorbell.c
===================================================================
--- hw/mlx4/kernel/bus/ib/doorbell.c (revision 1294)
+++ hw/mlx4/kernel/bus/ib/doorbell.c (working copy)
@@ -182,7 +182,7 @@
  page->user_virt = virt & (u64)PAGE_MASK;
  page->refcnt    = 0;
  page->umem      = ib_umem_get(&context->ibucontext, virt &
(u64)PAGE_MASK,
-          PAGE_SIZE, 0);
+          PAGE_SIZE, 0, FALSE);
  if (IS_ERR(page->umem)) {
   err = PTR_ERR(page->umem);
   kfree(page);
Index: hw/mlx4/kernel/bus/ib/mr.c
===================================================================
--- hw/mlx4/kernel/bus/ib/mr.c (revision 1294)
+++ hw/mlx4/kernel/bus/ib/mr.c (working copy)
@@ -129,7 +129,7 @@
  if (!mr)
   return ERR_PTR(-ENOMEM);
 
- mr->umem = ib_umem_get(pd->p_uctx, start, (size_t)length,
access_flags);
+ mr->umem = ib_umem_get(pd->p_uctx, start, (size_t)length,
access_flags, TRUE);
  if (IS_ERR(mr->umem)) {
   // there can be also second reason of failue - insufficient memory,
   // but we can't get awared of that without changing ib_umem_get
prototype
Index: hw/mlx4/kernel/bus/ib/qp.c
===================================================================
--- hw/mlx4/kernel/bus/ib/qp.c (revision 1294)
+++ hw/mlx4/kernel/bus/ib/qp.c (working copy)
@@ -360,7 +360,7 @@
    goto err;
 
   qp->umem = ib_umem_get(pd->p_uctx, ucmd.buf_addr,
-           qp->buf_size, 0);
+           qp->buf_size, 0, FALSE);
   if (IS_ERR(qp->umem)) {
    err = PTR_ERR(qp->umem);
    goto err;
Index: hw/mlx4/kernel/bus/ib/srq.c
===================================================================
--- hw/mlx4/kernel/bus/ib/srq.c (revision 1294)
+++ hw/mlx4/kernel/bus/ib/srq.c (working copy)
@@ -116,7 +116,7 @@
   }
 
   srq->umem = ib_umem_get(pd->p_uctx, ucmd.buf_addr,
-     buf_size, 0);
+     buf_size, 0, FALSE);
   if (IS_ERR(srq->umem)) {
    err = PTR_ERR(srq->umem);
    goto err_srq;
Index: hw/mlx4/kernel/hca/mr.c
===================================================================
--- hw/mlx4/kernel/hca/mr.c (revision 1294)
+++ hw/mlx4/kernel/hca/mr.c (working copy)
@@ -170,7 +170,7 @@
  if (IS_ERR(p_ib_mr)) {
   err = PTR_ERR(p_ib_mr);
   HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY,
-   ("mthca_reg_phys_mr failed (%d)\n", err));
+   ("ib_reg_phys_mr failed (%d)\n", err));
   status = errno_to_iberr(err);
   goto err_reg_phys_mr;
  }
Index: hw/mlx4/kernel/inc/l2w_umem.h
===================================================================
--- hw/mlx4/kernel/inc/l2w_umem.h (revision 1294)
+++ hw/mlx4/kernel/inc/l2w_umem.h (working copy)
@@ -15,7 +15,7 @@
 void ib_umem_release(struct ib_umem *p_ib_umem);
 
 struct ib_umem *ib_umem_get(struct ib_ucontext *context, u64 addr,
-       size_t size, enum ib_access_flags access);
+       size_t size, enum ib_access_flags access, boolean_t secure);
 
 int ib_umem_page_count(struct ib_umem *p_ib_umem);
 
Index: hw/mlx4/user/hca/buf.c
===================================================================
--- hw/mlx4/user/hca/buf.c (revision 1294)
+++ hw/mlx4/user/hca/buf.c (working copy)
@@ -35,17 +35,13 @@
 int mlx4_alloc_buf(struct mlx4_buf *buf, int size, int page_size)
 {
  int ret;
-
  ret = posix_memalign(&buf->buf, page_size, align(size, page_size));
- if (ret)
-  return ret;
-
- buf->length = size;
-
- return 0;
+ if (!ret)
+  buf->length = size;
+ return ret;
 }
 
 void mlx4_free_buf(struct mlx4_buf *buf)
 {
- VirtualFree(buf->buf, 0, MEM_RELEASE);
+ posix_memfree(buf->buf);
 }
Index: hw/mlx4/user/hca/l2w.h
===================================================================
--- hw/mlx4/user/hca/l2w.h (revision 1294)
+++ hw/mlx4/user/hca/l2w.h (working copy)
@@ -74,17 +74,49 @@
 // FUNCTIONS
 // ===========================================
 
+static inline BOOLEAN is_power_of_2(uint32_t n)
+{
+ return (!!n & !(n & (n-1))) ? TRUE : FALSE;
+}
+
+// Allocated memory is zeroed !
 static inline int posix_memalign(void **memptr, int alignment, int
size)
 {
- UNREFERENCED_PARAMETER(alignment);
+ int aligned_size, desc_size = sizeof(int);
+ char *real_addr, *aligned_addr;
 
- *memptr = VirtualAlloc( NULL, size, MEM_COMMIT | MEM_RESERVE,
PAGE_READWRITE );
- if (*memptr) 
-  return 0;
- else 
-  return ENOMEM;
+ // sanity check: alignment should a power of 2 and more then 2
+ if ( alignment < desc_size || !is_power_of_2((uint32_t)alignment) )
+  return -EINVAL;
+
+ // calculate size, needed for aligned allocation
+ aligned_size = size + alignment + desc_size;
+
+ // allocate
+ real_addr = cl_zalloc(aligned_size);
+ if ( real_addr == NULL )
+  return -ENOMEM;
+
+ // calculate aligned address
+ aligned_addr = (char *)(((ULONG_PTR)(real_addr + alignment-1)) &
~(alignment - 1));
+ if ( aligned_addr < real_addr + desc_size )
+  aligned_addr += alignment;
+
+ // store the descriptor
+ *(int*)(aligned_addr - desc_size) = (int)(aligned_addr - real_addr);
+ 
+ *memptr = aligned_addr;
+ return 0;
 }
 
+// there is no such POSIX function. Called so to be similar to the
allocation one.
+static inline void posix_memfree(void *memptr)
+{
+ int *desc_addr = (int*)((char*)memptr - sizeof(int));
+ char *real_addr = (char*)memptr - *desc_addr;
+ cl_free(real_addr);
+}
+
 static inline int ffsl(uint32_t x)
 {
        int r = 0;
Index: hw/mlx4/user/hca/qp.c
===================================================================
--- hw/mlx4/user/hca/qp.c (revision 1294)
+++ hw/mlx4/user/hca/qp.c (working copy)
@@ -685,7 +685,6 @@
   return -1;
  }
 
- memset(qp->buf.buf, 0, qp->buf_size);
  mlx4_qp_init_sq_ownership(qp);
 
  return 0;
Index: hw/mlx4/user/hca/srq.c
===================================================================
--- hw/mlx4/user/hca/srq.c (revision 1294)
+++ hw/mlx4/user/hca/srq.c (working copy)
@@ -146,8 +146,6 @@
   return -1;
  }
 
- // srq->buf.buf is zeroed in posix_memalign - memset(srq->buf.buf, 0,
buf_size);
-
  /*
   * Now initialize the SRQ buffer so that all of the WQEs are
   * linked into the list of free WQEs.
Index: hw/mlx4/user/hca/verbs.c
===================================================================
--- hw/mlx4/user/hca/verbs.c (revision 1294)
+++ hw/mlx4/user/hca/verbs.c (working copy)
@@ -373,8 +373,6 @@
       context->page_size))
   goto err_alloc_buf;
 
- // cq->buf.buf is zeroed in posix_memalign - memset(cq->buf.buf, 0,
buf_size);
-
  cq->ibv_cq.context = context;
  cq->cons_index = 0;
   
@@ -718,7 +716,7 @@
  attr.cap.max_recv_wr  = p_create_attr->rq_depth;
  attr.cap.max_send_sge  = p_create_attr->sq_sge;
  attr.cap.max_recv_sge  = p_create_attr->rq_sge;
- attr.cap.max_inline_data = p_create_attr->sq_max_inline;  /* absent in
IBAL */
+ attr.cap.max_inline_data = p_create_attr->sq_max_inline;
  attr.qp_type    = __to_qp_type(p_create_attr->qp_type);
  attr.sq_sig_all    = p_create_attr->sq_signaled;
 
Index: hw/mthca/kernel/hca_memory.c
===================================================================
--- hw/mthca/kernel/hca_memory.c (revision 1294)
+++ hw/mthca/kernel/hca_memory.c (working copy)
@@ -88,7 +88,7 @@
  // register mr 
  mr_p = ibv_reg_mr(ib_pd_p, map_qp_ibal_acl(p_mr_create->access_ctrl), 
   p_mr_create->vaddr, p_mr_create->length, 
-  (uint64_t)p_mr_create->vaddr, um_call );
+  (uint64_t)p_mr_create->vaddr, um_call, TRUE );
  if (IS_ERR(mr_p)) {
   err = PTR_ERR(mr_p);
   HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY,
Index: hw/mthca/kernel/ib_verbs.h
===================================================================
--- hw/mthca/kernel/ib_verbs.h (revision 1294)
+++ hw/mthca/kernel/ib_verbs.h (working copy)
@@ -729,7 +729,7 @@
         u64 *iova_start);
  struct ib_mr *      (*reg_virt_mr)(struct ib_pd *pd, 
       void* FUNC_PTR64 vaddr, uint64_t length, uint64_t hca_va,
-      mthca_qp_access_t acc, boolean_t um_call);
+      mthca_qp_access_t acc, boolean_t um_call, boolean_t secure);
  int                        (*query_mr)(struct ib_mr *mr,
             struct ib_mr_attr *mr_attr);
  int                        (*dereg_mr)(struct ib_mr *mr);
@@ -1140,13 +1140,15 @@
  * @hca_va: virtual address in HCA
  * @mr_access_flags: Specifies the memory access rights.
  * @um_call: call from user, when TRUE.
+ * @secure: secure the memory from releasing (only for um_call == TRUE)
  */
 struct ib_mr *ibv_reg_mr(struct ib_pd *pd, 
  mthca_qp_access_t mr_access_flags,
  void* FUNC_PTR64   vaddr,
  uint64_t    length,
  uint64_t     hca_va,
- boolean_t   um_call
+ boolean_t    um_call,
+ boolean_t    secure
  );
 
 /**
Index: hw/mthca/kernel/mt_verbs.c
===================================================================
--- hw/mthca/kernel/mt_verbs.c (revision 1294)
+++ hw/mthca/kernel/mt_verbs.c (working copy)
@@ -171,7 +171,7 @@
    pd, 
    create_ah->mr.access_flags, 
    (void*)(ULONG_PTR)create_ah->mr.start,
-   create_ah->mr.length, create_ah->mr.hca_va, TRUE );
+   create_ah->mr.length, create_ah->mr.hca_va, TRUE, FALSE );
   if (IS_ERR(ib_mr)) {
    err = PTR_ERR(ib_mr);
    HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV ,("ibv_reg_mr failed (%d)\n",
err));
@@ -331,7 +331,7 @@
    (struct ib_pd *)(ULONG_PTR)create_srp->mr.pd_handle, 
    create_srp->mr.access_flags, 
    (void*)(ULONG_PTR)create_srp->mr.start,
-   create_srp->mr.length, create_srp->mr.hca_va, TRUE );
+   create_srp->mr.length, create_srp->mr.hca_va, TRUE, FALSE );
   if (IS_ERR(ib_mr)) {
    err = PTR_ERR(ib_mr);
    HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("ibv_reg_mr failed
(%d)\n", err));
@@ -453,7 +453,7 @@
    (struct ib_pd *)(ULONG_PTR)create_qp->mr.pd_handle, 
    create_qp->mr.access_flags, 
    (void*)(ULONG_PTR)create_qp->mr.start,
-   create_qp->mr.length, create_qp->mr.hca_va, TRUE );
+   create_qp->mr.length, create_qp->mr.hca_va, TRUE, FALSE );
   if (IS_ERR(ib_mr)) {
    err = PTR_ERR(ib_mr);
    HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("ibv_reg_mr failed
(%d)\n", err));
@@ -598,7 +598,7 @@
    (struct ib_pd *)(ULONG_PTR)create_cq->mr.pd_handle, 
    create_cq->mr.access_flags, 
    (void*)(ULONG_PTR)create_cq->mr.start,
-   create_cq->mr.length, create_cq->mr.hca_va, TRUE );
+   create_cq->mr.length, create_cq->mr.hca_va, TRUE, FALSE );
   if (IS_ERR(ib_mr)) {
    err = PTR_ERR(ib_mr);
    HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_CQ ,("ibv_reg_mr failed
(%d)\n", err));
@@ -688,14 +688,15 @@
  void* FUNC_PTR64   vaddr,
  uint64_t    length,
  uint64_t     hca_va,
- boolean_t   um_call
+ boolean_t    um_call,
+ boolean_t    secure
  )
 {
  struct ib_mr *ib_mr;
  int                          err;
  HCA_ENTER(HCA_DBG_MEMORY);
 
- ib_mr = pd->device->reg_virt_mr(pd, vaddr, length, hca_va,
mr_access_flags, um_call);
+ ib_mr = pd->device->reg_virt_mr(pd, vaddr, length, hca_va,
mr_access_flags, um_call, secure);
  if (IS_ERR(ib_mr)) {
   err = PTR_ERR(ib_mr);
   HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_MEMORY ,("mthca_reg_user_mr
failed (%d)\n", err));
Index: hw/mthca/kernel/mthca_provider.c
===================================================================
--- hw/mthca/kernel/mthca_provider.c (revision 1294)
+++ hw/mthca/kernel/mthca_provider.c (working copy)
@@ -996,7 +996,7 @@
 
 static struct ib_mr *mthca_reg_virt_mr(struct ib_pd *pd, 
  void* FUNC_PTR64 vaddr, uint64_t length, uint64_t hca_va,
- mthca_qp_access_t acc, boolean_t um_call)
+ mthca_qp_access_t acc, boolean_t um_call, boolean_t secure)
 {
  struct mthca_dev *dev = to_mdev(pd->device);
  struct mthca_mr *mr;
@@ -1082,7 +1082,7 @@
   goto err_mt_alloc;
 
  // secure memory
- if (!pd->ucontext)
+ if (!pd->ucontext || !secure)
   goto done;
  __try {
   mr->secure_handle = MmSecureVirtualMemory ( vaddr, (SIZE_T)length,
@@ -1129,8 +1129,19 @@
  struct mthca_mr *mmr = to_mmr(mr);
  struct mthca_dev* dev = to_mdev(mr->device);
 
- if (mmr->secure_handle)
-  MmUnsecureVirtualMemory ( mmr->secure_handle );
+ if (mmr->secure_handle) {
+  __try {
+   MmUnsecureVirtualMemory( mmr->secure_handle );
+   mmr->secure_handle = NULL;
+  }
+  __except (EXCEPTION_EXECUTE_HANDLER) {
+   NTSTATUS Status = GetExceptionCode();
+   UNUSED_PARAM_WOWPP(Status);
+   HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY ,
+    ("Exception 0x%x on MmUnsecureVirtualMemory(), addr %I64x, size
%I64x, seg_num %d, nr_pages %d\n", 
+    Status, mmr->iobuf.va, (u64)mmr->iobuf.size, mmr->iobuf.seg_num,
mmr->iobuf.nr_pages ));
+  }
+ }
  mthca_free_mr(dev, mmr);
  if (mmr->iobuf_used)
   iobuf_deregister_with_cash(&mmr->iobuf);
Index: hw/mthca/user/mlnx_ual_srq.c
===================================================================
--- hw/mthca/user/mlnx_ual_srq.c (revision 1294)
+++ hw/mthca/user/mlnx_ual_srq.c (working copy)
@@ -54,11 +54,7 @@
  }
 
  if (srq->buf) {
-#ifdef NOT_USE_VIRTUAL_ALLOC 
-  cl_free(srq->buf);
-#else
-  VirtualFree( srq->buf, 0, MEM_RELEASE);
-#endif
+  posix_memfree(srq->buf);
  }
 
  if (srq->wrid) 
@@ -158,11 +154,7 @@
  goto end;
 
 err_alloc_db:
-#ifdef NOT_USE_VIRTUAL_ALLOC 
-  cl_free(srq->buf);
-#else
-  VirtualFree( srq->buf, 0, MEM_RELEASE);
-#endif
+ posix_memfree(srq->buf);
  cl_free(srq->wrid);
 err_alloc_buf:
  cl_spinlock_destroy(&srq->lock);
Index: hw/mthca/user/mlnx_uvp_memfree.c
===================================================================
--- hw/mthca/user/mlnx_uvp_memfree.c (revision 1294)
+++ hw/mthca/user/mlnx_uvp_memfree.c (working copy)
@@ -201,11 +201,7 @@
 
  for (i = 0; i < db_tab->npages; ++i)
   if (db_tab->page[i].db_rec)
-#ifdef NOT_USE_VIRTUAL_ALLOC 
-   cl_free(db_tab->page[i].db_rec);
-#else
-   VirtualFree( db_tab->page[i].db_rec, 0, MEM_RELEASE);
-#endif
+   posix_memfree( db_tab->page[i].db_rec);
 
  cl_free(db_tab);
 }
Index: hw/mthca/user/mlnx_uvp_verbs.c
===================================================================
--- hw/mthca/user/mlnx_uvp_verbs.c (revision 1294)
+++ hw/mthca/user/mlnx_uvp_verbs.c (working copy)
@@ -80,11 +80,7 @@
   WaitForSingleObject( pd->ah_mutex, INFINITE );
   for (page = pd->ah_list; page; page = next_page) {
    next_page = page->next;
-   #ifdef NOT_USE_VIRTUAL_ALLOC 
-    cl_free(page->buf);
-   #else
-    VirtualFree( page->buf, 0, MEM_RELEASE);
-   #endif
+   posix_memfree(page->buf);
    cl_free(page);
   }
   ReleaseMutex( pd->ah_mutex );
@@ -181,7 +177,7 @@
    cq->set_ci_db_index);
 
 err_unreg:
- cl_free(cq->buf);
+ posix_memfree(cq->buf);
 
 err_memalign:
  cl_spinlock_destroy(&cq->lock);
@@ -233,12 +229,7 @@
          to_mcq(cq)->arm_db_index);
  }
 
-#ifdef NOT_USE_VIRTUAL_ALLOC 
- cl_free(to_mcq(cq)->buf);
-#else
- VirtualFree( to_mcq(cq)->buf, 0, MEM_RELEASE);
-#endif
-
+ posix_memfree(to_mcq(cq)->buf);
  
  cl_spinlock_destroy(&((struct mthca_cq *)cq)->lock);
  cl_free(to_mcq(cq));
@@ -380,11 +371,7 @@
  
 err_spinlock_sq:
  cl_free(qp->wrid);
-#ifdef NOT_USE_VIRTUAL_ALLOC 
- cl_free(qp->buf);
-#else
- VirtualFree( qp->buf, 0, MEM_RELEASE);
-#endif
+ posix_memfree(qp->buf);
 
 err_nomem:
  cl_free(qp);
@@ -501,11 +488,7 @@
   cl_spinlock_destroy(&((struct mthca_qp *)qp)->sq.lock);
   cl_spinlock_destroy(&((struct mthca_qp *)qp)->rq.lock);
 
-#ifdef NOT_USE_VIRTUAL_ALLOC 
-  cl_free(to_mqp(qp)->buf);
-#else
-  VirtualFree( to_mqp(qp)->buf, 0, MEM_RELEASE);
-#endif
+  posix_memfree(to_mqp(qp)->buf);
   cl_free(to_mqp(qp)->wrid);
   cl_free(to_mqp(qp));
  }
Index: hw/mthca/user/mt_l2w.h
===================================================================
--- hw/mthca/user/mt_l2w.h (revision 1294)
+++ hw/mthca/user/mt_l2w.h (working copy)
@@ -52,32 +52,49 @@
 
 extern size_t g_page_size;
 
-static inline int posix_memalign(void **memptr, size_t alignment,
size_t size)
+static inline BOOLEAN is_power_of_2(uint32_t n)
 {
-#ifdef NOT_USE_VIRTUAL_ALLOC 
- // sanity checks
- if (alignment % sizeof(void*))
-  return EINVAL;
- if (alignment < g_page_size) {
-  fprintf(stderr, "mthca: Fatal (posix_memalign): alignment too small -
%d \n",  alignment );
-  return EINVAL;
- }
+ return (!!n & !(n & (n-1))) ? TRUE : FALSE;
+}
 
- // allocation
- *memptr = cl_malloc(size);
- if (*memptr) 
-  return 0;
- else 
-  return ENOMEM;
-#else
- *memptr = VirtualAlloc( NULL, size, MEM_COMMIT | MEM_RESERVE,
PAGE_READWRITE );
- if (*memptr) 
-  return 0;
- else 
-  return ENOMEM;
-#endif
+// Allocated memory is zeroed !
+static inline int posix_memalign(void **memptr, int alignment, int
size)
+{
+ int aligned_size, desc_size = sizeof(int);
+ char *real_addr, *aligned_addr;
+
+ // sanity check: alignment should a power of 2 and more then 2
+ if ( alignment < desc_size || !is_power_of_2((uint32_t)alignment) )
+  return -EINVAL;
+
+ // calculate size, needed for aligned allocation
+ aligned_size = size + alignment + desc_size;
+
+ // allocate
+ real_addr = cl_zalloc(aligned_size);
+ if ( real_addr == NULL )
+  return -ENOMEM;
+
+ // calculate aligned address
+ aligned_addr = (char *)(((ULONG_PTR)(real_addr + alignment-1)) &
~(alignment - 1));
+ if ( aligned_addr < real_addr + desc_size )
+  aligned_addr += alignment;
+
+ // store the descriptor
+ *(int*)(aligned_addr - desc_size) = (int)(aligned_addr - real_addr);
+ 
+ *memptr = aligned_addr;
+ return 0;
 }
 
+// there is no such POSIX function. Called so to be similar to the
allocation one.
+static inline void posix_memfree(void *memptr)
+{
+ int *desc_addr = (int*)((char*)memptr - sizeof(int));
+ char *real_addr = (char*)memptr - *desc_addr;
+ cl_free(real_addr);
+}
+
 // ===========================================
 // FUNCTIONS
 // ===========================================



--
Leonid Keller
Mellanox Technologies LTD.
SW- Windows
Phone: +972 (4) 909 7200 (ext 372)
Mobile: +972 (54) 464 7702
E-mail: leonid at mellanox.co.il

----------------------------------------------------------------------
Emails belong on computers, trees belong in forests; if you must print
this, do it on recycled paper.
http://www.greenpeace.org/international/
----------------------------------------------------------------------


Disclaimer added by CodeTwo Exchange Rules
http://www.codetwo.com
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080623/75bdfda7/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: improve_memory.patch
Type: application/octet-stream
Size: 19431 bytes
Desc: improve_memory.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080623/75bdfda7/attachment.obj>


More information about the ofw mailing list