[openib-general] [RFC/BUG] libibverbs: DMA vs. CQ race

akepner at sgi.com akepner at sgi.com
Sun Jan 28 15:17:14 PST 2007


Here's a first cut at a patch. I'd appreciate comments. 
(The patch is against 1.1-rc7, and doesn't quite apply to 
1.2.)

The attached patches cause CQ allocation to (unconditionally) 
be done using dma_alloc_coherent(). The mmap() interface is 
(ab)used to allow access to user-level CQs.

Is this going in the right direction? Should the allocations 
be done conditionally (i.e., should user-level CQs continue 
to be allocated with a plain old malloc(), or something 
similar, unless the platform requires otherwise)?

This is the first time I've done anything beyond minor first
aid to OFED code, so please let me know if I've broken
anything, too.

-- 
Arthur
-------------- next part --------------
diff -rup openib-1.1/drivers/infiniband/hw/mthca/mthca_allocator.c openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_allocator.c
--- openib-1.1/drivers/infiniband/hw/mthca/mthca_allocator.c	2006-10-05 06:07:01.000000000 -0700
+++ openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_allocator.c	2007-01-28 14:16:41.859588954 -0800
@@ -194,7 +194,7 @@ void mthca_array_cleanup(struct mthca_ar
  */
 
 int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
-		    union mthca_buf *buf, int *is_direct, struct mthca_pd *pd,
+		    union mthca_buf *buf, int *is_direct, u32 pdn,
 		    int hca_write, struct mthca_mr *mr)
 {
 	int err = -ENOMEM;
@@ -259,9 +259,7 @@ int mthca_buf_alloc(struct mthca_dev *de
 		}
 	}
 
-	err = mthca_mr_alloc_phys(dev, pd->pd_num,
-				  dma_list, shift, npages,
-				  0, size,
+	err = mthca_mr_alloc_phys(dev, pdn, dma_list, shift, npages, 0, size,
 				  MTHCA_MPT_FLAG_LOCAL_READ |
 				  (hca_write ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0),
 				  mr);
diff -rup openib-1.1/drivers/infiniband/hw/mthca/mthca_cq.c openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_cq.c
--- openib-1.1/drivers/infiniband/hw/mthca/mthca_cq.c	2006-10-05 06:07:01.000000000 -0700
+++ openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_cq.c	2007-01-28 14:05:18.585901589 -0800
@@ -342,7 +342,8 @@ void mthca_cq_resize_copy_cqes(struct mt
 		       get_cqe(cq, i & cq->ibcq.cqe), MTHCA_CQ_ENTRY_SIZE);
 }
 
-int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent)
+int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, 
+		       int nent, u32 pdn)
 {
 	int ret;
 	int i;
@@ -350,7 +351,7 @@ int mthca_alloc_cq_buf(struct mthca_dev 
 	ret = mthca_buf_alloc(dev, nent * MTHCA_CQ_ENTRY_SIZE,
 			      MTHCA_MAX_DIRECT_CQ_SIZE,
 			      &buf->queue, &buf->is_direct,
-			      &dev->driver_pd, 1, &buf->mr);
+			      pdn, 1, &buf->mr);
 	if (ret)
 		return ret;
 
@@ -813,11 +814,10 @@ int mthca_init_cq(struct mthca_dev *dev,
 
 	cq_context = mailbox->buf;
 
-	if (cq->is_kernel) {
-		err = mthca_alloc_cq_buf(dev, &cq->buf, nent);
-		if (err)
-			goto err_out_mailbox;
-	}
+	err = mthca_alloc_cq_buf(dev, &cq->buf, nent, 
+				 ctx ? pdn : dev->driver_pd.pd_num);
+	if (err)
+		goto err_out_mailbox;
 
 	spin_lock_init(&cq->lock);
 	cq->refcount = 1;
@@ -873,8 +873,7 @@ int mthca_init_cq(struct mthca_dev *dev,
 	return 0;
 
 err_out_free_mr:
-	if (cq->is_kernel)
-		mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
+	mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
 
 err_out_mailbox:
 	mthca_free_mailbox(dev, mailbox);
@@ -950,12 +949,10 @@ void mthca_free_cq(struct mthca_dev *dev
 
 	wait_event(cq->wait, !get_cq_refcount(dev, cq));
 
-	if (cq->is_kernel) {
-		mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
-		if (mthca_is_memfree(dev)) {
-			mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM,    cq->arm_db_index);
-			mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
-		}
+	mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
+	if (mthca_is_memfree(dev)) {
+		mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM,    cq->arm_db_index);
+		mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
 	}
 
 	mthca_table_put(dev, dev->cq_table.table, cq->cqn);
diff -rup openib-1.1/drivers/infiniband/hw/mthca/mthca_dev.h openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_dev.h
--- openib-1.1/drivers/infiniband/hw/mthca/mthca_dev.h	2006-10-05 06:07:01.000000000 -0700
+++ openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_dev.h	2007-01-28 13:58:46.069861105 -0800
@@ -120,6 +120,8 @@ enum {
 	MTHCA_CMD_NUM_DBELL_DWORDS = 8
 };
 
+#define MTHCA_MAGIC_CQ_OFFSET 0xcffe
+
 struct mthca_cmd {
 	struct pci_pool          *pool;
 	struct mutex              hcr_mutex;
@@ -420,7 +422,7 @@ void mthca_array_clear(struct mthca_arra
 int mthca_array_init(struct mthca_array *array, int nent);
 void mthca_array_cleanup(struct mthca_array *array, int nent);
 int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
-		    union mthca_buf *buf, int *is_direct, struct mthca_pd *pd,
+		    union mthca_buf *buf, int *is_direct, u32 pdn,
 		    int hca_write, struct mthca_mr *mr);
 void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf,
 		    int is_direct, struct mthca_mr *mr);
@@ -499,7 +501,8 @@ void mthca_cq_event(struct mthca_dev *de
 void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
 		    struct mthca_srq *srq);
 void mthca_cq_resize_copy_cqes(struct mthca_cq *cq);
-int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent);
+int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, 
+		       int nent, u32 pdn);
 void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe);
 
 int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
diff -rup openib-1.1/drivers/infiniband/hw/mthca/mthca_provider.c openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_provider.c
--- openib-1.1/drivers/infiniband/hw/mthca/mthca_provider.c	2006-10-05 06:07:01.000000000 -0700
+++ openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_provider.c	2007-01-28 14:14:26.753475417 -0800
@@ -377,20 +377,74 @@ static int mthca_dealloc_ucontext(struct
 	return 0;
 }
 
-static int mthca_mmap_uar(struct ib_ucontext *context,
+static int mthca_remap_buf(struct vm_area_struct *vma, union mthca_buf *buf, 
+			   int is_direct)
+{
+	unsigned long size = vma->vm_end - vma->vm_start;
+	unsigned long pfn;
+
+	/* XXX sanity check size */
+	if (is_direct) {
+		pfn = __pa(buf->direct.buf);
+		pfn >>= PAGE_SHIFT;
+		if (remap_pfn_range(vma, vma->vm_start, pfn, size, 
+				    vma->vm_page_prot))
+			return -EAGAIN;
+	} else {
+		struct mthca_buf_list *page_list = buf->page_list;
+		int npages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+		unsigned long addr = vma->vm_start;
+		int i;
+
+		for (i = 0; i < npages; ++i) {
+			pfn = __pa(page_list[i].buf);
+			pfn >>= PAGE_SHIFT;
+			addr += i << PAGE_SHIFT;
+
+			if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, 
+					    vma->vm_page_prot))
+				return -EAGAIN;
+		}
+	}
+	return 0;
+}
+
+static int mthca_mmap(struct ib_ucontext *context,
 			  struct vm_area_struct *vma)
 {
-	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
-		return -EINVAL;
+	unsigned long pgoff = vma->vm_pgoff & 0xffff;
+	int cqn;
+	struct mthca_dev *dev;
+	struct mthca_cq *cq;
+	struct mthca_cq_buf *buf;
 
-	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	switch (pgoff) {
+	case MTHCA_MAGIC_CQ_OFFSET:
+		cqn = vma->vm_pgoff >> (32 - PAGE_SHIFT);
+		dev = to_mdev(context->device);
+
+		spin_lock_irq(&dev->cq_table.lock);
+		cq = mthca_array_get(&dev->cq_table.cq, cqn & 
+					(dev->limits.num_cqs - 1));
+		spin_unlock(&dev->cq_table.lock);
+		if (!cq)
+			return -EINVAL;
 
-	if (io_remap_pfn_range(vma, vma->vm_start,
-			       to_mucontext(context)->uar.pfn,
-			       PAGE_SIZE, vma->vm_page_prot))
-		return -EAGAIN;
+		buf = &cq->buf;
 
-	return 0;
+		return mthca_remap_buf(vma, &buf->queue, buf->is_direct);
+	default: /* uar */
+		if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+			return -EINVAL;
+
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+		if (io_remap_pfn_range(vma, vma->vm_start,
+				       to_mucontext(context)->uar.pfn,
+				       PAGE_SIZE, vma->vm_page_prot))
+			return -EAGAIN;
+		return 0;
+	}
 }
 
 static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev,
@@ -696,7 +750,6 @@ static struct ib_cq *mthca_create_cq(str
 	}
 
 	if (context) {
-		cq->buf.mr.ibmr.lkey = ucmd.lkey;
 		cq->set_ci_db_index  = ucmd.set_db_index;
 		cq->arm_db_index     = ucmd.arm_db_index;
 	}
@@ -737,7 +790,7 @@ err_unmap_set:
 }
 
 static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq,
-				  int entries)
+				  int entries, u32 pdn)
 {
 	int ret;
 
@@ -763,7 +816,8 @@ unlock:
 	if (ret)
 		return ret;
 
-	ret = mthca_alloc_cq_buf(dev, &cq->resize_buf->buf, entries);
+	ret = mthca_alloc_cq_buf(dev, &cq->resize_buf->buf, entries, pdn);
+
 	if (ret) {
 		spin_lock_irq(&cq->lock);
 		kfree(cq->resize_buf);
@@ -786,9 +840,10 @@ static int mthca_resize_cq(struct ib_cq 
 	struct mthca_dev *dev = to_mdev(ibcq->device);
 	struct mthca_cq *cq = to_mcq(ibcq);
 	struct mthca_resize_cq ucmd;
-	u32 lkey;
+	struct mthca_cq_buf tbuf;
+	u32 lkey, pdn;
 	u8 status;
-	int ret;
+	int ret, tcqe;
 
 	if (entries < 1 || entries > dev->limits.max_cqes)
 		return -EINVAL;
@@ -801,18 +856,22 @@ static int mthca_resize_cq(struct ib_cq 
 		goto out;
 	}
 
-	if (cq->is_kernel) {
-		ret = mthca_alloc_resize_buf(dev, cq, entries);
-		if (ret)
-			goto out;
-		lkey = cq->resize_buf->buf.mr.ibmr.lkey;
-	} else {
+	if (cq->is_kernel) 
+		pdn = dev->driver_pd.pd_num;
+	else {
 		if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
 			ret = -EFAULT;
 			goto out;
 		}
-		lkey = ucmd.lkey;
-	}
+		pdn = ucmd.pdn;
+	} 
+
+	ret = mthca_alloc_resize_buf(dev, cq, entries, pdn);
+
+	if (ret)
+		goto out;
+
+	lkey = cq->resize_buf->buf.mr.ibmr.lkey;
 
 	ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, long_log2(entries), &status);
 	if (status)
@@ -830,29 +889,24 @@ static int mthca_resize_cq(struct ib_cq 
 		goto out;
 	}
 
-	if (cq->is_kernel) {
-		struct mthca_cq_buf tbuf;
-		int tcqe;
 
-		spin_lock_irq(&cq->lock);
-		if (cq->resize_buf->state == CQ_RESIZE_READY) {
-			mthca_cq_resize_copy_cqes(cq);
-			tbuf         = cq->buf;
-			tcqe         = cq->ibcq.cqe;
-			cq->buf      = cq->resize_buf->buf;
-			cq->ibcq.cqe = cq->resize_buf->cqe;
-		} else {
-			tbuf = cq->resize_buf->buf;
-			tcqe = cq->resize_buf->cqe;
-		}
+	spin_lock_irq(&cq->lock);
+	if (cq->resize_buf->state == CQ_RESIZE_READY) {
+		mthca_cq_resize_copy_cqes(cq);
+		tbuf         = cq->buf;
+		tcqe         = cq->ibcq.cqe;
+		cq->buf      = cq->resize_buf->buf;
+		cq->ibcq.cqe = cq->resize_buf->cqe;
+	} else {
+		tbuf = cq->resize_buf->buf;
+		tcqe = cq->resize_buf->cqe;
+	}
 
-		kfree(cq->resize_buf);
-		cq->resize_buf = NULL;
-		spin_unlock_irq(&cq->lock);
+	kfree(cq->resize_buf);
+	cq->resize_buf = NULL;
+	spin_unlock_irq(&cq->lock);
 
-		mthca_free_cq_buf(dev, &tbuf, tcqe);
-	} else
-		ibcq->cqe = entries - 1;
+	mthca_free_cq_buf(dev, &tbuf, tcqe);
 
 out:
 	mutex_unlock(&cq->mutex);
@@ -1300,7 +1354,7 @@ int mthca_register_device(struct mthca_d
 	dev->ib_dev.query_gid            = mthca_query_gid;
 	dev->ib_dev.alloc_ucontext       = mthca_alloc_ucontext;
 	dev->ib_dev.dealloc_ucontext     = mthca_dealloc_ucontext;
-	dev->ib_dev.mmap                 = mthca_mmap_uar;
+	dev->ib_dev.mmap                 = mthca_mmap;
 	dev->ib_dev.alloc_pd             = mthca_alloc_pd;
 	dev->ib_dev.dealloc_pd           = mthca_dealloc_pd;
 	dev->ib_dev.create_ah            = mthca_ah_create;
diff -rup openib-1.1/drivers/infiniband/hw/mthca/mthca_qp.c openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_qp.c
--- openib-1.1/drivers/infiniband/hw/mthca/mthca_qp.c	2006-10-05 06:07:01.000000000 -0700
+++ openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_qp.c	2007-01-28 13:58:46.082556115 -0800
@@ -1008,7 +1008,8 @@ static int mthca_alloc_wqe_buf(struct mt
 		goto err_out;
 
 	err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_QP_SIZE,
-			      &qp->queue, &qp->is_direct, pd, 0, &qp->mr);
+			      &qp->queue, &qp->is_direct, pd->pd_num, 0, 
+			      &qp->mr);
 	if (err)
 		goto err_out;
 
diff -rup openib-1.1/drivers/infiniband/hw/mthca/mthca_srq.c openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_srq.c
--- openib-1.1/drivers/infiniband/hw/mthca/mthca_srq.c	2006-10-05 06:07:01.000000000 -0700
+++ openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_srq.c	2007-01-28 13:58:46.086462272 -0800
@@ -155,7 +155,8 @@ static int mthca_alloc_srq_buf(struct mt
 
 	err = mthca_buf_alloc(dev, srq->max << srq->wqe_shift,
 			      MTHCA_MAX_DIRECT_SRQ_SIZE,
-			      &srq->queue, &srq->is_direct, pd, 1, &srq->mr);
+			      &srq->queue, &srq->is_direct, pd->pd_num, 1, 
+			      &srq->mr);
 	if (err) {
 		kfree(srq->wrid);
 		return err;
diff -rup openib-1.1/drivers/infiniband/hw/mthca/mthca_user.h openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_user.h
--- openib-1.1/drivers/infiniband/hw/mthca/mthca_user.h	2006-10-05 06:07:01.000000000 -0700
+++ openib-1.1.cq/drivers/infiniband/hw/mthca/mthca_user.h	2007-01-28 13:58:46.095251125 -0800
@@ -77,7 +77,7 @@ struct mthca_create_cq_resp {
 
 struct mthca_resize_cq {
 	__u32 lkey;
-	__u32 reserved;
+	__u32 pdn;
 };
 
 struct mthca_create_srq {
-------------- next part --------------
diff -rup openib-1.1/src/userspace/libmthca/src/mthca-abi.h openib-1.1.cq/src/userspace/libmthca/src/mthca-abi.h
--- openib-1.1/src/userspace/libmthca/src/mthca-abi.h	2006-10-05 06:07:02.000000000 -0700
+++ openib-1.1.cq/src/userspace/libmthca/src/mthca-abi.h	2007-01-28 13:58:46.056189555 -0800
@@ -69,7 +69,7 @@ struct mthca_create_cq_resp {
 struct mthca_resize_cq {
 	struct ibv_resize_cq		ibv_cmd;
 	__u32				lkey;
-	__u32				reserved;
+	__u32				pdn;
 };
 
 struct mthca_create_srq {
diff -rup openib-1.1/src/userspace/libmthca/src/mthca.h openib-1.1.cq/src/userspace/libmthca/src/mthca.h
--- openib-1.1/src/userspace/libmthca/src/mthca.h	2006-10-05 06:07:02.000000000 -0700
+++ openib-1.1.cq/src/userspace/libmthca/src/mthca.h	2007-01-28 13:58:46.053259937 -0800
@@ -88,6 +88,8 @@ enum {
 	MTHCA_OPCODE_INVALID        = 0xff
 };
 
+#define MTHCA_MAGIC_CQ_OFFSET 0xcffe
+
 struct mthca_ah_page;
 
 struct mthca_device {
diff -rup openib-1.1/src/userspace/libmthca/src/verbs.c openib-1.1.cq/src/userspace/libmthca/src/verbs.c
--- openib-1.1/src/userspace/libmthca/src/verbs.c	2006-10-05 06:07:02.000000000 -0700
+++ openib-1.1.cq/src/userspace/libmthca/src/verbs.c	2007-01-28 14:02:07.830680091 -0800
@@ -43,6 +43,7 @@
 #include <pthread.h>
 #include <errno.h>
 #include <netinet/in.h>
+#include <sys/mman.h>
 
 #include "mthca.h"
 #include "mthca-abi.h"
@@ -172,7 +173,9 @@ struct ibv_cq *mthca_create_cq(struct ib
 	struct mthca_create_cq      cmd;
 	struct mthca_create_cq_resp resp;
 	struct mthca_cq      	   *cq;
+	int                  	    page_size, npages;
 	int                  	    ret;
+	off_t                  	    offset;
 
 	/* Sanity check CQ size before proceeding */
 	if (cqe > 131072)
@@ -188,16 +191,6 @@ struct ibv_cq *mthca_create_cq(struct ib
 		goto err;
 
 	cqe = align_cq_size(cqe);
-	if (mthca_alloc_cq_buf(to_mdev(context->device), &cq->buf, cqe))
-		goto err;
-
-	cq->mr = __mthca_reg_mr(to_mctx(context)->pd, cq->buf.buf,
-				cqe * MTHCA_CQ_ENTRY_SIZE,
-				0, IBV_ACCESS_LOCAL_WRITE);
-	if (!cq->mr)
-		goto err_buf;
-
-	cq->mr->context = context;
 
 	if (mthca_is_memfree(context)) {
 		cq->arm_sn          = 1;
@@ -205,7 +198,7 @@ struct ibv_cq *mthca_create_cq(struct ib
 						     MTHCA_DB_TYPE_CQ_SET_CI,
 						     &cq->set_ci_db);
 		if (cq->set_ci_db_index < 0)
-			goto err_unreg;
+			goto err;
 
 		cq->arm_db_index    = mthca_alloc_db(to_mctx(context)->db_tab,
 						     MTHCA_DB_TYPE_CQ_ARM,
@@ -219,7 +212,6 @@ struct ibv_cq *mthca_create_cq(struct ib
 		cmd.set_db_index = cq->set_ci_db_index;
 	}
 
-	cmd.lkey   = cq->mr->lkey;
 	cmd.pdn    = to_mpd(to_mctx(context)->pd)->pdn;
 	ret = ibv_cmd_create_cq(context, cqe - 1, channel, comp_vector,
 				&cq->ibv_cq, &cmd.ibv_cmd, sizeof cmd,
@@ -229,6 +221,22 @@ struct ibv_cq *mthca_create_cq(struct ib
 
 	cq->cqn = resp.cqn;
 
+	page_size = to_mdev(context->device)->page_size;
+	npages = (cqe * MTHCA_CQ_ENTRY_SIZE + (page_size - 1))/page_size;
+
+	/* offset encodes CQ and cqn; lower PAGE_SHIFT bits MBZ */
+	offset = cq->cqn;
+	offset <<= 32;
+	offset += MTHCA_MAGIC_CQ_OFFSET * page_size;
+
+	cq->buf.buf = mmap(NULL, npages * page_size, PROT_READ | PROT_WRITE, 
+			   MAP_SHARED, context->cmd_fd, offset);
+
+	if (cq->buf.buf == MAP_FAILED) 
+		goto err_cmd_create_cq;
+
+	cq->buf.length = npages * page_size;
+
 	if (mthca_is_memfree(context)) {
 		mthca_set_db_qn(cq->set_ci_db, MTHCA_DB_TYPE_CQ_SET_CI, cq->cqn);
 		mthca_set_db_qn(cq->arm_db,    MTHCA_DB_TYPE_CQ_ARM,    cq->cqn);
@@ -236,6 +244,9 @@ struct ibv_cq *mthca_create_cq(struct ib
 
 	return &cq->ibv_cq;
 
+err_cmd_create_cq:
+	ibv_cmd_destroy_cq(&cq->ibv_cq);
+
 err_arm_db:
 	if (mthca_is_memfree(context))
 		mthca_free_db(to_mctx(context)->db_tab, MTHCA_DB_TYPE_CQ_ARM,
@@ -246,12 +257,6 @@ err_set_db:
 		mthca_free_db(to_mctx(context)->db_tab, MTHCA_DB_TYPE_CQ_SET_CI,
 			      cq->set_ci_db_index);
 
-err_unreg:
-	mthca_dereg_mr(cq->mr);
-
-err_buf:
-	mthca_free_buf(&cq->buf);
-
 err:
 	free(cq);
 
@@ -260,12 +265,12 @@ err:
 
 int mthca_resize_cq(struct ibv_cq *ibcq, int cqe)
 {
+	struct ibv_context *context = ibcq->context;
 	struct mthca_cq *cq = to_mcq(ibcq);
 	struct mthca_resize_cq cmd;
-	struct ibv_mr *mr;
-	struct mthca_buf buf;
-	int old_cqe;
-	int ret;
+	int    page_size, npages;
+	off_t  offset;
+	int    ret;
 
 	/* Sanity check CQ size before proceeding */
 	if (cqe > 131072)
@@ -279,38 +284,34 @@ int mthca_resize_cq(struct ibv_cq *ibcq,
 		goto out;
 	}
 
-	ret = mthca_alloc_cq_buf(to_mdev(ibcq->context->device), &buf, cqe);
-	if (ret)
-		goto out;
-
-	mr = __mthca_reg_mr(to_mctx(ibcq->context)->pd, buf.buf,
-			    cqe * MTHCA_CQ_ENTRY_SIZE,
-			    0, IBV_ACCESS_LOCAL_WRITE);
-	if (!mr) {
-		mthca_free_buf(&buf);
-		ret = ENOMEM;
+	if (munmap(cq->buf.buf, cq->buf.length) != 0) {
+		ret = errno; 
 		goto out;
 	}
 
-	mr->context = ibcq->context;
-
-	old_cqe = ibcq->cqe;
+	cmd.pdn = to_mpd(to_mctx(context)->pd)->pdn;
 
-	cmd.lkey = mr->lkey;
 	ret = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof cmd);
-	if (ret) {
-		mthca_dereg_mr(mr);
-		mthca_free_buf(&buf);
+	if (ret)
 		goto out;
-	}
 
-	mthca_cq_resize_copy_cqes(cq, buf.buf, old_cqe);
+	page_size = to_mdev(context->device)->page_size;
+	npages = (cqe * MTHCA_CQ_ENTRY_SIZE + (page_size - 1))/page_size;
+
+	/* offset encodes CQ and cqn; lower PAGE_SHIFT bits MBZ */
+	offset = cq->cqn;
+	offset <<= 32;
+	offset += MTHCA_MAGIC_CQ_OFFSET * page_size;
 
-	mthca_dereg_mr(cq->mr);
-	mthca_free_buf(&cq->buf);
+	cq->buf.buf = mmap(NULL, npages * page_size, PROT_READ | PROT_WRITE, 
+			   MAP_SHARED, context->cmd_fd, offset);
+
+	if (cq->buf.buf == MAP_FAILED) {
+		ret = errno;
+		goto out;
+	}
 
-	cq->buf = buf;
-	cq->mr  = mr;
+	cq->buf.length = npages * page_size;
 
 out:
 	pthread_spin_unlock(&cq->lock);
@@ -332,8 +333,6 @@ int mthca_destroy_cq(struct ibv_cq *cq)
 			      to_mcq(cq)->arm_db_index);
 	}
 
-	mthca_dereg_mr(to_mcq(cq)->mr);
-	mthca_free_buf(&to_mcq(cq)->buf);
 	free(to_mcq(cq));
 
 	return 0;


More information about the general mailing list