[openib-general] [PATCH] fmr support in mthca

Michael S. Tsirkin mst at mellanox.co.il
Thu Mar 17 12:16:46 PST 2005


This patch implements FMR support. I also rolled into it two fixes
for regular mrs that I posed previously, let me know if its a problem.

This seems to be working fine for me, although I only did relatively basic
tests. Both Tavor and Arbel Native modes are supported. I made some tradeoffs
for simplicity, let me know what do you think:
- for tavor, I keep for each fmr two pages mapped: for mpt and one for
  mtt access. This spends more kernel virtual memory than could be used,
  since many mpts could share one page. Alternatives are:
  map/unmap io memory on each fmr map/unmap request, or
  keep and intermediate table tomap each page only once.
- icm that has the mpts/mtts is linearly scanned and this is repeated
  for each mtt on each fmr map. This may be improved somewhat
  with some kind of an iterator, but to really speed things up
  the icm data structure (list of arrays) would have to
  be replaced by some kind of tree.

Signed-off-by: Michael S. Tsirkin <mst at mellanox.co.il>

Index: hw/mthca/mthca_dev.h
===================================================================
--- hw/mthca/mthca_dev.h	(revision 2012)
+++ hw/mthca/mthca_dev.h	(working copy)
@@ -163,6 +163,7 @@ struct mthca_mr_table {
 	int                     max_mtt_order;
 	unsigned long         **mtt_buddy;
 	u64                     mtt_base;
+	u64                     mpt_base; /* Tavor only */
 	struct mthca_icm_table *mtt_table;
 	struct mthca_icm_table *mpt_table;
 };
@@ -363,7 +364,17 @@ int mthca_mr_alloc_phys(struct mthca_dev
 			u64 *buffer_list, int buffer_size_shift,
 			int list_len, u64 iova, u64 total_size,
 			u32 access, struct mthca_mr *mr);
-void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr);
+void mthca_free_mr(struct mthca_dev *dev,  struct mthca_mr *mr);
+
+int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
+			u32 access, struct mthca_fmr *fmr);
+
+int mthca_fmr_map(struct mthca_dev *dev, struct mthca_fmr *fmr,
+		  u64 *page_list, int list_len, u64 iova);
+
+void mthca_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr);
+
+void mthca_free_fmr(struct mthca_dev *dev,  struct mthca_fmr *fmr);
 
 int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt);
 void mthca_unmap_eq_icm(struct mthca_dev *dev);
Index: hw/mthca/mthca_memfree.h
===================================================================
--- hw/mthca/mthca_memfree.h	(revision 2012)
+++ hw/mthca/mthca_memfree.h	(working copy)
@@ -90,6 +90,11 @@ int mthca_table_get_range(struct mthca_d
 void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
 			   int start, int end);
 
+/* Nonblocking. Callers must make sure the object exists by serializing against
+ * callers of get/put. */
+void *mthca_table_find(struct mthca_dev *dev, struct mthca_icm_table *table,
+		       int obj);
+
 static inline void mthca_icm_first(struct mthca_icm *icm,
 				   struct mthca_icm_iter *iter)
 {
Index: hw/mthca/mthca_provider.c
===================================================================
--- hw/mthca/mthca_provider.c	(revision 2012)
+++ hw/mthca/mthca_provider.c	(working copy)
@@ -568,8 +568,101 @@ static struct ib_mr *mthca_reg_phys_mr(s
 
 static int mthca_dereg_mr(struct ib_mr *mr)
 {
-	mthca_free_mr(to_mdev(mr->device), to_mmr(mr));
-	kfree(mr);
+	struct mthca_mr *mmr = to_mmr(mr);
+	mthca_free_mr(to_mdev(mr->device), mmr);
+	kfree(mmr);
+	return 0;
+}
+
+static struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
+			   struct ib_fmr_attr *fmr_attr)
+{
+	struct mthca_fmr *fmr;
+	int err;
+	fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
+	if (!fmr)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(&fmr->attr, fmr_attr, sizeof *fmr_attr);
+	err = mthca_fmr_alloc(to_mdev(pd->device), to_mpd(pd)->pd_num,
+			      convert_access(mr_access_flags), fmr);
+
+	if (err) {
+		kfree(fmr);
+		return ERR_PTR(err);
+	}
+	return &fmr->ibmr;
+}
+
+static int mthca_dealloc_fmr(struct ib_fmr *fmr)
+{
+	struct mthca_fmr *mfmr = to_mfmr(fmr);
+	mthca_free_fmr(to_mdev(fmr->device), mfmr);
+	kfree(mfmr);
+	return 0;
+}
+
+static int mthca_map_phys_fmr(struct ib_fmr *fmr, u64 *page_list, int list_len,
+			      u64 iova)
+{
+	int i, page_mask;
+	struct mthca_fmr *mfmr;
+
+	mfmr = to_mfmr(fmr);
+
+	if (list_len > mfmr->attr.max_pages) {
+		mthca_warn(to_mdev(fmr->device), "Attempt to map list length "
+			   "%d to fmr with max %d pages\n",
+			   list_len, mfmr->attr.max_pages);
+		return -EINVAL;
+	}
+
+	page_mask = (1 << mfmr->attr.page_size) - 1;
+
+	/* We are getting page lists, so va must be page aligned. */
+	if (iova & page_mask) {
+		mthca_warn(to_mdev(fmr->device), "Attempt to map fmr with page "
+			   "shift %d at misaligned virtual address %016llx\n",
+			   mfmr->attr.page_size, iova);
+		return -EINVAL;
+	}
+
+	/* Trust the user not to pass misaligned data in page_list */
+	if (0)
+		for (i = 0; i < list_len; ++i) {
+			if (page_list[i] & page_mask) {
+				mthca_warn(to_mdev(fmr->device), "Attempt to "
+					   "map fmr with page shift %d at "
+					   "address %016llx\n",
+					   mfmr->attr.page_size, page_list[i]);
+				return -EINVAL;
+			}
+		}
+
+	return mthca_fmr_map(to_mdev(fmr->device), to_mfmr(fmr), page_list,
+			     list_len, iova);
+}
+
+static int mthca_unmap_fmr(struct list_head *fmr_list)
+{
+	struct mthca_dev* mdev = NULL;
+	struct ib_fmr *fmr;
+	int err;
+	u8 status;
+
+	list_for_each_entry(fmr, fmr_list, list) {
+		mdev = to_mdev(fmr->device);
+		mthca_fmr_unmap(mdev, to_mfmr(fmr));
+	}
+
+	if (!mdev)
+		return 0;
+
+	err = mthca_SYNC_TPT(mdev, &status);
+	if (err)
+		return err;
+	if (status)
+		return -EINVAL;
 	return 0;
 }
 
@@ -636,6 +729,15 @@ int mthca_register_device(struct mthca_d
 	dev->ib_dev.get_dma_mr           = mthca_get_dma_mr;
 	dev->ib_dev.reg_phys_mr          = mthca_reg_phys_mr;
 	dev->ib_dev.dereg_mr             = mthca_dereg_mr;
+
+	if (dev->hca_type == ARBEL_NATIVE ||
+	    !(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) {
+		dev->ib_dev.alloc_fmr            = mthca_alloc_fmr;
+		dev->ib_dev.map_phys_fmr         = mthca_map_phys_fmr;
+		dev->ib_dev.unmap_fmr            = mthca_unmap_fmr;
+		dev->ib_dev.dealloc_fmr          = mthca_dealloc_fmr;
+	}
+
 	dev->ib_dev.attach_mcast         = mthca_multicast_attach;
 	dev->ib_dev.detach_mcast         = mthca_multicast_detach;
 	dev->ib_dev.process_mad          = mthca_process_mad;
Index: hw/mthca/mthca_provider.h
===================================================================
--- hw/mthca/mthca_provider.h	(revision 2012)
+++ hw/mthca/mthca_provider.h	(working copy)
@@ -60,6 +60,16 @@ struct mthca_mr {
 	u32 first_seg;
 };
 
+struct mthca_fmr {
+	struct ib_fmr ibmr;
+	struct ib_fmr_attr attr;
+	int order;
+	u32 first_seg;
+	int maps;
+	struct mthca_mpt_entry __iomem *mpt;  /* Tavor only */
+	u64 __iomem *mtts; /* Tavor only */
+};
+
 struct mthca_pd {
 	struct ib_pd    ibpd;
 	u32             pd_num;
@@ -218,6 +228,11 @@ struct mthca_sqp {
 	dma_addr_t      header_dma;
 };
 
+static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr)
+{
+	return container_of(ibmr, struct mthca_fmr, ibmr);
+}
+
 static inline struct mthca_mr *to_mmr(struct ib_mr *ibmr)
 {
 	return container_of(ibmr, struct mthca_mr, ibmr);
Index: hw/mthca/mthca_profile.c
===================================================================
--- hw/mthca/mthca_profile.c	(revision 2012)
+++ hw/mthca/mthca_profile.c	(working copy)
@@ -223,9 +223,10 @@ u64 mthca_make_profile(struct mthca_dev 
 			init_hca->mc_hash_sz      = 1 << (profile[i].log_num - 1);
 			break;
 		case MTHCA_RES_MPT:
-			dev->limits.num_mpts = profile[i].num;
-			init_hca->mpt_base   = profile[i].start;
-			init_hca->log_mpt_sz = profile[i].log_num;
+			dev->limits.num_mpts   = profile[i].num;
+			dev->mr_table.mpt_base = profile[i].start;
+			init_hca->mpt_base     = profile[i].start;
+			init_hca->log_mpt_sz   = profile[i].log_num;
 			break;
 		case MTHCA_RES_MTT:
 			dev->limits.num_mtt_segs = profile[i].num;
Index: hw/mthca/mthca_cmd.c
===================================================================
--- hw/mthca/mthca_cmd.c	(revision 2012)
+++ hw/mthca/mthca_cmd.c	(working copy)
@@ -1406,6 +1406,11 @@ int mthca_WRITE_MTT(struct mthca_dev *de
 	return err;
 }
 
+int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status)
+{
+	return mthca_cmd(dev, 0, 0, 0, CMD_SYNC_TPT, CMD_TIME_CLASS_B, status);
+}
+
 int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
 		 int eq_num, u8 *status)
 {
Index: hw/mthca/mthca_cmd.h
===================================================================
--- hw/mthca/mthca_cmd.h	(revision 2012)
+++ hw/mthca/mthca_cmd.h	(working copy)
@@ -277,6 +277,7 @@ int mthca_HW2SW_MPT(struct mthca_dev *de
 		    int mpt_index, u8 *status);
 int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry,
 		    int num_mtt, u8 *status);
+int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status);
 int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
 		 int eq_num, u8 *status);
 int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context,
Index: hw/mthca/mthca_mr.c
===================================================================
--- hw/mthca/mthca_mr.c	(revision 2012)
+++ hw/mthca/mthca_mr.c	(working copy)
@@ -368,11 +368,13 @@ err_out_table:
 		mthca_table_put(dev, dev->mr_table.mpt_table, key);
 
 err_out_mpt_free:
-	mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
+	mthca_free(&dev->mr_table.mpt_alloc, key);
 	return err;
 }
 
-void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
+/* Free mr or fmr */
+static void mthca_free_region(struct mthca_dev *dev, u32 lkey, int order,
+			      u32 first_seg)
 {
 	int err;
 	u8 status;
@@ -380,7 +382,7 @@ void mthca_free_mr(struct mthca_dev *dev
 	might_sleep();
 
 	err = mthca_HW2SW_MPT(dev, NULL,
-			      key_to_hw_index(dev, mr->ibmr.lkey) &
+			      key_to_hw_index(dev, lkey) &
 			      (dev->limits.num_mpts - 1),
 			      &status);
 	if (err)
@@ -389,15 +391,276 @@ void mthca_free_mr(struct mthca_dev *dev
 		mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
 			   status);
 
-	if (mr->order >= 0)
-		mthca_free_mtt(dev, mr->first_seg, mr->order);
+	if (order >= 0)
+		mthca_free_mtt(dev, first_seg, order);
 
 	if (dev->hca_type == ARBEL_NATIVE)
 		mthca_table_put(dev, dev->mr_table.mpt_table,
-				key_to_hw_index(dev, mr->ibmr.lkey));
-	mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, mr->ibmr.lkey));
+				key_to_hw_index(dev, lkey));
+	mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
+}
+
+void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
+{
+	mthca_free_region(dev, mr->ibmr.lkey, mr->order, mr->first_seg);
+}
+
+int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
+			u32 access, struct mthca_fmr *mr)
+{
+	void *mailbox;
+	u64 mtt_seg;
+	struct mthca_mpt_entry *mpt_entry;
+	u32 key, idx;
+	int err = -ENOMEM;
+	u8 status;
+	int i;
+	int list_len = mr->attr.max_pages;
+
+	might_sleep();
+
+	BUG_ON(mr->attr.page_size < 12);
+	WARN_ON(mr->attr.page_size >= 32);
+
+	mr->maps = 0;
+
+	key = mthca_alloc(&dev->mr_table.mpt_alloc);
+	if (key == -1)
+		return -ENOMEM;
+	idx = key & (dev->limits.num_mpts - 1);
+	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
+
+	if (dev->hca_type == ARBEL_NATIVE) {
+		err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
+		if (err)
+			goto err_out_mpt_free;
+	}
+	for (i = dev->limits.mtt_seg_size / 8, mr->order = 0;
+	     i < list_len;
+	     i <<= 1, ++mr->order)
+		; /* nothing */
+
+	mr->first_seg = mthca_alloc_mtt(dev, mr->order);
+	if (mr->first_seg == -1)
+		goto err_out_table;
+
+	mtt_seg = dev->mr_table.mtt_base +
+		mr->first_seg * dev->limits.mtt_seg_size;
+
+	mr->mpt = NULL;
+	mr->mtts = NULL;
+
+	if (dev->hca_type != ARBEL_NATIVE) {
+		mr->mpt = ioremap(dev->mr_table.mpt_base +
+				  sizeof *(mr->mpt) * idx, sizeof *(mr->mpt));
+		if (!mr->mpt) {
+			mthca_dbg(dev, "Couldn't map MPT entry for fmr %08x.\n",
+				  mr->ibmr.lkey);
+			goto err_out_free_mtt;
+		}
+		mr->mtts = ioremap(mtt_seg, list_len * sizeof *(mr->mtts));
+		if (!mr->mtts) {
+			mthca_dbg(dev, "Couldn't map MTT entry %016llx "
+				  "(size %x) for fmr %08x.\n", mtt_seg,
+				  list_len * sizeof u64, mr->ibmr.lkey);
+			goto err_out_free_mtt;
+		}
+	}
+
+	mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
+			  GFP_KERNEL);
+	if (!mailbox)
+		goto err_out_free_mtt;
+
+	mpt_entry = MAILBOX_ALIGN(mailbox);
+
+	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
+				       MTHCA_MPT_FLAG_MIO         |
+				       MTHCA_MPT_FLAG_REGION      |
+				       access);
+
+	mpt_entry->page_size = cpu_to_be32(mr->attr.page_size - 12);
+	mpt_entry->key       = cpu_to_be32(key);
+	mpt_entry->pd        = cpu_to_be32(pd);
+	memset(&mpt_entry->start, 0,
+	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start));
+	mpt_entry->mtt_seg   = cpu_to_be64(mtt_seg);
+
+	if (0) {
+		mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
+		for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
+			if (i % 4 == 0)
+				printk("[%02x] ", i * 4);
+			printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i]));
+			if ((i + 1) % 4 == 0)
+				printk("\n");
+		}
+	}
+
+	err = mthca_SW2HW_MPT(dev, mpt_entry,
+			      key & (dev->limits.num_mpts - 1),
+			      &status);
+	if (err)
+		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
+	else if (status) {
+		mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
+			   status);
+		err = -EINVAL;
+	}
+
+	kfree(mailbox);
+	return err;
+
+err_out_free_mtt:
+	if (mr->mtts)
+		iounmap(mr->mtts);
+	if (mr->mpt)
+		iounmap(mr->mpt);
+	mthca_free_mtt(dev, mr->first_seg, mr->order);
+
+err_out_table:
+	if (dev->hca_type == ARBEL_NATIVE)
+		mthca_table_put(dev, dev->mr_table.mpt_table, key);
+
+err_out_mpt_free:
+	mthca_free(&dev->mr_table.mpt_alloc, key);
+	return err;
+}
+
+void mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
+{
+	mthca_free_region(dev, fmr->ibmr.lkey, fmr->order, fmr->first_seg);
+}
+
+#define MTHCA_MPT_STATUS_SW 0xF
+#define MTHCA_MPT_STATUS_HW 0x0
+
+static void mthca_tavor_fmr_map(struct mthca_dev *dev, struct mthca_fmr *fmr,
+		  u64 *page_list, int list_len, u64 iova, u32 key)
+{
+	struct mthca_mpt_entry mpt_entry;
+	int i;
+
+	writeb(MTHCA_MPT_STATUS_SW,fmr->mpt);
+
+	wmb();
+
+	for (i = 0; i < list_len; ++i) {
+		u64 mtt_entry = cpu_to_be64(page_list[i] |
+					    MTHCA_MTT_FLAG_PRESENT);
+		writeq(mtt_entry, fmr->mtts + i);
+	}
+	mpt_entry.lkey = cpu_to_be32(key);
+	mpt_entry.length = cpu_to_be64(((u64)list_len) *
+				       (1 << fmr->attr.page_size));
+	mpt_entry.start = cpu_to_be64(iova);
+
+	writel(mpt_entry.lkey, &fmr->mpt->key);
+	memcpy_toio(&fmr->mpt->start, &mpt_entry.start, 
+		    offsetof(struct mthca_mpt_entry, window_count) -
+		    offsetof(struct mthca_mpt_entry, start));
+
+	wmb();
+
+	writeb(MTHCA_MPT_STATUS_SW,fmr->mpt);
+
+	wmb();
+}
+
+static int mthca_arbel_fmr_map(struct mthca_dev *dev, struct mthca_fmr *fmr,
+		  u64 *page_list, int list_len, u64 iova, u32 key)
+{
+	void* mpt;
+	struct mthca_mpt_entry *mpt_entry;
+	u8 *mpt_status;
+	int i;
+
+	mpt = mthca_table_find(dev, dev->mr_table.mpt_table, key);
+	if (!mpt)
+		return -EINVAL;
+
+	mpt_status = mpt;
+	*mpt_status = MTHCA_MPT_STATUS_SW;
+
+	wmb();
+
+	/* This is really dumb. We are rescanning the ICM on
+	 * each mpt entry. We want some kind of iterator here.
+	 * May be fine meanwhile, while things are small. */
+	for (i = 0; i < list_len; ++i) {
+		u64 *mtt_entry = mthca_table_find(dev, dev->mr_table.mtt_table,
+						  fmr->first_seg + i);
+		if (!mtt_entry)
+			return -EINVAL;
+
+		*mtt_entry = cpu_to_be64(page_list[i] | MTHCA_MTT_FLAG_PRESENT);
+	}
+
+
+	mpt_entry = mpt;
+	mpt_entry->lkey = mpt_entry->key = cpu_to_be32(key);
+	mpt_entry->length = cpu_to_be64(((u64)list_len) *
+				       	(1 << fmr->attr.page_size));
+	mpt_entry->start = cpu_to_be64(iova);
+
+	wmb();
+
+	*mpt_status = MTHCA_MPT_STATUS_HW;
+
+	wmb();
+	return 0;
+}
+
+
+int mthca_fmr_map(struct mthca_dev *dev, struct mthca_fmr *fmr,
+		  u64 *page_list, int list_len, u64 iova)
+{
+	u32 key;
+
+	if (fmr->maps >= fmr->attr.max_maps) {
+		mthca_warn(dev, "Attempt to map fmr %d times, max_maps is %d\n",
+			   fmr->maps, fmr->attr.max_maps);
+		return -EINVAL;
+	}
+
+	key = key_to_hw_index(dev, fmr->ibmr.lkey) + dev->limits.num_mpts;
+	fmr->ibmr.lkey = fmr->ibmr.rkey = hw_index_to_key(dev, key);
+	fmr->maps++;
+
+	if (dev->hca_type == ARBEL_NATIVE) {
+		return mthca_arbel_fmr_map(dev, fmr, page_list, list_len,
+					   iova, key);
+	} else {
+		mthca_tavor_fmr_map(dev, fmr, page_list, list_len,
+					   iova, key);
+		return 0;
+	}
+}
+
+void mthca_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
+{
+	if (!fmr->maps) {
+		return;
+	}
+
+	fmr->maps = 0;
+
+	if (dev->hca_type == ARBEL_NATIVE) {
+		u32 key = key_to_hw_index(dev, fmr->ibmr.lkey);
+		u8 *mpt_status = mthca_table_find(dev, dev->mr_table.mpt_table,
+						  key);
+		if (!mpt_status)
+			return;
+
+		*mpt_status = MTHCA_MPT_STATUS_SW;
+		wmb();
+	} else {
+		writeb(MTHCA_MPT_STATUS_SW,fmr->mpt);
+		wmb();
+	}
 }
 
+
 int __devinit mthca_init_mr_table(struct mthca_dev *dev)
 {
 	int err;
Index: hw/mthca/mthca_memfree.c
===================================================================
--- hw/mthca/mthca_memfree.c	(revision 2012)
+++ hw/mthca/mthca_memfree.c	(working copy)
@@ -192,6 +192,47 @@ void mthca_table_put(struct mthca_dev *d
 	up(&table->mutex);
 }
 
+/* Nonblocking. Callers must make sure the object exists by serializing against
+ * callers of get/put. */
+void *mthca_table_find(struct mthca_dev *dev, struct mthca_icm_table *table,
+		       int obj)
+{
+	int idx, offset, i;
+	struct mthca_icm_chunk *chunk;
+	struct mthca_icm *icm;
+	struct page *page = NULL;
+
+	/* Supported only for low mem tables for now. */
+	if (!table->lowmem)
+		return NULL;
+
+       	idx = (obj & (table->num_obj - 1)) * table->obj_size;
+       	icm = table->icm[idx / MTHCA_TABLE_CHUNK_SIZE];
+	offset = idx % MTHCA_TABLE_CHUNK_SIZE;
+
+	if(!icm)
+		return NULL;
+
+	/* Linear scan of ICM on each access. Since this is called on fmr
+	 * registration which is on data path, eventually we may want to
+	 * rearrange things to use some kind of tree. */
+
+	list_for_each_entry(chunk, &icm->chunk_list, list) {
+		for (i = 0; i < chunk->npages; ++i) {
+			if (chunk->mem[i].length >= offset) {
+				page = chunk->mem[i].page;
+				break;
+			}
+			offset -= chunk->mem[i].length;
+		}
+	}
+
+	if (!page)
+		return NULL;
+
+	return lowmem_page_address(page) + offset;
+}
+
 int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table,
 			  int start, int end)
 {

-- 
MST - Michael S. Tsirkin



More information about the general mailing list