[openib-general] [PATCHv2] mthca: speed up memory registration by filling MTTs directly

Michael S. Tsirkin mst at mellanox.co.il
Wed Dec 13 10:09:16 PST 2006


Speed up memory registration by filling in MTTs directly.  This reduces the
number of FW commands needed to register an MR by at least a factor of 2.  This
applies to all memfree cards, and to tavor mode on 64 bit systems with the patch
I posted earlier.

Signed-off-by: Michael S. Tsirkin <mst at mellanox.co.il>

---

Roland, the previous version of this patch had a bug on memfree.
I noticed you didn't push these patches out to Linus yet so I did a
re-spin. Let me know if you prefer an incremental patch.

This applies on top of "make all MRs accessible for FMR mapping".

Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_dev.h
===================================================================
--- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_dev.h
+++ linux-2.6/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -464,6 +464,8 @@ void mthca_uar_free(struct mthca_dev *de
 int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd);
 void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
 
+int mthca_write_mtt_size(struct mthca_dev *dev);
+
 struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size);
 void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt);
 int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_mr.c
===================================================================
--- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_mr.c
+++ linux-2.6/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -244,8 +244,8 @@ void mthca_free_mtt(struct mthca_dev *de
 	kfree(mtt);
 }
 
-int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
-		    int start_index, u64 *buffer_list, int list_len)
+static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
+			     int start_index, u64 *buffer_list, int list_len)
 {
 	struct mthca_mailbox *mailbox;
 	__be64 *mtt_entry;
@@ -296,6 +296,84 @@ out:
 	return err;
 }
 
+void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt,
+			      int start_index, u64 *buffer_list, int list_len)
+{
+	u64 __iomem *mtts;
+	u32 mtt_seg;
+	int i;
+
+	mtt_seg = mtt->first_seg * MTHCA_MTT_SEG_SIZE;
+       	mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg + start_index * sizeof (u64);
+	for (i = 0; i < list_len; ++i) {
+		__be64 mtt_entry = cpu_to_be64(buffer_list[i] |
+					       MTHCA_MTT_FLAG_PRESENT);
+		mthca_write64_raw(mtt_entry, mtts + i);
+	}
+}
+
+void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt,
+			      int start_index, u64 *buffer_list, int list_len)
+{
+	__be64 *mtts;
+	int i;
+	int s = start_index * sizeof (u64);
+
+	/* For Arbel, all MTTs must fit in the same page. */
+	BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE);
+	/* Require full segments */
+	BUG_ON(s % MTHCA_MTT_SEG_SIZE);
+
+	mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg +
+				s / MTHCA_MTT_SEG_SIZE);
+
+	BUG_ON(!mtts);
+
+	for (i = 0; i < list_len; ++i)
+		mtts[i] = cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT);
+}
+
+int mthca_write_mtt_size(struct mthca_dev *dev)
+{
+	if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy)
+		/*
+		 * Be friendly to WRITE_MTT command
+		 * and leave two empty slots for the
+		 * index and reserved fields of the
+		 * mailbox.
+		 */
+		return PAGE_SIZE / sizeof (u64) - 2;
+
+	/* For Arbel, all MTTs must fit in the same page. */
+	return mthca_is_memfree(dev) ? (PAGE_SIZE / sizeof (u64)) : 0x7ffffff;
+}
+
+int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
+		    int start_index, u64 *buffer_list, int list_len)
+{
+	int size = mthca_write_mtt_size(dev);
+	int chunk;
+
+	if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy)
+		return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len);
+
+	while (list_len > 0) {
+		chunk = min(size, list_len);
+		if (mthca_is_memfree(dev))
+			mthca_arbel_write_mtt_seg(dev, mtt, start_index,
+						  buffer_list, chunk);
+		else
+			mthca_tavor_write_mtt_seg(dev, mtt, start_index,
+						  buffer_list, chunk);
+
+		list_len    -= chunk;
+		start_index += chunk;
+		buffer_list += chunk;
+	}
+
+	return 0;
+}
+
 static inline u32 tavor_hw_index_to_key(u32 ind)
 {
 	return ind;
Index: linux-2.6/drivers/infiniband/hw/mthca/mthca_provider.c
===================================================================
--- linux-2.6.orig/drivers/infiniband/hw/mthca/mthca_provider.c
+++ linux-2.6/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1015,6 +1015,7 @@ static struct ib_mr *mthca_reg_user_mr(s
 	int shift, n, len;
 	int i, j, k;
 	int err = 0;
+	int write_mtt_size;
 
 	shift = ffs(region->page_size) - 1;
 
@@ -1040,6 +1041,8 @@ static struct ib_mr *mthca_reg_user_mr(s
 
 	i = n = 0;
 
+	write_mtt_size = min(mthca_write_mtt_size(dev), (int)(PAGE_SIZE / sizeof *pages));
+
 	list_for_each_entry(chunk, &region->chunk_list, list)
 		for (j = 0; j < chunk->nmap; ++j) {
 			len = sg_dma_len(&chunk->page_list[j]) >> shift;
@@ -1047,14 +1050,11 @@ static struct ib_mr *mthca_reg_user_mr(s
 				pages[i++] = sg_dma_address(&chunk->page_list[j]) +
 					region->page_size * k;
 				/*
-				 * Be friendly to WRITE_MTT command
-				 * and leave two empty slots for the
-				 * index and reserved fields of the
-				 * mailbox.
+				 * Be friendly to write_mtt and pass it chunks
+				 * of appropriate size.
 				 */
-				if (i == PAGE_SIZE / sizeof (u64) - 2) {
-					err = mthca_write_mtt(dev, mr->mtt,
-							      n, pages, i);
+				if (i == write_mtt_size) {
+					err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
 					if (err)
 						goto mtt_done;
 					n += i;
-- 
MST




More information about the general mailing list