[openib-general] [PATCH untested] IB/mthca: avoid wasting MTT enties on memfree

Michael S. Tsirkin mst at mellanox.co.il
Mon Jan 8 11:24:07 PST 2007


I looked at what be the clean fix for the MTT SEG handling in mthca,
and I came up with the following (applies on top of the series I posted
earlier). I think this gives us an important optimization.
Roland, could you please give me a hint whether something
like this is too big a change to get into 2.6.20?


Arbel does not actually have a concept of MTT segment.
So we should set MTT segment size to 64 bit (1 entry) for memfree,
otherwise we might be wasting as much as 87% of MTT entries.

Signed-off-by: Michael S. Tsirkin <mst at mellanox.co.il>

---

diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 7131446..968d151 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1051,11 +1051,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
 	MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EQ_OFFSET);
 	dev_lim->max_eqs = 1 << (field & 0x7);
 	MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET);
-	if (mthca_is_memfree(dev))
-		dev_lim->reserved_mtts = ALIGN((1 << (field >> 4)) * sizeof(u64),
-					       MTHCA_MTT_SEG_SIZE) / MTHCA_MTT_SEG_SIZE;
-	else
-		dev_lim->reserved_mtts = 1 << (field >> 4);
+	dev_lim->reserved_mtts = 1 << (field >> 4);
 	MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET);
 	dev_lim->max_mrw_sz = 1 << field;
 	MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MRW_OFFSET);
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index b7e42ef..0973359 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -78,16 +78,17 @@ enum {
 };
 
 enum {
-	MTHCA_EQ_CONTEXT_SIZE =  0x40,
-	MTHCA_CQ_CONTEXT_SIZE =  0x40,
-	MTHCA_QP_CONTEXT_SIZE = 0x200,
-	MTHCA_RDB_ENTRY_SIZE  =  0x20,
-	MTHCA_AV_SIZE         =  0x20,
-	MTHCA_MGM_ENTRY_SIZE  =  0x40,
+	MTHCA_EQ_CONTEXT_SIZE    =  0x40,
+	MTHCA_CQ_CONTEXT_SIZE    =  0x40,
+	MTHCA_QP_CONTEXT_SIZE    = 0x200,
+	MTHCA_RDB_ENTRY_SIZE     =  0x20,
+	MTHCA_AV_SIZE            =  0x20,
+	MTHCA_MGM_ENTRY_SIZE     =  0x40,
+
+	MTHCA_TAVOR_MTT_SEG_SIZE =  0x40,
 
 	/* Arbel FW gives us these, but we need them for Tavor */
 	MTHCA_MPT_ENTRY_SIZE  =  0x40,
-	MTHCA_MTT_SEG_SIZE    =  0x40,
 
 	MTHCA_QP_PER_MGM      = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2)
 };
@@ -595,4 +596,8 @@ static inline int mthca_is_memfree(struct mthca_dev *dev)
 	return dev->mthca_flags & MTHCA_FLAG_MEMFREE;
 }
 
+static inline unsigned mthca_mtt_seg_size(struct mthca_dev *dev)
+{
+	return mthca_is_memfree(dev) ? sizeof(u64) : MTHCA_TAVOR_MTT_SEG_SIZE;
+}
 #endif /* MTHCA_DEV_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index bbe9143..d9d5b89 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -465,11 +465,11 @@ static int mthca_init_icm(struct mthca_dev *mdev,
 	}
 
 	/* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */
-	mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * MTHCA_MTT_SEG_SIZE,
-					   dma_get_cache_alignment()) / MTHCA_MTT_SEG_SIZE;
+	mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * sizeof(u64),
+					   dma_get_cache_alignment()) / sizeof(u64);
 
 	mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base,
-							 MTHCA_MTT_SEG_SIZE,
+							 sizeof(u64),
 							 mdev->limits.num_mtt_segs,
 							 mdev->limits.reserved_mtts,
 							 1, 0);
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index 88f9dc2..0357dbe 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -212,7 +212,7 @@ static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size,
 
 	mtt->buddy = buddy;
 	mtt->order = 0;
-	for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1)
+	for (i = mthca_mtt_seg_size(dev) / sizeof(u64); i < size; i <<= 1)
 		++mtt->order;
 
 	mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy);
@@ -259,7 +259,7 @@ static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
 
 	while (list_len > 0) {
 		mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
-					   mtt->first_seg * MTHCA_MTT_SEG_SIZE +
+					   mtt->first_seg * mthca_mtt_seg_size(dev) +
 					   start_index * 8);
 		mtt_entry[1] = 0;
 		for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i)
@@ -302,7 +302,7 @@ void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt,
 	u32 mtt_seg;
 	int i;
 
-	mtt_seg = mtt->first_seg * MTHCA_MTT_SEG_SIZE;
+	mtt_seg = mtt->first_seg * MTHCA_TAVOR_MTT_SEG_SIZE;
        	mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg + start_index * sizeof (u64);
 	for (i = 0; i < list_len; ++i) {
 		__be64 mtt_entry = cpu_to_be64(buffer_list[i] |
@@ -321,11 +321,9 @@ void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt,
 
 	/* For Arbel, all MTTs must fit in the same page. */
 	BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE);
-	/* Require full segments */
-	BUG_ON(s % MTHCA_MTT_SEG_SIZE);
 
 	mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg +
-				s / MTHCA_MTT_SEG_SIZE, &dma_handle);
+				s / sizeof(u64), &dma_handle);
 
 	BUG_ON(!mtts);
 
@@ -470,7 +468,7 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
 	if (mr->mtt)
 		mpt_entry->mtt_seg =
 			cpu_to_be64(dev->mr_table.mtt_base +
-				    mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE);
+				    mr->mtt->first_seg * mthca_mtt_seg_size(dev));
 
 	if (0) {
 		mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
@@ -615,7 +613,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
 	if (IS_ERR(mr->mtt))
 		goto err_out_table;
 
-	mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE;
+	mtt_seg = mr->mtt->first_seg * mthca_mtt_seg_size(dev);
 
 	if (mthca_is_memfree(dev)) {
 		mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
@@ -907,7 +905,7 @@ int mthca_init_mr_table(struct mthca_dev *dev)
 			 dev->mr_table.mtt_base);
 
 		dev->mr_table.tavor_fmr.mtt_base =
-			ioremap(addr, mtts * MTHCA_MTT_SEG_SIZE);
+			ioremap(addr, mtts * MTHCA_TAVOR_MTT_SEG_SIZE);
 		if (!dev->mr_table.tavor_fmr.mtt_base) {
 			mthca_warn(dev, "MTT ioremap for FMR failed.\n");
 			err = -ENOMEM;
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
index 26bf86d..7367150 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -96,7 +96,7 @@ u64 mthca_make_profile(struct mthca_dev *dev,
 	profile[MTHCA_RES_RDB].size  = MTHCA_RDB_ENTRY_SIZE;
 	profile[MTHCA_RES_MCG].size  = MTHCA_MGM_ENTRY_SIZE;
 	profile[MTHCA_RES_MPT].size  = dev_lim->mpt_entry_sz;
-	profile[MTHCA_RES_MTT].size  = MTHCA_MTT_SEG_SIZE;
+	profile[MTHCA_RES_MTT].size  = mthca_mtt_seg_size(dev);
 	profile[MTHCA_RES_UAR].size  = dev_lim->uar_scratch_entry_sz;
 	profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE;
 	profile[MTHCA_RES_UARC].size = request->uarc_size;
@@ -234,7 +234,8 @@ u64 mthca_make_profile(struct mthca_dev *dev,
 			dev->limits.num_mtt_segs = profile[i].num;
 			dev->mr_table.mtt_base   = profile[i].start;
 			init_hca->mtt_base       = profile[i].start;
-			init_hca->mtt_seg_sz     = ffs(MTHCA_MTT_SEG_SIZE) - 7;
+			if (!mthca_is_memfree(dev))
+				init_hca->mtt_seg_sz = ffs(MTHCA_TAVOR_MTT_SEG_SIZE) - 7;
 			break;
 		case MTHCA_RES_UAR:
 			dev->limits.num_uars       = profile[i].num;

-- 
MST




More information about the general mailing list