[openib-general] [PATCH untested] IB/mthca: avoid wasting MTT enties on memfree
Michael S. Tsirkin
mst at mellanox.co.il
Mon Jan 8 11:24:07 PST 2007
I looked at what be the clean fix for the MTT SEG handling in mthca,
and I came up with the following (applies on top of the series I posted
earlier). I think this gives us an important optimization.
Roland, could you please give me a hint whether something
like this is too big a change to get into 2.6.20?
Arbel does not actually have a concept of MTT segment.
So we should set MTT segment size to 64 bit (1 entry) for memfree,
otherwise we might be wasting as much as 87% of MTT entries.
Signed-off-by: Michael S. Tsirkin <mst at mellanox.co.il>
---
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 7131446..968d151 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1051,11 +1051,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EQ_OFFSET);
dev_lim->max_eqs = 1 << (field & 0x7);
MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET);
- if (mthca_is_memfree(dev))
- dev_lim->reserved_mtts = ALIGN((1 << (field >> 4)) * sizeof(u64),
- MTHCA_MTT_SEG_SIZE) / MTHCA_MTT_SEG_SIZE;
- else
- dev_lim->reserved_mtts = 1 << (field >> 4);
+ dev_lim->reserved_mtts = 1 << (field >> 4);
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET);
dev_lim->max_mrw_sz = 1 << field;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MRW_OFFSET);
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index b7e42ef..0973359 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -78,16 +78,17 @@ enum {
};
enum {
- MTHCA_EQ_CONTEXT_SIZE = 0x40,
- MTHCA_CQ_CONTEXT_SIZE = 0x40,
- MTHCA_QP_CONTEXT_SIZE = 0x200,
- MTHCA_RDB_ENTRY_SIZE = 0x20,
- MTHCA_AV_SIZE = 0x20,
- MTHCA_MGM_ENTRY_SIZE = 0x40,
+ MTHCA_EQ_CONTEXT_SIZE = 0x40,
+ MTHCA_CQ_CONTEXT_SIZE = 0x40,
+ MTHCA_QP_CONTEXT_SIZE = 0x200,
+ MTHCA_RDB_ENTRY_SIZE = 0x20,
+ MTHCA_AV_SIZE = 0x20,
+ MTHCA_MGM_ENTRY_SIZE = 0x40,
+
+ MTHCA_TAVOR_MTT_SEG_SIZE = 0x40,
/* Arbel FW gives us these, but we need them for Tavor */
MTHCA_MPT_ENTRY_SIZE = 0x40,
- MTHCA_MTT_SEG_SIZE = 0x40,
MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2)
};
@@ -595,4 +596,8 @@ static inline int mthca_is_memfree(struct mthca_dev *dev)
return dev->mthca_flags & MTHCA_FLAG_MEMFREE;
}
+static inline unsigned mthca_mtt_seg_size(struct mthca_dev *dev)
+{
+ return mthca_is_memfree(dev) ? sizeof(u64) : MTHCA_TAVOR_MTT_SEG_SIZE;
+}
#endif /* MTHCA_DEV_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index bbe9143..d9d5b89 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -465,11 +465,11 @@ static int mthca_init_icm(struct mthca_dev *mdev,
}
/* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */
- mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * MTHCA_MTT_SEG_SIZE,
- dma_get_cache_alignment()) / MTHCA_MTT_SEG_SIZE;
+ mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * sizeof(u64),
+ dma_get_cache_alignment()) / sizeof(u64);
mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base,
- MTHCA_MTT_SEG_SIZE,
+ sizeof(u64),
mdev->limits.num_mtt_segs,
mdev->limits.reserved_mtts,
1, 0);
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index 88f9dc2..0357dbe 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -212,7 +212,7 @@ static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size,
mtt->buddy = buddy;
mtt->order = 0;
- for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1)
+ for (i = mthca_mtt_seg_size(dev) / sizeof(u64); i < size; i <<= 1)
++mtt->order;
mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy);
@@ -259,7 +259,7 @@ static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
while (list_len > 0) {
mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
- mtt->first_seg * MTHCA_MTT_SEG_SIZE +
+ mtt->first_seg * mthca_mtt_seg_size(dev) +
start_index * 8);
mtt_entry[1] = 0;
for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i)
@@ -302,7 +302,7 @@ void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt,
u32 mtt_seg;
int i;
- mtt_seg = mtt->first_seg * MTHCA_MTT_SEG_SIZE;
+ mtt_seg = mtt->first_seg * MTHCA_TAVOR_MTT_SEG_SIZE;
mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg + start_index * sizeof (u64);
for (i = 0; i < list_len; ++i) {
__be64 mtt_entry = cpu_to_be64(buffer_list[i] |
@@ -321,11 +321,9 @@ void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt,
/* For Arbel, all MTTs must fit in the same page. */
BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE);
- /* Require full segments */
- BUG_ON(s % MTHCA_MTT_SEG_SIZE);
mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg +
- s / MTHCA_MTT_SEG_SIZE, &dma_handle);
+ s / sizeof(u64), &dma_handle);
BUG_ON(!mtts);
@@ -470,7 +468,7 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
if (mr->mtt)
mpt_entry->mtt_seg =
cpu_to_be64(dev->mr_table.mtt_base +
- mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE);
+ mr->mtt->first_seg * mthca_mtt_seg_size(dev));
if (0) {
mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
@@ -615,7 +613,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
if (IS_ERR(mr->mtt))
goto err_out_table;
- mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE;
+ mtt_seg = mr->mtt->first_seg * mthca_mtt_seg_size(dev);
if (mthca_is_memfree(dev)) {
mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
@@ -907,7 +905,7 @@ int mthca_init_mr_table(struct mthca_dev *dev)
dev->mr_table.mtt_base);
dev->mr_table.tavor_fmr.mtt_base =
- ioremap(addr, mtts * MTHCA_MTT_SEG_SIZE);
+ ioremap(addr, mtts * MTHCA_TAVOR_MTT_SEG_SIZE);
if (!dev->mr_table.tavor_fmr.mtt_base) {
mthca_warn(dev, "MTT ioremap for FMR failed.\n");
err = -ENOMEM;
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
index 26bf86d..7367150 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -96,7 +96,7 @@ u64 mthca_make_profile(struct mthca_dev *dev,
profile[MTHCA_RES_RDB].size = MTHCA_RDB_ENTRY_SIZE;
profile[MTHCA_RES_MCG].size = MTHCA_MGM_ENTRY_SIZE;
profile[MTHCA_RES_MPT].size = dev_lim->mpt_entry_sz;
- profile[MTHCA_RES_MTT].size = MTHCA_MTT_SEG_SIZE;
+ profile[MTHCA_RES_MTT].size = mthca_mtt_seg_size(dev);
profile[MTHCA_RES_UAR].size = dev_lim->uar_scratch_entry_sz;
profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE;
profile[MTHCA_RES_UARC].size = request->uarc_size;
@@ -234,7 +234,8 @@ u64 mthca_make_profile(struct mthca_dev *dev,
dev->limits.num_mtt_segs = profile[i].num;
dev->mr_table.mtt_base = profile[i].start;
init_hca->mtt_base = profile[i].start;
- init_hca->mtt_seg_sz = ffs(MTHCA_MTT_SEG_SIZE) - 7;
+ if (!mthca_is_memfree(dev))
+ init_hca->mtt_seg_sz = ffs(MTHCA_TAVOR_MTT_SEG_SIZE) - 7;
break;
case MTHCA_RES_UAR:
dev->limits.num_uars = profile[i].num;
--
MST
More information about the general
mailing list