[openib-general] [PATCH] mthca - optimize sinai large message
Eli Cohen
eli at mellanox.co.il
Tue Feb 28 00:08:35 PST 2006
Memory key generation modified to optimize large messages transfer
in Sinai.
This implementation restricts the MPT table size for Sinai to a
maximum of 2^23 entries.
Signed-off-by: Eli Cohen <eli at mellanox.co.il>
Index: linux-2.6.14.2/drivers/infiniband/hw/mthca/mthca_profile.c
===================================================================
--- linux-2.6.14.2.orig/drivers/infiniband/hw/mthca/mthca_profile.c
+++ linux-2.6.14.2/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -153,8 +153,8 @@ u64 mthca_make_profile(struct mthca_dev
"won't in 0x%llx bytes of context memory.\n",
(unsigned long long) total_size,
(unsigned long long) mem_avail);
- kfree(profile);
- return -ENOMEM;
+ total_size = -ENOMEM;
+ goto exit;
}
if (profile[i].size)
@@ -260,6 +260,13 @@ u64 mthca_make_profile(struct mthca_dev
*/
dev->limits.num_pds = MTHCA_NUM_PDS;
+ /* for Sinai MPT table must be smaller the 2^24 for optimized oprtatipn */
+ if ((dev->mthca_flags & MTHCA_FLAG_SINAI_OPT) && init_hca->log_mpt_sz > 23) {
+ total_size = -ENOSYS;
+ mthca_err(dev, "MPT table too large\n");
+ goto exit;
+ }
+
/*
* For Tavor, FMRs use ioremapped PCI memory. For 32 bit
* systems it may use too much vmalloc space to map all MTT
@@ -272,6 +279,7 @@ u64 mthca_make_profile(struct mthca_dev
else
dev->limits.fmr_reserved_mtts = request->fmr_reserved_mtts;
+exit:
kfree(profile);
return total_size;
}
Index: linux-2.6.14.2/drivers/infiniband/hw/mthca/mthca_main.c
===================================================================
--- linux-2.6.14.2.orig/drivers/infiniband/hw/mthca/mthca_main.c
+++ linux-2.6.14.2/drivers/infiniband/hw/mthca/mthca_main.c
@@ -937,11 +937,12 @@ static struct {
u64 latest_fw;
int is_memfree;
int is_pcie;
+ int mkey_opt;
} mthca_hca_table[] = {
- [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 3, 3), .is_memfree = 0, .is_pcie = 0 },
- [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 0), .is_memfree = 0, .is_pcie = 1 },
- [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0), .is_memfree = 1, .is_pcie = 1 },
- [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 0, 1), .is_memfree = 1, .is_pcie = 1 }
+ [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 3, 3), .is_memfree = 0, .is_pcie = 0, .mkey_opt = 0 },
+ [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 0), .is_memfree = 0, .is_pcie = 1, .mkey_opt = 0 },
+ [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0), .is_memfree = 1, .is_pcie = 1, .mkey_opt = 0 },
+ [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 0, 1), .is_memfree = 1, .is_pcie = 1, .mkey_opt = 1 }
};
static int __devinit mthca_init_one(struct pci_dev *pdev,
@@ -1037,6 +1038,9 @@ static int __devinit mthca_init_one(stru
mdev->mthca_flags |= MTHCA_FLAG_MEMFREE;
if (mthca_hca_table[id->driver_data].is_pcie)
mdev->mthca_flags |= MTHCA_FLAG_PCIE;
+ if (mthca_hca_table[id->driver_data].mkey_opt)
+ mdev->mthca_flags |= MTHCA_FLAG_SINAI_OPT;
+
/*
* Now reset the HCA before we touch the PCI capabilities or
Index: linux-2.6.14.2/drivers/infiniband/hw/mthca/mthca_dev.h
===================================================================
--- linux-2.6.14.2.orig/drivers/infiniband/hw/mthca/mthca_dev.h
+++ linux-2.6.14.2/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -71,7 +71,8 @@ enum {
MTHCA_FLAG_NO_LAM = 1 << 5,
MTHCA_FLAG_FMR = 1 << 6,
MTHCA_FLAG_MEMFREE = 1 << 7,
- MTHCA_FLAG_PCIE = 1 << 8
+ MTHCA_FLAG_PCIE = 1 << 8,
+ MTHCA_FLAG_SINAI_OPT = 1 << 9
};
enum {
Index: linux-2.6.14.2/drivers/infiniband/hw/mthca/mthca_mr.c
===================================================================
--- linux-2.6.14.2.orig/drivers/infiniband/hw/mthca/mthca_mr.c
+++ linux-2.6.14.2/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -76,6 +76,8 @@ struct mthca_mpt_entry {
#define MTHCA_MPT_STATUS_SW 0xF0
#define MTHCA_MPT_STATUS_HW 0x00
+#define SINAI_FMR_KEY_INC 0x1000000
+
/*
* Buddy allocator for MTT segments (currently not very efficient
* since it doesn't keep a free list and just searches linearly
@@ -330,6 +332,14 @@ static inline u32 key_to_hw_index(struct
return tavor_key_to_hw_index(key);
}
+static inline u32 adjust_key(struct mthca_dev *dev, u32 key)
+{
+ if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+ return ((key << 20) & 0x800000) | (key & 0x7fffff);
+ else
+ return key;
+}
+
int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
u64 iova, u64 total_size, u32 access, struct mthca_mr *mr)
{
@@ -345,6 +355,7 @@ int mthca_mr_alloc(struct mthca_dev *dev
key = mthca_alloc(&dev->mr_table.mpt_alloc);
if (key == -1)
return -ENOMEM;
+ key = adjust_key(dev, key);
mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
if (mthca_is_memfree(dev)) {
@@ -504,6 +515,7 @@ int mthca_fmr_alloc(struct mthca_dev *de
key = mthca_alloc(&dev->mr_table.mpt_alloc);
if (key == -1)
return -ENOMEM;
+ key = adjust_key(dev, key);
idx = key & (dev->limits.num_mpts - 1);
mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
@@ -687,7 +699,10 @@ int mthca_arbel_map_phys_fmr(struct ib_f
++fmr->maps;
key = arbel_key_to_hw_index(fmr->ibmr.lkey);
- key += dev->limits.num_mpts;
+ if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+ key += SINAI_FMR_KEY_INC;
+ else
+ key += dev->limits.num_mpts;
fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
Index: linux-2.6.14.2/drivers/infiniband/hw/mthca/mthca_cmd.c
===================================================================
--- linux-2.6.14.2.orig/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ linux-2.6.14.2/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1173,7 +1173,8 @@ int mthca_INIT_HCA(struct mthca_dev *dev
int err;
#define INIT_HCA_IN_SIZE 0x200
-#define INIT_HCA_FLAGS_OFFSET 0x014
+#define INIT_HCA_FLAGS1_OFFSET 0x00c
+#define INIT_HCA_FLAGS2_OFFSET 0x014
#define INIT_HCA_QPC_OFFSET 0x020
#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10)
#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17)
@@ -1216,15 +1217,18 @@ int mthca_INIT_HCA(struct mthca_dev *dev
memset(inbox, 0, INIT_HCA_IN_SIZE);
+ if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+ MTHCA_PUT(inbox, 0x1, INIT_HCA_FLAGS1_OFFSET);
+
#if defined(__LITTLE_ENDIAN)
- *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
+ *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
#elif defined(__BIG_ENDIAN)
- *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1);
+ *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1 << 1);
#else
#error Host endianness not defined
#endif
/* Check port for UD address vector: */
- *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1);
+ *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1);
/* We leave wqe_quota, responder_exu, etc as 0 (default) */
More information about the general
mailing list