[openib-general] [PATCH] fmr support in mthca
Roland Dreier
roland at topspin.com
Thu Mar 17 19:59:31 PST 2005
Thanks for implementing this. You saved me a lot of work and Libor
will be very happy when he gets back next week.
Some comments from my first read through:
> This patch implements FMR support. I also rolled into it two fixes
> for regular mrs that I posed previously, let me know if its a problem.
No problem although I'll apply them separately.
> This seems to be working fine for me, although I only did relatively basic
> tests. Both Tavor and Arbel Native modes are supported. I made some tradeoffs
> for simplicity, let me know what do you think:
> - for tavor, I keep for each fmr two pages mapped: for mpt and one for
> mtt access. This spends more kernel virtual memory than could be used,
> since many mpts could share one page. Alternatives are:
> map/unmap io memory on each fmr map/unmap request, or
> keep and intermediate table tomap each page only once.
I don't think this is acceptable. Each ioremap has to map at least
one page plus a guard page. With two ioremaps per FMR, every FMR is
using 16K (or more) of vmalloc space. On 64 bit archs, this doesn't
matter, but on a large memory i386 machine, there's less than 128 MB
of vmalloc space available (possibly a lot less if someone is using a
video card with a big frame buffer or something). That means we're
limited to a few thousand FMRs, which isn't enough.
What if we just reserve something like 64K MPTs and MTTs for FMRs and
ioremap everything at driver startup? That would only use a few MB of
vmalloc space and probably simplify the code too.
> - icm that has the mpts/mtts is linearly scanned and this is repeated
> for each mtt on each fmr map. This may be improved somewhat
> with some kind of an iterator, but to really speed things up
> the icm data structure (list of arrays) would have to
> be replaced by some kind of tree.
I don't understand this. I'm probably missing something but the
addresses don't change after we allocate the FMR, right? It seems we
could just store the MPT/MTT address in the FMR structure the same way
we do for Tavor mode.
Some more nitpicky comments below...
>
> Signed-off-by: Michael S. Tsirkin <mst at mellanox.co.il>
>
> Index: hw/mthca/mthca_dev.h
> ===================================================================
> --- hw/mthca/mthca_dev.h (revision 2012)
> +++ hw/mthca/mthca_dev.h (working copy)
> @@ -163,6 +163,7 @@ struct mthca_mr_table {
> int max_mtt_order;
> unsigned long **mtt_buddy;
> u64 mtt_base;
> + u64 mpt_base; /* Tavor only */
> struct mthca_icm_table *mtt_table;
> struct mthca_icm_table *mpt_table;
> };
> @@ -363,7 +364,17 @@ int mthca_mr_alloc_phys(struct mthca_dev
> u64 *buffer_list, int buffer_size_shift,
> int list_len, u64 iova, u64 total_size,
> u32 access, struct mthca_mr *mr);
> -void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr);
> +void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr);
> +
> +int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
> + u32 access, struct mthca_fmr *fmr);
> +
> +int mthca_fmr_map(struct mthca_dev *dev, struct mthca_fmr *fmr,
> + u64 *page_list, int list_len, u64 iova);
> +
> +void mthca_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr);
> +
> +void mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr);
>
> int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt);
> void mthca_unmap_eq_icm(struct mthca_dev *dev);
> Index: hw/mthca/mthca_memfree.h
> ===================================================================
> --- hw/mthca/mthca_memfree.h (revision 2012)
> +++ hw/mthca/mthca_memfree.h (working copy)
> @@ -90,6 +90,11 @@ int mthca_table_get_range(struct mthca_d
> void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
> int start, int end);
>
> +/* Nonblocking. Callers must make sure the object exists by serializing against
> + * callers of get/put. */
> +void *mthca_table_find(struct mthca_dev *dev, struct mthca_icm_table *table,
> + int obj);
Can we just make this use the table mutex and only call it when
allocating an FMR?
> +
> static inline void mthca_icm_first(struct mthca_icm *icm,
> struct mthca_icm_iter *iter)
> {
> Index: hw/mthca/mthca_provider.c
> ===================================================================
> --- hw/mthca/mthca_provider.c (revision 2012)
> +++ hw/mthca/mthca_provider.c (working copy)
> @@ -568,8 +568,101 @@ static struct ib_mr *mthca_reg_phys_mr(s
>
> static int mthca_dereg_mr(struct ib_mr *mr)
> {
> - mthca_free_mr(to_mdev(mr->device), to_mmr(mr));
> - kfree(mr);
> + struct mthca_mr *mmr = to_mmr(mr);
> + mthca_free_mr(to_mdev(mr->device), mmr);
> + kfree(mmr);
> + return 0;
> +}
> +
> +static struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
> + struct ib_fmr_attr *fmr_attr)
> +{
> + struct mthca_fmr *fmr;
> + int err;
> + fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
> + if (!fmr)
> + return ERR_PTR(-ENOMEM);
> +
> + memcpy(&fmr->attr, fmr_attr, sizeof *fmr_attr);
> + err = mthca_fmr_alloc(to_mdev(pd->device), to_mpd(pd)->pd_num,
> + convert_access(mr_access_flags), fmr);
> +
> + if (err) {
> + kfree(fmr);
> + return ERR_PTR(err);
> + }
> + return &fmr->ibmr;
> +}
> +
> +static int mthca_dealloc_fmr(struct ib_fmr *fmr)
> +{
> + struct mthca_fmr *mfmr = to_mfmr(fmr);
> + mthca_free_fmr(to_mdev(fmr->device), mfmr);
> + kfree(mfmr);
> + return 0;
> +}
> +
> +static int mthca_map_phys_fmr(struct ib_fmr *fmr, u64 *page_list, int list_len,
> + u64 iova)
> +{
> + int i, page_mask;
> + struct mthca_fmr *mfmr;
> +
> + mfmr = to_mfmr(fmr);
> +
> + if (list_len > mfmr->attr.max_pages) {
> + mthca_warn(to_mdev(fmr->device), "Attempt to map list length "
> + "%d to fmr with max %d pages\n",
> + list_len, mfmr->attr.max_pages);
> + return -EINVAL;
> + }
> +
> + page_mask = (1 << mfmr->attr.page_size) - 1;
> +
> + /* We are getting page lists, so va must be page aligned. */
> + if (iova & page_mask) {
> + mthca_warn(to_mdev(fmr->device), "Attempt to map fmr with page "
> + "shift %d at misaligned virtual address %016llx\n",
> + mfmr->attr.page_size, iova);
> + return -EINVAL;
> + }
> +
> + /* Trust the user not to pass misaligned data in page_list */
> + if (0)
> + for (i = 0; i < list_len; ++i) {
> + if (page_list[i] & page_mask) {
> + mthca_warn(to_mdev(fmr->device), "Attempt to "
> + "map fmr with page shift %d at "
> + "address %016llx\n",
> + mfmr->attr.page_size, page_list[i]);
> + return -EINVAL;
> + }
> + }
> +
> + return mthca_fmr_map(to_mdev(fmr->device), to_mfmr(fmr), page_list,
> + list_len, iova);
> +}
> +
> +static int mthca_unmap_fmr(struct list_head *fmr_list)
> +{
> + struct mthca_dev* mdev = NULL;
> + struct ib_fmr *fmr;
> + int err;
> + u8 status;
> +
> + list_for_each_entry(fmr, fmr_list, list) {
> + mdev = to_mdev(fmr->device);
> + mthca_fmr_unmap(mdev, to_mfmr(fmr));
> + }
> +
> + if (!mdev)
> + return 0;
> +
> + err = mthca_SYNC_TPT(mdev, &status);
> + if (err)
> + return err;
> + if (status)
> + return -EINVAL;
> return 0;
> }
>
> @@ -636,6 +729,15 @@ int mthca_register_device(struct mthca_d
> dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
> dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr;
> dev->ib_dev.dereg_mr = mthca_dereg_mr;
> +
> + if (dev->hca_type == ARBEL_NATIVE ||
> + !(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) {
> + dev->ib_dev.alloc_fmr = mthca_alloc_fmr;
> + dev->ib_dev.map_phys_fmr = mthca_map_phys_fmr;
> + dev->ib_dev.unmap_fmr = mthca_unmap_fmr;
> + dev->ib_dev.dealloc_fmr = mthca_dealloc_fmr;
> + }
> +
> dev->ib_dev.attach_mcast = mthca_multicast_attach;
> dev->ib_dev.detach_mcast = mthca_multicast_detach;
> dev->ib_dev.process_mad = mthca_process_mad;
> Index: hw/mthca/mthca_provider.h
> ===================================================================
> --- hw/mthca/mthca_provider.h (revision 2012)
> +++ hw/mthca/mthca_provider.h (working copy)
> @@ -60,6 +60,16 @@ struct mthca_mr {
> u32 first_seg;
> };
>
> +struct mthca_fmr {
> + struct ib_fmr ibmr;
> + struct ib_fmr_attr attr;
> + int order;
> + u32 first_seg;
> + int maps;
> + struct mthca_mpt_entry __iomem *mpt; /* Tavor only */
> + u64 __iomem *mtts; /* Tavor only */
> +};
> +
> struct mthca_pd {
> struct ib_pd ibpd;
> u32 pd_num;
> @@ -218,6 +228,11 @@ struct mthca_sqp {
> dma_addr_t header_dma;
> };
>
> +static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr)
> +{
> + return container_of(ibmr, struct mthca_fmr, ibmr);
> +}
> +
> static inline struct mthca_mr *to_mmr(struct ib_mr *ibmr)
> {
> return container_of(ibmr, struct mthca_mr, ibmr);
> Index: hw/mthca/mthca_profile.c
> ===================================================================
> --- hw/mthca/mthca_profile.c (revision 2012)
> +++ hw/mthca/mthca_profile.c (working copy)
> @@ -223,9 +223,10 @@ u64 mthca_make_profile(struct mthca_dev
> init_hca->mc_hash_sz = 1 << (profile[i].log_num - 1);
> break;
> case MTHCA_RES_MPT:
> - dev->limits.num_mpts = profile[i].num;
> - init_hca->mpt_base = profile[i].start;
> - init_hca->log_mpt_sz = profile[i].log_num;
> + dev->limits.num_mpts = profile[i].num;
> + dev->mr_table.mpt_base = profile[i].start;
> + init_hca->mpt_base = profile[i].start;
> + init_hca->log_mpt_sz = profile[i].log_num;
> break;
> case MTHCA_RES_MTT:
> dev->limits.num_mtt_segs = profile[i].num;
> Index: hw/mthca/mthca_cmd.c
> ===================================================================
> --- hw/mthca/mthca_cmd.c (revision 2012)
> +++ hw/mthca/mthca_cmd.c (working copy)
> @@ -1406,6 +1406,11 @@ int mthca_WRITE_MTT(struct mthca_dev *de
> return err;
> }
>
> +int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status)
> +{
> + return mthca_cmd(dev, 0, 0, 0, CMD_SYNC_TPT, CMD_TIME_CLASS_B, status);
> +}
> +
> int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
> int eq_num, u8 *status)
> {
> Index: hw/mthca/mthca_cmd.h
> ===================================================================
> --- hw/mthca/mthca_cmd.h (revision 2012)
> +++ hw/mthca/mthca_cmd.h (working copy)
> @@ -277,6 +277,7 @@ int mthca_HW2SW_MPT(struct mthca_dev *de
> int mpt_index, u8 *status);
> int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry,
> int num_mtt, u8 *status);
> +int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status);
> int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
> int eq_num, u8 *status);
> int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context,
> Index: hw/mthca/mthca_mr.c
> ===================================================================
> --- hw/mthca/mthca_mr.c (revision 2012)
> +++ hw/mthca/mthca_mr.c (working copy)
> @@ -368,11 +368,13 @@ err_out_table:
> mthca_table_put(dev, dev->mr_table.mpt_table, key);
>
> err_out_mpt_free:
> - mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
> + mthca_free(&dev->mr_table.mpt_alloc, key);
> return err;
> }
>
> -void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
> +/* Free mr or fmr */
> +static void mthca_free_region(struct mthca_dev *dev, u32 lkey, int order,
> + u32 first_seg)
> {
> int err;
> u8 status;
> @@ -380,7 +382,7 @@ void mthca_free_mr(struct mthca_dev *dev
> might_sleep();
>
> err = mthca_HW2SW_MPT(dev, NULL,
> - key_to_hw_index(dev, mr->ibmr.lkey) &
> + key_to_hw_index(dev, lkey) &
> (dev->limits.num_mpts - 1),
> &status);
> if (err)
> @@ -389,15 +391,276 @@ void mthca_free_mr(struct mthca_dev *dev
> mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
> status);
>
> - if (mr->order >= 0)
> - mthca_free_mtt(dev, mr->first_seg, mr->order);
> + if (order >= 0)
> + mthca_free_mtt(dev, first_seg, order);
>
> if (dev->hca_type == ARBEL_NATIVE)
> mthca_table_put(dev, dev->mr_table.mpt_table,
> - key_to_hw_index(dev, mr->ibmr.lkey));
> - mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, mr->ibmr.lkey));
> + key_to_hw_index(dev, lkey));
> + mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
> +}
> +
> +void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
> +{
> + mthca_free_region(dev, mr->ibmr.lkey, mr->order, mr->first_seg);
> +}
> +
> +int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
> + u32 access, struct mthca_fmr *mr)
> +{
> + void *mailbox;
> + u64 mtt_seg;
> + struct mthca_mpt_entry *mpt_entry;
> + u32 key, idx;
> + int err = -ENOMEM;
> + u8 status;
> + int i;
> + int list_len = mr->attr.max_pages;
> +
> + might_sleep();
> +
> + BUG_ON(mr->attr.page_size < 12);
> + WARN_ON(mr->attr.page_size >= 32);
Why is one a BUG and one a WARN? Why not just return an error in both
cases? Something like:
if (mr->attr.page_size < 12 || mr->attr.page_size >= 32)
return -EINVAL;
> +
> + mr->maps = 0;
> +
> + key = mthca_alloc(&dev->mr_table.mpt_alloc);
> + if (key == -1)
> + return -ENOMEM;
> + idx = key & (dev->limits.num_mpts - 1);
> + mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
> +
> + if (dev->hca_type == ARBEL_NATIVE) {
> + err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
> + if (err)
> + goto err_out_mpt_free;
> + }
> + for (i = dev->limits.mtt_seg_size / 8, mr->order = 0;
> + i < list_len;
> + i <<= 1, ++mr->order)
> + ; /* nothing */
> +
> + mr->first_seg = mthca_alloc_mtt(dev, mr->order);
> + if (mr->first_seg == -1)
> + goto err_out_table;
> +
> + mtt_seg = dev->mr_table.mtt_base +
> + mr->first_seg * dev->limits.mtt_seg_size;
> +
> + mr->mpt = NULL;
> + mr->mtts = NULL;
> +
> + if (dev->hca_type != ARBEL_NATIVE) {
> + mr->mpt = ioremap(dev->mr_table.mpt_base +
> + sizeof *(mr->mpt) * idx, sizeof *(mr->mpt));
> + if (!mr->mpt) {
> + mthca_dbg(dev, "Couldn't map MPT entry for fmr %08x.\n",
> + mr->ibmr.lkey);
> + goto err_out_free_mtt;
> + }
> + mr->mtts = ioremap(mtt_seg, list_len * sizeof *(mr->mtts));
> + if (!mr->mtts) {
> + mthca_dbg(dev, "Couldn't map MTT entry %016llx "
> + "(size %x) for fmr %08x.\n", mtt_seg,
> + list_len * sizeof u64, mr->ibmr.lkey);
> + goto err_out_free_mtt;
> + }
> + }
> +
> + mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
> + GFP_KERNEL);
> + if (!mailbox)
> + goto err_out_free_mtt;
> +
> + mpt_entry = MAILBOX_ALIGN(mailbox);
> +
> + mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
> + MTHCA_MPT_FLAG_MIO |
> + MTHCA_MPT_FLAG_REGION |
> + access);
> +
> + mpt_entry->page_size = cpu_to_be32(mr->attr.page_size - 12);
> + mpt_entry->key = cpu_to_be32(key);
> + mpt_entry->pd = cpu_to_be32(pd);
> + memset(&mpt_entry->start, 0,
> + sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start));
> + mpt_entry->mtt_seg = cpu_to_be64(mtt_seg);
> +
> + if (0) {
> + mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
> + for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
> + if (i % 4 == 0)
> + printk("[%02x] ", i * 4);
> + printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i]));
> + if ((i + 1) % 4 == 0)
> + printk("\n");
> + }
> + }
> +
> + err = mthca_SW2HW_MPT(dev, mpt_entry,
> + key & (dev->limits.num_mpts - 1),
> + &status);
> + if (err)
> + mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
> + else if (status) {
> + mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
> + status);
> + err = -EINVAL;
> + }
> +
> + kfree(mailbox);
> + return err;
> +
> +err_out_free_mtt:
> + if (mr->mtts)
> + iounmap(mr->mtts);
> + if (mr->mpt)
> + iounmap(mr->mpt);
> + mthca_free_mtt(dev, mr->first_seg, mr->order);
> +
> +err_out_table:
> + if (dev->hca_type == ARBEL_NATIVE)
> + mthca_table_put(dev, dev->mr_table.mpt_table, key);
> +
> +err_out_mpt_free:
> + mthca_free(&dev->mr_table.mpt_alloc, key);
> + return err;
> +}
> +
> +void mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
> +{
> + mthca_free_region(dev, fmr->ibmr.lkey, fmr->order, fmr->first_seg);
> +}
> +
> +#define MTHCA_MPT_STATUS_SW 0xF
> +#define MTHCA_MPT_STATUS_HW 0x0
> +
> +static void mthca_tavor_fmr_map(struct mthca_dev *dev, struct mthca_fmr *fmr,
> + u64 *page_list, int list_len, u64 iova, u32 key)
> +{
> + struct mthca_mpt_entry mpt_entry;
> + int i;
> +
> + writeb(MTHCA_MPT_STATUS_SW,fmr->mpt);
It's a minor nitpick but put a space after the "," please. (This
appears a few more times below too)
> +
> + wmb();
Are the wmb()s required in this function? writeX() is already ordered.
> +
> + for (i = 0; i < list_len; ++i) {
> + u64 mtt_entry = cpu_to_be64(page_list[i] |
> + MTHCA_MTT_FLAG_PRESENT);
> + writeq(mtt_entry, fmr->mtts + i);
Don't use writeq() unconditionally. 32 bit archs don't have it.
> + }
> + mpt_entry.lkey = cpu_to_be32(key);
> + mpt_entry.length = cpu_to_be64(((u64)list_len) *
> + (1 << fmr->attr.page_size));
> + mpt_entry.start = cpu_to_be64(iova);
> +
> + writel(mpt_entry.lkey, &fmr->mpt->key);
> + memcpy_toio(&fmr->mpt->start, &mpt_entry.start,
> + offsetof(struct mthca_mpt_entry, window_count) -
> + offsetof(struct mthca_mpt_entry, start));
> +
> + wmb();
> +
> + writeb(MTHCA_MPT_STATUS_SW,fmr->mpt);
Should this be STATUS_HW here? It seems we exit the function with the
MPT marked invalid.
> +
> + wmb();
> +}
> +
> +static int mthca_arbel_fmr_map(struct mthca_dev *dev, struct mthca_fmr *fmr,
> + u64 *page_list, int list_len, u64 iova, u32 key)
> +{
> + void* mpt;
Write this as "void *mpt" instead please.
> + struct mthca_mpt_entry *mpt_entry;
> + u8 *mpt_status;
> + int i;
> +
> + mpt = mthca_table_find(dev, dev->mr_table.mpt_table, key);
> + if (!mpt)
> + return -EINVAL;
> +
> + mpt_status = mpt;
> + *mpt_status = MTHCA_MPT_STATUS_SW;
> +
> + wmb();
> +
> + /* This is really dumb. We are rescanning the ICM on
> + * each mpt entry. We want some kind of iterator here.
> + * May be fine meanwhile, while things are small. */
> + for (i = 0; i < list_len; ++i) {
> + u64 *mtt_entry = mthca_table_find(dev, dev->mr_table.mtt_table,
> + fmr->first_seg + i);
> + if (!mtt_entry)
> + return -EINVAL;
> +
> + *mtt_entry = cpu_to_be64(page_list[i] | MTHCA_MTT_FLAG_PRESENT);
> + }
> +
> +
> + mpt_entry = mpt;
> + mpt_entry->lkey = mpt_entry->key = cpu_to_be32(key);
> + mpt_entry->length = cpu_to_be64(((u64)list_len) *
> + (1 << fmr->attr.page_size));
> + mpt_entry->start = cpu_to_be64(iova);
> +
> + wmb();
> +
> + *mpt_status = MTHCA_MPT_STATUS_HW;
> +
> + wmb();
> + return 0;
> +}
> +
> +
> +int mthca_fmr_map(struct mthca_dev *dev, struct mthca_fmr *fmr,
> + u64 *page_list, int list_len, u64 iova)
> +{
> + u32 key;
> +
> + if (fmr->maps >= fmr->attr.max_maps) {
> + mthca_warn(dev, "Attempt to map fmr %d times, max_maps is %d\n",
> + fmr->maps, fmr->attr.max_maps);
> + return -EINVAL;
> + }
> +
> + key = key_to_hw_index(dev, fmr->ibmr.lkey) + dev->limits.num_mpts;
> + fmr->ibmr.lkey = fmr->ibmr.rkey = hw_index_to_key(dev, key);
> + fmr->maps++;
Maybe put this common part in a static function and have
mthca_{arbel,tavor}_fmr_map() call it. Then we could just set the
map_phys_fmr method in the device struct to the right function at
initialization time.
> +
> + if (dev->hca_type == ARBEL_NATIVE) {
> + return mthca_arbel_fmr_map(dev, fmr, page_list, list_len,
> + iova, key);
> + } else {
> + mthca_tavor_fmr_map(dev, fmr, page_list, list_len,
> + iova, key);
> + return 0;
> + }
> +}
> +
> +void mthca_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
> +{
> + if (!fmr->maps) {
> + return;
> + }
Nitpick -- no { } needed here.
> +
> + fmr->maps = 0;
> +
> + if (dev->hca_type == ARBEL_NATIVE) {
> + u32 key = key_to_hw_index(dev, fmr->ibmr.lkey);
> + u8 *mpt_status = mthca_table_find(dev, dev->mr_table.mpt_table,
> + key);
> + if (!mpt_status)
> + return;
> +
> + *mpt_status = MTHCA_MPT_STATUS_SW;
> + wmb();
> + } else {
> + writeb(MTHCA_MPT_STATUS_SW,fmr->mpt);
> + wmb();
> + }
> }
>
> +
> int __devinit mthca_init_mr_table(struct mthca_dev *dev)
> {
> int err;
> Index: hw/mthca/mthca_memfree.c
> ===================================================================
> --- hw/mthca/mthca_memfree.c (revision 2012)
> +++ hw/mthca/mthca_memfree.c (working copy)
> @@ -192,6 +192,47 @@ void mthca_table_put(struct mthca_dev *d
> up(&table->mutex);
> }
>
> +/* Nonblocking. Callers must make sure the object exists by serializing against
> + * callers of get/put. */
> +void *mthca_table_find(struct mthca_dev *dev, struct mthca_icm_table *table,
> + int obj)
> +{
> + int idx, offset, i;
> + struct mthca_icm_chunk *chunk;
> + struct mthca_icm *icm;
> + struct page *page = NULL;
> +
> + /* Supported only for low mem tables for now. */
> + if (!table->lowmem)
> + return NULL;
> +
> + idx = (obj & (table->num_obj - 1)) * table->obj_size;
> + icm = table->icm[idx / MTHCA_TABLE_CHUNK_SIZE];
> + offset = idx % MTHCA_TABLE_CHUNK_SIZE;
What happened to the indendation here?
> +
> + if(!icm)
> + return NULL;
> +
> + /* Linear scan of ICM on each access. Since this is called on fmr
> + * registration which is on data path, eventually we may want to
> + * rearrange things to use some kind of tree. */
> +
> + list_for_each_entry(chunk, &icm->chunk_list, list) {
> + for (i = 0; i < chunk->npages; ++i) {
> + if (chunk->mem[i].length >= offset) {
> + page = chunk->mem[i].page;
> + break;
> + }
> + offset -= chunk->mem[i].length;
> + }
> + }
> +
> + if (!page)
> + return NULL;
> +
> + return lowmem_page_address(page) + offset;
> +}
> +
> int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table,
> int start, int end)
> {
More information about the general
mailing list