[openib-general] Re: [PATCH] Ref count user doorbell pages in mthca
Michael S. Tsirkin
mst at mellanox.co.il
Thu Jun 16 13:43:52 PDT 2005
Quoting r. Roland Dreier <roland at topspin.com>:
> Subject: [PATCH] Ref count user doorbell pages in mthca
>
> This patch keeps a reference count on the doorbell pages that are
> mapped into userspace for direct verbs access. This prevents a
> buggy/malicious app from closing its context but keeping a doorbell
> page mapped, and then improperly accessing the doorbell page after it
> gets allocated again to a new process.
How does one close the context? I thought thats done by closing the
file descriptor, isnt that right? And if not - why not?
I think closing the file descriptor will kill the vmas without work in
mthca, avoiding this issue altogether.
> Anyone see any issues with this?
>
> - R.
For hotswap we'll need a way to find and unmap all uars, anyway,
and I think the same shall be done when context is closed by the
application. IMO, when the app closes the context, all resources shall go.
> +void mthca_uar_put(struct mthca_uar *uar)
> {
> - mthca_free(&dev->uar_table.alloc, uar->index);
> + kref_put(&uar->ref, mthca_uar_free);
> }
What lock prevents the reference count from going to 0, and driver deciding
to kill the uar object, when at the same time vma (by vm_ops) has a pointer
to this object and is calling kref_get?
MST
> --- infiniband/hw/mthca/mthca_dev.h (revision 2631)
> +++ infiniband/hw/mthca/mthca_dev.h (working copy)
> @@ -301,7 +301,7 @@ struct mthca_dev {
> struct mthca_av_table av_table;
> struct mthca_mcg_table mcg_table;
>
> - struct mthca_uar driver_uar;
> + struct mthca_uar *driver_uar;
> struct mthca_db_table *db_tab;
> struct mthca_pd driver_pd;
> struct mthca_mr driver_mr;
> @@ -382,8 +382,8 @@ void mthca_cleanup_mcg_table(struct mthc
> int mthca_register_device(struct mthca_dev *dev);
> void mthca_unregister_device(struct mthca_dev *dev);
>
> -int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
> -void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
> +struct mthca_uar *mthca_uar_alloc(struct mthca_dev *dev);
> +void mthca_uar_put(struct mthca_uar *uar);
>
> int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd);
> void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
> --- infiniband/hw/mthca/mthca_main.c (revision 2631)
> +++ infiniband/hw/mthca/mthca_main.c (working copy)
> @@ -636,14 +636,15 @@ static int __devinit mthca_setup_hca(str
> return err;
> }
>
> - err = mthca_uar_alloc(dev, &dev->driver_uar);
> - if (err) {
> + dev->driver_uar = mthca_uar_alloc(dev);
> + if (IS_ERR(dev->driver_uar)) {
> mthca_err(dev, "Failed to allocate driver access region, "
> "aborting.\n");
> + err = PTR_ERR(dev->driver_uar);
> goto err_uar_table_free;
> }
>
> - dev->kar = ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
> + dev->kar = ioremap(dev->driver_uar->pfn << PAGE_SHIFT, PAGE_SIZE);
> if (!dev->kar) {
> mthca_err(dev, "Couldn't map kernel access region, "
> "aborting.\n");
> @@ -760,7 +761,7 @@ err_kar_unmap:
> iounmap(dev->kar);
>
> err_uar_free:
> - mthca_uar_free(dev, &dev->driver_uar);
> + mthca_uar_put(dev->driver_uar);
>
> err_uar_table_free:
> mthca_cleanup_uar_table(dev);
> @@ -1117,7 +1118,7 @@ static void __devexit mthca_remove_one(s
> mthca_cleanup_pd_table(mdev);
>
> iounmap(mdev->kar);
> - mthca_uar_free(mdev, &mdev->driver_uar);
> + mthca_uar_put(mdev->driver_uar);
> mthca_cleanup_uar_table(mdev);
>
> mthca_close_hca(mdev);
> --- infiniband/hw/mthca/mthca_uar.c (revision 2631)
> +++ infiniband/hw/mthca/mthca_uar.c (working copy)
> @@ -35,20 +35,35 @@
> #include "mthca_dev.h"
> #include "mthca_memfree.h"
>
> -int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar)
> +struct mthca_uar *mthca_uar_alloc(struct mthca_dev *dev)
> {
> + struct mthca_uar *uar = kmalloc(sizeof *uar, GFP_KERNEL);
> + if (!uar)
> + return ERR_PTR(-ENOMEM);
> +
> + kref_init(&uar->ref);
> +
> uar->index = mthca_alloc(&dev->uar_table.alloc);
> if (uar->index == -1)
> - return -ENOMEM;
> + return ERR_PTR(-ENOMEM);
>
> uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + uar->index;
> + uar->dev = dev;
> +
> + return uar;
> +}
> +
> +static void mthca_uar_free(struct kref *ref)
> +{
> + struct mthca_uar *uar = container_of(ref, struct mthca_uar, ref);
>
> - return 0;
> + mthca_free(&uar->dev->uar_table.alloc, uar->index);
> + kfree(uar);
> }
>
> -void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar)
> +void mthca_uar_put(struct mthca_uar *uar)
> {
> - mthca_free(&dev->uar_table.alloc, uar->index);
> + kref_put(&uar->ref, mthca_uar_free);
> }
>
> int mthca_init_uar_table(struct mthca_dev *dev)
> --- infiniband/hw/mthca/mthca_provider.c (revision 2631)
> +++ infiniband/hw/mthca/mthca_provider.c (working copy)
> @@ -309,8 +309,9 @@ static struct ib_ucontext *mthca_alloc_u
> if (!context)
> return ERR_PTR(-ENOMEM);
>
> - err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
> - if (err) {
> + context->uar = mthca_uar_alloc(to_mdev(ibdev));
> + if (IS_ERR(context->uar)) {
> + err = PTR_ERR(context->uar);
> kfree(context);
> return ERR_PTR(err);
> }
> @@ -318,15 +319,15 @@ static struct ib_ucontext *mthca_alloc_u
> context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
> if (IS_ERR(context->db_tab)) {
> err = PTR_ERR(context->db_tab);
> - mthca_uar_free(to_mdev(ibdev), &context->uar);
> + mthca_uar_put(context->uar);
> kfree(context);
> return ERR_PTR(err);
> }
>
> if (copy_to_user((void __user *) (unsigned long) ucmd.respbuf,
> &uresp, sizeof uresp)) {
> - mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab);
> - mthca_uar_free(to_mdev(ibdev), &context->uar);
> + mthca_cleanup_user_db_tab(to_mdev(ibdev), context->uar, context->db_tab);
> + mthca_uar_put(context->uar);
> kfree(context);
> return ERR_PTR(-EFAULT);
> }
> @@ -336,14 +337,30 @@ static struct ib_ucontext *mthca_alloc_u
>
> static int mthca_dealloc_ucontext(struct ib_ucontext *context)
> {
> - mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar,
> + mthca_cleanup_user_db_tab(to_mdev(context->device), to_mucontext(context)->uar,
> to_mucontext(context)->db_tab);
> - mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar);
> + mthca_uar_put(to_mucontext(context)->uar);
> kfree(to_mucontext(context));
>
> return 0;
> }
>
> +static void mthca_open_vma(struct vm_area_struct *vma)
> +{
> + struct mthca_uar *uar = vma->vm_private_data;
> + kref_get(&uar->ref);
> +}
> +
> +static void mthca_close_vma(struct vm_area_struct *vma)
> +{
> + mthca_uar_put(vma->vm_private_data);
> +}
> +
> +static struct vm_operations_struct mthca_vm_ops = {
> + .open = mthca_open_vma,
> + .close = mthca_close_vma
> +};
> +
> static int mthca_mmap_uar(struct ib_ucontext *context,
> struct vm_area_struct *vma)
> {
> @@ -353,10 +370,15 @@ static int mthca_mmap_uar(struct ib_ucon
> vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
>
> if (remap_pfn_range(vma, vma->vm_start,
> - to_mucontext(context)->uar.pfn,
> + to_mucontext(context)->uar->pfn,
> PAGE_SIZE, vma->vm_page_prot))
> return -EAGAIN;
>
> + kref_get(&to_mucontext(context)->uar->ref);
> +
> + vma->vm_private_data = to_mucontext(context)->uar;
> + vma->vm_ops = &mthca_vm_ops;
> +
> return 0;
> }
>
> @@ -460,7 +482,7 @@ static struct ib_qp *mthca_create_qp(str
> if (copy_from_user(&ucmd, udata, sizeof ucmd))
> return ERR_PTR(-EFAULT);
>
> - err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
> + err = mthca_map_user_db(to_mdev(pd->device), context->uar,
> context->db_tab,
> ucmd.sq_db_index, ucmd.sq_db_page);
> if (err) {
> @@ -468,12 +490,12 @@ static struct ib_qp *mthca_create_qp(str
> return ERR_PTR(err);
> }
>
> - err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
> + err = mthca_map_user_db(to_mdev(pd->device), context->uar,
> context->db_tab,
> ucmd.rq_db_index, ucmd.rq_db_page);
> if (err) {
> mthca_unmap_user_db(to_mdev(pd->device),
> - &context->uar,
> + context->uar,
> context->db_tab,
> ucmd.sq_db_index);
> kfree(qp);
> @@ -495,11 +517,11 @@ static struct ib_qp *mthca_create_qp(str
> context = to_mucontext(pd->uobject->context);
>
> mthca_unmap_user_db(to_mdev(pd->device),
> - &context->uar,
> + context->uar,
> context->db_tab,
> ucmd.sq_db_index);
> mthca_unmap_user_db(to_mdev(pd->device),
> - &context->uar,
> + context->uar,
> context->db_tab,
> ucmd.rq_db_index);
> }
> @@ -551,11 +573,11 @@ static int mthca_destroy_qp(struct ib_qp
> {
> if (qp->uobject) {
> mthca_unmap_user_db(to_mdev(qp->device),
> - &to_mucontext(qp->uobject->context)->uar,
> + to_mucontext(qp->uobject->context)->uar,
> to_mucontext(qp->uobject->context)->db_tab,
> to_mqp(qp)->sq.db_index);
> mthca_unmap_user_db(to_mdev(qp->device),
> - &to_mucontext(qp->uobject->context)->uar,
> + to_mucontext(qp->uobject->context)->uar,
> to_mucontext(qp->uobject->context)->db_tab,
> to_mqp(qp)->rq.db_index);
> }
> @@ -580,13 +602,13 @@ static struct ib_cq *mthca_create_cq(str
> if (copy_from_user(&ucmd, udata, sizeof ucmd))
> return ERR_PTR(-EFAULT);
>
> - err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
> + err = mthca_map_user_db(to_mdev(ibdev), to_mucontext(context)->uar,
> to_mucontext(context)->db_tab,
> ucmd.set_db_index, ucmd.set_db_page);
> if (err)
> return ERR_PTR(err);
>
> - err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
> + err = mthca_map_user_db(to_mdev(ibdev), to_mucontext(context)->uar,
> to_mucontext(context)->db_tab,
> ucmd.arm_db_index, ucmd.arm_db_page);
> if (err)
> @@ -627,12 +649,12 @@ err_free:
>
> err_unmap_arm:
> if (context)
> - mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
> + mthca_unmap_user_db(to_mdev(ibdev), to_mucontext(context)->uar,
> to_mucontext(context)->db_tab, ucmd.arm_db_index);
>
> err_unmap_set:
> if (context)
> - mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
> + mthca_unmap_user_db(to_mdev(ibdev), to_mucontext(context)->uar,
> to_mucontext(context)->db_tab, ucmd.set_db_index);
>
> return ERR_PTR(err);
> @@ -642,11 +664,11 @@ static int mthca_destroy_cq(struct ib_cq
> {
> if (cq->uobject) {
> mthca_unmap_user_db(to_mdev(cq->device),
> - &to_mucontext(cq->uobject->context)->uar,
> + to_mucontext(cq->uobject->context)->uar,
> to_mucontext(cq->uobject->context)->db_tab,
> to_mcq(cq)->arm_db_index);
> mthca_unmap_user_db(to_mdev(cq->device),
> - &to_mucontext(cq->uobject->context)->uar,
> + to_mucontext(cq->uobject->context)->uar,
> to_mucontext(cq->uobject->context)->db_tab,
> to_mcq(cq)->set_ci_db_index);
> }
> --- infiniband/hw/mthca/mthca_provider.h (revision 2631)
> +++ infiniband/hw/mthca/mthca_provider.h (working copy)
> @@ -50,15 +50,17 @@ struct mthca_buf_list {
> };
>
> struct mthca_uar {
> - unsigned long pfn;
> - int index;
> + struct mthca_dev *dev;
> + unsigned long pfn;
> + int index;
> + struct kref ref;
> };
>
> struct mthca_user_db_table;
>
> struct mthca_ucontext {
> struct ib_ucontext ibucontext;
> - struct mthca_uar uar;
> + struct mthca_uar *uar;
> struct mthca_user_db_table *db_tab;
> };
>
> --- infiniband/hw/mthca/mthca_cq.c (revision 2631)
> +++ infiniband/hw/mthca/mthca_cq.c (working copy)
> @@ -809,9 +809,9 @@ int mthca_init_cq(struct mthca_dev *dev,
> cq_context->start = cpu_to_be64(0);
> cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24);
> if (ctx)
> - cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index);
> + cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar->index);
> else
> - cq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
> + cq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar->index);
> cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
> cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn);
> cq_context->pd = cpu_to_be32(pdn);
> --- infiniband/hw/mthca/mthca_eq.c (revision 2631)
> +++ infiniband/hw/mthca/mthca_eq.c (working copy)
> @@ -542,7 +542,7 @@ static int __devinit mthca_create_eq(str
> if (mthca_is_memfree(dev)) {
> eq_context->arbel_pd = cpu_to_be32(dev->driver_pd.pd_num);
> } else {
> - eq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
> + eq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar->index);
> eq_context->tavor_pd = cpu_to_be32(dev->driver_pd.pd_num);
> }
> eq_context->intr = intr;
> --- infiniband/hw/mthca/mthca_qp.c (revision 2631)
> +++ infiniband/hw/mthca/mthca_qp.c (working copy)
> @@ -693,9 +693,9 @@ int mthca_modify_qp(struct ib_qp *ibqp,
>
> if (qp->ibqp.uobject)
> qp_context->usr_page =
> - cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index);
> + cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar->index);
> else
> - qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
> + qp_context->usr_page = cpu_to_be32(dev->driver_uar->index);
> qp_context->local_qpn = cpu_to_be32(qp->qpn);
> if (attr_mask & IB_QP_DEST_QPN) {
> qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
> --- infiniband/hw/mthca/mthca_memfree.c (revision 2631)
> +++ infiniband/hw/mthca/mthca_memfree.c (working copy)
> @@ -527,7 +527,7 @@ int mthca_alloc_db(struct mthca_dev *dev
> memset(page->db_rec, 0, 4096);
>
> ret = mthca_MAP_ICM_page(dev, page->mapping,
> - mthca_uarc_virt(dev, &dev->driver_uar, i), &status);
> + mthca_uarc_virt(dev, dev->driver_uar, i), &status);
> if (!ret && status)
> ret = -EINVAL;
> if (ret) {
> @@ -581,7 +581,7 @@ void mthca_free_db(struct mthca_dev *dev
>
> if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) &&
> i >= dev->db_tab->max_group1 - 1) {
> - mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
> + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, dev->driver_uar, i), 1, &status);
>
> dma_free_coherent(&dev->pdev->dev, 4096,
> page->db_rec, page->mapping);
> @@ -650,7 +650,7 @@ void mthca_cleanup_db_tab(struct mthca_d
> if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE))
> mthca_warn(dev, "Kernel UARC page %d not empty\n", i);
>
> - mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
> + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, dev->driver_uar, i), 1, &status);
>
> dma_free_coherent(&dev->pdev->dev, 4096,
> dev->db_tab->page[i].db_rec,
--
MST
More information about the general
mailing list