[openib-general] Re: [PATCH] Ref count user doorbell pages in mthca

Michael S. Tsirkin mst at mellanox.co.il
Thu Jun 16 13:43:52 PDT 2005


Quoting r. Roland Dreier <roland at topspin.com>:
> Subject: [PATCH] Ref count user doorbell pages in mthca
> 
> This patch keeps a reference count on the doorbell pages that are
> mapped into userspace for direct verbs access.  This prevents a
> buggy/malicious app from closing its context but keeping a doorbell
> page mapped, and then improperly accessing the doorbell page after it
> gets allocated again to a new process.

How does one close the context? I thought thats done by closing the
file descriptor, isnt that right? And if not - why not?
I think closing the file descriptor will kill the vmas without work in
mthca, avoiding this issue altogether.

> Anyone see any issues with this?
> 
>  - R.

For hotswap we'll need a way to find and unmap all uars, anyway,
and I think the same shall be done when context is closed by the
application. IMO, when the app closes the context, all resources shall go.

> +void mthca_uar_put(struct mthca_uar *uar)
>  {
> -	mthca_free(&dev->uar_table.alloc, uar->index);
> +	kref_put(&uar->ref, mthca_uar_free);
>  }

What lock prevents the reference count from going to 0, and driver deciding
to kill the uar object, when at the same time vma (by vm_ops) has a pointer
to this object and is calling kref_get?

MST


> --- infiniband/hw/mthca/mthca_dev.h	(revision 2631)
> +++ infiniband/hw/mthca/mthca_dev.h	(working copy)
> @@ -301,7 +301,7 @@ struct mthca_dev {
>  	struct mthca_av_table  av_table;
>  	struct mthca_mcg_table mcg_table;
>  
> -	struct mthca_uar       driver_uar;
> +	struct mthca_uar      *driver_uar;
>  	struct mthca_db_table *db_tab;
>  	struct mthca_pd        driver_pd;
>  	struct mthca_mr        driver_mr;
> @@ -382,8 +382,8 @@ void mthca_cleanup_mcg_table(struct mthc
>  int mthca_register_device(struct mthca_dev *dev);
>  void mthca_unregister_device(struct mthca_dev *dev);
>  
> -int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
> -void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
> +struct mthca_uar *mthca_uar_alloc(struct mthca_dev *dev);
> +void mthca_uar_put(struct mthca_uar *uar);
>  
>  int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd);
>  void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
> --- infiniband/hw/mthca/mthca_main.c	(revision 2631)
> +++ infiniband/hw/mthca/mthca_main.c	(working copy)
> @@ -636,14 +636,15 @@ static int __devinit mthca_setup_hca(str
>  		return err;
>  	}
>  
> -	err = mthca_uar_alloc(dev, &dev->driver_uar);
> -	if (err) {
> +	dev->driver_uar = mthca_uar_alloc(dev);
> +	if (IS_ERR(dev->driver_uar)) {
>  		mthca_err(dev, "Failed to allocate driver access region, "
>  			  "aborting.\n");
> +		err = PTR_ERR(dev->driver_uar);
>  		goto err_uar_table_free;
>  	}
>  
> -	dev->kar = ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
> +	dev->kar = ioremap(dev->driver_uar->pfn << PAGE_SHIFT, PAGE_SIZE);
>  	if (!dev->kar) {
>  		mthca_err(dev, "Couldn't map kernel access region, "
>  			  "aborting.\n");
> @@ -760,7 +761,7 @@ err_kar_unmap:
>  	iounmap(dev->kar);
>  
>  err_uar_free:
> -	mthca_uar_free(dev, &dev->driver_uar);
> +	mthca_uar_put(dev->driver_uar);
>  
>  err_uar_table_free:
>  	mthca_cleanup_uar_table(dev);
> @@ -1117,7 +1118,7 @@ static void __devexit mthca_remove_one(s
>  		mthca_cleanup_pd_table(mdev);
>  
>  		iounmap(mdev->kar);
> -		mthca_uar_free(mdev, &mdev->driver_uar);
> +		mthca_uar_put(mdev->driver_uar);
>  		mthca_cleanup_uar_table(mdev);
>  
>  		mthca_close_hca(mdev);
> --- infiniband/hw/mthca/mthca_uar.c	(revision 2631)
> +++ infiniband/hw/mthca/mthca_uar.c	(working copy)
> @@ -35,20 +35,35 @@
>  #include "mthca_dev.h"
>  #include "mthca_memfree.h"
>  
> -int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar)
> +struct mthca_uar *mthca_uar_alloc(struct mthca_dev *dev)
>  {
> +	struct mthca_uar *uar = kmalloc(sizeof *uar, GFP_KERNEL);
> +	if (!uar)
> +		return ERR_PTR(-ENOMEM);
> +
> +	kref_init(&uar->ref);
> +
>  	uar->index = mthca_alloc(&dev->uar_table.alloc);
>  	if (uar->index == -1)
> -		return -ENOMEM;
> +		return ERR_PTR(-ENOMEM);
>  
>  	uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + uar->index;
> +	uar->dev = dev;
> +
> +	return uar;
> +}
> +
> +static void mthca_uar_free(struct kref *ref)
> +{
> +	struct mthca_uar *uar = container_of(ref, struct mthca_uar, ref);
>  
> -	return 0;
> +	mthca_free(&uar->dev->uar_table.alloc, uar->index);
> +	kfree(uar);
>  }
>  
> -void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar)
> +void mthca_uar_put(struct mthca_uar *uar)
>  {
> -	mthca_free(&dev->uar_table.alloc, uar->index);
> +	kref_put(&uar->ref, mthca_uar_free);
>  }
>  
>  int mthca_init_uar_table(struct mthca_dev *dev)
> --- infiniband/hw/mthca/mthca_provider.c	(revision 2631)
> +++ infiniband/hw/mthca/mthca_provider.c	(working copy)
> @@ -309,8 +309,9 @@ static struct ib_ucontext *mthca_alloc_u
>  	if (!context)
>  		return ERR_PTR(-ENOMEM);
>  
> -	err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
> -	if (err) {
> +	context->uar = mthca_uar_alloc(to_mdev(ibdev));
> +	if (IS_ERR(context->uar)) {
> +		err = PTR_ERR(context->uar);
>  		kfree(context);
>  		return ERR_PTR(err);
>  	}
> @@ -318,15 +319,15 @@ static struct ib_ucontext *mthca_alloc_u
>  	context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
>  	if (IS_ERR(context->db_tab)) {
>  		err = PTR_ERR(context->db_tab);
> -		mthca_uar_free(to_mdev(ibdev), &context->uar);
> +		mthca_uar_put(context->uar);
>  		kfree(context);
>  		return ERR_PTR(err);
>  	}
>  
>  	if (copy_to_user((void __user *) (unsigned long) ucmd.respbuf,
>  			 &uresp, sizeof uresp)) {
> -		mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab);
> -		mthca_uar_free(to_mdev(ibdev), &context->uar);
> +		mthca_cleanup_user_db_tab(to_mdev(ibdev), context->uar, context->db_tab);
> +		mthca_uar_put(context->uar);
>  		kfree(context);
>  		return ERR_PTR(-EFAULT);
>  	}
> @@ -336,14 +337,30 @@ static struct ib_ucontext *mthca_alloc_u
>  
>  static int mthca_dealloc_ucontext(struct ib_ucontext *context)
>  {
> -	mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar,
> +	mthca_cleanup_user_db_tab(to_mdev(context->device), to_mucontext(context)->uar,
>  				  to_mucontext(context)->db_tab);
> -	mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar);
> +	mthca_uar_put(to_mucontext(context)->uar);
>  	kfree(to_mucontext(context));
>  
>  	return 0;
>  }
>  
> +static void mthca_open_vma(struct vm_area_struct *vma)
> +{
> +	struct mthca_uar *uar = vma->vm_private_data;
> +	kref_get(&uar->ref);
> +}
> +
> +static void mthca_close_vma(struct vm_area_struct *vma)
> +{
> +	mthca_uar_put(vma->vm_private_data);
> +}
> +
> +static struct vm_operations_struct mthca_vm_ops = {
> +	.open  = mthca_open_vma,
> +	.close = mthca_close_vma
> +};
> +
>  static int mthca_mmap_uar(struct ib_ucontext *context,
>  			  struct vm_area_struct *vma)
>  {
> @@ -353,10 +370,15 @@ static int mthca_mmap_uar(struct ib_ucon
>  	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
>  
>  	if (remap_pfn_range(vma, vma->vm_start,
> -			    to_mucontext(context)->uar.pfn,
> +			    to_mucontext(context)->uar->pfn,
>  			    PAGE_SIZE, vma->vm_page_prot))
>  		return -EAGAIN;
>  
> +	kref_get(&to_mucontext(context)->uar->ref);
> +
> +	vma->vm_private_data = to_mucontext(context)->uar;
> +	vma->vm_ops          = &mthca_vm_ops;
> +
>  	return 0;
>  }
>  
> @@ -460,7 +482,7 @@ static struct ib_qp *mthca_create_qp(str
>  			if (copy_from_user(&ucmd, udata, sizeof ucmd))
>  				return ERR_PTR(-EFAULT);
>  
> -			err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
> +			err = mthca_map_user_db(to_mdev(pd->device), context->uar,
>  						context->db_tab,
>  						ucmd.sq_db_index, ucmd.sq_db_page);
>  			if (err) {
> @@ -468,12 +490,12 @@ static struct ib_qp *mthca_create_qp(str
>  				return ERR_PTR(err);
>  			}
>  
> -			err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
> +			err = mthca_map_user_db(to_mdev(pd->device), context->uar,
>  						context->db_tab,
>  						ucmd.rq_db_index, ucmd.rq_db_page);
>  			if (err) {
>  				mthca_unmap_user_db(to_mdev(pd->device),
> -						    &context->uar,
> +						    context->uar,
>  						    context->db_tab,
>  						    ucmd.sq_db_index);
>  				kfree(qp);
> @@ -495,11 +517,11 @@ static struct ib_qp *mthca_create_qp(str
>  			context = to_mucontext(pd->uobject->context);
>  
>  			mthca_unmap_user_db(to_mdev(pd->device),
> -					    &context->uar,
> +					    context->uar,
>  					    context->db_tab,
>  					    ucmd.sq_db_index);
>  			mthca_unmap_user_db(to_mdev(pd->device),
> -					    &context->uar,
> +					    context->uar,
>  					    context->db_tab,
>  					    ucmd.rq_db_index);
>  		}
> @@ -551,11 +573,11 @@ static int mthca_destroy_qp(struct ib_qp
>  {
>  	if (qp->uobject) {
>  		mthca_unmap_user_db(to_mdev(qp->device),
> -				    &to_mucontext(qp->uobject->context)->uar,
> +				    to_mucontext(qp->uobject->context)->uar,
>  				    to_mucontext(qp->uobject->context)->db_tab,
>  				    to_mqp(qp)->sq.db_index);
>  		mthca_unmap_user_db(to_mdev(qp->device),
> -				    &to_mucontext(qp->uobject->context)->uar,
> +				    to_mucontext(qp->uobject->context)->uar,
>  				    to_mucontext(qp->uobject->context)->db_tab,
>  				    to_mqp(qp)->rq.db_index);
>  	}
> @@ -580,13 +602,13 @@ static struct ib_cq *mthca_create_cq(str
>  		if (copy_from_user(&ucmd, udata, sizeof ucmd))
>  			return ERR_PTR(-EFAULT);
>  
> -		err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
> +		err = mthca_map_user_db(to_mdev(ibdev), to_mucontext(context)->uar,
>  					to_mucontext(context)->db_tab,
>  					ucmd.set_db_index, ucmd.set_db_page);
>  		if (err)
>  			return ERR_PTR(err);
>  
> -		err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
> +		err = mthca_map_user_db(to_mdev(ibdev), to_mucontext(context)->uar,
>  					to_mucontext(context)->db_tab,
>  					ucmd.arm_db_index, ucmd.arm_db_page);
>  		if (err)
> @@ -627,12 +649,12 @@ err_free:
>  
>  err_unmap_arm:
>  	if (context)
> -		mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
> +		mthca_unmap_user_db(to_mdev(ibdev), to_mucontext(context)->uar,
>  				    to_mucontext(context)->db_tab, ucmd.arm_db_index);
>  
>  err_unmap_set:
>  	if (context)
> -		mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
> +		mthca_unmap_user_db(to_mdev(ibdev), to_mucontext(context)->uar,
>  				    to_mucontext(context)->db_tab, ucmd.set_db_index);
>  
>  	return ERR_PTR(err);
> @@ -642,11 +664,11 @@ static int mthca_destroy_cq(struct ib_cq
>  {
>  	if (cq->uobject) {
>  		mthca_unmap_user_db(to_mdev(cq->device),
> -				    &to_mucontext(cq->uobject->context)->uar,
> +				    to_mucontext(cq->uobject->context)->uar,
>  				    to_mucontext(cq->uobject->context)->db_tab,
>  				    to_mcq(cq)->arm_db_index);
>  		mthca_unmap_user_db(to_mdev(cq->device),
> -				    &to_mucontext(cq->uobject->context)->uar,
> +				    to_mucontext(cq->uobject->context)->uar,
>  				    to_mucontext(cq->uobject->context)->db_tab,
>  				    to_mcq(cq)->set_ci_db_index);
>  	}
> --- infiniband/hw/mthca/mthca_provider.h	(revision 2631)
> +++ infiniband/hw/mthca/mthca_provider.h	(working copy)
> @@ -50,15 +50,17 @@ struct mthca_buf_list {
>  };
>  
>  struct mthca_uar {
> -	unsigned long pfn;
> -	int           index;
> +	struct mthca_dev *dev;
> +	unsigned long     pfn;
> +	int               index;
> +	struct kref       ref;
>  };
>  
>  struct mthca_user_db_table;
>  
>  struct mthca_ucontext {
>  	struct ib_ucontext          ibucontext;
> -	struct mthca_uar            uar;
> +	struct mthca_uar           *uar;
>  	struct mthca_user_db_table *db_tab;
>  };
>  
> --- infiniband/hw/mthca/mthca_cq.c	(revision 2631)
> +++ infiniband/hw/mthca/mthca_cq.c	(working copy)
> @@ -809,9 +809,9 @@ int mthca_init_cq(struct mthca_dev *dev,
>  	cq_context->start           = cpu_to_be64(0);
>  	cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24);
>  	if (ctx)
> -		cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index);
> +		cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar->index);
>  	else
> -		cq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
> +		cq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar->index);
>  	cq_context->error_eqn       = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
>  	cq_context->comp_eqn        = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn);
>  	cq_context->pd              = cpu_to_be32(pdn);
> --- infiniband/hw/mthca/mthca_eq.c	(revision 2631)
> +++ infiniband/hw/mthca/mthca_eq.c	(working copy)
> @@ -542,7 +542,7 @@ static int __devinit mthca_create_eq(str
>  	if (mthca_is_memfree(dev)) {
>  		eq_context->arbel_pd = cpu_to_be32(dev->driver_pd.pd_num);
>  	} else {
> -		eq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
> +		eq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar->index);
>  		eq_context->tavor_pd         = cpu_to_be32(dev->driver_pd.pd_num);
>  	}
>  	eq_context->intr            = intr;
> --- infiniband/hw/mthca/mthca_qp.c	(revision 2631)
> +++ infiniband/hw/mthca/mthca_qp.c	(working copy)
> @@ -693,9 +693,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, 
>  
>  	if (qp->ibqp.uobject)
>  		qp_context->usr_page =
> -			cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index);
> +			cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar->index);
>  	else
> -		qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
> +		qp_context->usr_page = cpu_to_be32(dev->driver_uar->index);
>  	qp_context->local_qpn  = cpu_to_be32(qp->qpn);
>  	if (attr_mask & IB_QP_DEST_QPN) {
>  		qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
> --- infiniband/hw/mthca/mthca_memfree.c	(revision 2631)
> +++ infiniband/hw/mthca/mthca_memfree.c	(working copy)
> @@ -527,7 +527,7 @@ int mthca_alloc_db(struct mthca_dev *dev
>  	memset(page->db_rec, 0, 4096);
>  
>  	ret = mthca_MAP_ICM_page(dev, page->mapping,
> -				 mthca_uarc_virt(dev, &dev->driver_uar, i), &status);
> +				 mthca_uarc_virt(dev, dev->driver_uar, i), &status);
>  	if (!ret && status)
>  		ret = -EINVAL;
>  	if (ret) {
> @@ -581,7 +581,7 @@ void mthca_free_db(struct mthca_dev *dev
>  
>  	if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) &&
>  	    i >= dev->db_tab->max_group1 - 1) {
> -		mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
> +		mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, dev->driver_uar, i), 1, &status);
>  		
>  		dma_free_coherent(&dev->pdev->dev, 4096,
>  				  page->db_rec, page->mapping);
> @@ -650,7 +650,7 @@ void mthca_cleanup_db_tab(struct mthca_d
>  		if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE))
>  			mthca_warn(dev, "Kernel UARC page %d not empty\n", i);
>  
> -		mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
> +		mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, dev->driver_uar, i), 1, &status);
>  		
>  		dma_free_coherent(&dev->pdev->dev, 4096,
>  				  dev->db_tab->page[i].db_rec,

-- 
MST



More information about the general mailing list