[ofa-general] [RFC 0/1] libmthca: CQ/DMA race on Altix

akepner at sgi.com akepner at sgi.com
Sun Jul 15 14:21:46 PDT 2007


Here's a first cut at OFED 1.3/Linux 2.6.23 patches to
address the "CQ/DMA race" that's possible on Altix systems
when CQs are allocated in user space.

(A description of this bug appears here:
http://lists.openfabrics.org/pipermail/general/2006-December/030251.html)

I'll post the kernel patch to lkml, but I'd appreciate any
comments from this list before doing that.

Obviously this is just a subset of the necessary kernel
changes required, since every use of dma_map_sg() would
need to be modified. Comments?

 arch/ia64/sn/pci/pci_dma.c                   |   19 ++++++++++++++-----
 drivers/infiniband/core/umem.c               |    5 +++--
 drivers/infiniband/hw/mthca/mthca_provider.c |   11 ++++++++++-
 drivers/infiniband/hw/mthca/mthca_user.h     |    8 +++++++-
 drivers/infiniband/ulp/srp/ib_srp.c          |    2 +-
 include/asm-generic/dma-mapping.h            |    4 ++--
 include/asm-generic/pci-dma-compat.h         |    2 +-
 include/asm-ia64/machvec.h                   |    2 +-
 include/rdma/ib_umem.h                       |    2 +-
 include/rdma/ib_verbs.h                      |    5 +++--

-- 
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index d79ddac..d942390 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -245,7 +245,7 @@ EXPORT_SYMBOL(sn_dma_unmap_sg);
  * Maps each entry of @sg for DMA.
  */
 int sn_dma_map_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
-		  int direction)
+		  int direction, int coherent)
 {
 	unsigned long phys_addr;
 	struct scatterlist *saved_sg = sg;
@@ -259,12 +259,21 @@ int sn_dma_map_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
 	 * Setup a DMA address for each entry in the scatterlist.
 	 */
 	for (i = 0; i < nhwentries; i++, sg++) {
+		dma_addr_t dma_addr;
 		phys_addr = SG_ENT_PHYS_ADDRESS(sg);
-		sg->dma_address = provider->dma_map(pdev,
-						    phys_addr, sg->length,
-						    SN_DMA_ADDR_PHYS);
 
-		if (!sg->dma_address) {
+		if (coherent) {
+			dma_addr= provider->dma_map_consistent(pdev,
+							       phys_addr, 	
+							       sg->length,
+							       SN_DMA_ADDR_PHYS);
+		} else {
+			dma_addr = provider->dma_map(pdev,
+						     phys_addr, sg->length,
+						     SN_DMA_ADDR_PHYS);
+		}
+
+		if (!(sg->dma_address = dma_addr)) {
 			printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__);
 
 			/*
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index d40652a..e9f9f42 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -66,7 +66,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
  * @access: IB_ACCESS_xxx flags for memory being pinned
  */
 struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
-			    size_t size, int access)
+			    size_t size, int access, int coherent)
 {
 	struct ib_umem *umem;
 	struct page **page_list;
@@ -154,7 +154,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 			chunk->nmap = ib_dma_map_sg(context->device,
 						    &chunk->page_list[0],
 						    chunk->nents,
-						    DMA_BIDIRECTIONAL);
+						    DMA_BIDIRECTIONAL,
+						    coherent);
 			if (chunk->nmap <= 0) {
 				for (i = 0; i < chunk->nents; ++i)
 					put_page(chunk->page_list[i].page);
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 6bcde1c..c0cf5f1 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1017,6 +1017,8 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	struct mthca_dev *dev = to_mdev(pd->device);
 	struct ib_umem_chunk *chunk;
 	struct mthca_mr *mr;
+	struct mthca_reg_mr ucmd;
+	int coherent;
 	u64 *pages;
 	int shift, n, len;
 	int i, j, k;
@@ -1027,7 +1029,14 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	if (!mr)
 		return ERR_PTR(-ENOMEM);
 
-	mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
+	if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
+		err = -EFAULT;
+		goto err;
+	}
+	coherent = (int) ucmd.mr_attrs & MTHCA_MR_COHERENT;
+
+	mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, 
+			       coherent);
 	if (IS_ERR(mr->umem)) {
 		err = PTR_ERR(mr->umem);
 		goto err;
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
index 02cc0a7..f46773e 100644
--- a/drivers/infiniband/hw/mthca/mthca_user.h
+++ b/drivers/infiniband/hw/mthca/mthca_user.h
@@ -41,7 +41,7 @@
  * Increment this value if any changes that break userspace ABI
  * compatibility are made.
  */
-#define MTHCA_UVERBS_ABI_VERSION	1
+#define MTHCA_UVERBS_ABI_VERSION	2
 
 /*
  * Make sure that all structs defined in this file remain laid out so
@@ -61,6 +61,12 @@ struct mthca_alloc_pd_resp {
 	__u32 reserved;
 };
 
+struct mthca_reg_mr {
+	__u32 mr_attrs;
+#define MTHCA_MR_COHERENT 0x1
+	__u32 reserved;
+};
+
 struct mthca_create_cq {
 	__u32 lkey;
 	__u32 pdn;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 39bf057..b7a4301 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -699,7 +699,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
 	dev = target->srp_host->dev;
 	ibdev = dev->dev;
 
-	count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
+	count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction, 0);
 
 	fmt = SRP_DATA_DESC_DIRECT;
 	len = sizeof (struct srp_cmd) +	sizeof (struct srp_direct_buf);
diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h
index 783ab99..34e8357 100644
--- a/include/asm-generic/dma-mapping.h
+++ b/include/asm-generic/dma-mapping.h
@@ -89,7 +89,7 @@ dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
 
 static inline int
 dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-	   enum dma_data_direction direction)
+	   enum dma_data_direction direction, int coherent)
 {
 	BUG_ON(dev->bus != &pci_bus_type);
 
@@ -213,7 +213,7 @@ dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
 
 static inline int
 dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-	   enum dma_data_direction direction)
+	   enum dma_data_direction direction, int coherent)
 {
 	BUG();
 	return 0;
diff --git a/include/asm-generic/pci-dma-compat.h b/include/asm-generic/pci-dma-compat.h
index 25c10e9..3e85b8e 100644
--- a/include/asm-generic/pci-dma-compat.h
+++ b/include/asm-generic/pci-dma-compat.h
@@ -60,7 +60,7 @@ static inline int
 pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
 	   int nents, int direction)
 {
-	return dma_map_sg(hwdev == NULL ? NULL : &hwdev->dev, sg, nents, (enum dma_data_direction)direction);
+	return dma_map_sg(hwdev == NULL ? NULL : &hwdev->dev, sg, nents, (enum dma_data_direction)direction, 0);
 }
 
 static inline void
diff --git a/include/asm-ia64/machvec.h b/include/asm-ia64/machvec.h
index ca33eb1..34e9a58 100644
--- a/include/asm-ia64/machvec.h
+++ b/include/asm-ia64/machvec.h
@@ -46,7 +46,7 @@ typedef void *ia64_mv_dma_alloc_coherent (struct device *, size_t, dma_addr_t *,
 typedef void ia64_mv_dma_free_coherent (struct device *, size_t, void *, dma_addr_t);
 typedef dma_addr_t ia64_mv_dma_map_single (struct device *, void *, size_t, int);
 typedef void ia64_mv_dma_unmap_single (struct device *, dma_addr_t, size_t, int);
-typedef int ia64_mv_dma_map_sg (struct device *, struct scatterlist *, int, int);
+typedef int ia64_mv_dma_map_sg (struct device *, struct scatterlist *, int, int, int);
 typedef void ia64_mv_dma_unmap_sg (struct device *, struct scatterlist *, int, int);
 typedef void ia64_mv_dma_sync_single_for_cpu (struct device *, dma_addr_t, size_t, int);
 typedef void ia64_mv_dma_sync_sg_for_cpu (struct device *, struct scatterlist *, int, int);
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index c533d6c..08aeb87 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -61,7 +61,7 @@ struct ib_umem_chunk {
 #ifdef CONFIG_INFINIBAND_USER_MEM
 
 struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
-			    size_t size, int access);
+			    size_t size, int access, int coherent);
 void ib_umem_release(struct ib_umem *umem);
 int ib_umem_page_count(struct ib_umem *umem);
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 0627a6a..d5d3180 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1555,11 +1555,12 @@ static inline void ib_dma_unmap_page(struct ib_device *dev,
  */
 static inline int ib_dma_map_sg(struct ib_device *dev,
 				struct scatterlist *sg, int nents,
-				enum dma_data_direction direction)
+				enum dma_data_direction direction, 
+				int coherent)
 {
 	if (dev->dma_ops)
 		return dev->dma_ops->map_sg(dev, sg, nents, direction);
-	return dma_map_sg(dev->dma_device, sg, nents, direction);
+	return dma_map_sg(dev->dma_device, sg, nents, direction, coherent);
 }
 
 /**

-- 
Arthur




More information about the general mailing list