[openib-general] How to support IOMMUs for ipath driver

Ralph Campbell ralphc at pathscale.com
Tue Sep 12 17:40:10 PDT 2006


Problem:

The IB kernel to IB device driver interface uses dma_map_single()
and dma_map_sg() to allocate device bus addresses for HW DMA.
These bus addresses are passed to the IB device driver via ib_post_send()
and ib_post_recv().

The ib_ipath driver needs kernel virtual addresses in order to be able
to copy data to/from the posted work requests since it does not
use HW DMA. It currently relies on the mapping being one-to-one
and cannot reasonably reverse the mapping when an IOMMU is present.

History:

I first proposed modifying the dma_* routines to allow a device
driver to interpose on the function calls.  This was not well
received by the Linux kernel maintainers since it would have too
much impact on the current code.

I also tried proposing adding a flag to the ib_device structure
and modifying the kernel IB code to check the flag and pass
either the dma_*() mapped address or a kernel virtual address.
This works OK for kmalloc() buffers where dma_map_single() is
being called but doesn't work well for SRP which has lists
of physical pages and calls dma_map_sg().
It also means that the kernel IB layer needs to explicitly handle
two different kinds of addresses.

Current Proposal:

My current proposal is to provide wrapper routines for the
dma_*() routines which only the IB kernel code would use.
These ib_dma_*() variants would allow a device driver to interpose
on the call and do appropriate code to convert the kernel virtual
or physical page addresses to something the device driver can handle.
For ib_mthca and ib_ehca, these would result in the corresponding
dma_*() routine being called. For ib_ipath, a different implementation
would be needed.

My expectation is that this would add little overhead, be easy to
explain and document, and would be straightforward to convert existing
code to the new convention (see sample patch below).

I would like to get some consensus that this is an acceptable
approach before I spend a bunch of time developing it further.

Index: ib_verbs.h
===================================================================
--- ib_verbs.h	(revision 9441)
+++ ib_verbs.h	(working copy)
@@ -43,6 +43,7 @@
 
 #include <linux/types.h>
 #include <linux/device.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/atomic.h>
 #include <asm/scatterlist.h>
@@ -984,6 +985,19 @@ struct ib_device {
 						  struct ib_grh *in_grh,
 						  struct ib_mad *in_mad,
 						  struct ib_mad *out_mad);
+	int                        (*mapping_error)(dma_addr_t dma_addr);
+	dma_addr_t                 (*map_single)(struct device *hwdev,
+						 void *ptr, size_t size,
+						 int direction);
+	void                       (*unmap_single)(struct device *dev,
+						   dma_addr_t addr,
+						   size_t size, int direction);
+	int                        (*map_sg)(struct device *hwdev,
+					     struct scatterlist *sg,
+					     int nents, int direction);
+	void                       (*unmap_sg)(struct device *hwdev,
+					       struct scatterlist *sg,
+					       int nents, int direction);
 
 	struct module               *owner;
 	struct class_device          class_dev;
@@ -1392,6 +1406,64 @@ static inline int ib_req_ncomp_notif(str
 struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
 
 /**
+ * ib_dma_mapping_error -
+ */
+static inline int ib_dma_mapping_error(struct ib_device *dev,
+				       dma_addr_t dma_addr)
+{
+	return dev->mapping_error ?
+		dev->mapping_error(dma_addr) : dma_mapping_error(dma_addr);
+}
+
+/**
+ * ib_dma_map_single -
+ */
+static inline dma_addr_t ib_dma_map_single(struct ib_device *dev,
+					   void *cpu_addr, size_t size,
+					   enum dma_data_direction direction)
+{
+	return dev->map_single ?
+		dev->map_single(dev, cpu_addr, size, direction) :
+		dma_map_single(dev->dma_device, cpu_addr, size, direction);
+}
+
+/**
+ * ib_dma_unmap_single -
+ */
+static inline void ib_dma_unmap_single(struct ib_device *dev,
+				       dma_addr_t addr, size_t size,
+				       enum dma_data_direction direction)
+{
+	dev->unmap_single ?
+		dev->unmap_single(dev, addr, size, direction) :
+		dma_unmap_single(dev->dma_device, addr, size, direction);
+}
+
+/**
+ * ib_dma_map_sg -
+ */
+static inline dma_addr_t ib_dma_map_sg(struct ib_device *dev,
+				       struct scatterlist *sg, int nents,
+				       enum dma_data_direction direction)
+{
+	return dev->map_sg ?
+		dev->map_sg(dev, sg, nents, direction) :
+		dma_map_sg(dev->dma_device, sg, nents, direction);
+}
+
+/**
+ * ib_dma_unmap_sg -
+ */
+static inline void ib_dma_unmap_sg(struct ib_device *dev,
+				   struct scatterlist *sg, int nents,
+				   enum dma_data_direction direction)
+{
+	dev->unmap_sg ?
+		dev->unmap_sg(dev, sg, nents, direction) :
+		dma_unmap_sg(dev->dma_device, sg, nents, direction);
+}
+
+/**
  * ib_reg_phys_mr - Prepares a virtually addressed memory region for use
  *   by an HCA.
  * @pd: The protection domain associated assigned to the registered region.






More information about the general mailing list