[ofa-general] Re: [PATCH] ib_core: Use weak ordering for data registered memory

Eli Cohen eli at dev.mellanox.co.il
Wed Oct 22 06:38:39 PDT 2008


On Mon, Oct 20, 2008 at 07:43:04AM -0700, Roland Dreier wrote:
>  > Some architectures support weak ordering in which case better
>  > performance is possible. IB registered memory used for data can be
>  > weakly ordered becuase the the completion queues' buffers are
>  > registered as strongly ordered. This will result in flushing all data
>  > related outstanding DMA requests by the HCA when a completion is DMAed
>  > to a completion queue buffer.
> 
> This would break the Mellanox HW's guarantee of writing the last byte of
> an RDMA last, right?  So on platforms where this has an effect (only
> Cell at the moment) some applications could be subtly broken?
>
In theory it would break Mellanox's guarantee for strict ordering on
data, but in practice it will not since the only architecture that
supports weak ordering is CELL. As Arnd suggested in his response
email, here is the patch with a module parameter which by default will
not configure weak ordering for data. Anyone wishing to benefit from
weak ordering will have to set the module parameter accordingly.


>From 2c1e0f4d8138c1fbd675e7ada4384f59269acb1f Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli at mellanox.co.il>
Date: Mon, 20 Oct 2008 15:52:22 +0200
Subject: [PATCH] ib_core: Use weak ordering for data registered memory

Some architectures support weak ordering in which case better
performance is possible. IB registered memory used for data can be
weakly ordered becuase the the completion queues' buffers are
registered as strongly ordered. This will result in flushing all data
related outstanding DMA requests by the HCA when a completion is DMAed
to a completion queue buffer.
This patch will allow weak ordering for data if ib_core is loaded with
the module parameter, allow_weak_ordering, set to a none zero value.

Signed-off-by: Eli Cohen <eli at mellanox.co.il>
Signed-off-by: Arnd Bergmann <arnd at arndb.de>
---
 drivers/infiniband/core/umem.c |   12 ++++++++++--
 include/rdma/ib_umem.h         |    2 ++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 6f7c096..d21853d 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -40,6 +40,10 @@
 
 #include "uverbs.h"
 
+static int allow_weak_ordering;
+module_param(allow_weak_ordering, bool, 0444);
+MODULE_PARM_DESC(allow_weak_ordering,  "Allow weak ordering for data registered memory");
+
 #define IB_UMEM_MAX_PAGE_CHUNK						\
 	((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) /	\
 	 ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] -	\
@@ -51,8 +55,8 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
 	int i;
 
 	list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
-		ib_dma_unmap_sg(dev, chunk->page_list,
-				chunk->nents, DMA_BIDIRECTIONAL);
+		ib_dma_unmap_sg_attrs(dev, chunk->page_list,
+				      chunk->nents, DMA_BIDIRECTIONAL, &chunk->attrs);
 		for (i = 0; i < chunk->nents; ++i) {
 			struct page *page = sg_page(&chunk->page_list[i]);
 
@@ -91,6 +95,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 
 	if (dmasync)
 		dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
+	else if (allow_weak_ordering)
+		dma_set_attr(DMA_ATTR_WEAK_ORDERING, &attrs);
+
 
 	if (!can_do_mlock())
 		return ERR_PTR(-EPERM);
@@ -155,6 +162,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 		if (ret < 0)
 			goto out;
 
+		chunk->attrs = attrs;
 		cur_base += ret * PAGE_SIZE;
 		npages   -= ret;
 
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 9ee0d2e..90f3712 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -36,6 +36,7 @@
 #include <linux/list.h>
 #include <linux/scatterlist.h>
 #include <linux/workqueue.h>
+#include <linux/dma-attrs.h>
 
 struct ib_ucontext;
 
@@ -56,6 +57,7 @@ struct ib_umem_chunk {
 	struct list_head	list;
 	int                     nents;
 	int                     nmap;
+	struct dma_attrs	attrs;
 	struct scatterlist      page_list[0];
 };
 
-- 
1.6.0.2




More information about the general mailing list