[ofa-general] Re: [PATCH 1 of 2] IB/mlx4: For 64-bit systems, use large virtually contiguous queue buffers (vmap)
Roland Dreier
rdreier at cisco.com
Wed Feb 6 21:18:36 PST 2008
OK, I applied this along with a couple of cleanup patches of my own.
I decided to use the vmap() access for CQ and SRQ buffers too, since I
think that the CPU's MMU should be faster than walking our own page
tables in software.
Here's what I applied (I still need to test tomorrow so there may be
silly bugs):
commit b57aacfa7a95328f469d0360e49289b023c47e9e
Author: Roland Dreier <rolandd at cisco.com>
Date: Wed Feb 6 21:17:59 2008 -0800
mlx4_core: Clean up struct mlx4_buf
Now that struct mlx4_buf.u is a struct instead of a union because of
the vmap() changes, there's no point in having a struct at all. So
move .direct and .page_list directly into struct mlx4_buf and get rid
of a bunch of unnecessary ".u"s.
Signed-off-by: Roland Dreier <rolandd at cisco.com>
---
drivers/net/mlx4/alloc.c | 40 ++++++++++++++++++++--------------------
drivers/net/mlx4/mr.c | 4 ++--
include/linux/mlx4/device.h | 10 ++++------
3 files changed, 26 insertions(+), 28 deletions(-)
diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c
index 2da2c2e..521dc03 100644
--- a/drivers/net/mlx4/alloc.c
+++ b/drivers/net/mlx4/alloc.c
@@ -116,40 +116,40 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
buf->nbufs = 1;
buf->npages = 1;
buf->page_shift = get_order(size) + PAGE_SHIFT;
- buf->u.direct.buf = dma_alloc_coherent(&dev->pdev->dev,
+ buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev,
size, &t, GFP_KERNEL);
- if (!buf->u.direct.buf)
+ if (!buf->direct.buf)
return -ENOMEM;
- buf->u.direct.map = t;
+ buf->direct.map = t;
while (t & ((1 << buf->page_shift) - 1)) {
--buf->page_shift;
buf->npages *= 2;
}
- memset(buf->u.direct.buf, 0, size);
+ memset(buf->direct.buf, 0, size);
} else {
int i;
buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE;
buf->npages = buf->nbufs;
buf->page_shift = PAGE_SHIFT;
- buf->u.page_list = kzalloc(buf->nbufs * sizeof *buf->u.page_list,
+ buf->page_list = kzalloc(buf->nbufs * sizeof *buf->page_list,
GFP_KERNEL);
- if (!buf->u.page_list)
+ if (!buf->page_list)
return -ENOMEM;
for (i = 0; i < buf->nbufs; ++i) {
- buf->u.page_list[i].buf =
+ buf->page_list[i].buf =
dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
&t, GFP_KERNEL);
- if (!buf->u.page_list[i].buf)
+ if (!buf->page_list[i].buf)
goto err_free;
- buf->u.page_list[i].map = t;
+ buf->page_list[i].map = t;
- memset(buf->u.page_list[i].buf, 0, PAGE_SIZE);
+ memset(buf->page_list[i].buf, 0, PAGE_SIZE);
}
if (BITS_PER_LONG == 64) {
@@ -158,10 +158,10 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
if (!pages)
goto err_free;
for (i = 0; i < buf->nbufs; ++i)
- pages[i] = virt_to_page(buf->u.page_list[i].buf);
- buf->u.direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL);
+ pages[i] = virt_to_page(buf->page_list[i].buf);
+ buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL);
kfree(pages);
- if (!buf->u.direct.buf)
+ if (!buf->direct.buf)
goto err_free;
}
}
@@ -180,18 +180,18 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
int i;
if (buf->nbufs == 1)
- dma_free_coherent(&dev->pdev->dev, size, buf->u.direct.buf,
- buf->u.direct.map);
+ dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf,
+ buf->direct.map);
else {
if (BITS_PER_LONG == 64)
- vunmap(buf->u.direct.buf);
+ vunmap(buf->direct.buf);
for (i = 0; i < buf->nbufs; ++i)
- if (buf->u.page_list[i].buf)
+ if (buf->page_list[i].buf)
dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
- buf->u.page_list[i].buf,
- buf->u.page_list[i].map);
- kfree(buf->u.page_list);
+ buf->page_list[i].buf,
+ buf->page_list[i].map);
+ kfree(buf->page_list);
}
}
EXPORT_SYMBOL_GPL(mlx4_buf_free);
diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
index 9c9e308..679dfdb 100644
--- a/drivers/net/mlx4/mr.c
+++ b/drivers/net/mlx4/mr.c
@@ -419,9 +419,9 @@ int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
for (i = 0; i < buf->npages; ++i)
if (buf->nbufs == 1)
- page_list[i] = buf->u.direct.map + (i << buf->page_shift);
+ page_list[i] = buf->direct.map + (i << buf->page_shift);
else
- page_list[i] = buf->u.page_list[i].map;
+ page_list[i] = buf->page_list[i].map;
err = mlx4_write_mtt(dev, mtt, 0, buf->npages, page_list);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 6316077..4210ac4 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -189,10 +189,8 @@ struct mlx4_buf_list {
};
struct mlx4_buf {
- struct {
- struct mlx4_buf_list direct;
- struct mlx4_buf_list *page_list;
- } u;
+ struct mlx4_buf_list direct;
+ struct mlx4_buf_list *page_list;
int nbufs;
int npages;
int page_shift;
@@ -311,9 +309,9 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
{
if (BITS_PER_LONG == 64 || buf->nbufs == 1)
- return buf->u.direct.buf + offset;
+ return buf->direct.buf + offset;
else
- return buf->u.page_list[offset >> PAGE_SHIFT].buf +
+ return buf->page_list[offset >> PAGE_SHIFT].buf +
(offset & (PAGE_SIZE - 1));
}
commit 313abe55a87bc10e55d00f337d609e17ad5f8c9a
Author: Jack Morgenstein <jackm at dev.mellanox.co.il>
Date: Mon Jan 28 10:40:51 2008 +0200
mlx4_core: For 64-bit systems, vmap() kernel queue buffers
Since kernel virtual memory is not a problem on 64-bit systems, there
is no reason to use our own 2-layer page mapping scheme for large
kernel queue buffers on such systems. Instead, map the page list to a
single virtually contiguous buffer with vmap(), so that can we access
buffer memory via direct indexing.
Signed-off-by: Michael S. Tsirkin <mst at dev.mellanox.co.il>
Signed-off-by: Jack Morgenstein <jackm at dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd at cisco.com>
---
drivers/net/mlx4/alloc.c | 16 ++++++++++++++++
include/linux/mlx4/device.h | 4 ++--
2 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c
index b226e01..2da2c2e 100644
--- a/drivers/net/mlx4/alloc.c
+++ b/drivers/net/mlx4/alloc.c
@@ -151,6 +151,19 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
memset(buf->u.page_list[i].buf, 0, PAGE_SIZE);
}
+
+ if (BITS_PER_LONG == 64) {
+ struct page **pages;
+ pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL);
+ if (!pages)
+ goto err_free;
+ for (i = 0; i < buf->nbufs; ++i)
+ pages[i] = virt_to_page(buf->u.page_list[i].buf);
+ buf->u.direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL);
+ kfree(pages);
+ if (!buf->u.direct.buf)
+ goto err_free;
+ }
}
return 0;
@@ -170,6 +183,9 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
dma_free_coherent(&dev->pdev->dev, size, buf->u.direct.buf,
buf->u.direct.map);
else {
+ if (BITS_PER_LONG == 64)
+ vunmap(buf->u.direct.buf);
+
for (i = 0; i < buf->nbufs; ++i)
if (buf->u.page_list[i].buf)
dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index a0afa75..6316077 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -189,7 +189,7 @@ struct mlx4_buf_list {
};
struct mlx4_buf {
- union {
+ struct {
struct mlx4_buf_list direct;
struct mlx4_buf_list *page_list;
} u;
@@ -310,7 +310,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
{
- if (buf->nbufs == 1)
+ if (BITS_PER_LONG == 64 || buf->nbufs == 1)
return buf->u.direct.buf + offset;
else
return buf->u.page_list[offset >> PAGE_SHIFT].buf +
commit 1c69fc2a9012e160c8d459f63df74a6b01db8322
Author: Roland Dreier <rolandd at cisco.com>
Date: Wed Feb 6 21:07:54 2008 -0800
IB/mlx4: Consolidate code to get an entry from a struct mlx4_buf
We use struct mlx4_buf for kernel QP, CQ and SRQ buffers, and the code
to look up an entry is duplicated in get_cqe_from_buf() and the QP and
SRQ versions of get_wqe(). Factor this out into mlx4_buf_offset().
This will also make it easier to switch over to using vmap() for buffers.
Signed-off-by: Roland Dreier <rolandd at cisco.com>
---
drivers/infiniband/hw/mlx4/cq.c | 8 +-------
drivers/infiniband/hw/mlx4/qp.c | 6 +-----
drivers/infiniband/hw/mlx4/srq.c | 8 +-------
include/linux/mlx4/device.h | 8 ++++++++
4 files changed, 11 insertions(+), 19 deletions(-)
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 7950aa6..8ac7b97 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -64,13 +64,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
{
- int offset = n * sizeof (struct mlx4_cqe);
-
- if (buf->buf.nbufs == 1)
- return buf->buf.u.direct.buf + offset;
- else
- return buf->buf.u.page_list[offset >> PAGE_SHIFT].buf +
- (offset & (PAGE_SIZE - 1));
+ return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe));
}
static void *get_cqe(struct mlx4_ib_cq *cq, int n)
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 8cba9c5..376db73 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -96,11 +96,7 @@ static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
{
- if (qp->buf.nbufs == 1)
- return qp->buf.u.direct.buf + offset;
- else
- return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf +
- (offset & (PAGE_SIZE - 1));
+ return mlx4_buf_offset(&qp->buf, offset);
}
static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n)
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index e7e9a3d..beaa3b0 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -38,13 +38,7 @@
static void *get_wqe(struct mlx4_ib_srq *srq, int n)
{
- int offset = n << srq->msrq.wqe_shift;
-
- if (srq->buf.nbufs == 1)
- return srq->buf.u.direct.buf + offset;
- else
- return srq->buf.u.page_list[offset >> PAGE_SHIFT].buf +
- (offset & (PAGE_SIZE - 1));
+ return mlx4_buf_offset(&srq->buf, n << srq->msrq.wqe_shift);
}
static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 222815d..a0afa75 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -308,6 +308,14 @@ struct mlx4_init_port_param {
int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
struct mlx4_buf *buf);
void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
+static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
+{
+ if (buf->nbufs == 1)
+ return buf->u.direct.buf + offset;
+ else
+ return buf->u.page_list[offset >> PAGE_SHIFT].buf +
+ (offset & (PAGE_SIZE - 1));
+}
int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn);
void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn);
More information about the general
mailing list