[ofa-general] [PATCH v2] IB/ipoib: use vmap with allocation of tx ring

David Wilder dwilder at us.ibm.com
Thu Feb 14 11:50:38 PST 2008


Eli Cohen wrote:
>  From 2f1870f76ddbfc948aea4847c25d05ae70dd43cf Mon Sep 17 00:00:00 2001
> From: Eli Cohen <eli at mellanox.co.il>
> Date: Thu, 14 Feb 2008 15:46:33 +0200
> Subject: [PATCH] IB/ipoib: use vmap with allocation of tx ring
> 
> With the introduction of s/g support in IPOIB, the size of struct 
> ipoib_tx_buf
> has increased since it reserves room for the fragments. This caused 
> allocations
> to fail when large send queues are required. This patch uses an array of 
> pages
> and maps them with vmap to increase the certainty of the allocation to 
> succeed.
> 
> Signed-off-by: Eli Cohen <eli at mellanox.co.il>
> ---
>  drivers/infiniband/ulp/ipoib/ipoib.h      |    9 +++++
>  drivers/infiniband/ulp/ipoib/ipoib_main.c |   54 
> ++++++++++++++++++++++++++---
>  2 files changed, 58 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h 
> b/drivers/infiniband/ulp/ipoib/ipoib.h
> index f9b7caa..78a99d6 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib.h
> +++ b/drivers/infiniband/ulp/ipoib/ipoib.h
> @@ -186,6 +186,12 @@ enum ipoib_cm_state {
>      IPOIB_CM_RX_FLUSH  /* Last WQE Reached event observed */
>  };
> 
> +struct ipoib_vmap {
> +       void           *ptr;
> +       struct page   **page_arr;
> +       int             npages;
> +};
> +
>  struct ipoib_cm_rx {
>      struct ib_cm_id           *id;
>      struct ib_qp           *qp;
> @@ -293,6 +299,7 @@ struct ipoib_dev_priv {
>      struct ipoib_rx_buf *rx_ring;
> 
>      spinlock_t         tx_lock;
> +    struct ipoib_vmap    tx_vmap_ring;
>      struct ipoib_tx_buf *tx_ring;
>      unsigned         tx_head;
>      unsigned         tx_tail;
> @@ -458,6 +465,8 @@ int ipoib_vlan_delete(struct net_device *pdev, 
> unsigned short pkey);
>  void ipoib_pkey_poll(struct work_struct *work);
>  int ipoib_pkey_dev_delay_open(struct net_device *dev);
>  void ipoib_drain_cq(struct net_device *dev);
> +int ipoib_vmalloc(struct ipoib_vmap *buf, int size);
> +void ipoib_vfree(struct ipoib_vmap *buf);
> 
>  #ifdef CONFIG_INFINIBAND_IPOIB_CM
> 
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c 
> b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> index f96477a..3a44a42 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> @@ -92,6 +92,50 @@ static struct ib_client ipoib_client = {
>      .remove = ipoib_remove_one
>  };
> 
> +int ipoib_vmalloc(struct ipoib_vmap *buf, int size)
> +{
> +    int     i;
> +    int     npages = ALIGN(size, PAGE_SIZE) / PAGE_SIZE;
> +    int     ret = -ENOMEM;
> +
> +    buf->page_arr = kmalloc(npages * sizeof buf->page_arr[0], GFP_KERNEL);
> +    if (!buf->page_arr)
> +        goto out;
> +
> +    for (i = 0; i < npages; ++i) {
> +        buf->page_arr[i] = alloc_page(GFP_KERNEL);
> +        if (!buf->page_arr[i])
> +            goto page_fail;
> +    }
> +
> +    buf->npages = npages;
> +    buf->ptr = vmap(buf->page_arr, buf->npages, VM_MAP, PAGE_KERNEL);
> +    if (!buf->ptr)
> +        goto page_fail;
> +
> +    memset(buf->ptr, 0, size);
> +    return 0;
> +   
> +page_fail:
> +    for (; i > 0; --i)
> +        __free_page(buf->page_arr[i - 1]);
> +
> +    kfree(buf->page_arr);
> +out:
> +    return ret;
> +}
> +
> +void ipoib_vfree(struct ipoib_vmap *buf)
> +{
> +    int     i;
> +
> +    vunmap(buf->ptr);
> +    for (i = 0; i < buf->npages; ++i)
> +        __free_page(buf->page_arr[i]);
> +
> +    kfree(buf->page_arr);
> +}
> +
>  int ipoib_open(struct net_device *dev)
>  {
>      struct ipoib_dev_priv *priv = netdev_priv(dev);
> @@ -887,13 +931,13 @@ int ipoib_dev_init(struct net_device *dev, struct 
> ib_device *ca, int port)
>          goto out;
>      }
> 
> -    priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring,
> -                GFP_KERNEL);
> -    if (!priv->tx_ring) {
> +    if (ipoib_vmalloc(&priv->tx_vmap_ring, ipoib_sendq_size *
> +              sizeof *priv->tx_ring)) {
>          printk(KERN_WARNING "%s: failed to allocate TX ring (%d 
> entries)\n",
>                 ca->name, ipoib_sendq_size);
>          goto out_rx_ring_cleanup;
>      }
> +    priv->tx_ring = priv->tx_vmap_ring.ptr;
> 
>      /* priv->tx_head, tx_tail & tx_outstanding are already 0 */
> 
> @@ -903,7 +947,7 @@ int ipoib_dev_init(struct net_device *dev, struct 
> ib_device *ca, int port)
>      return 0;
> 
>  out_tx_ring_cleanup:
> -    kfree(priv->tx_ring);
> +    ipoib_vfree(&priv->tx_vmap_ring);
> 
>  out_rx_ring_cleanup:
>      kfree(priv->rx_ring);
> @@ -928,7 +972,7 @@ void ipoib_dev_cleanup(struct net_device *dev)
>      ipoib_ib_dev_cleanup(dev);
> 
>      kfree(priv->rx_ring);
> -    kfree(priv->tx_ring);
> +    ipoib_vfree(&priv->tx_vmap_ring);
> 
>      priv->rx_ring = NULL;
>      priv->tx_ring = NULL;

I tested with OFED-1.3-20080214-0725.tgz.  This build look to have both 
a tx_ring and rx_ring fix.

This build fixes our problem using send_queue_size=1024

But the recv_queue_size=2048 is still failing.

[dmesg]
ib%d: failed allocating SRQ wr array
ib%d: failed allocating SRQ wr array

kernel:  2.6.16.57-0.9











More information about the general mailing list