[ofa-general] Re: [UPDATE] [V3] [PATCH 3/3] ib/ipoib: IPoIB-UD RX S/G support for 4K MTU

Eli Cohen eli at mellanox.co.il
Sun Feb 3 02:07:29 PST 2008


Hi Shirley,

you patches cannot be applied cleanly. It seems like your email client
wraps around long lines. Can please check if this is the case?

On Sat, 2008-02-02 at 16:02 -0800, Shirley Ma wrote:
> I have fixed a bug found in 4K MTU test. Here is the new patch. I am
> running stress tonight.
> 
> Signed-off-by: Shirley Ma <xma at us.ibm.com>
> ---
> 
>  drivers/infiniband/ulp/ipoib/ipoib.h    |    7 +--
>  drivers/infiniband/ulp/ipoib/ipoib_ib.c |   93
> +++++++++++++++++++-----------
>  2 files changed, 60 insertions(+), 40 deletions(-)
> 
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h
> b/drivers/infiniband/ulp/ipoib/ipoib.h
> index 6b5e108..faee740 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib.h
> +++ b/drivers/infiniband/ulp/ipoib/ipoib.h
> @@ -145,11 +145,6 @@ struct ipoib_sg_rx_buf {
>  	u64		mapping[IPOIB_UD_RX_SG];
>  };
>  
> -struct ipoib_rx_buf {
> -	struct sk_buff *skb;
> -	u64		mapping;
> -};
> -
>  struct ipoib_tx_buf {
>  	struct sk_buff *skb;
>  	u64		mapping;
> @@ -299,7 +294,7 @@ struct ipoib_dev_priv {
>  	unsigned int admin_mtu;
>  	unsigned int mcast_mtu;
>  
> -	struct ipoib_rx_buf *rx_ring;
> +	struct ipoib_sg_rx_buf *rx_ring;
>  
>  	spinlock_t	     tx_lock;
>  	struct ipoib_tx_buf *tx_ring;
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> index 9ca3d34..dfb5cc2 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> @@ -155,29 +155,25 @@ partial_error:
>  static int ipoib_ib_post_receive(struct net_device *dev, int id)
>  {
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
> -	struct ib_sge list;
> -	struct ib_recv_wr param;
>  	struct ib_recv_wr *bad_wr;
>  	int ret;
>  
> -	list.addr     = priv->rx_ring[id].mapping;
> -	list.length   = IPOIB_BUF_SIZE;
> -	list.lkey     = priv->mr->lkey;
> +	priv->rx_wr.wr_id = id | IPOIB_OP_RECV;
> +	priv->rx_sge[0].addr = priv->rx_ring[id].mapping[0];
> +	priv->rx_sge[1].addr = priv->rx_ring[id].mapping[1];	
>  
> -	param.next    = NULL;
> -	param.wr_id   = id | IPOIB_OP_RECV;
> -	param.sg_list = &list;
> -	param.num_sge = 1;
> -
> -	ret = ib_post_recv(priv->qp, &param, &bad_wr);
> +	ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr);
>  	if (unlikely(ret)) {
> +		if (ipoib_ud_need_sg(priv->max_ib_mtu))
> +			ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[id].mapping);
> +		else 
> +			ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping[0],
> +					    IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
> +					    DMA_FROM_DEVICE);
>  		ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
> -		ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping,
> -				    IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
>  		dev_kfree_skb_any(priv->rx_ring[id].skb);
>  		priv->rx_ring[id].skb = NULL;
>  	}
> -
>  	return ret;
>  }
>  
> @@ -187,7 +183,7 @@ static int ipoib_alloc_rx_skb(struct net_device
> *dev, int id)
>  	struct sk_buff *skb;
>  	u64 addr;
>  
> -	skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4);
> +	skb = dev_alloc_skb(IPOIB_UD_BUF_SIZE(priv->max_ib_mtu) + 4);
>  	if (!skb)
>  		return -ENOMEM;
>  
> @@ -198,7 +194,8 @@ static int ipoib_alloc_rx_skb(struct net_device
> *dev, int id)
>  	 */
>  	skb_reserve(skb, 4);
>  
> -	addr = ib_dma_map_single(priv->ca, skb->data, IPOIB_BUF_SIZE,
> +	addr = ib_dma_map_single(priv->ca, skb->data,
> +				 IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
>  				 DMA_FROM_DEVICE);
>  	if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
>  		dev_kfree_skb_any(skb);
> @@ -206,7 +203,7 @@ static int ipoib_alloc_rx_skb(struct net_device
> *dev, int id)
>  	}
>  
>  	priv->rx_ring[id].skb     = skb;
> -	priv->rx_ring[id].mapping = addr;
> +	priv->rx_ring[id].mapping[0] = addr;
>  
>  	return 0;
>  }
> @@ -214,10 +211,15 @@ static int ipoib_alloc_rx_skb(struct net_device
> *dev, int id)
>  static int ipoib_ib_post_receives(struct net_device *dev)
>  {
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
> -	int i;
> +	int i, ret;
>  
>  	for (i = 0; i < ipoib_recvq_size; ++i) {
> -		if (ipoib_alloc_rx_skb(dev, i)) {
> +		if (ipoib_ud_need_sg(priv->max_ib_mtu)) 
> +			ret = !(ipoib_sg_alloc_rx_skb(dev, i,
> +						      priv->rx_ring[i].mapping));
> +		else
> +			ret = ipoib_alloc_rx_skb(dev, i);
> +		if (ret) {
>  			ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
>  			return -ENOMEM;
>  		}
> @@ -234,8 +236,9 @@ static void ipoib_ib_handle_rx_wc(struct net_device
> *dev, struct ib_wc *wc)
>  {
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
>  	unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
> -	struct sk_buff *skb;
> -	u64 addr;
> +	struct sk_buff *skb, *newskb = NULL;
> +	u64 mapping[IPOIB_UD_RX_SG];
> +	u64 addr = 0;
>  
>  	ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n",
>  		       wr_id, wc->status);
> @@ -247,15 +250,20 @@ static void ipoib_ib_handle_rx_wc(struct
> net_device *dev, struct ib_wc *wc)
>  	}
>  
>  	skb  = priv->rx_ring[wr_id].skb;
> -	addr = priv->rx_ring[wr_id].mapping;
> +	if (!ipoib_ud_need_sg(priv->max_ib_mtu))
> +		addr = priv->rx_ring[wr_id].mapping[0];
>  
>  	if (unlikely(wc->status != IB_WC_SUCCESS)) {
>  		if (wc->status != IB_WC_WR_FLUSH_ERR)
>  			ipoib_warn(priv, "failed recv event "
>  				   "(status=%d, wrid=%d vend_err %x)\n",
>  				   wc->status, wr_id, wc->vendor_err);
> -		ib_dma_unmap_single(priv->ca, addr,
> -				    IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
> +		if (ipoib_ud_need_sg(priv->max_ib_mtu))
> +			ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping);
> +		else 
> +			ib_dma_unmap_single(priv->ca, addr,
> +					    IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
> +					    DMA_FROM_DEVICE);
>  		dev_kfree_skb_any(skb);
>  		priv->rx_ring[wr_id].skb = NULL;
>  		return;
> @@ -272,17 +280,30 @@ static void ipoib_ib_handle_rx_wc(struct
> net_device *dev, struct ib_wc *wc)
>  	 * If we can't allocate a new RX buffer, dump
>  	 * this packet and reuse the old buffer.
>  	 */
> -	if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
> +	if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
> +		newskb = ipoib_sg_alloc_rx_skb(dev, wr_id, mapping);
> +		if (unlikely(!newskb)) {
> +			++dev->stats.rx_dropped;
> +			goto repost;
> +		}
> +	} else if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
>  		++dev->stats.rx_dropped;
>  		goto repost;
>  	}
>  
>  	ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
>  		       wc->byte_len, wc->slid);
> -
> -	ib_dma_unmap_single(priv->ca, addr, IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
> -
> -	skb_put(skb, wc->byte_len);
> +	if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
> +		ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping);
> +		memcpy(priv->rx_ring[wr_id].mapping, mapping,
> +		       IPOIB_UD_RX_SG * sizeof *mapping);
> +		ipoib_ud_skb_put_frags(skb, wc->byte_len, newskb);
> +	} else {
> +		ib_dma_unmap_single(priv->ca, addr,
> +				    IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
> +				    DMA_FROM_DEVICE);
> +		skb_put(skb, wc->byte_len);
> +	}
>  	skb_pull(skb, IB_GRH_BYTES);
>  
>  	skb->protocol = ((struct ipoib_header *) skb->data)->proto;
> @@ -690,15 +711,19 @@ int ipoib_ib_dev_stop(struct net_device *dev, int
> flush)
>  			}
>  
>  			for (i = 0; i < ipoib_recvq_size; ++i) {
> -				struct ipoib_rx_buf *rx_req;
> +				struct ipoib_sg_rx_buf *rx_req;
>  
>  				rx_req = &priv->rx_ring[i];
>  				if (!rx_req->skb)
>  					continue;
> -				ib_dma_unmap_single(priv->ca,
> -						    rx_req->mapping,
> -						    IPOIB_BUF_SIZE,
> -						    DMA_FROM_DEVICE);
> +				if (ipoib_ud_need_sg(priv->max_ib_mtu))
> +					ipoib_sg_dma_unmap_rx(priv,
> +							      priv->rx_ring[i].mapping);
> +				else
> +					ib_dma_unmap_single(priv->ca,
> +							    rx_req->mapping[0],
> +							    IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
> +							    DMA_FROM_DEVICE);
>  				dev_kfree_skb_any(rx_req->skb);
>  				rx_req->skb = NULL;
>  			}
> 
> 




More information about the general mailing list