[ewg] Re: split CQs for IPOIB UD

Or Gerlitz ogerlitz at voltaire.com
Mon Apr 28 01:37:42 PDT 2008


Eli Cohen wrote:
>> OK, so we start a review here, good! I see now (that you made a day later a v1 post for this patch @ http://lists.openfabrics.org/pipermail/general/2008-March/048381.html fixing some receive size calculations, and that some variations plus fixes exist in the copy and related patches in ofed 1.3. For example the call to skb_orphan() was moved from where it is placed here, there's a bug fix for the CQ size calculations, ipoib_ib_handle_tx_wc() gets a "need_lock" flag, etc.
>>
>>     
> thanks for noticing the v1 post. However, the v1 post only fixed CQ size
> calculation and all the other changes you mention I can't see. Also v1
> matches what I have in my git tree. Did I miss something?
>   
Yes.

I don't know about your tree, but this is the copy used by ofed 1.3 - 
see the need_lock param,  usage of MAX_SEND_CQE + 1, etc. The call to 
skb_orphan() was moved in a different patch (the unsig udqp), etc.

Or.
> IB/ipoib: Split CQs for IPOIB UD
>
> This comes as a preparation for using unsignalled QP in UD mode. It
> uses a dedicated CQ for the UD send. The CQ is not armed and is polled
> for completion right after sending a packet.
> This patch and the following patches fix bugs 760 and 761.
>
> Signed-off-by: Eli Cohen <eli at mellanox.co.il>
> ---
>
> Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> ===================================================================
> --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2008-02-14 12:26:22.000000000 +0200
> +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2008-02-14 14:40:38.000000000 +0200
> @@ -254,7 +254,7 @@ repost:
>  			   "for buf %d\n", wr_id);
>  }
>  
> -static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
> +static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc, int need_lock)
>  {
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
>  	unsigned int wr_id = wc->wr_id;
> @@ -279,13 +279,17 @@ static void ipoib_ib_handle_tx_wc(struct
>  
>  	dev_kfree_skb_any(tx_req->skb);
>  
> -	spin_lock_irqsave(&priv->tx_lock, flags);
> +	if (need_lock)
> +		spin_lock_irqsave(&priv->tx_lock, flags);
> +
>  	++priv->tx_tail;
>  	if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
>  	    netif_queue_stopped(dev) &&
>  	    test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
>  		netif_wake_queue(dev);
> -	spin_unlock_irqrestore(&priv->tx_lock, flags);
> +
> +	if (need_lock)
> +		spin_unlock_irqrestore(&priv->tx_lock, flags);
>  
>  	if (wc->status != IB_WC_SUCCESS &&
>  	    wc->status != IB_WC_WR_FLUSH_ERR)
> @@ -294,6 +298,15 @@ static void ipoib_ib_handle_tx_wc(struct
>  			   wc->status, wr_id, wc->vendor_err);
>  }
>  
> +static void poll_tx(struct ipoib_dev_priv *priv, int need_lock)
> +{
> +	int n, i;
> +
> +	n = ib_poll_cq(priv->scq, MAX_SEND_CQE, priv->send_wc);
> +	for (i = 0; i < n; ++i)
> +		ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i, need_lock);
> +}
> +
>  int ipoib_poll(struct napi_struct *napi, int budget)
>  {
>  	struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi);
> @@ -309,7 +322,7 @@ poll_more:
>  		int max = (budget - done);
>  
>  		t = min(IPOIB_NUM_WC, max);
> -		n = ib_poll_cq(priv->cq, t, priv->ibwc);
> +		n = ib_poll_cq(priv->rcq, t, priv->ibwc);
>  
>  		for (i = 0; i < n; i++) {
>  			struct ib_wc *wc = priv->ibwc + i;
> @@ -320,12 +333,8 @@ poll_more:
>  					ipoib_cm_handle_rx_wc(dev, wc);
>  				else
>  					ipoib_ib_handle_rx_wc(dev, wc);
> -			} else {
> -				if (wc->wr_id & IPOIB_OP_CM)
> -					ipoib_cm_handle_tx_wc(dev, wc);
> -				else
> -					ipoib_ib_handle_tx_wc(dev, wc);
> -			}
> +			} else
> +                                ipoib_cm_handle_tx_wc(priv->dev, wc);
>  		}
>  
>  		if (n != t)
> @@ -334,7 +343,7 @@ poll_more:
>  
>  	if (done < budget) {
>  		netif_rx_complete(dev, napi);
> -		if (unlikely(ib_req_notify_cq(priv->cq,
> +		if (unlikely(ib_req_notify_cq(priv->rcq,
>  					      IB_CQ_NEXT_COMP |
>  					      IB_CQ_REPORT_MISSED_EVENTS)) &&
>  		    netif_rx_reschedule(dev, napi))
> @@ -344,7 +353,7 @@ poll_more:
>  	return done;
>  }
>  
> -void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
> +void ipoib_ib_rx_completion(struct ib_cq *cq, void *dev_ptr)
>  {
>  	struct net_device *dev = dev_ptr;
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
> @@ -352,6 +361,13 @@ void ipoib_ib_completion(struct ib_cq *c
>  	netif_rx_schedule(dev, &priv->napi);
>  }
>  
> +void ipoib_ib_tx_completion(struct ib_cq *cq, void *dev_ptr)
> +{
> +	struct ipoib_dev_priv *priv = netdev_priv(dev_ptr);
> +
> +	poll_tx(priv, 1);
> +}
> +
>  static inline int post_send(struct ipoib_dev_priv *priv,
>  			    unsigned int wr_id,
>  			    struct ib_ah *address, u32 qpn,
> @@ -471,6 +487,10 @@ void ipoib_send(struct net_device *dev, 
>  			netif_stop_queue(dev);
>  		}
>  	}
> +
> +	if (unlikely(priv->tx_outstanding > MAX_SEND_CQE + 1))
> +		poll_tx(priv, 0);
> +
>  	return;
>  
>  drop:
> @@ -623,7 +643,7 @@ void ipoib_drain_cq(struct net_device *d
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
>  	int i, n;
>  	do {
> -		n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc);
> +		n = ib_poll_cq(priv->rcq, IPOIB_NUM_WC, priv->ibwc);
>  		for (i = 0; i < n; ++i) {
>  			/*
>  			 * Convert any successful completions to flush
> @@ -642,7 +662,7 @@ void ipoib_drain_cq(struct net_device *d
>  				if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
>  					ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
>  				else
> -					ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
> +					ipoib_ib_handle_tx_wc(dev, priv->ibwc + i, 1);
>  			}
>  		}
>  	} while (n == IPOIB_NUM_WC);
> @@ -737,7 +757,7 @@ timeout:
>  		msleep(1);
>  	}
>  
> -	ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP);
> +	ib_req_notify_cq(priv->rcq, IB_CQ_NEXT_COMP);
>  
>  	return 0;
>  }
> Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
> ===================================================================
> --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h	2008-02-14 12:26:23.000000000 +0200
> +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h	2008-02-14 14:40:38.000000000 +0200
> @@ -94,6 +94,8 @@ enum {
>  	IPOIB_MCAST_FLAG_SENDONLY = 1,
>  	IPOIB_MCAST_FLAG_BUSY 	  = 2,	/* joining or already joined */
>  	IPOIB_MCAST_FLAG_ATTACHED = 3,
> +
> +	MAX_SEND_CQE              = 16,
>  };
>  
>  #define	IPOIB_OP_RECV   (1ul << 31)
> @@ -348,7 +350,8 @@ struct ipoib_dev_priv {
>  	u16               pkey_index;
>  	struct ib_pd  	 *pd;
>  	struct ib_mr  	 *mr;
> -	struct ib_cq  	 *cq;
> +	struct ib_cq  	 *rcq;
> +	struct ib_cq  	 *scq;
>  	struct ib_qp  	 *qp;
>  	u32           	  qkey;
>  
> @@ -368,7 +371,8 @@ struct ipoib_dev_priv {
>  	struct ib_send_wr    tx_wr;
>  	unsigned             tx_outstanding;
>  
> -	struct ib_wc ibwc[IPOIB_NUM_WC];
> +	struct ib_wc 	     ibwc[IPOIB_NUM_WC];
> +	struct ib_wc         send_wc[MAX_SEND_CQE];
>  
>  	struct list_head dead_ahs;
>  
> @@ -449,7 +453,8 @@ extern struct workqueue_struct *ipoib_wo
>  /* functions */
>  
>  int ipoib_poll(struct napi_struct *napi, int budget);
> -void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
> +void ipoib_ib_rx_completion(struct ib_cq *cq, void *dev_ptr);
> +void ipoib_ib_tx_completion(struct ib_cq *cq, void *dev_ptr);
>  
>  struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
>  				 struct ib_pd *pd, struct ib_ah_attr *attr);
> @@ -697,7 +702,6 @@ static inline int ipoib_register_debugfs
>  static inline void ipoib_unregister_debugfs(void) { }
>  #endif
>  
> -
>  #define ipoib_printk(level, priv, format, arg...)	\
>  	printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg)
>  #define ipoib_warn(priv, format, arg...)		\
> Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
> ===================================================================
> --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2008-02-14 12:26:23.000000000 +0200
> +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2008-02-14 14:40:50.000000000 +0200
> @@ -173,37 +173,42 @@ int ipoib_transport_dev_init(struct net_
>  		goto out_free_pd;
>  	}
>  
> -	size = ipoib_sendq_size + ipoib_recvq_size + 1;
> +	size = ipoib_sendq_size + ipoib_recvq_size;
>  	ret = ipoib_cm_dev_init(dev);
>  	if (!ret)
> -		size += ipoib_recvq_size + 1 /* 1 extra for rx_drain_qp */;
> +		size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */
>  
> -	priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
> -	if (IS_ERR(priv->cq)) {
> -		printk(KERN_WARNING "%s: failed to create CQ\n", ca->name);
> +	priv->rcq = ib_create_cq(priv->ca, ipoib_ib_rx_completion, NULL, dev, size, 0);
> +	if (IS_ERR(priv->rcq)) {
> +		printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
>  		goto out_free_mr;
>  	}
>  
> +	priv->scq = ib_create_cq(priv->ca, ipoib_ib_tx_completion, NULL, dev, ipoib_sendq_size, 0);
> +	if (IS_ERR(priv->scq)) {
> +		printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
> +		goto out_free_rcq;
> +	}
> +
> +
>  	coal = kzalloc(sizeof *coal, GFP_KERNEL);
>  	if (coal) {
>  		coal->rx_coalesce_usecs = 10;
> -		coal->tx_coalesce_usecs = 10;
>  		coal->rx_max_coalesced_frames = 16;
> -		coal->tx_max_coalesced_frames = 16;
>  		dev->ethtool_ops->set_coalesce(dev, coal);
>  		kfree(coal);
>  	}
>  
> -	if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP))
> -		goto out_free_cq;
> +	if (ib_req_notify_cq(priv->rcq, IB_CQ_NEXT_COMP))
> +		goto out_free_scq;
>  
> -	init_attr.send_cq = priv->cq;
> -	init_attr.recv_cq = priv->cq;
> +	init_attr.send_cq = priv->scq;
> +	init_attr.recv_cq = priv->rcq;
>  
>  	priv->qp = ib_create_qp(priv->pd, &init_attr);
>  	if (IS_ERR(priv->qp)) {
>  		printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
> -		goto out_free_cq;
> +		goto out_free_rcq;
>  	}
>  
>  	priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
> @@ -219,8 +224,11 @@ int ipoib_transport_dev_init(struct net_
>  
>  	return 0;
>  
> -out_free_cq:
> -	ib_destroy_cq(priv->cq);
> +out_free_scq:
> +	ib_destroy_cq(priv->scq);
> +
> +out_free_rcq:
> +	ib_destroy_cq(priv->rcq);
>  
>  out_free_mr:
>  	ib_dereg_mr(priv->mr);
> @@ -243,7 +251,10 @@ void ipoib_transport_dev_cleanup(struct 
>  		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
>  	}
>  
> -	if (ib_destroy_cq(priv->cq))
> +	if (ib_destroy_cq(priv->scq))
> +		ipoib_warn(priv, "ib_cq_destroy failed\n");
> +
> +	if (ib_destroy_cq(priv->rcq))
>  		ipoib_warn(priv, "ib_cq_destroy failed\n");
>  
>  	ipoib_cm_dev_cleanup(dev);
> Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c
> ===================================================================
> --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_cm.c	2008-02-14 12:26:22.000000000 +0200
> +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c	2008-02-14 14:40:37.000000000 +0200
> @@ -199,8 +199,8 @@ static struct ib_qp *ipoib_cm_create_rx_
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
>  	struct ib_qp_init_attr attr = {
>  		.event_handler = ipoib_cm_rx_event_handler,
> -		.send_cq = priv->cq, /* For drain WR */
> -		.recv_cq = priv->cq,
> +		.send_cq = priv->rcq, /* For drain WR */
> +		.recv_cq = priv->rcq,
>  		.srq = priv->cm.srq,
>  		.cap.max_send_wr = 1, /* For drain WR */
>  		.cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
> @@ -791,8 +791,8 @@ static struct ib_qp *ipoib_cm_create_tx_
>  {
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
>  	struct ib_qp_init_attr attr = {
> -		.send_cq		= priv->cq,
> -		.recv_cq		= priv->cq,
> +		.send_cq		= priv->rcq,
> +		.recv_cq		= priv->rcq,
>  		.srq			= priv->cm.srq,
>  		.cap.max_send_wr	= ipoib_sendq_size,
>  		.cap.max_send_sge	= 1,
> Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_etool.c
> ===================================================================
> --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_etool.c	2008-02-14 12:26:23.000000000 +0200
> +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_etool.c	2008-02-14 12:26:23.000000000 +0200
> @@ -69,7 +69,7 @@ static int ipoib_set_coalesce(struct net
>  	    coal->tx_max_coalesced_frames > 0xffff)
>  		return -EINVAL;
>  
> -	ret = ib_modify_cq(priv->cq, coal->rx_max_coalesced_frames,
> +	ret = ib_modify_cq(priv->rcq, coal->rx_max_coalesced_frames,
>  	coal->rx_coalesce_usecs);
>  	if (ret) {
>  			ipoib_dbg(priv, "failed modifying CQ\n");
>   








More information about the ewg mailing list