[ewg] Re: split CQs for IPOIB UD
Or Gerlitz
ogerlitz at voltaire.com
Mon Apr 28 01:37:42 PDT 2008
Eli Cohen wrote:
>> OK, so we start a review here, good! I see now (that you made a day later a v1 post for this patch @ http://lists.openfabrics.org/pipermail/general/2008-March/048381.html fixing some receive size calculations, and that some variations plus fixes exist in the copy and related patches in ofed 1.3. For example the call to skb_orphan() was moved from where it is placed here, there's a bug fix for the CQ size calculations, ipoib_ib_handle_tx_wc() gets a "need_lock" flag, etc.
>>
>>
> thanks for noticing the v1 post. However, the v1 post only fixed CQ size
> calculation and all the other changes you mention I can't see. Also v1
> matches what I have in my git tree. Did I miss something?
>
Yes.
I don't know about your tree, but this is the copy used by ofed 1.3 -
see the need_lock param, usage of MAX_SEND_CQE + 1, etc. The call to
skb_orphan() was moved in a different patch (the unsig udqp), etc.
Or.
> IB/ipoib: Split CQs for IPOIB UD
>
> This comes as a preparation for using unsignalled QP in UD mode. It
> uses a dedicated CQ for the UD send. The CQ is not armed and is polled
> for completion right after sending a packet.
> This patch and the following patches fix bugs 760 and 761.
>
> Signed-off-by: Eli Cohen <eli at mellanox.co.il>
> ---
>
> Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> ===================================================================
> --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-02-14 12:26:22.000000000 +0200
> +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-02-14 14:40:38.000000000 +0200
> @@ -254,7 +254,7 @@ repost:
> "for buf %d\n", wr_id);
> }
>
> -static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
> +static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc, int need_lock)
> {
> struct ipoib_dev_priv *priv = netdev_priv(dev);
> unsigned int wr_id = wc->wr_id;
> @@ -279,13 +279,17 @@ static void ipoib_ib_handle_tx_wc(struct
>
> dev_kfree_skb_any(tx_req->skb);
>
> - spin_lock_irqsave(&priv->tx_lock, flags);
> + if (need_lock)
> + spin_lock_irqsave(&priv->tx_lock, flags);
> +
> ++priv->tx_tail;
> if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
> netif_queue_stopped(dev) &&
> test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
> netif_wake_queue(dev);
> - spin_unlock_irqrestore(&priv->tx_lock, flags);
> +
> + if (need_lock)
> + spin_unlock_irqrestore(&priv->tx_lock, flags);
>
> if (wc->status != IB_WC_SUCCESS &&
> wc->status != IB_WC_WR_FLUSH_ERR)
> @@ -294,6 +298,15 @@ static void ipoib_ib_handle_tx_wc(struct
> wc->status, wr_id, wc->vendor_err);
> }
>
> +static void poll_tx(struct ipoib_dev_priv *priv, int need_lock)
> +{
> + int n, i;
> +
> + n = ib_poll_cq(priv->scq, MAX_SEND_CQE, priv->send_wc);
> + for (i = 0; i < n; ++i)
> + ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i, need_lock);
> +}
> +
> int ipoib_poll(struct napi_struct *napi, int budget)
> {
> struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi);
> @@ -309,7 +322,7 @@ poll_more:
> int max = (budget - done);
>
> t = min(IPOIB_NUM_WC, max);
> - n = ib_poll_cq(priv->cq, t, priv->ibwc);
> + n = ib_poll_cq(priv->rcq, t, priv->ibwc);
>
> for (i = 0; i < n; i++) {
> struct ib_wc *wc = priv->ibwc + i;
> @@ -320,12 +333,8 @@ poll_more:
> ipoib_cm_handle_rx_wc(dev, wc);
> else
> ipoib_ib_handle_rx_wc(dev, wc);
> - } else {
> - if (wc->wr_id & IPOIB_OP_CM)
> - ipoib_cm_handle_tx_wc(dev, wc);
> - else
> - ipoib_ib_handle_tx_wc(dev, wc);
> - }
> + } else
> + ipoib_cm_handle_tx_wc(priv->dev, wc);
> }
>
> if (n != t)
> @@ -334,7 +343,7 @@ poll_more:
>
> if (done < budget) {
> netif_rx_complete(dev, napi);
> - if (unlikely(ib_req_notify_cq(priv->cq,
> + if (unlikely(ib_req_notify_cq(priv->rcq,
> IB_CQ_NEXT_COMP |
> IB_CQ_REPORT_MISSED_EVENTS)) &&
> netif_rx_reschedule(dev, napi))
> @@ -344,7 +353,7 @@ poll_more:
> return done;
> }
>
> -void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
> +void ipoib_ib_rx_completion(struct ib_cq *cq, void *dev_ptr)
> {
> struct net_device *dev = dev_ptr;
> struct ipoib_dev_priv *priv = netdev_priv(dev);
> @@ -352,6 +361,13 @@ void ipoib_ib_completion(struct ib_cq *c
> netif_rx_schedule(dev, &priv->napi);
> }
>
> +void ipoib_ib_tx_completion(struct ib_cq *cq, void *dev_ptr)
> +{
> + struct ipoib_dev_priv *priv = netdev_priv(dev_ptr);
> +
> + poll_tx(priv, 1);
> +}
> +
> static inline int post_send(struct ipoib_dev_priv *priv,
> unsigned int wr_id,
> struct ib_ah *address, u32 qpn,
> @@ -471,6 +487,10 @@ void ipoib_send(struct net_device *dev,
> netif_stop_queue(dev);
> }
> }
> +
> + if (unlikely(priv->tx_outstanding > MAX_SEND_CQE + 1))
> + poll_tx(priv, 0);
> +
> return;
>
> drop:
> @@ -623,7 +643,7 @@ void ipoib_drain_cq(struct net_device *d
> struct ipoib_dev_priv *priv = netdev_priv(dev);
> int i, n;
> do {
> - n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc);
> + n = ib_poll_cq(priv->rcq, IPOIB_NUM_WC, priv->ibwc);
> for (i = 0; i < n; ++i) {
> /*
> * Convert any successful completions to flush
> @@ -642,7 +662,7 @@ void ipoib_drain_cq(struct net_device *d
> if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
> ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
> else
> - ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
> + ipoib_ib_handle_tx_wc(dev, priv->ibwc + i, 1);
> }
> }
> } while (n == IPOIB_NUM_WC);
> @@ -737,7 +757,7 @@ timeout:
> msleep(1);
> }
>
> - ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP);
> + ib_req_notify_cq(priv->rcq, IB_CQ_NEXT_COMP);
>
> return 0;
> }
> Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
> ===================================================================
> --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2008-02-14 12:26:23.000000000 +0200
> +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h 2008-02-14 14:40:38.000000000 +0200
> @@ -94,6 +94,8 @@ enum {
> IPOIB_MCAST_FLAG_SENDONLY = 1,
> IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
> IPOIB_MCAST_FLAG_ATTACHED = 3,
> +
> + MAX_SEND_CQE = 16,
> };
>
> #define IPOIB_OP_RECV (1ul << 31)
> @@ -348,7 +350,8 @@ struct ipoib_dev_priv {
> u16 pkey_index;
> struct ib_pd *pd;
> struct ib_mr *mr;
> - struct ib_cq *cq;
> + struct ib_cq *rcq;
> + struct ib_cq *scq;
> struct ib_qp *qp;
> u32 qkey;
>
> @@ -368,7 +371,8 @@ struct ipoib_dev_priv {
> struct ib_send_wr tx_wr;
> unsigned tx_outstanding;
>
> - struct ib_wc ibwc[IPOIB_NUM_WC];
> + struct ib_wc ibwc[IPOIB_NUM_WC];
> + struct ib_wc send_wc[MAX_SEND_CQE];
>
> struct list_head dead_ahs;
>
> @@ -449,7 +453,8 @@ extern struct workqueue_struct *ipoib_wo
> /* functions */
>
> int ipoib_poll(struct napi_struct *napi, int budget);
> -void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
> +void ipoib_ib_rx_completion(struct ib_cq *cq, void *dev_ptr);
> +void ipoib_ib_tx_completion(struct ib_cq *cq, void *dev_ptr);
>
> struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
> struct ib_pd *pd, struct ib_ah_attr *attr);
> @@ -697,7 +702,6 @@ static inline int ipoib_register_debugfs
> static inline void ipoib_unregister_debugfs(void) { }
> #endif
>
> -
> #define ipoib_printk(level, priv, format, arg...) \
> printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg)
> #define ipoib_warn(priv, format, arg...) \
> Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
> ===================================================================
> --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-02-14 12:26:23.000000000 +0200
> +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-02-14 14:40:50.000000000 +0200
> @@ -173,37 +173,42 @@ int ipoib_transport_dev_init(struct net_
> goto out_free_pd;
> }
>
> - size = ipoib_sendq_size + ipoib_recvq_size + 1;
> + size = ipoib_sendq_size + ipoib_recvq_size;
> ret = ipoib_cm_dev_init(dev);
> if (!ret)
> - size += ipoib_recvq_size + 1 /* 1 extra for rx_drain_qp */;
> + size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */
>
> - priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
> - if (IS_ERR(priv->cq)) {
> - printk(KERN_WARNING "%s: failed to create CQ\n", ca->name);
> + priv->rcq = ib_create_cq(priv->ca, ipoib_ib_rx_completion, NULL, dev, size, 0);
> + if (IS_ERR(priv->rcq)) {
> + printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
> goto out_free_mr;
> }
>
> + priv->scq = ib_create_cq(priv->ca, ipoib_ib_tx_completion, NULL, dev, ipoib_sendq_size, 0);
> + if (IS_ERR(priv->scq)) {
> + printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
> + goto out_free_rcq;
> + }
> +
> +
> coal = kzalloc(sizeof *coal, GFP_KERNEL);
> if (coal) {
> coal->rx_coalesce_usecs = 10;
> - coal->tx_coalesce_usecs = 10;
> coal->rx_max_coalesced_frames = 16;
> - coal->tx_max_coalesced_frames = 16;
> dev->ethtool_ops->set_coalesce(dev, coal);
> kfree(coal);
> }
>
> - if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP))
> - goto out_free_cq;
> + if (ib_req_notify_cq(priv->rcq, IB_CQ_NEXT_COMP))
> + goto out_free_scq;
>
> - init_attr.send_cq = priv->cq;
> - init_attr.recv_cq = priv->cq;
> + init_attr.send_cq = priv->scq;
> + init_attr.recv_cq = priv->rcq;
>
> priv->qp = ib_create_qp(priv->pd, &init_attr);
> if (IS_ERR(priv->qp)) {
> printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
> - goto out_free_cq;
> + goto out_free_rcq;
> }
>
> priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
> @@ -219,8 +224,11 @@ int ipoib_transport_dev_init(struct net_
>
> return 0;
>
> -out_free_cq:
> - ib_destroy_cq(priv->cq);
> +out_free_scq:
> + ib_destroy_cq(priv->scq);
> +
> +out_free_rcq:
> + ib_destroy_cq(priv->rcq);
>
> out_free_mr:
> ib_dereg_mr(priv->mr);
> @@ -243,7 +251,10 @@ void ipoib_transport_dev_cleanup(struct
> clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
> }
>
> - if (ib_destroy_cq(priv->cq))
> + if (ib_destroy_cq(priv->scq))
> + ipoib_warn(priv, "ib_cq_destroy failed\n");
> +
> + if (ib_destroy_cq(priv->rcq))
> ipoib_warn(priv, "ib_cq_destroy failed\n");
>
> ipoib_cm_dev_cleanup(dev);
> Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c
> ===================================================================
> --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-02-14 12:26:22.000000000 +0200
> +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-02-14 14:40:37.000000000 +0200
> @@ -199,8 +199,8 @@ static struct ib_qp *ipoib_cm_create_rx_
> struct ipoib_dev_priv *priv = netdev_priv(dev);
> struct ib_qp_init_attr attr = {
> .event_handler = ipoib_cm_rx_event_handler,
> - .send_cq = priv->cq, /* For drain WR */
> - .recv_cq = priv->cq,
> + .send_cq = priv->rcq, /* For drain WR */
> + .recv_cq = priv->rcq,
> .srq = priv->cm.srq,
> .cap.max_send_wr = 1, /* For drain WR */
> .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
> @@ -791,8 +791,8 @@ static struct ib_qp *ipoib_cm_create_tx_
> {
> struct ipoib_dev_priv *priv = netdev_priv(dev);
> struct ib_qp_init_attr attr = {
> - .send_cq = priv->cq,
> - .recv_cq = priv->cq,
> + .send_cq = priv->rcq,
> + .recv_cq = priv->rcq,
> .srq = priv->cm.srq,
> .cap.max_send_wr = ipoib_sendq_size,
> .cap.max_send_sge = 1,
> Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_etool.c
> ===================================================================
> --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_etool.c 2008-02-14 12:26:23.000000000 +0200
> +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_etool.c 2008-02-14 12:26:23.000000000 +0200
> @@ -69,7 +69,7 @@ static int ipoib_set_coalesce(struct net
> coal->tx_max_coalesced_frames > 0xffff)
> return -EINVAL;
>
> - ret = ib_modify_cq(priv->cq, coal->rx_max_coalesced_frames,
> + ret = ib_modify_cq(priv->rcq, coal->rx_max_coalesced_frames,
> coal->rx_coalesce_usecs);
> if (ret) {
> ipoib_dbg(priv, "failed modifying CQ\n");
>
More information about the ewg
mailing list