[openib-general] [PATCH][3/7]ipoib performance patches -- remove tx_ring

Shirley Ma xma at us.ibm.com
Wed May 24 22:01:33 PDT 2006


Here is the new patch.

diff -urpN infiniband-ah/ulp/ipoib/ipoib.h infiniband-tx/ulp/ipoib/ipoib.h
--- infiniband-ah/ulp/ipoib/ipoib.h     2006-05-23 10:09:05.000000000 
-0700
+++ infiniband-tx/ulp/ipoib/ipoib.h     2006-05-24 11:45:52.000000000 
-0700
@@ -114,11 +114,19 @@ struct ipoib_rx_buf {
        dma_addr_t      mapping;
 };
 
-struct ipoib_tx_buf {
-       struct sk_buff *skb;
-       DECLARE_PCI_UNMAP_ADDR(mapping)
+struct ipoib_skb_prv {
+       dma_addr_t              addr;
+       struct ipoib_ah         *ah;
+       struct sk_buff          *skb;
+       struct list_head        list;
 };
 
+#define IPOIB_SKB_PRV_ADDR(skb)        (((struct ipoib_skb_prv 
*)(skb)->cb)->addr)
+#define IPOIB_SKB_PRV_AH(skb)  (((struct ipoib_skb_prv *)(skb)->cb)->ah)
+#define IPOIB_SKB_PRV_SKB(skb) (((struct ipoib_skb_prv *)(skb)->cb)->skb)
+#define IPOIB_SKB_PRV_LIST(skb)        (((struct ipoib_skb_prv 
*)(skb)->cb)->list)
+
+
 /*
  * Device private locking: tx_lock protects members used in TX fast
  * path (and we use LLTX so upper layers don't do extra locking).
@@ -166,12 +174,11 @@ struct ipoib_dev_priv {
 
        struct ipoib_rx_buf *rx_ring;
 
-       spinlock_t           tx_lock    ____cacheline_aligned_in_smp;
-       struct ipoib_tx_buf *tx_ring;
-       unsigned             tx_head;
-       unsigned             tx_tail;
+       spinlock_t           tx_lock;
        struct ib_sge        tx_sge;
        struct ib_send_wr    tx_wr;
+       spinlock_t           slist_lock;
+       struct list_head     send_list;
 
        struct list_head dead_ahs;
 
diff -urpN infiniband-ah/ulp/ipoib/ipoib_ib.c 
infiniband-tx/ulp/ipoib/ipoib_ib.c
--- infiniband-ah/ulp/ipoib/ipoib_ib.c  2006-05-23 10:14:08.000000000 
-0700
+++ infiniband-tx/ulp/ipoib/ipoib_ib.c  2006-05-24 14:57:33.000000000 
-0700
@@ -243,45 +243,39 @@ static void ipoib_ib_handle_send_wc(stru
                                    struct ib_wc *wc)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
-       unsigned int wr_id = wc->wr_id;
-       struct ipoib_tx_buf *tx_req;
+       struct sk_buff *skb;
        unsigned long flags;
+       unsigned long wr_id = wc->wr_id;
 
-       ipoib_dbg_data(priv, "called: id %d, op %d, status: %d\n",
-                      wr_id, wc->opcode, wc->status);
-
-       if (wr_id >= ipoib_sendq_size) {
-               ipoib_warn(priv, "completion event with wrid %d (> %d)\n",
-                          wr_id, ipoib_sendq_size);
-               return;
-       }
-
-       ipoib_dbg_data(priv, "send complete, wrid %d\n", wr_id);
-
-       tx_req = &priv->tx_ring[wr_id];
-
-       dma_unmap_single(priv->ca->dma_device,
-                        pci_unmap_addr(tx_req, mapping),
-                        tx_req->skb->len,
-                        DMA_TO_DEVICE);
+       skb = (struct sk_buff *)wr_id;
+       kref_put(&IPOIB_SKB_PRV_AH(skb)->ref, ipoib_free_ah);
 
+       if (IS_ERR(skb) || skb != IPOIB_SKB_PRV_SKB(skb)) {
+               ipoib_warn(priv, "send completion event with corrupted 
wrid\n");
+               return;
+       }
+       spin_lock_irqsave(&priv->slist_lock, flags);
+       list_del(&IPOIB_SKB_PRV_LIST(skb));
+       spin_unlock_irqrestore(&priv->slist_lock, flags);
+ 
+       ipoib_dbg_data(priv, "send complete, wrid %lu\n", wr_id);
+ 
+       dma_unmap_single(priv->ca->dma_device,
+                        IPOIB_SKB_PRV_ADDR(skb),
+                        skb->len,
+                        DMA_TO_DEVICE);
+ 
        ++priv->stats.tx_packets;
-       priv->stats.tx_bytes += tx_req->skb->len;
-
-       dev_kfree_skb_any(tx_req->skb);
-
-       spin_lock_irqsave(&priv->tx_lock, flags);
-       ++priv->tx_tail;
-       if (netif_queue_stopped(dev) &&
-           priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
-               netif_wake_queue(dev);
-       spin_unlock_irqrestore(&priv->tx_lock, flags);
-
-       if (wc->status != IB_WC_SUCCESS &&
-           wc->status != IB_WC_WR_FLUSH_ERR)
-               ipoib_warn(priv, "failed send event "
-                          "(status=%d, wrid=%d vend_err %x)\n",
-                          wc->status, wr_id, wc->vendor_err);
+       priv->stats.tx_bytes += skb->len;
+       dev_kfree_skb_any(skb);
+ 
+       if (netif_queue_stopped(dev))
+               netif_wake_queue(dev);
+       if (wc->status != IB_WC_SUCCESS &&
+           wc->status != IB_WC_WR_FLUSH_ERR)
+               ipoib_warn(priv, "failed send event "
+                          "(status=%d, wrid=%lu vend_err %x)\n",
+                          wc->status, wr_id, wc->vendor_err);
 }
 
 void ipoib_ib_send_completion(struct ib_cq *cq, void *dev_ptr)
@@ -313,7 +307,7 @@ void ipoib_ib_recv_completion(struct ib_
 }
 
 static inline int post_send(struct ipoib_dev_priv *priv,
-                           unsigned int wr_id,
+                           unsigned long wr_id,
                            struct ib_ah *address, u32 qpn,
                            dma_addr_t addr, int len)
 {
@@ -333,8 +327,8 @@ void ipoib_send(struct net_device *dev, 
                struct ipoib_ah *address, u32 qpn)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
-       struct ipoib_tx_buf *tx_req;
        dma_addr_t addr;
+       unsigned long wr_id;
        int err;
 
        kref_get(&address->ref);
@@ -350,38 +344,31 @@ void ipoib_send(struct net_device *dev, 
 
        ipoib_dbg_data(priv, "sending packet, length=%d address=%p 
qpn=0x%06x\n",
                       skb->len, address, qpn);
-
-       /*
-        * We put the skb into the tx_ring _before_ we call post_send()
-        * because it's entirely possible that the completion handler will
-        * run before we execute anything after the post_send().  That
-        * means we have to make sure everything is properly recorded and
-        * our state is consistent before we call post_send().
-        */
-       tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
-       tx_req->skb = skb;
        addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len,
                              DMA_TO_DEVICE);
-       pci_unmap_addr_set(tx_req, mapping, addr);
-
-       err = post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), 
-                       address->ah, qpn, addr, skb->len); 
-       kref_put(&address->ref, ipoib_free_ah);
-       if (unlikely(err)) {
-               ipoib_warn(priv, "post_send failed\n");
-               ++priv->stats.tx_errors;
-               dma_unmap_single(priv->ca->dma_device, addr, skb->len,
-                                DMA_TO_DEVICE);
-               dev_kfree_skb_any(skb);
-       } else {
-               dev->trans_start = jiffies;
-
-               ++priv->tx_head;
-
-               if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) {
-                       ipoib_dbg(priv, "TX ring full, stopping kernel net 
queue\n");
+       wr_id = (unsigned long)skb;
+       err = post_send(priv, wr_id, address->ah, qpn, addr, skb->len);
+       if (!err) {
+               dev->trans_start = jiffies;
+               IPOIB_SKB_PRV_ADDR(skb) = addr;
+               IPOIB_SKB_PRV_AH(skb) = address;
+               IPOIB_SKB_PRV_SKB(skb) = skb;
+               spin_lock(&priv->slist_lock);
+               list_add_tail(&IPOIB_SKB_PRV_LIST(skb), &priv->send_list);
+               spin_unlock(&priv->slist_lock);
+               return;
+       } else {
+               if (!netif_queue_stopped(dev)) {
                        netif_stop_queue(dev);
+                       ipoib_warn(priv, "stopping kernel net queue\n");
                }
+               dma_unmap_single(priv->ca->dma_device, addr, skb->len,
+                                DMA_TO_DEVICE);
+               ipoib_warn(priv, "post_send failed\n");
+               ++priv->stats.tx_dropped;
+               ++priv->stats.tx_errors;
+               dev_kfree_skb_any(skb);
+               kref_put(&address->ref, ipoib_free_ah);
        }
 }
 
@@ -480,7 +467,9 @@ int ipoib_ib_dev_stop(struct net_device 
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ib_qp_attr qp_attr;
        unsigned long begin;
-       struct ipoib_tx_buf *tx_req;
+       unsigned long flags;
+       struct ipoib_skb_prv *cb, *tcb;
+       struct sk_buff *skb;
        int i;
 
        clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
@@ -496,25 +485,25 @@ int ipoib_ib_dev_stop(struct net_device 
        /* Wait for all sends and receives to complete */
        begin = jiffies;
 
-       while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
+       while (!list_empty(&priv->send_list) || recvs_pending(dev)) {
                if (time_after(jiffies, begin + 5 * HZ)) {
-                       ipoib_warn(priv, "timing out; %d sends %d receives 
not completed\n",
-                                  priv->tx_head - priv->tx_tail, 
recvs_pending(dev));
+                       ipoib_warn(priv, "timing out; %d receives not 
completed\n",
+                                 recvs_pending(dev));
 
                        /*
                         * assume the HW is wedged and just free up
                         * all our pending work requests.
                         */
-                       while ((int) priv->tx_tail - (int) priv->tx_head < 
0) {
-                               tx_req = &priv->tx_ring[priv->tx_tail &
-                                                       (ipoib_sendq_size 
- 1)];
-                               dma_unmap_single(priv->ca->dma_device,
-                                                pci_unmap_addr(tx_req, 
mapping),
-                                                tx_req->skb->len,
-                                                DMA_TO_DEVICE);
-                               dev_kfree_skb_any(tx_req->skb);
-                               ++priv->tx_tail;
-                       }
+                       spin_lock_irqsave(&priv->slist_lock, flags);
+                       list_for_each_entry_safe(cb, tcb, 
&priv->send_list,
+                                           list) {
+                               skb = cb->skb;
+                               dma_unmap_single(priv->ca->dma_device,
+                                                IPOIB_SKB_PRV_ADDR(skb),
+                                                skb->len, DMA_TO_DEVICE);
+                               dev_kfree_skb_any(skb);
+                       }
+                       spin_unlock_irqrestore(&priv->slist_lock, flags);
 
                        for (i = 0; i < ipoib_recvq_size; ++i)
                                if (priv->rx_ring[i].skb) {
diff -urpN infiniband-ah/ulp/ipoib/ipoib_main.c 
infiniband-tx/ulp/ipoib/ipoib_main.c
--- infiniband-ah/ulp/ipoib/ipoib_main.c        2006-05-23 
09:31:49.000000000 -0700
+++ infiniband-tx/ulp/ipoib/ipoib_main.c        2006-05-24 
11:47:06.000000000 -0700
@@ -708,9 +708,7 @@ static void ipoib_timeout(struct net_dev
 
        ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
                   jiffies_to_msecs(jiffies - dev->trans_start));
-       ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n",
-                  netif_queue_stopped(dev),
-                  priv->tx_head, priv->tx_tail);
+       ipoib_warn(priv, "queue stopped %d\n", netif_queue_stopped(dev));
        /* XXX reset QP, etc. */
 }
 
@@ -846,7 +844,7 @@ int ipoib_dev_init(struct net_device *de
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-       /* Allocate RX/TX "rings" to hold queued skbs */
+       /* Allocate RX "rings" to hold queued skbs */
        priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
                                GFP_KERNEL);
        if (!priv->rx_ring) {
@@ -855,24 +853,11 @@ int ipoib_dev_init(struct net_device *de
                goto out;
        }
 
-       priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring,
-                               GFP_KERNEL);
-       if (!priv->tx_ring) {
-               printk(KERN_WARNING "%s: failed to allocate TX ring (%d 
entries)\n",
-                      ca->name, ipoib_sendq_size);
-               goto out_rx_ring_cleanup;
-       }
-
-       /* priv->tx_head & tx_tail are already 0 */
-
        if (ipoib_ib_dev_init(dev, ca, port))
-               goto out_tx_ring_cleanup;
+               goto out_rx_ring_cleanup;
 
        return 0;
 
-out_tx_ring_cleanup:
-       kfree(priv->tx_ring);
-
 out_rx_ring_cleanup:
        kfree(priv->rx_ring);
 
@@ -896,10 +881,8 @@ void ipoib_dev_cleanup(struct net_device
        ipoib_ib_dev_cleanup(dev);
 
        kfree(priv->rx_ring);
-       kfree(priv->tx_ring);
 
        priv->rx_ring = NULL;
-       priv->tx_ring = NULL;
 }
 
 static void ipoib_setup(struct net_device *dev)
@@ -944,6 +927,7 @@ static void ipoib_setup(struct net_devic
 
        spin_lock_init(&priv->lock);
        spin_lock_init(&priv->tx_lock);
+       spin_lock_init(&priv->slist_lock);
 
        mutex_init(&priv->mcast_mutex);
        mutex_init(&priv->vlan_mutex);
@@ -952,6 +936,7 @@ static void ipoib_setup(struct net_devic
        INIT_LIST_HEAD(&priv->child_intfs);
        INIT_LIST_HEAD(&priv->dead_ahs);
        INIT_LIST_HEAD(&priv->multicast_list);
+       INIT_LIST_HEAD(&priv->send_list); 
 
        INIT_WORK(&priv->pkey_task,    ipoib_pkey_poll, priv->dev);
        INIT_WORK(&priv->mcast_task,   ipoib_mcast_join_task, priv->dev);


Thanks
Shirley Ma
IBM Linux Technology Center
15300 SW Koll Parkway
Beaverton, OR 97006-6063
Phone(Fax): (503) 578-7638

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20060524/4996acd8/attachment.html>


More information about the general mailing list