[openib-general] [PATCH][3/7]ipoib performance patches -- remove tx_ring
Shirley Ma
xma at us.ibm.com
Wed May 24 22:01:33 PDT 2006
Here is the new patch.
diff -urpN infiniband-ah/ulp/ipoib/ipoib.h infiniband-tx/ulp/ipoib/ipoib.h
--- infiniband-ah/ulp/ipoib/ipoib.h 2006-05-23 10:09:05.000000000
-0700
+++ infiniband-tx/ulp/ipoib/ipoib.h 2006-05-24 11:45:52.000000000
-0700
@@ -114,11 +114,19 @@ struct ipoib_rx_buf {
dma_addr_t mapping;
};
-struct ipoib_tx_buf {
- struct sk_buff *skb;
- DECLARE_PCI_UNMAP_ADDR(mapping)
+struct ipoib_skb_prv {
+ dma_addr_t addr;
+ struct ipoib_ah *ah;
+ struct sk_buff *skb;
+ struct list_head list;
};
+#define IPOIB_SKB_PRV_ADDR(skb) (((struct ipoib_skb_prv
*)(skb)->cb)->addr)
+#define IPOIB_SKB_PRV_AH(skb) (((struct ipoib_skb_prv *)(skb)->cb)->ah)
+#define IPOIB_SKB_PRV_SKB(skb) (((struct ipoib_skb_prv *)(skb)->cb)->skb)
+#define IPOIB_SKB_PRV_LIST(skb) (((struct ipoib_skb_prv
*)(skb)->cb)->list)
+
+
/*
* Device private locking: tx_lock protects members used in TX fast
* path (and we use LLTX so upper layers don't do extra locking).
@@ -166,12 +174,11 @@ struct ipoib_dev_priv {
struct ipoib_rx_buf *rx_ring;
- spinlock_t tx_lock ____cacheline_aligned_in_smp;
- struct ipoib_tx_buf *tx_ring;
- unsigned tx_head;
- unsigned tx_tail;
+ spinlock_t tx_lock;
struct ib_sge tx_sge;
struct ib_send_wr tx_wr;
+ spinlock_t slist_lock;
+ struct list_head send_list;
struct list_head dead_ahs;
diff -urpN infiniband-ah/ulp/ipoib/ipoib_ib.c
infiniband-tx/ulp/ipoib/ipoib_ib.c
--- infiniband-ah/ulp/ipoib/ipoib_ib.c 2006-05-23 10:14:08.000000000
-0700
+++ infiniband-tx/ulp/ipoib/ipoib_ib.c 2006-05-24 14:57:33.000000000
-0700
@@ -243,45 +243,39 @@ static void ipoib_ib_handle_send_wc(stru
struct ib_wc *wc)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- unsigned int wr_id = wc->wr_id;
- struct ipoib_tx_buf *tx_req;
+ struct sk_buff *skb;
unsigned long flags;
+ unsigned long wr_id = wc->wr_id;
- ipoib_dbg_data(priv, "called: id %d, op %d, status: %d\n",
- wr_id, wc->opcode, wc->status);
-
- if (wr_id >= ipoib_sendq_size) {
- ipoib_warn(priv, "completion event with wrid %d (> %d)\n",
- wr_id, ipoib_sendq_size);
- return;
- }
-
- ipoib_dbg_data(priv, "send complete, wrid %d\n", wr_id);
-
- tx_req = &priv->tx_ring[wr_id];
-
- dma_unmap_single(priv->ca->dma_device,
- pci_unmap_addr(tx_req, mapping),
- tx_req->skb->len,
- DMA_TO_DEVICE);
+ skb = (struct sk_buff *)wr_id;
+ kref_put(&IPOIB_SKB_PRV_AH(skb)->ref, ipoib_free_ah);
+ if (IS_ERR(skb) || skb != IPOIB_SKB_PRV_SKB(skb)) {
+ ipoib_warn(priv, "send completion event with corrupted
wrid\n");
+ return;
+ }
+ spin_lock_irqsave(&priv->slist_lock, flags);
+ list_del(&IPOIB_SKB_PRV_LIST(skb));
+ spin_unlock_irqrestore(&priv->slist_lock, flags);
+
+ ipoib_dbg_data(priv, "send complete, wrid %lu\n", wr_id);
+
+ dma_unmap_single(priv->ca->dma_device,
+ IPOIB_SKB_PRV_ADDR(skb),
+ skb->len,
+ DMA_TO_DEVICE);
+
++priv->stats.tx_packets;
- priv->stats.tx_bytes += tx_req->skb->len;
-
- dev_kfree_skb_any(tx_req->skb);
-
- spin_lock_irqsave(&priv->tx_lock, flags);
- ++priv->tx_tail;
- if (netif_queue_stopped(dev) &&
- priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
- netif_wake_queue(dev);
- spin_unlock_irqrestore(&priv->tx_lock, flags);
-
- if (wc->status != IB_WC_SUCCESS &&
- wc->status != IB_WC_WR_FLUSH_ERR)
- ipoib_warn(priv, "failed send event "
- "(status=%d, wrid=%d vend_err %x)\n",
- wc->status, wr_id, wc->vendor_err);
+ priv->stats.tx_bytes += skb->len;
+ dev_kfree_skb_any(skb);
+
+ if (netif_queue_stopped(dev))
+ netif_wake_queue(dev);
+ if (wc->status != IB_WC_SUCCESS &&
+ wc->status != IB_WC_WR_FLUSH_ERR)
+ ipoib_warn(priv, "failed send event "
+ "(status=%d, wrid=%lu vend_err %x)\n",
+ wc->status, wr_id, wc->vendor_err);
}
void ipoib_ib_send_completion(struct ib_cq *cq, void *dev_ptr)
@@ -313,7 +307,7 @@ void ipoib_ib_recv_completion(struct ib_
}
static inline int post_send(struct ipoib_dev_priv *priv,
- unsigned int wr_id,
+ unsigned long wr_id,
struct ib_ah *address, u32 qpn,
dma_addr_t addr, int len)
{
@@ -333,8 +327,8 @@ void ipoib_send(struct net_device *dev,
struct ipoib_ah *address, u32 qpn)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ipoib_tx_buf *tx_req;
dma_addr_t addr;
+ unsigned long wr_id;
int err;
kref_get(&address->ref);
@@ -350,38 +344,31 @@ void ipoib_send(struct net_device *dev,
ipoib_dbg_data(priv, "sending packet, length=%d address=%p
qpn=0x%06x\n",
skb->len, address, qpn);
-
- /*
- * We put the skb into the tx_ring _before_ we call post_send()
- * because it's entirely possible that the completion handler will
- * run before we execute anything after the post_send(). That
- * means we have to make sure everything is properly recorded and
- * our state is consistent before we call post_send().
- */
- tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
- tx_req->skb = skb;
addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len,
DMA_TO_DEVICE);
- pci_unmap_addr_set(tx_req, mapping, addr);
-
- err = post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
- address->ah, qpn, addr, skb->len);
- kref_put(&address->ref, ipoib_free_ah);
- if (unlikely(err)) {
- ipoib_warn(priv, "post_send failed\n");
- ++priv->stats.tx_errors;
- dma_unmap_single(priv->ca->dma_device, addr, skb->len,
- DMA_TO_DEVICE);
- dev_kfree_skb_any(skb);
- } else {
- dev->trans_start = jiffies;
-
- ++priv->tx_head;
-
- if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) {
- ipoib_dbg(priv, "TX ring full, stopping kernel net
queue\n");
+ wr_id = (unsigned long)skb;
+ err = post_send(priv, wr_id, address->ah, qpn, addr, skb->len);
+ if (!err) {
+ dev->trans_start = jiffies;
+ IPOIB_SKB_PRV_ADDR(skb) = addr;
+ IPOIB_SKB_PRV_AH(skb) = address;
+ IPOIB_SKB_PRV_SKB(skb) = skb;
+ spin_lock(&priv->slist_lock);
+ list_add_tail(&IPOIB_SKB_PRV_LIST(skb), &priv->send_list);
+ spin_unlock(&priv->slist_lock);
+ return;
+ } else {
+ if (!netif_queue_stopped(dev)) {
netif_stop_queue(dev);
+ ipoib_warn(priv, "stopping kernel net queue\n");
}
+ dma_unmap_single(priv->ca->dma_device, addr, skb->len,
+ DMA_TO_DEVICE);
+ ipoib_warn(priv, "post_send failed\n");
+ ++priv->stats.tx_dropped;
+ ++priv->stats.tx_errors;
+ dev_kfree_skb_any(skb);
+ kref_put(&address->ref, ipoib_free_ah);
}
}
@@ -480,7 +467,9 @@ int ipoib_ib_dev_stop(struct net_device
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_attr qp_attr;
unsigned long begin;
- struct ipoib_tx_buf *tx_req;
+ unsigned long flags;
+ struct ipoib_skb_prv *cb, *tcb;
+ struct sk_buff *skb;
int i;
clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
@@ -496,25 +485,25 @@ int ipoib_ib_dev_stop(struct net_device
/* Wait for all sends and receives to complete */
begin = jiffies;
- while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
+ while (!list_empty(&priv->send_list) || recvs_pending(dev)) {
if (time_after(jiffies, begin + 5 * HZ)) {
- ipoib_warn(priv, "timing out; %d sends %d receives
not completed\n",
- priv->tx_head - priv->tx_tail,
recvs_pending(dev));
+ ipoib_warn(priv, "timing out; %d receives not
completed\n",
+ recvs_pending(dev));
/*
* assume the HW is wedged and just free up
* all our pending work requests.
*/
- while ((int) priv->tx_tail - (int) priv->tx_head <
0) {
- tx_req = &priv->tx_ring[priv->tx_tail &
- (ipoib_sendq_size
- 1)];
- dma_unmap_single(priv->ca->dma_device,
- pci_unmap_addr(tx_req,
mapping),
- tx_req->skb->len,
- DMA_TO_DEVICE);
- dev_kfree_skb_any(tx_req->skb);
- ++priv->tx_tail;
- }
+ spin_lock_irqsave(&priv->slist_lock, flags);
+ list_for_each_entry_safe(cb, tcb,
&priv->send_list,
+ list) {
+ skb = cb->skb;
+ dma_unmap_single(priv->ca->dma_device,
+ IPOIB_SKB_PRV_ADDR(skb),
+ skb->len, DMA_TO_DEVICE);
+ dev_kfree_skb_any(skb);
+ }
+ spin_unlock_irqrestore(&priv->slist_lock, flags);
for (i = 0; i < ipoib_recvq_size; ++i)
if (priv->rx_ring[i].skb) {
diff -urpN infiniband-ah/ulp/ipoib/ipoib_main.c
infiniband-tx/ulp/ipoib/ipoib_main.c
--- infiniband-ah/ulp/ipoib/ipoib_main.c 2006-05-23
09:31:49.000000000 -0700
+++ infiniband-tx/ulp/ipoib/ipoib_main.c 2006-05-24
11:47:06.000000000 -0700
@@ -708,9 +708,7 @@ static void ipoib_timeout(struct net_dev
ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
jiffies_to_msecs(jiffies - dev->trans_start));
- ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n",
- netif_queue_stopped(dev),
- priv->tx_head, priv->tx_tail);
+ ipoib_warn(priv, "queue stopped %d\n", netif_queue_stopped(dev));
/* XXX reset QP, etc. */
}
@@ -846,7 +844,7 @@ int ipoib_dev_init(struct net_device *de
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- /* Allocate RX/TX "rings" to hold queued skbs */
+ /* Allocate RX "rings" to hold queued skbs */
priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
GFP_KERNEL);
if (!priv->rx_ring) {
@@ -855,24 +853,11 @@ int ipoib_dev_init(struct net_device *de
goto out;
}
- priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring,
- GFP_KERNEL);
- if (!priv->tx_ring) {
- printk(KERN_WARNING "%s: failed to allocate TX ring (%d
entries)\n",
- ca->name, ipoib_sendq_size);
- goto out_rx_ring_cleanup;
- }
-
- /* priv->tx_head & tx_tail are already 0 */
-
if (ipoib_ib_dev_init(dev, ca, port))
- goto out_tx_ring_cleanup;
+ goto out_rx_ring_cleanup;
return 0;
-out_tx_ring_cleanup:
- kfree(priv->tx_ring);
-
out_rx_ring_cleanup:
kfree(priv->rx_ring);
@@ -896,10 +881,8 @@ void ipoib_dev_cleanup(struct net_device
ipoib_ib_dev_cleanup(dev);
kfree(priv->rx_ring);
- kfree(priv->tx_ring);
priv->rx_ring = NULL;
- priv->tx_ring = NULL;
}
static void ipoib_setup(struct net_device *dev)
@@ -944,6 +927,7 @@ static void ipoib_setup(struct net_devic
spin_lock_init(&priv->lock);
spin_lock_init(&priv->tx_lock);
+ spin_lock_init(&priv->slist_lock);
mutex_init(&priv->mcast_mutex);
mutex_init(&priv->vlan_mutex);
@@ -952,6 +936,7 @@ static void ipoib_setup(struct net_devic
INIT_LIST_HEAD(&priv->child_intfs);
INIT_LIST_HEAD(&priv->dead_ahs);
INIT_LIST_HEAD(&priv->multicast_list);
+ INIT_LIST_HEAD(&priv->send_list);
INIT_WORK(&priv->pkey_task, ipoib_pkey_poll, priv->dev);
INIT_WORK(&priv->mcast_task, ipoib_mcast_join_task, priv->dev);
Thanks
Shirley Ma
IBM Linux Technology Center
15300 SW Koll Parkway
Beaverton, OR 97006-6063
Phone(Fax): (503) 578-7638
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20060524/4996acd8/attachment.html>
More information about the general
mailing list