[openib-general] [PATCH] Move IPoIB to use LockLess TX
Roland Dreier
roland at topspin.com
Wed Dec 1 09:32:50 PST 2004
This changes IPoIB's locking scheme to use the new NETIF_F_LLTX
scheme. It adds about 2-3 % to throughput in my netpipe tests.
- R.
Index: infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- infiniband/ulp/ipoib/ipoib_main.c (revision 1304)
+++ infiniband/ulp/ipoib/ipoib_main.c (working copy)
@@ -204,7 +204,7 @@
kfree(path);
}
-static int path_rec_start(struct sk_buff *skb, struct net_device *dev)
+static void path_rec_start(struct sk_buff *skb, struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_path *path = kmalloc(sizeof *path, GFP_ATOMIC);
@@ -244,23 +244,23 @@
path->neighbour = skb->dst->neighbour;
*to_ipoib_path(skb->dst->neighbour) = path;
- return 0;
+ return;
err:
kfree(path);
++priv->stats.tx_dropped;
dev_kfree_skb_any(skb);
-
- return 0;
}
-static int path_lookup(struct sk_buff *skb, struct net_device *dev)
+static void path_lookup(struct sk_buff *skb, struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(skb->dev);
/* Look up path record for unicasts */
- if (skb->dst->neighbour->ha[4] != 0xff)
- return path_rec_start(skb, dev);
+ if (skb->dst->neighbour->ha[4] != 0xff) {
+ path_rec_start(skb, dev);
+ return;
+ }
/* Add in the P_Key */
skb->dst->neighbour->ha[8] = (priv->pkey >> 8) & 0xff;
@@ -268,7 +268,6 @@
ipoib_mcast_send(dev,
(union ib_gid *) (skb->dst->neighbour->ha + 4),
skb);
- return 0;
}
static void unicast_arp_completion(int status,
@@ -336,8 +335,8 @@
* still go through (since we'll get the new path from the SM for
* these queries) so we'll never update the neighbour.
*/
-static int unicast_arp_start(struct sk_buff *skb, struct net_device *dev,
- struct ipoib_pseudoheader *phdr)
+static void unicast_arp_start(struct sk_buff *skb, struct net_device *dev,
+ struct ipoib_pseudoheader *phdr)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct sk_buff *tmp_skb;
@@ -352,7 +351,7 @@
dev_kfree_skb_any(tmp_skb);
if (!skb) {
++priv->stats.tx_dropped;
- return 0;
+ return;
}
}
@@ -381,25 +380,32 @@
++priv->stats.tx_dropped;
dev_kfree_skb_any(skb);
}
-
- return 0;
}
static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_path *path;
+ unsigned long flags;
+ local_irq_save(flags);
+ if (!spin_trylock(&priv->tx_lock)) {
+ local_irq_restore(flags);
+ return NETDEV_TX_LOCKED;
+ }
+
if (skb->dst && skb->dst->neighbour) {
- if (unlikely(!*to_ipoib_path(skb->dst->neighbour)))
- return path_lookup(skb, dev);
+ if (unlikely(!*to_ipoib_path(skb->dst->neighbour))) {
+ path_lookup(skb, dev);
+ goto out;
+ }
path = *to_ipoib_path(skb->dst->neighbour);
if (likely(path->ah)) {
ipoib_send(dev, skb, path->ah,
be32_to_cpup((__be32 *) skb->dst->neighbour->ha));
- return 0;
+ goto out;
}
if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE)
@@ -417,8 +423,7 @@
phdr->hwaddr[9] = priv->pkey & 0xff;
ipoib_mcast_send(dev, (union ib_gid *) (phdr->hwaddr + 4), skb);
- }
- else {
+ } else {
/* unicast GID -- ARP reply?? */
/*
@@ -429,7 +434,7 @@
if (skb->destructor == unicast_arp_finish) {
ipoib_send(dev, skb, *(struct ipoib_ah **) skb->cb,
be32_to_cpup((u32 *) phdr->hwaddr));
- return 0;
+ goto out;
}
if (be16_to_cpup((u16 *) skb->data) != ETH_P_ARP) {
@@ -441,22 +446,25 @@
IPOIB_GID_ARG(*(union ib_gid *) (phdr->hwaddr + 4)));
dev_kfree_skb_any(skb);
++priv->stats.tx_dropped;
- return 0;
+ goto out;
}
/* put the pseudoheader back on */
skb_push(skb, sizeof *phdr);
- return unicast_arp_start(skb, dev, phdr);
+ unicast_arp_start(skb, dev, phdr);
}
}
- return 0;
+ goto out;
err:
++priv->stats.tx_dropped;
dev_kfree_skb_any(skb);
- return 0;
+out:
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+ return NETDEV_TX_OK;
}
struct net_device_stats *ipoib_get_stats(struct net_device *dev)
@@ -641,7 +649,7 @@
dev->addr_len = INFINIBAND_ALEN;
dev->type = ARPHRD_INFINIBAND;
dev->tx_queue_len = IPOIB_TX_RING_SIZE * 2;
- dev->features = NETIF_F_VLAN_CHALLENGED;
+ dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX;
/* MTU will be reset when mcast join happens */
dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN;
@@ -656,6 +664,7 @@
priv->dev = dev;
spin_lock_init(&priv->lock);
+ spin_lock_init(&priv->tx_lock);
init_MUTEX(&priv->mcast_mutex);
init_MUTEX(&priv->vlan_mutex);
Index: infiniband/ulp/ipoib/ipoib.h
===================================================================
--- infiniband/ulp/ipoib/ipoib.h (revision 1304)
+++ infiniband/ulp/ipoib/ipoib.h (working copy)
@@ -130,6 +130,7 @@
struct ipoib_buf *rx_ring;
+ spinlock_t tx_lock;
struct ipoib_buf *tx_ring;
unsigned tx_head;
unsigned tx_tail;
Index: infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- infiniband/ulp/ipoib/ipoib_ib.c (revision 1304)
+++ infiniband/ulp/ipoib/ipoib_ib.c (working copy)
@@ -70,14 +70,15 @@
unsigned long flags;
- spin_lock_irqsave(&priv->lock, flags);
if (ah->last_send <= priv->tx_tail) {
ipoib_dbg(priv, "Freeing ah %p\n", ah->ah);
ib_destroy_ah(ah->ah);
kfree(ah);
- } else
+ } else {
+ spin_lock_irqsave(&priv->lock, flags);
list_add_tail(&ah->list, &priv->dead_ahs);
- spin_unlock_irqrestore(&priv->lock, flags);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ }
}
static inline int ipoib_ib_receive(struct ipoib_dev_priv *priv,
@@ -235,11 +236,11 @@
dev_kfree_skb_any(tx_req->skb);
- spin_lock_irqsave(&priv->lock, flags);
+ spin_lock_irqsave(&priv->tx_lock, flags);
++priv->tx_tail;
if (priv->tx_head - priv->tx_tail <= IPOIB_TX_RING_SIZE / 2)
netif_wake_queue(dev);
- spin_unlock_irqrestore(&priv->lock, flags);
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
if (wc->status != IB_WC_SUCCESS &&
wc->status != IB_WC_WR_FLUSH_ERR)
@@ -338,19 +339,15 @@
++priv->stats.tx_errors;
dev_kfree_skb_any(skb);
} else {
- unsigned long flags;
-
dev->trans_start = jiffies;
address->last_send = priv->tx_head;
++priv->tx_head;
- spin_lock_irqsave(&priv->lock, flags);
if (priv->tx_head - priv->tx_tail == IPOIB_TX_RING_SIZE) {
ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
netif_stop_queue(dev);
}
- spin_unlock_irqrestore(&priv->lock, flags);
}
}
More information about the general
mailing list