[openib-general] [PATCH] ipoib: Free AHs
Roland Dreier
roland at topspin.com
Thu Nov 11 08:31:57 PST 2004
This patch corrects the fact that IPoIB leaks all of its address
handles by creating a list of dead AHs and freeing an AH once all the
sends using it complete.
Index: ulp/ipoib/ipoib_verbs.c
===================================================================
--- ulp/ipoib/ipoib_verbs.c (revision 1201)
+++ ulp/ipoib/ipoib_verbs.c (working copy)
@@ -171,16 +171,6 @@
return -EINVAL;
}
-void ipoib_qp_destroy(struct net_device *dev)
-{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
-
- if (ib_destroy_qp(priv->qp))
- ipoib_warn(priv, "ib_qp_destroy failed\n");
-
- priv->qp = NULL;
-}
-
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
Index: ulp/ipoib/ipoib_main.c
===================================================================
--- ulp/ipoib/ipoib_main.c (revision 1201)
+++ ulp/ipoib/ipoib_main.c (working copy)
@@ -177,7 +177,7 @@
struct ipoib_path *path = path_ptr;
struct ipoib_dev_priv *priv = netdev_priv(path->dev);
struct sk_buff *skb;
- struct ib_ah *ah;
+ struct ipoib_ah *ah;
ipoib_dbg(priv, "status %d, LID 0x%04x for GID " IPOIB_GID_FMT "\n",
status, be16_to_cpu(pathrec->dlid), IPOIB_GID_ARG(pathrec->dgid));
@@ -195,10 +195,10 @@
.port_num = priv->port
};
- ah = ib_create_ah(priv->pd, &av);
+ ah = ipoib_create_ah(path->dev, priv->pd, &av);
}
- if (IS_ERR(ah))
+ if (!ah)
goto err;
path->ah = ah;
@@ -299,7 +299,7 @@
{
struct sk_buff *skb = skb_ptr;
struct ipoib_dev_priv *priv = netdev_priv(skb->dev);
- struct ib_ah *ah;
+ struct ipoib_ah *ah;
ipoib_dbg(priv, "status %d, LID 0x%04x for GID " IPOIB_GID_FMT "\n",
status, be16_to_cpu(pathrec->dlid), IPOIB_GID_ARG(pathrec->dgid));
@@ -307,6 +307,10 @@
if (status)
goto err;
+ ah = kmalloc(sizeof *ah, GFP_KERNEL);
+ if (!ah)
+ goto err;
+
{
struct ib_ah_attr av = {
.dlid = be16_to_cpu(pathrec->dlid),
@@ -317,13 +321,15 @@
.port_num = priv->port
};
- ah = ib_create_ah(priv->pd, &av);
+ ah->ah = ib_create_ah(priv->pd, &av);
}
- if (IS_ERR(ah))
+ if (IS_ERR(ah->ah)) {
+ kfree(ah);
goto err;
+ }
- *(struct ib_ah **) skb->cb = ah;
+ *(struct ipoib_ah **) skb->cb = ah;
if (dev_queue_xmit(skb))
ipoib_warn(priv, "dev_queue_xmit failed "
@@ -337,10 +343,15 @@
static void unicast_arp_finish(struct sk_buff *skb)
{
- struct ib_ah *ah = *(struct ib_ah **) skb->cb;
+ struct ipoib_dev_priv *priv = netdev_priv(skb->dev);
+ struct ipoib_ah *ah = *(struct ipoib_ah **) skb->cb;
+ unsigned long flags;
- if (ah)
- ib_destroy_ah(ah);
+ if (ah) {
+ spin_lock_irqsave(&priv->lock, flags);
+ list_add_tail(&ah->list, &priv->dead_ahs);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ }
}
/*
@@ -443,7 +454,7 @@
* now we can just send the packet.
*/
if (skb->destructor == unicast_arp_finish) {
- ipoib_send(dev, skb, *(struct ib_ah **) skb->cb,
+ ipoib_send(dev, skb, *(struct ipoib_ah **) skb->cb,
be32_to_cpup((u32 *) phdr->hwaddr));
return 0;
}
@@ -454,14 +465,7 @@
skb->dst ? "neigh" : "dst",
be16_to_cpup((u16 *) skb->data),
be32_to_cpup((u32 *) phdr->hwaddr),
- phdr->hwaddr[ 4], phdr->hwaddr[ 5],
- phdr->hwaddr[ 6], phdr->hwaddr[ 7],
- phdr->hwaddr[ 8], phdr->hwaddr[ 9],
- phdr->hwaddr[10], phdr->hwaddr[11],
- phdr->hwaddr[12], phdr->hwaddr[13],
- phdr->hwaddr[14], phdr->hwaddr[15],
- phdr->hwaddr[16], phdr->hwaddr[17],
- phdr->hwaddr[18], phdr->hwaddr[19]);
+ IPOIB_GID_ARG(*(union ib_gid *) (phdr->hwaddr + 4)));
/* put the pseudoheader back on */
skb_push(skb, sizeof *phdr);
@@ -529,10 +533,17 @@
static void ipoib_neigh_destructor(struct neighbour *neigh)
{
- ipoib_dbg(netdev_priv(neigh->dev),
- "neigh_destructor for %06x " IPOIB_GID_FMT "\n",
+ struct ipoib_dev_priv *priv = netdev_priv(neigh->dev);
+ struct ipoib_path *path = IPOIB_PATH(neigh);
+
+ ipoib_dbg(priv, "neigh_destructor for %06x " IPOIB_GID_FMT "\n",
be32_to_cpup((__be32 *) neigh->ha),
IPOIB_GID_ARG(*((union ib_gid *) (neigh->ha + 4))));
+
+ if (path && path->ah) {
+ ipoib_put_ah(path->ah);
+ kfree(path);
+ }
}
static int ipoib_neigh_setup(struct neighbour *neigh)
@@ -683,12 +694,14 @@
sema_init(&priv->mcast_mutex, 1);
INIT_LIST_HEAD(&priv->child_intfs);
+ INIT_LIST_HEAD(&priv->dead_ahs);
INIT_LIST_HEAD(&priv->multicast_list);
INIT_WORK(&priv->pkey_task, ipoib_pkey_poll, priv->dev);
INIT_WORK(&priv->mcast_task, ipoib_mcast_join_task, priv->dev);
INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush, priv->dev);
INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task, priv->dev);
+ INIT_WORK(&priv->ah_reap_task, ipoib_reap_ah, priv->dev);
}
struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
Index: ulp/ipoib/ipoib_multicast.c
===================================================================
--- ulp/ipoib/ipoib_multicast.c (revision 1201)
+++ ulp/ipoib/ipoib_multicast.c (working copy)
@@ -36,7 +36,7 @@
/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
struct ipoib_mcast {
struct ib_sa_mcmember_rec mcmember;
- struct ib_ah *address_handle;
+ struct ipoib_ah *ah;
struct rb_node rb_node;
struct list_head list;
@@ -69,11 +69,8 @@
ipoib_dbg_mcast(priv, "deleting multicast group " IPOIB_GID_FMT "\n",
IPOIB_GID_ARG(mcast->mcmember.mgid));
- if (mcast->address_handle != NULL) {
- int ret = ib_destroy_ah(mcast->address_handle);
- if (ret < 0)
- ipoib_warn(priv, "ib_destroy_ah failed (ret = %d)\n", ret);
- }
+ if (mcast->ah)
+ ipoib_put_ah(mcast->ah);
while (!skb_queue_empty(&mcast->pkt_queue)) {
struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
@@ -108,7 +105,7 @@
INIT_LIST_HEAD(&mcast->list);
skb_queue_head_init(&mcast->pkt_queue);
- mcast->address_handle = NULL;
+ mcast->ah = NULL;
mcast->query = NULL;
return mcast;
@@ -224,14 +221,14 @@
av.grh.dgid = mcast->mcmember.mgid;
- mcast->address_handle = ib_create_ah(priv->pd, &av);
- if (IS_ERR(mcast->address_handle)) {
+ mcast->ah = ipoib_create_ah(dev, priv->pd, &av);
+ if (!mcast->ah) {
ipoib_warn(priv, "ib_address_create failed\n");
} else {
ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT
" AV %p, LID 0x%04x, SL %d\n",
IPOIB_GID_ARG(mcast->mcmember.mgid),
- mcast->address_handle,
+ mcast->ah->ah,
be16_to_cpu(mcast->mcmember.mlid),
mcast->mcmember.sl);
}
@@ -661,7 +658,7 @@
list_add_tail(&mcast->list, &priv->multicast_list);
}
- if (!mcast->address_handle) {
+ if (!mcast->ah) {
if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)
skb_queue_tail(&mcast->pkt_queue, skb);
else
@@ -682,14 +679,15 @@
out:
spin_unlock_irqrestore(&priv->lock, flags);
- if (mcast && mcast->address_handle) {
+ if (mcast && mcast->ah) {
if (skb->dst &&
skb->dst->neighbour &&
!IPOIB_PATH(skb->dst->neighbour)) {
struct ipoib_path *path = kmalloc(sizeof *path, GFP_ATOMIC);
if (path) {
- path->ah = mcast->address_handle;
+ kref_get(&mcast->ah->ref);
+ path->ah = mcast->ah;
path->qpn = IB_MULTICAST_QPN;
path->dev = dev;
path->neighbour = skb->dst->neighbour;
@@ -697,7 +695,7 @@
}
}
- ipoib_send(dev, skb, mcast->address_handle, IB_MULTICAST_QPN);
+ ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
}
}
@@ -951,9 +949,9 @@
mcast = rb_entry(iter->rb_node, struct ipoib_mcast, rb_node);
- *mgid = mcast->mcmember.mgid;
- *created = mcast->created;
- *queuelen = skb_queue_len(&mcast->pkt_queue);
- *complete = mcast->address_handle != NULL;
+ *mgid = mcast->mcmember.mgid;
+ *created = mcast->created;
+ *queuelen = skb_queue_len(&mcast->pkt_queue);
+ *complete = !!mcast->ah;
*send_only = (mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY)) ? 1 : 0;
}
Index: ulp/ipoib/ipoib.h
===================================================================
--- ulp/ipoib/ipoib.h (revision 1201)
+++ ulp/ipoib/ipoib.h (working copy)
@@ -31,6 +31,7 @@
#include <linux/workqueue.h>
#include <linux/pci.h>
#include <linux/config.h>
+#include <linux/kref.h>
#include <asm/atomic.h>
#include <asm/semaphore.h>
@@ -65,6 +66,7 @@
IPOIB_PKEY_STOP = 4,
IPOIB_FLAG_SUBINTERFACE = 5,
IPOIB_MCAST_RUN = 6,
+ IPOIB_STOP_REAPER = 7,
IPOIB_MAX_BACKOFF_SECONDS = 16,
@@ -109,6 +111,7 @@
struct work_struct mcast_task;
struct work_struct flush_task;
struct work_struct restart_task;
+ struct work_struct ah_reap_task;
struct ib_device *ca;
u8 port;
@@ -134,18 +137,28 @@
struct ib_wc ibwc[IPOIB_NUM_WC];
+ struct list_head dead_ahs;
+
struct proc_dir_entry *mcast_proc_entry;
struct ib_event_handler event_handler;
struct net_device_stats stats;
+ struct list_head child_intfs;
struct list_head list;
- struct list_head child_intfs;
};
+struct ipoib_ah {
+ struct net_device *dev;
+ struct ib_ah *ah;
+ struct list_head list;
+ struct kref ref;
+ unsigned last_send;
+};
+
struct ipoib_path {
- struct ib_ah *ah;
+ struct ipoib_ah *ah;
u32 qpn;
struct sk_buff_head queue;
@@ -166,8 +179,17 @@
void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
+struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
+ struct ib_pd *pd, struct ib_ah_attr *attr);
+void ipoib_free_ah(struct kref *kref);
+static inline void ipoib_put_ah(struct ipoib_ah *ah)
+{
+ kref_put(&ah->ref, ipoib_free_ah);
+}
+
void ipoib_send(struct net_device *dev, struct sk_buff *skb,
- struct ib_ah *address, u32 qpn);
+ struct ipoib_ah *address, u32 qpn);
+void ipoib_reap_ah(void *dev_ptr);
struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
@@ -213,7 +235,6 @@
union ib_gid *mgid);
int ipoib_qp_create(struct net_device *dev);
-void ipoib_qp_destroy(struct net_device *dev);
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
void ipoib_transport_dev_cleanup(struct net_device *dev);
Index: ulp/ipoib/ipoib_ib.c
===================================================================
--- ulp/ipoib/ipoib_ib.c (revision 1201)
+++ ulp/ipoib/ipoib_ib.c (working copy)
@@ -29,10 +29,44 @@
static DECLARE_MUTEX(pkey_sem);
-static int _ipoib_ib_receive(struct ipoib_dev_priv *priv,
- u64 work_request_id,
- dma_addr_t addr)
+struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
+ struct ib_pd *pd, struct ib_ah_attr *attr)
{
+ struct ipoib_ah *ah;
+
+ ah = kmalloc(sizeof *ah, GFP_KERNEL);
+ if (!ah)
+ return NULL;
+
+ ah->dev = dev;
+ ah->last_send = 0;
+ kref_init(&ah->ref);
+
+ ah->ah = ib_create_ah(pd, attr);
+ if (IS_ERR(ah->ah)) {
+ kfree(ah);
+ ah = NULL;
+ }
+
+ return ah;
+}
+
+void ipoib_free_ah(struct kref *kref)
+{
+ struct ipoib_ah *ah = container_of(kref, struct ipoib_ah, ref);
+ struct ipoib_dev_priv *priv = netdev_priv(ah->dev);
+
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->lock, flags);
+ list_add_tail(&ah->list, &priv->dead_ahs);
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int ipoib_ib_receive(struct ipoib_dev_priv *priv,
+ u64 work_request_id,
+ dma_addr_t addr)
+{
struct ib_sge list = {
.addr = addr,
.length = IPOIB_BUF_SIZE,
@@ -50,8 +84,8 @@
}
/* =============================================================== */
-/*.._ipoib_ib_post_receive -- post a receive buffer */
-static int _ipoib_ib_post_receive(struct net_device *dev, int id)
+/*..ipoib_ib_post_receive -- post a receive buffer */
+static int ipoib_ib_post_receive(struct net_device *dev, int id)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct sk_buff *skb;
@@ -72,24 +106,24 @@
PCI_DMA_FROMDEVICE);
pci_unmap_addr_set(&priv->rx_ring[id], mapping, addr);
- ret = _ipoib_ib_receive(priv, id, addr);
+ ret = ipoib_ib_receive(priv, id, addr);
if (ret)
- ipoib_warn(priv, "_ipoib_ib_receive failed for buf %d (%d)\n",
+ ipoib_warn(priv, "ipoib_ib_receive failed for buf %d (%d)\n",
id, ret);
return ret;
}
/* =============================================================== */
-/*.._ipoib_ib_post_receives -- post all receive buffers */
-static int _ipoib_ib_post_receives(struct net_device *dev)
+/*..ipoib_ib_post_receives -- post all receive buffers */
+static int ipoib_ib_post_receives(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int i;
for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) {
- if (_ipoib_ib_post_receive(dev, i)) {
- ipoib_warn(priv, "_ipoib_ib_post_receive failed for buf %d\n", i);
+ if (ipoib_ib_post_receive(dev, i)) {
+ ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
return -EIO;
}
}
@@ -108,7 +142,7 @@
if (entry->status != IB_WC_SUCCESS) {
ipoib_warn(priv, "got failed completion event "
- "(status=%d, wrid=%d, op=%d)",
+ "(status=%d, wrid=%d, op=%d)\n",
entry->status, wr_id, entry->opcode);
if (entry->opcode == IB_WC_SEND) {
@@ -163,8 +197,8 @@
}
/* repost receive */
- if (_ipoib_ib_post_receive(dev, wr_id))
- ipoib_warn(priv, "_ipoib_ib_post_receive failed "
+ if (ipoib_ib_post_receive(dev, wr_id))
+ ipoib_warn(priv, "ipoib_ib_post_receive failed "
"for buf %d\n", wr_id);
} else
ipoib_warn(priv, "completion event with wrid %d\n",
@@ -262,7 +296,7 @@
/* =============================================================== */
/*..ipoib_send -- schedule an IB send work request */
void ipoib_send(struct net_device *dev, struct sk_buff *skb,
- struct ib_ah *address, u32 qpn)
+ struct ipoib_ah *address, u32 qpn)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_buf *tx_req;
@@ -302,7 +336,7 @@
pci_unmap_addr_set(tx_req, mapping, addr);
if (post_send(priv, priv->tx_head & (IPOIB_TX_RING_SIZE - 1),
- address, qpn, addr, skb->len)) {
+ address->ah, qpn, addr, skb->len)) {
ipoib_warn(priv, "post_send failed\n");
++priv->stats.tx_errors;
tx_req->skb = NULL;
@@ -312,6 +346,7 @@
dev->trans_start = jiffies;
+ address->last_send = priv->tx_head;
++priv->tx_head;
spin_lock_irqsave(&priv->lock, flags);
@@ -323,6 +358,38 @@
}
}
+void __ipoib_reap_ah(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_ah *ah, *tah;
+ LIST_HEAD(remove_list);
+
+ spin_lock_irq(&priv->lock);
+ list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
+ if (ah->last_send <= priv->tx_tail) {
+ list_del(&ah->list);
+ list_add_tail(&ah->list, &remove_list);
+ }
+ spin_unlock_irq(&priv->lock);
+
+ list_for_each_entry_safe(ah, tah, &remove_list, list) {
+ ipoib_dbg(priv, "Reaping ah %p\n", ah->ah);
+ ib_destroy_ah(ah->ah);
+ kfree(ah);
+ }
+}
+
+void ipoib_reap_ah(void *dev_ptr)
+{
+ struct net_device *dev = dev_ptr;
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+ __ipoib_reap_ah(dev);
+
+ if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
+ queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ);
+}
+
int ipoib_ib_dev_open(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -334,12 +401,15 @@
return -1;
}
- ret = _ipoib_ib_post_receives(dev);
+ ret = ipoib_ib_post_receives(dev);
if (ret) {
- ipoib_warn(priv, "_ipoib_ib_post_receives returned %d\n", ret);
+ ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
return -1;
}
+ clear_bit(IPOIB_STOP_REAPER, &priv->flags);
+ queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ);
+
return 0;
}
@@ -395,8 +465,10 @@
int i;
/* Kill the existing QP and allocate a new one */
- if (priv->qp != NULL)
- ipoib_qp_destroy(dev);
+ if (priv->qp != NULL) {
+ ib_destroy_qp(priv->qp);
+ priv->qp = NULL;
+ }
for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) {
if (priv->rx_ring[i].skb) {
@@ -463,14 +535,24 @@
/*..ipoib_ib_dev_cleanup -- clean up IB resources for iface */
void ipoib_ib_dev_cleanup(struct net_device *dev)
{
- ipoib_dbg(netdev_priv(dev), "cleaning up ib_dev\n");
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ ipoib_dbg(priv, "cleaning up ib_dev\n");
+
ipoib_mcast_stop_thread(dev);
/* Delete the broadcast address and the local address */
ipoib_mcast_dev_down(dev);
ipoib_transport_dev_cleanup(dev);
+
+ set_bit(IPOIB_STOP_REAPER, &priv->flags);
+ cancel_delayed_work(&priv->ah_reap_task);
+ flush_workqueue(ipoib_workqueue);
+ while (!list_empty(&priv->dead_ahs)) {
+ __ipoib_reap_ah(dev);
+ yield();
+ }
}
/*
More information about the general
mailing list