[openib-general] [PATCH] ipoib: Free AHs

Roland Dreier roland at topspin.com
Thu Nov 11 08:31:57 PST 2004


This patch corrects the fact that IPoIB leaks all of its address
handles by creating a list of dead AHs and freeing an AH once all the
sends using it complete.

Index: ulp/ipoib/ipoib_verbs.c
===================================================================
--- ulp/ipoib/ipoib_verbs.c	(revision 1201)
+++ ulp/ipoib/ipoib_verbs.c	(working copy)
@@ -171,16 +171,6 @@
 	return -EINVAL;
 }
 
-void ipoib_qp_destroy(struct net_device *dev)
-{
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-
-	if (ib_destroy_qp(priv->qp))
-		ipoib_warn(priv, "ib_qp_destroy failed\n");
-
-	priv->qp = NULL;
-}
-
 int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
Index: ulp/ipoib/ipoib_main.c
===================================================================
--- ulp/ipoib/ipoib_main.c	(revision 1201)
+++ ulp/ipoib/ipoib_main.c	(working copy)
@@ -177,7 +177,7 @@
 	struct ipoib_path *path = path_ptr;
 	struct ipoib_dev_priv *priv = netdev_priv(path->dev);
 	struct sk_buff *skb;
-	struct ib_ah *ah;
+	struct ipoib_ah *ah;
 
 	ipoib_dbg(priv, "status %d, LID 0x%04x for GID " IPOIB_GID_FMT "\n",
 		  status, be16_to_cpu(pathrec->dlid), IPOIB_GID_ARG(pathrec->dgid));
@@ -195,10 +195,10 @@
 			.port_num      = priv->port
 		};
 
-		ah = ib_create_ah(priv->pd, &av);
+		ah = ipoib_create_ah(path->dev, priv->pd, &av);
 	}
 
-	if (IS_ERR(ah))
+	if (!ah)
 		goto err;
 
 	path->ah = ah;
@@ -299,7 +299,7 @@
 {
 	struct sk_buff *skb = skb_ptr;
 	struct ipoib_dev_priv *priv = netdev_priv(skb->dev);
-	struct ib_ah *ah;
+	struct ipoib_ah *ah;
 
 	ipoib_dbg(priv, "status %d, LID 0x%04x for GID " IPOIB_GID_FMT "\n",
 		  status, be16_to_cpu(pathrec->dlid), IPOIB_GID_ARG(pathrec->dgid));
@@ -307,6 +307,10 @@
 	if (status)
 		goto err;
 
+	ah = kmalloc(sizeof *ah, GFP_KERNEL);
+	if (!ah)
+		goto err;
+
 	{
 		struct ib_ah_attr av = {
 			.dlid 	       = be16_to_cpu(pathrec->dlid),
@@ -317,13 +321,15 @@
 			.port_num      = priv->port
 		};
 
-		ah = ib_create_ah(priv->pd, &av);
+		ah->ah = ib_create_ah(priv->pd, &av);
 	}
 
-	if (IS_ERR(ah))
+	if (IS_ERR(ah->ah)) {
+		kfree(ah);
 		goto err;
+	}
 
-	*(struct ib_ah **) skb->cb = ah;
+	*(struct ipoib_ah **) skb->cb = ah;
 
 	if (dev_queue_xmit(skb))
 		ipoib_warn(priv, "dev_queue_xmit failed "
@@ -337,10 +343,15 @@
 
 static void unicast_arp_finish(struct sk_buff *skb)
 {
-	struct ib_ah *ah = *(struct ib_ah **) skb->cb;
+	struct ipoib_dev_priv *priv = netdev_priv(skb->dev);
+	struct ipoib_ah *ah = *(struct ipoib_ah **) skb->cb;
+	unsigned long flags;
 
-	if (ah)
-		ib_destroy_ah(ah);
+	if (ah) {
+		spin_lock_irqsave(&priv->lock, flags);
+		list_add_tail(&ah->list, &priv->dead_ahs);
+		spin_unlock_irqrestore(&priv->lock, flags);
+	}
 }
 
 /*
@@ -443,7 +454,7 @@
 			 * now we can just send the packet.
 			 */
 			if (skb->destructor == unicast_arp_finish) {
-				ipoib_send(dev, skb, *(struct ib_ah **) skb->cb,
+				ipoib_send(dev, skb, *(struct ipoib_ah **) skb->cb,
 					   be32_to_cpup((u32 *) phdr->hwaddr));
 				return 0;
 			}
@@ -454,14 +465,7 @@
 					   skb->dst ? "neigh" : "dst",
 					   be16_to_cpup((u16 *) skb->data),
 					   be32_to_cpup((u32 *) phdr->hwaddr),
-					   phdr->hwaddr[ 4], phdr->hwaddr[ 5],
-					   phdr->hwaddr[ 6], phdr->hwaddr[ 7],
-					   phdr->hwaddr[ 8], phdr->hwaddr[ 9],
-					   phdr->hwaddr[10], phdr->hwaddr[11],
-					   phdr->hwaddr[12], phdr->hwaddr[13],
-					   phdr->hwaddr[14], phdr->hwaddr[15],
-					   phdr->hwaddr[16], phdr->hwaddr[17],
-					   phdr->hwaddr[18], phdr->hwaddr[19]);
+					   IPOIB_GID_ARG(*(union ib_gid *) (phdr->hwaddr + 4)));
 
 			/* put the pseudoheader back on */			  
 			skb_push(skb, sizeof *phdr);
@@ -529,10 +533,17 @@
 
 static void ipoib_neigh_destructor(struct neighbour *neigh)
 {
-	ipoib_dbg(netdev_priv(neigh->dev),
-		  "neigh_destructor for %06x " IPOIB_GID_FMT "\n",
+	struct ipoib_dev_priv *priv = netdev_priv(neigh->dev);
+	struct ipoib_path     *path = IPOIB_PATH(neigh);
+
+	ipoib_dbg(priv, "neigh_destructor for %06x " IPOIB_GID_FMT "\n",
 		  be32_to_cpup((__be32 *) neigh->ha),
 		  IPOIB_GID_ARG(*((union ib_gid *) (neigh->ha + 4))));
+
+	if (path && path->ah) {
+		ipoib_put_ah(path->ah);
+		kfree(path);
+	}
 }
 
 static int ipoib_neigh_setup(struct neighbour *neigh)
@@ -683,12 +694,14 @@
 	sema_init(&priv->mcast_mutex, 1);
 
 	INIT_LIST_HEAD(&priv->child_intfs);
+	INIT_LIST_HEAD(&priv->dead_ahs);
 	INIT_LIST_HEAD(&priv->multicast_list);
 
 	INIT_WORK(&priv->pkey_task,    ipoib_pkey_poll,          priv->dev);
 	INIT_WORK(&priv->mcast_task,   ipoib_mcast_join_task,    priv->dev);
 	INIT_WORK(&priv->flush_task,   ipoib_ib_dev_flush,       priv->dev);
 	INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task, priv->dev);
+	INIT_WORK(&priv->ah_reap_task, ipoib_reap_ah,            priv->dev);
 }
 
 struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
Index: ulp/ipoib/ipoib_multicast.c
===================================================================
--- ulp/ipoib/ipoib_multicast.c	(revision 1201)
+++ ulp/ipoib/ipoib_multicast.c	(working copy)
@@ -36,7 +36,7 @@
 /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
 struct ipoib_mcast {
 	struct ib_sa_mcmember_rec mcmember;
-	struct ib_ah             *address_handle;
+	struct ipoib_ah          *ah;
 
 	struct rb_node    rb_node;
 	struct list_head  list;
@@ -69,11 +69,8 @@
 	ipoib_dbg_mcast(priv, "deleting multicast group " IPOIB_GID_FMT "\n",
 			IPOIB_GID_ARG(mcast->mcmember.mgid));
 
-	if (mcast->address_handle != NULL) {
-		int ret = ib_destroy_ah(mcast->address_handle);
-		if (ret < 0)
-			ipoib_warn(priv, "ib_destroy_ah failed (ret = %d)\n", ret);
-	}
+	if (mcast->ah)
+		ipoib_put_ah(mcast->ah);
 
 	while (!skb_queue_empty(&mcast->pkt_queue)) {
 		struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
@@ -108,7 +105,7 @@
 	INIT_LIST_HEAD(&mcast->list);
 	skb_queue_head_init(&mcast->pkt_queue);
 
-	mcast->address_handle = NULL;
+	mcast->ah    = NULL;
 	mcast->query = NULL;
 
 	return mcast;
@@ -224,14 +221,14 @@
 
 		av.grh.dgid = mcast->mcmember.mgid;
 
-		mcast->address_handle = ib_create_ah(priv->pd, &av);
-		if (IS_ERR(mcast->address_handle)) {
+		mcast->ah = ipoib_create_ah(dev, priv->pd, &av);
+		if (!mcast->ah) {
 			ipoib_warn(priv, "ib_address_create failed\n");
 		} else {
 			ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT
 					" AV %p, LID 0x%04x, SL %d\n",
 					IPOIB_GID_ARG(mcast->mcmember.mgid),
-					mcast->address_handle,
+					mcast->ah->ah,
 					be16_to_cpu(mcast->mcmember.mlid),
 					mcast->mcmember.sl);
 		}
@@ -661,7 +658,7 @@
 		list_add_tail(&mcast->list, &priv->multicast_list);
 	}
 
-	if (!mcast->address_handle) {
+	if (!mcast->ah) {
 		if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)
 			skb_queue_tail(&mcast->pkt_queue, skb);
 		else
@@ -682,14 +679,15 @@
 
 out:
 	spin_unlock_irqrestore(&priv->lock, flags);
-	if (mcast && mcast->address_handle) {
+	if (mcast && mcast->ah) {
 		if (skb->dst            &&
 		    skb->dst->neighbour &&
 		    !IPOIB_PATH(skb->dst->neighbour)) {
 			struct ipoib_path *path = kmalloc(sizeof *path, GFP_ATOMIC);
 
 			if (path) {
-				path->ah  	= mcast->address_handle;
+				kref_get(&mcast->ah->ref);
+				path->ah  	= mcast->ah;
 				path->qpn 	= IB_MULTICAST_QPN;
 				path->dev 	= dev;
 				path->neighbour = skb->dst->neighbour;
@@ -697,7 +695,7 @@
 			}
 		}
 
-		ipoib_send(dev, skb, mcast->address_handle, IB_MULTICAST_QPN);
+		ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
 	}
 }
 
@@ -951,9 +949,9 @@
 
 	mcast = rb_entry(iter->rb_node, struct ipoib_mcast, rb_node);
 
-	*mgid = mcast->mcmember.mgid;
-	*created = mcast->created;
-	*queuelen = skb_queue_len(&mcast->pkt_queue);
-	*complete = mcast->address_handle != NULL;
+	*mgid      = mcast->mcmember.mgid;
+	*created   = mcast->created;
+	*queuelen  = skb_queue_len(&mcast->pkt_queue);
+	*complete  = !!mcast->ah;
 	*send_only = (mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY)) ? 1 : 0;
 }
Index: ulp/ipoib/ipoib.h
===================================================================
--- ulp/ipoib/ipoib.h	(revision 1201)
+++ ulp/ipoib/ipoib.h	(working copy)
@@ -31,6 +31,7 @@
 #include <linux/workqueue.h>
 #include <linux/pci.h>
 #include <linux/config.h>
+#include <linux/kref.h>
 
 #include <asm/atomic.h>
 #include <asm/semaphore.h>
@@ -65,6 +66,7 @@
 	IPOIB_PKEY_STOP 	  = 4,
 	IPOIB_FLAG_SUBINTERFACE   = 5,
 	IPOIB_MCAST_RUN 	  = 6,
+	IPOIB_STOP_REAPER         = 7,
 
 	IPOIB_MAX_BACKOFF_SECONDS = 16,
 
@@ -109,6 +111,7 @@
 	struct work_struct mcast_task;
 	struct work_struct flush_task;
 	struct work_struct restart_task;
+	struct work_struct ah_reap_task;
 
 	struct ib_device *ca;
 	u8            	  port;
@@ -134,18 +137,28 @@
 
 	struct ib_wc ibwc[IPOIB_NUM_WC];
 
+	struct list_head dead_ahs;
+
 	struct proc_dir_entry *mcast_proc_entry;
 
 	struct ib_event_handler event_handler;
 
 	struct net_device_stats stats;
 
+	struct list_head child_intfs;
 	struct list_head list;
-	struct list_head child_intfs;
 };
 
+struct ipoib_ah {
+	struct net_device *dev;
+	struct ib_ah      *ah;
+	struct list_head   list;
+	struct kref        ref;
+	unsigned           last_send;
+};
+
 struct ipoib_path {
-	struct ib_ah       *ah;
+	struct ipoib_ah    *ah;
 	u32                 qpn;
 	struct sk_buff_head queue;
 
@@ -166,8 +179,17 @@
 
 void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
 
+struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
+				 struct ib_pd *pd, struct ib_ah_attr *attr);
+void ipoib_free_ah(struct kref *kref);
+static inline void ipoib_put_ah(struct ipoib_ah *ah)
+{
+	kref_put(&ah->ref, ipoib_free_ah);
+}
+
 void ipoib_send(struct net_device *dev, struct sk_buff *skb,
-		struct ib_ah *address, u32 qpn);
+		struct ipoib_ah *address, u32 qpn);
+void ipoib_reap_ah(void *dev_ptr);
 
 struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
 
@@ -213,7 +235,6 @@
 		       union ib_gid *mgid);
 
 int ipoib_qp_create(struct net_device *dev);
-void ipoib_qp_destroy(struct net_device *dev);
 int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
 void ipoib_transport_dev_cleanup(struct net_device *dev);
 
Index: ulp/ipoib/ipoib_ib.c
===================================================================
--- ulp/ipoib/ipoib_ib.c	(revision 1201)
+++ ulp/ipoib/ipoib_ib.c	(working copy)
@@ -29,10 +29,44 @@
 
 static DECLARE_MUTEX(pkey_sem);
 
-static int _ipoib_ib_receive(struct ipoib_dev_priv *priv,
-			     u64 work_request_id,
-			     dma_addr_t addr)
+struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
+				 struct ib_pd *pd, struct ib_ah_attr *attr)
 {
+	struct ipoib_ah *ah;
+
+	ah = kmalloc(sizeof *ah, GFP_KERNEL);
+	if (!ah)
+		return NULL;
+
+	ah->dev       = dev;
+	ah->last_send = 0;
+	kref_init(&ah->ref);
+
+	ah->ah = ib_create_ah(pd, attr);
+	if (IS_ERR(ah->ah)) {
+		kfree(ah);
+		ah = NULL;
+	}
+
+	return ah;
+}
+
+void ipoib_free_ah(struct kref *kref)
+{
+	struct ipoib_ah *ah = container_of(kref, struct ipoib_ah, ref);
+	struct ipoib_dev_priv *priv = netdev_priv(ah->dev);
+
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	list_add_tail(&ah->list, &priv->dead_ahs);
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int ipoib_ib_receive(struct ipoib_dev_priv *priv,
+			    u64 work_request_id,
+			    dma_addr_t addr)
+{
 	struct ib_sge list = {
 		.addr    = addr,
 		.length  = IPOIB_BUF_SIZE,
@@ -50,8 +84,8 @@
 }
 
 /* =============================================================== */
-/*.._ipoib_ib_post_receive -- post a receive buffer                */
-static int _ipoib_ib_post_receive(struct net_device *dev, int id)
+/*..ipoib_ib_post_receive -- post a receive buffer                */
+static int ipoib_ib_post_receive(struct net_device *dev, int id)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct sk_buff *skb;
@@ -72,24 +106,24 @@
 			      PCI_DMA_FROMDEVICE);
 	pci_unmap_addr_set(&priv->rx_ring[id], mapping, addr);
 
-	ret = _ipoib_ib_receive(priv, id, addr);
+	ret = ipoib_ib_receive(priv, id, addr);
 	if (ret)
-		ipoib_warn(priv, "_ipoib_ib_receive failed for buf %d (%d)\n",
+		ipoib_warn(priv, "ipoib_ib_receive failed for buf %d (%d)\n",
 			   id, ret);
 
 	return ret;
 }
 
 /* =============================================================== */
-/*.._ipoib_ib_post_receives -- post all receive buffers            */
-static int _ipoib_ib_post_receives(struct net_device *dev)
+/*..ipoib_ib_post_receives -- post all receive buffers            */
+static int ipoib_ib_post_receives(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int i;
 
 	for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) {
-		if (_ipoib_ib_post_receive(dev, i)) {
-			ipoib_warn(priv, "_ipoib_ib_post_receive failed for buf %d\n", i);
+		if (ipoib_ib_post_receive(dev, i)) {
+			ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
 			return -EIO;
 		}
 	}
@@ -108,7 +142,7 @@
 
 	if (entry->status != IB_WC_SUCCESS) {
 		ipoib_warn(priv, "got failed completion event "
-			   "(status=%d, wrid=%d, op=%d)",
+			   "(status=%d, wrid=%d, op=%d)\n",
 			   entry->status, wr_id, entry->opcode);
 
 		if (entry->opcode == IB_WC_SEND) {
@@ -163,8 +197,8 @@
 			}
 
 			/* repost receive */
-			if (_ipoib_ib_post_receive(dev, wr_id))
-				ipoib_warn(priv, "_ipoib_ib_post_receive failed "
+			if (ipoib_ib_post_receive(dev, wr_id))
+				ipoib_warn(priv, "ipoib_ib_post_receive failed "
 					   "for buf %d\n", wr_id);
 		} else
 			ipoib_warn(priv, "completion event with wrid %d\n",
@@ -262,7 +296,7 @@
 /* =============================================================== */
 /*..ipoib_send -- schedule an IB send work request                 */
 void ipoib_send(struct net_device *dev, struct sk_buff *skb,
-		struct ib_ah *address, u32 qpn)
+		struct ipoib_ah *address, u32 qpn)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_buf *tx_req;
@@ -302,7 +336,7 @@
 	pci_unmap_addr_set(tx_req, mapping, addr);
 
 	if (post_send(priv, priv->tx_head & (IPOIB_TX_RING_SIZE - 1),
-		      address, qpn, addr, skb->len)) {
+		      address->ah, qpn, addr, skb->len)) {
 		ipoib_warn(priv, "post_send failed\n");
 		++priv->stats.tx_errors;
 		tx_req->skb = NULL;
@@ -312,6 +346,7 @@
 
 		dev->trans_start = jiffies;
 
+		address->last_send = priv->tx_head;
 		++priv->tx_head;
 
 		spin_lock_irqsave(&priv->lock, flags);
@@ -323,6 +358,38 @@
 	}
 }
 
+void __ipoib_reap_ah(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_ah *ah, *tah;
+	LIST_HEAD(remove_list);
+
+	spin_lock_irq(&priv->lock);
+	list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
+		if (ah->last_send <= priv->tx_tail) {
+			list_del(&ah->list);
+			list_add_tail(&ah->list, &remove_list);
+		}
+	spin_unlock_irq(&priv->lock);
+
+	list_for_each_entry_safe(ah, tah, &remove_list, list) {
+		ipoib_dbg(priv, "Reaping ah %p\n", ah->ah);
+		ib_destroy_ah(ah->ah);
+		kfree(ah);
+	}
+}
+
+void ipoib_reap_ah(void *dev_ptr)
+{
+	struct net_device *dev = dev_ptr;
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+	__ipoib_reap_ah(dev);
+
+	if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
+		queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ);
+}
+
 int ipoib_ib_dev_open(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -334,12 +401,15 @@
 		return -1;
 	}
 
-	ret = _ipoib_ib_post_receives(dev);
+	ret = ipoib_ib_post_receives(dev);
 	if (ret) {
-		ipoib_warn(priv, "_ipoib_ib_post_receives returned %d\n", ret);
+		ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
 		return -1;
 	}
 
+	clear_bit(IPOIB_STOP_REAPER, &priv->flags);
+	queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ);
+
 	return 0;
 }
 
@@ -395,8 +465,10 @@
 	int i;
 
 	/* Kill the existing QP and allocate a new one */
-	if (priv->qp != NULL)
-		ipoib_qp_destroy(dev);
+	if (priv->qp != NULL) {
+		ib_destroy_qp(priv->qp);
+		priv->qp = NULL;
+	}
 
 	for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) {
 		if (priv->rx_ring[i].skb) {
@@ -463,14 +535,24 @@
 /*..ipoib_ib_dev_cleanup -- clean up IB resources for iface        */
 void ipoib_ib_dev_cleanup(struct net_device *dev)
 {
-	ipoib_dbg(netdev_priv(dev), "cleaning up ib_dev\n");
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
+	ipoib_dbg(priv, "cleaning up ib_dev\n");
+
 	ipoib_mcast_stop_thread(dev);
 
 	/* Delete the broadcast address and the local address */
 	ipoib_mcast_dev_down(dev);
 
 	ipoib_transport_dev_cleanup(dev);
+
+	set_bit(IPOIB_STOP_REAPER, &priv->flags);
+	cancel_delayed_work(&priv->ah_reap_task);
+	flush_workqueue(ipoib_workqueue);
+	while (!list_empty(&priv->dead_ahs)) {
+		__ipoib_reap_ah(dev);
+		yield();
+	}
 }
 
 /*



More information about the general mailing list