[ofa-general] [RFC] ipoib: avoid using stale ipoib_neigh* in ipoib_neigh_cleanup()

akepner at sgi.com akepner at sgi.com
Mon Jun 1 15:04:13 PDT 2009


On Sun, May 31, 2009 at 02:34:56PM +0300, Or Gerlitz wrote:
> .... Looking on the code, I'm not sure why the non sucess/flush path 
> of ipoib_cm_handle_tx_wc() must access the neighbour while 
> ipoib_ib_handle_tx_wc can get a way with only a warning print... 
> do we agree that accessing the neigbour from the cm tx completion 
> flow is buggy?

But the fundamental problem is that the ipoib_neigh structures 
aren't read and written using consitent locking (particularly 
in ipoib_neigh_cleanup()). 

What about introducing a set of locks for no other purpose 
than protecting ipoib_neigh structures? Something like this
(which is only intended to convey the idea - it's obviously 
incomplete):


NOT-Signed-off-by: Arthur Kepner <akepner at sgi.com>

--- 

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 753a983..cc20074 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -403,6 +403,30 @@ static inline int ipoib_ud_need_sg(unsigned int ib_mtu)
 	return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE;
 }
 
+struct ipoib_neigh_lock
+{
+	spinlock_t sl;
+}__attribute__((__aligned__(SMP_CACHE_BYTES)));
+
+#define IPOIB_LOCK_SHIFT	6
+#define IPOIB_LOCK_SIZE		(1 << IPOIB_LOCK_SHIFT)
+#define IPOIB_LOCK_MASK		(IPOIB_LOCK_SIZE -1)
+
+static struct ipoib_neigh_lock 
+ipoib_neigh_locks[IPOIB_LOCK_SIZE] __cacheline_aligned;
+
+static inline void lock_ipoib_neigh(unsigned int hval)
+{
+	spin_lock(&ipoib_neigh_locks[hval & IPOIB_LOCK_MASK].sl);
+}
+
+static inline void unlock_ipoib_neigh(unsigned int hval)
+{
+	spin_unlock(&ipoib_neigh_locks[hval & IPOIB_LOCK_MASK].sl);
+}
+
+unsigned int ipoib_neigh_hval(struct neighbour *n);
+
 /*
  * We stash a pointer to our private neighbour information after our
  * hardware address in neigh->ha.  The ALIGN() expression here makes
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index ab2c192..685b664 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -45,6 +45,7 @@
 
 #include <linux/ip.h>
 #include <linux/in.h>
+#include <linux/jhash.h>
 
 #include <net/dst.h>
 
@@ -844,6 +845,9 @@ static void ipoib_neigh_cleanup(struct neighbour *n)
 	struct ipoib_dev_priv *priv = netdev_priv(n->dev);
 	unsigned long flags;
 	struct ipoib_ah *ah = NULL;
+	unsigned int hval = ipoib_neigh_hval(n);
+
+	lock_ipoib_neigh(hval);
 
 	neigh = *to_ipoib_neigh(n);
 	if (neigh)
@@ -864,6 +868,8 @@ static void ipoib_neigh_cleanup(struct neighbour *n)
 
 	spin_unlock_irqrestore(&priv->lock, flags);
 
+	unlock_ipoib_neigh(hval);
+
 	if (ah)
 		ipoib_put_ah(ah);
 }
@@ -899,6 +905,13 @@ void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh)
 	kfree(neigh);
 }
 
+static unsigned int ipoib_neigh_rnd;
+
+unsigned int ipoib_neigh_hval(struct neighbour *n)
+{
+        return jhash(n->primary_key, n->tbl->key_len, ipoib_neigh_rnd);
+}
+
 static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms)
 {
 	parms->neigh_cleanup = ipoib_neigh_cleanup;
@@ -1408,6 +1421,8 @@ static int __init ipoib_init_module(void)
 	ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
 #endif
 
+	get_random_bytes(&ipoib_neigh_rnd, sizeof(ipoib_neigh_rnd));
+
 	/*
 	 * When copying small received packets, we only copy from the
 	 * linear data part of the SKB, so we rely on this condition.



More information about the general mailing list