[ofa-general] [RFC] ipoib: avoid using stale ipoib_neigh* in ipoib_neigh_cleanup()
akepner at sgi.com
akepner at sgi.com
Mon Jun 1 15:04:13 PDT 2009
On Sun, May 31, 2009 at 02:34:56PM +0300, Or Gerlitz wrote:
> .... Looking on the code, I'm not sure why the non sucess/flush path
> of ipoib_cm_handle_tx_wc() must access the neighbour while
> ipoib_ib_handle_tx_wc can get a way with only a warning print...
> do we agree that accessing the neigbour from the cm tx completion
> flow is buggy?
But the fundamental problem is that the ipoib_neigh structures
aren't read and written using consitent locking (particularly
in ipoib_neigh_cleanup()).
What about introducing a set of locks for no other purpose
than protecting ipoib_neigh structures? Something like this
(which is only intended to convey the idea - it's obviously
incomplete):
NOT-Signed-off-by: Arthur Kepner <akepner at sgi.com>
---
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 753a983..cc20074 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -403,6 +403,30 @@ static inline int ipoib_ud_need_sg(unsigned int ib_mtu)
return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE;
}
+struct ipoib_neigh_lock
+{
+ spinlock_t sl;
+}__attribute__((__aligned__(SMP_CACHE_BYTES)));
+
+#define IPOIB_LOCK_SHIFT 6
+#define IPOIB_LOCK_SIZE (1 << IPOIB_LOCK_SHIFT)
+#define IPOIB_LOCK_MASK (IPOIB_LOCK_SIZE -1)
+
+static struct ipoib_neigh_lock
+ipoib_neigh_locks[IPOIB_LOCK_SIZE] __cacheline_aligned;
+
+static inline void lock_ipoib_neigh(unsigned int hval)
+{
+ spin_lock(&ipoib_neigh_locks[hval & IPOIB_LOCK_MASK].sl);
+}
+
+static inline void unlock_ipoib_neigh(unsigned int hval)
+{
+ spin_unlock(&ipoib_neigh_locks[hval & IPOIB_LOCK_MASK].sl);
+}
+
+unsigned int ipoib_neigh_hval(struct neighbour *n);
+
/*
* We stash a pointer to our private neighbour information after our
* hardware address in neigh->ha. The ALIGN() expression here makes
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index ab2c192..685b664 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -45,6 +45,7 @@
#include <linux/ip.h>
#include <linux/in.h>
+#include <linux/jhash.h>
#include <net/dst.h>
@@ -844,6 +845,9 @@ static void ipoib_neigh_cleanup(struct neighbour *n)
struct ipoib_dev_priv *priv = netdev_priv(n->dev);
unsigned long flags;
struct ipoib_ah *ah = NULL;
+ unsigned int hval = ipoib_neigh_hval(n);
+
+ lock_ipoib_neigh(hval);
neigh = *to_ipoib_neigh(n);
if (neigh)
@@ -864,6 +868,8 @@ static void ipoib_neigh_cleanup(struct neighbour *n)
spin_unlock_irqrestore(&priv->lock, flags);
+ unlock_ipoib_neigh(hval);
+
if (ah)
ipoib_put_ah(ah);
}
@@ -899,6 +905,13 @@ void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh)
kfree(neigh);
}
+static unsigned int ipoib_neigh_rnd;
+
+unsigned int ipoib_neigh_hval(struct neighbour *n)
+{
+ return jhash(n->primary_key, n->tbl->key_len, ipoib_neigh_rnd);
+}
+
static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms)
{
parms->neigh_cleanup = ipoib_neigh_cleanup;
@@ -1408,6 +1421,8 @@ static int __init ipoib_init_module(void)
ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
#endif
+ get_random_bytes(&ipoib_neigh_rnd, sizeof(ipoib_neigh_rnd));
+
/*
* When copying small received packets, we only copy from the
* linear data part of the SKB, so we rely on this condition.
More information about the general
mailing list