[ofa-general] [RFC] [PATCH v3] IB/ipoib: Add bonding support to IPoIB
Moni Shoua
monis at voltaire.com
Sun Mar 11 08:27:57 PDT 2007
Hi,
The post follows the discussions in
http://openib.org/pipermail/openib-general/2007-February/032598.html (version 1)
and
http://openib.org/pipermail/openib-general/2007-February/033434.html (version 2)
I got there some comments from Michael Tsirkin that helped me find bugs
in the bonding code.
I decided to stay with version #1 since IPoIB cleans up its paths and MC lists when
a device is stopped. Therefore I assume that hotplug is safe and that using bonding
with IPoIB doesn't break it.
With the current solution it is still unsafe to unload ib_ipoib before bonding but
I tend to agree with Michael and my opinion now is that this should be fixed in the
upper layer (i.e. bonding).
-------------------------------------------------------
IPoIB uses a two layer neighboring scheme, such that for each struct neighbour
whose device is an ipoib one, there is a struct ipoib_neigh buddy which is
created on demand at the tx flow by an ipoib_neigh_alloc(skb->dst->neighbour)
call.
When using the bonding driver, neighbours are created by the net stack on behalf
of the bonding (master) device. On the tx flow the bonding code gets an skb such
that skb->dev points to the master device, it changes this skb to point on the
slave device and calls the slave hard_start_xmit function.
Combing these two flows, there is a hole if some code at ipoib
(ipoib_neigh_destructor) assumes that for each struct neighbour it gets, n->dev
is an ipoib device so for example netdev_priv(n->dev) would be of type struct
ipoib_dev_priv.
To fix it, this patch adds a dev field to struct ipoib_neigh which is used
instead of the struct neighbour dev one.
Signed-off-by: Moni Shoua <monis at voltaire.com>
Signed-off-by: Or Gerlitz <ogerlitz at voltaire.com>
---
ipoib.h | 4 +++-
ipoib_main.c | 26 +++++++++++++-------------
ipoib_multicast.c | 2 +-
3 files changed, 17 insertions(+), 15 deletions(-)
Index: linux-2.6/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- linux-2.6.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2007-01-25 11:05:32.000000000 +0200
+++ linux-2.6/drivers/infiniband/ulp/ipoib/ipoib.h 2007-03-04 19:32:55.000000000 +0200
@@ -216,6 +216,7 @@ struct ipoib_neigh {
struct sk_buff_head queue;
struct neighbour *neighbour;
+ struct net_device *dev;
struct list_head list;
};
@@ -232,7 +233,8 @@ static inline struct ipoib_neigh **to_ip
INFINIBAND_ALEN, sizeof(void *));
}
-struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh);
+struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh,
+ struct net_device *dev);
void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh);
extern struct workqueue_struct *ipoib_workqueue;
Index: linux-2.6/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- linux-2.6.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-01-25 11:05:32.000000000 +0200
+++ linux-2.6/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-03-04 19:32:55.000000000 +0200
@@ -248,7 +248,6 @@ static void path_free(struct net_device
struct ipoib_neigh *neigh, *tn;
struct sk_buff *skb;
unsigned long flags;
-
while ((skb = __skb_dequeue(&path->queue)))
dev_kfree_skb_irq(skb);
@@ -490,7 +489,7 @@ static void neigh_add_path(struct sk_buf
struct ipoib_path *path;
struct ipoib_neigh *neigh;
- neigh = ipoib_neigh_alloc(skb->dst->neighbour);
+ neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev);
if (!neigh) {
++priv->stats.tx_dropped;
dev_kfree_skb_any(skb);
@@ -769,32 +768,32 @@ static void ipoib_set_mcast_list(struct
static void ipoib_neigh_destructor(struct neighbour *n)
{
struct ipoib_neigh *neigh;
- struct ipoib_dev_priv *priv = netdev_priv(n->dev);
+ struct ipoib_dev_priv *priv;
unsigned long flags;
struct ipoib_ah *ah = NULL;
- ipoib_dbg(priv,
- "neigh_destructor for %06x " IPOIB_GID_FMT "\n",
- IPOIB_QPN(n->ha),
- IPOIB_GID_RAW_ARG(n->ha + 4));
-
- spin_lock_irqsave(&priv->lock, flags);
neigh = *to_ipoib_neigh(n);
if (neigh) {
+ priv = netdev_priv(neigh->dev);
+ ipoib_dbg(priv,
+ "neigh_destructor for %06x " IPOIB_GID_FMT "\n",
+ IPOIB_QPN(n->ha),
+ IPOIB_GID_RAW_ARG(n->ha + 4));
+
+ spin_lock_irqsave(&priv->lock, flags);
if (neigh->ah)
ah = neigh->ah;
list_del(&neigh->list);
ipoib_neigh_free(n->dev, neigh);
+ spin_unlock_irqrestore(&priv->lock, flags);
}
-
- spin_unlock_irqrestore(&priv->lock, flags);
-
if (ah)
ipoib_put_ah(ah);
}
-struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour)
+struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour,
+ struct net_device *dev)
{
struct ipoib_neigh *neigh;
@@ -803,6 +802,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(st
return NULL;
neigh->neighbour = neighbour;
+ neigh->dev = dev;
*to_ipoib_neigh(neighbour) = neigh;
skb_queue_head_init(&neigh->queue);
Index: linux-2.6/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
===================================================================
--- linux-2.6.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2007-01-25 11:05:32.000000000 +0200
+++ linux-2.6/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2007-03-04 12:21:46.000000000 +0200
@@ -774,7 +774,7 @@ out:
if (skb->dst &&
skb->dst->neighbour &&
!*to_ipoib_neigh(skb->dst->neighbour)) {
- struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour);
+ struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev);
if (neigh) {
kref_get(&mcast->ah->ref);
More information about the general
mailing list