[ofa-general] [RFC V4 PATCH 4/5] rdma/cma: implement RDMA_CM_EVENT_ADDR_CHANGE notification

Or Gerlitz ogerlitz at voltaire.com
Wed May 28 04:36:30 PDT 2008


RDMA_CM_EVENT_ADDR_CHANGE event can be used by rdma-cm consuamers that wish
to have their RDMA sessions always use the same links (eg <hca/port>) as the
IP stack does. In the current code, this does not happen when bonding is used
and fail-over happened, but the IB link used by an already existing session is
operating fine.

Use netevent notification for sensing that a change has happened in the IP stack,
then scan the rdma-cm IDs list to see if there is an ID that is "misaligned" in
that respect with the IP stack, and deliver RDMA_CM_EVENT_ADDR_CHANGE for
this ID. The user can act on the event or just ignore it

Signed-off-by: Or Gerlitz <ogerlitz at voltaire.com>

changes from v2 -
- took the approach of uncoditionally notifying the user
- use the handler_mutex of the ID to serialize with other callbacks

changes from v3 -
- check in cma_ndev_work_handler to make sure the ID is not getting destroyed
- change the event name to be RDMA_CM_EVENT_ADDR_CHANGE
- cma_netdev_align_id --> cma_netdev_change

As for the locking issues, I still have the double loop in cma_netdev_callback()
being wrapped with the rdma-cm global mutex taken, as I explained over the thread.

 drivers/infiniband/core/cma.c |   88 ++++++++++++++++++++++++++++++++++++++++++
 include/rdma/rdma_cm.h        |    3 -
 2 files changed, 90 insertions(+), 1 deletion(-)

Index: linux-2.6.26-rc3/drivers/infiniband/core/cma.c
===================================================================
--- linux-2.6.26-rc3.orig/drivers/infiniband/core/cma.c	2008-05-28 11:08:24.000000000 +0300
+++ linux-2.6.26-rc3/drivers/infiniband/core/cma.c	2008-05-28 13:03:43.000000000 +0300
@@ -164,6 +164,12 @@ struct cma_work {
 	struct rdma_cm_event	event;
 };

+struct cma_ndev_work {
+	struct work_struct	work;
+	struct rdma_id_private	*id;
+	struct rdma_cm_event	event;
+};
+
 union cma_ip_addr {
 	struct in6_addr ip6;
 	struct {
@@ -1598,6 +1604,28 @@ out:
 	kfree(work);
 }

+static void cma_ndev_work_handler(struct work_struct *_work)
+{
+	struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
+	struct rdma_id_private *id_priv = work->id;
+	int destroy = 0;
+
+	mutex_lock(&id_priv->handler_mutex);
+	if (id_priv->state == CMA_DESTROYING)
+		goto out;
+
+	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
+		cma_exch(id_priv, CMA_DESTROYING);
+		destroy = 1;
+	}
+out:
+	mutex_unlock(&id_priv->handler_mutex);
+	cma_deref_id(id_priv);
+	if (destroy)
+		rdma_destroy_id(&id_priv->id);
+	kfree(work);
+}
+
 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
 {
 	struct rdma_route *route = &id_priv->id.route;
@@ -2723,6 +2751,63 @@ void rdma_leave_multicast(struct rdma_cm
 }
 EXPORT_SYMBOL(rdma_leave_multicast);

+static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
+{
+	struct rdma_dev_addr *dev_addr;
+	struct cma_ndev_work *work;
+
+	dev_addr = &id_priv->id.route.addr.dev_addr;
+
+	if (!memcmp(dev_addr->src_dev_name, ndev->name, IFNAMSIZ) &&
+	  memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
+		printk(KERN_ERR "addr change for device %s used by id %p, notifying\n",
+				ndev->name, &id_priv->id);
+		work = kzalloc(sizeof *work, GFP_ATOMIC);
+		if (!work)
+			return -ENOMEM;
+		INIT_WORK(&work->work, cma_ndev_work_handler);
+		work->id = id_priv;
+		work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
+		atomic_inc(&id_priv->refcount);
+		queue_work(cma_wq, &work->work);
+	}
+
+	return 0;
+}
+
+static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
+	void *ctx)
+{
+	struct net_device *ndev = (struct net_device *)ctx;
+	struct cma_device *cma_dev;
+	struct rdma_id_private *id_priv;
+	int ret = NOTIFY_DONE;
+
+	if (dev_net(ndev) != &init_net)
+		return NOTIFY_DONE;
+
+	if (event != NETDEV_BONDING_FAILOVER)
+		return NOTIFY_DONE;
+
+	if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
+		return NOTIFY_DONE;
+
+	mutex_lock(&lock);
+	list_for_each_entry(cma_dev, &dev_list, list)
+		list_for_each_entry(id_priv, &cma_dev->id_list, list) {
+			ret = cma_netdev_change(ndev, id_priv);
+			if (ret)
+				break;
+		}
+	mutex_unlock(&lock);
+
+	return ret;
+}
+
+static struct notifier_block cma_nb = {
+	.notifier_call = cma_netdev_callback
+};
+
 static void cma_add_one(struct ib_device *device)
 {
 	struct cma_device *cma_dev;
@@ -2831,6 +2916,7 @@ static int cma_init(void)

 	ib_sa_register_client(&sa_client);
 	rdma_addr_register_client(&addr_client);
+	register_netdevice_notifier(&cma_nb);

 	ret = ib_register_client(&cma_client);
 	if (ret)
@@ -2838,6 +2924,7 @@ static int cma_init(void)
 	return 0;

 err:
+	unregister_netdevice_notifier(&cma_nb);
 	rdma_addr_unregister_client(&addr_client);
 	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);
@@ -2847,6 +2934,7 @@ err:
 static void cma_cleanup(void)
 {
 	ib_unregister_client(&cma_client);
+	unregister_netdevice_notifier(&cma_nb);
 	rdma_addr_unregister_client(&addr_client);
 	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);
Index: linux-2.6.26-rc3/include/rdma/rdma_cm.h
===================================================================
--- linux-2.6.26-rc3.orig/include/rdma/rdma_cm.h	2008-05-28 10:34:27.000000000 +0300
+++ linux-2.6.26-rc3/include/rdma/rdma_cm.h	2008-05-28 12:55:31.000000000 +0300
@@ -53,7 +53,8 @@ enum rdma_cm_event_type {
 	RDMA_CM_EVENT_DISCONNECTED,
 	RDMA_CM_EVENT_DEVICE_REMOVAL,
 	RDMA_CM_EVENT_MULTICAST_JOIN,
-	RDMA_CM_EVENT_MULTICAST_ERROR
+	RDMA_CM_EVENT_MULTICAST_ERROR,
+	RDMA_CM_EVENT_ADDR_CHANGE
 };

 enum rdma_port_space {



More information about the general mailing list