[ofa-general] [RFC v2 PATCH 4/5] rdma/cma: implement RDMA_ALIGN_WITH_NETDEVICE ha mode

Or Gerlitz ogerlitz at voltaire.com
Thu May 15 07:25:34 PDT 2008


RDMA_ALIGN_WITH_NETDEVICE high availability (ha) mode means that the consumer
of the rdma-cm wants that RDMA sessions would always use the same links (eg <hca/port>)
as the IP stack does. In the current code, this does not happen when bonding did
fail-over but the IB link used by an already existing session is operating fine.

Use netevent notification for sensing that a change has happened in the IP stack,
then scan the rdma-cm IDs list to see if there is an ID that is misaligned
in that respect with the IP stack, and deliver RDMA_CM_EVENT_NETDEV_CHANGE for this
ID, in case this is what the user asked by setting this mode for the ID.

Signed-off-by: Or Gerlitz <ogerlitz at voltaire.com>

changes from v1 -

- took the approach of notifying the user vs disconnecting the ID
- this change bought us support also for the datagram (unconnected) services!
- I used the cma_work_handler existing mechanism and decided to leave the ID state unchanged.

As for the locking/protection issues, I assume the netdev notifers protect against net
device removal etc while processing the event, so dev_get/put calls are not needed. Other than
that there's a need to protect against (rdma) device removal and ID destruction. Spending
some time on the code, I couldn't see how to do it in finer grain then the global mutex
being locked/unlocked over the exectution of the dobule (dev list / id list) loops.

Taking into account that this event is --rare-- and I changed the logic to first see
if this ID wanted ha notification and only then do the more expensive memcmp calls,
maybe this global locking is accaptable, and if not, I'd be happy to get some directions,
eg if/how cma_disable_remove() and cma_enable_remove() can help for taking the lock
to shorter time, etc.

Index: linux-2.6.26-rc2/drivers/infiniband/core/cma.c
===================================================================
--- linux-2.6.26-rc2.orig/drivers/infiniband/core/cma.c	2008-05-15 16:30:42.000000000 +0300
+++ linux-2.6.26-rc2/drivers/infiniband/core/cma.c	2008-05-15 16:36:34.000000000 +0300
@@ -2743,6 +2743,64 @@ void rdma_leave_multicast(struct rdma_cm
 }
 EXPORT_SYMBOL(rdma_leave_multicast);

+static int cma_netdev_align_id(struct net_device *ndev, struct rdma_id_private *id_priv)
+{
+	struct rdma_dev_addr *dev_addr;
+	struct cma_work *work;
+
+	dev_addr = &id_priv->id.route.addr.dev_addr;
+
+	if (!memcmp(dev_addr->src_dev_name, ndev->name, IFNAMSIZ) &&
+	  memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
+		printk(KERN_ERR "addr change for device %s used by id %p, notifying\n",
+				ndev->name, &id_priv->id);
+		work = kzalloc(sizeof *work, GFP_KERNEL);
+		if (!work)
+			return -ENOMEM;
+		work->id = id_priv;
+		INIT_WORK(&work->work, cma_work_handler);
+		work->old_state = id_priv->state;
+		work->new_state = id_priv->state;
+		work->event.event = RDMA_CM_EVENT_NETDEV_CHANGE;
+		atomic_inc(&id_priv->refcount);
+		queue_work(cma_wq, &work->work);
+	}
+}
+
+static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
+	void *ctx)
+{
+	struct net_device *ndev = (struct net_device *)ctx;
+	struct cma_device *cma_dev;
+	struct rdma_id_private *id_priv;
+	int ret = NOTIFY_DONE;
+
+	if (dev_net(ndev) != &init_net)
+		return NOTIFY_DONE;
+
+	if (event != NETDEV_BONDING_FAILOVER)
+		return NOTIFY_DONE;
+
+	if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
+		return NOTIFY_DONE;
+
+	mutex_lock(&lock);
+	list_for_each_entry(cma_dev, &dev_list, list)
+		list_for_each_entry(id_priv, &cma_dev->id_list, list) {
+			if (id_priv->ha_mode == RDMA_ALIGN_WITH_NETDEVICE) {
+				ret = cma_netdev_align_id(ndev, id_priv);
+				if (ret)
+					break;
+			}
+		}
+	mutex_unlock(&lock);
+	return ret;
+}
+
+static struct notifier_block cma_nb = {
+	.notifier_call = cma_netdev_callback
+};
+
 static void cma_add_one(struct ib_device *device)
 {
 	struct cma_device *cma_dev;
@@ -2847,6 +2905,7 @@ static int cma_init(void)

 	ib_sa_register_client(&sa_client);
 	rdma_addr_register_client(&addr_client);
+	register_netdevice_notifier(&cma_nb);

 	ret = ib_register_client(&cma_client);
 	if (ret)
@@ -2854,6 +2913,7 @@ static int cma_init(void)
 	return 0;

 err:
+	unregister_netdevice_notifier(&cma_nb);
 	rdma_addr_unregister_client(&addr_client);
 	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);
@@ -2863,6 +2923,7 @@ err:
 static void cma_cleanup(void)
 {
 	ib_unregister_client(&cma_client);
+	unregister_netdevice_notifier(&cma_nb);
 	rdma_addr_unregister_client(&addr_client);
 	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);



More information about the general mailing list