[ofa-general] [RFC PATCH 4/4] rdma/cma: implement RDMA_ALIGN_WITH_NETDEVICE ha mode

Or Gerlitz ogerlitz at voltaire.com
Tue May 13 07:13:58 PDT 2008


RDMA_ALIGN_WITH_NETDEVICE high availability (ha) mode means that the consumer
of the rdma-cm wants that RDMA sessions would always use the same links (eg <hca/port>)
as the IP stack does. In the current code, this does not happen when bonding did
fail-over but the IB link used by an already existing session is operating fine.

Use netevent notification for sensing that a change has happened in the IP stack,
then scan the rdma-cm IDs list to see if there is an ID that is "misaligned"
in that respect with the IP stack, and disconnect it, in case this is what the
user asked to when setting an ha mode for the ID.

Signed-off-by: Or Gerlitz <ogerlitz at voltaire.com>

Index: linux-2.6.26-rc2/drivers/infiniband/core/cma.c
===================================================================
--- linux-2.6.26-rc2.orig/drivers/infiniband/core/cma.c	2008-05-13 16:57:47.000000000 +0300
+++ linux-2.6.26-rc2/drivers/infiniband/core/cma.c	2008-05-13 16:58:55.000000000 +0300
@@ -144,6 +144,7 @@ struct rdma_id_private {
 	u8			srq;
 	u8			tos;
 	enum rdma_ha_mode	ha_mode;
+	struct work_struct	ha_work;
 };

 struct cma_multicast {
@@ -268,6 +269,14 @@ static inline int cma_is_ud_ps(enum rdma
 	return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
 }

+static void cma_ha_work_handler(struct work_struct *work)
+{
+	struct rdma_id_private *id_priv;
+
+	id_priv = container_of(work, struct rdma_id_private, ha_work);
+	rdma_disconnect(&id_priv->id);
+}
+
 static void cma_attach_to_dev(struct rdma_id_private *id_priv,
 			      struct cma_device *cma_dev)
 {
@@ -401,7 +410,8 @@ struct rdma_cm_id *rdma_create_id(rdma_c
 	INIT_LIST_HEAD(&id_priv->listen_list);
 	INIT_LIST_HEAD(&id_priv->mc_list);
 	get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
-
+	INIT_WORK(&id_priv->ha_work, cma_ha_work_handler);
+
 	return &id_priv->id;
 }
 EXPORT_SYMBOL(rdma_create_id);
@@ -2743,6 +2753,38 @@ void rdma_leave_multicast(struct rdma_cm
 }
 EXPORT_SYMBOL(rdma_leave_multicast);

+static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
+	void *ctx)
+{
+	struct net_device *ndev = (struct net_device *)ctx;
+	struct cma_device *cma_dev;
+	struct rdma_id_private *id_priv;
+	struct rdma_dev_addr *dev_addr;
+
+	if (dev_net(ndev) != &init_net)
+		return NOTIFY_DONE;
+
+	if (event != NETDEV_BONDING_FAILOVER)
+		return NOTIFY_DONE;
+
+	if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
+		return NOTIFY_DONE;
+
+	list_for_each_entry(cma_dev, &dev_list, list)
+		list_for_each_entry(id_priv, &cma_dev->id_list, list) {
+			dev_addr = &id_priv->id.route.addr.dev_addr;
+			if (!memcmp(dev_addr->src_netdev_name, ndev->name, IFNAMSIZ) &&
+				memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len))
+					if (id_priv->ha_mode == RDMA_ALIGN_WITH_NETDEVICE)
+						schedule_work(&id_priv->ha_work);
+		}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block cma_nb = {
+	.notifier_call = cma_netdev_callback
+};
+
 static void cma_add_one(struct ib_device *device)
 {
 	struct cma_device *cma_dev;
@@ -2847,6 +2889,7 @@ static int cma_init(void)

 	ib_sa_register_client(&sa_client);
 	rdma_addr_register_client(&addr_client);
+	register_netdevice_notifier(&cma_nb);

 	ret = ib_register_client(&cma_client);
 	if (ret)
@@ -2854,6 +2897,7 @@ static int cma_init(void)
 	return 0;

 err:
+	unregister_netdevice_notifier(&cma_nb);
 	rdma_addr_unregister_client(&addr_client);
 	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);
@@ -2863,6 +2907,7 @@ err:
 static void cma_cleanup(void)
 {
 	ib_unregister_client(&cma_client);
+	unregister_netdevice_notifier(&cma_nb);
 	rdma_addr_unregister_client(&addr_client);
 	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);



More information about the general mailing list