[openib-general] [PATCH] [CMA] add support for listening on any RDMA device

Sean Hefty sean.hefty at intel.com
Wed Oct 12 12:22:20 PDT 2005


The following patch permits listening on a port number only.
All connection requests received on any RDMA device for that port
number are routed to the listening client.

Signed-off-by: Sean Hefty <sean.hefty at intel.com>


Index: core/cma.c
===================================================================
--- core/cma.c	(revision 3724)
+++ core/cma.c	(working copy)
@@ -51,8 +51,9 @@ static struct ib_client cma_client = {
 	.remove = cma_remove_one
 };
 
-static DEFINE_SPINLOCK(lock);
 static LIST_HEAD(dev_list);
+static LIST_HEAD(listen_any_list);
+static DECLARE_MUTEX(mutex);
 
 struct cma_device {
 	struct list_head	list;
@@ -86,6 +87,7 @@ struct rdma_id_private {
 	struct rdma_cm_id	id;
 
 	struct list_head	list;
+	struct list_head	listen_list;
 	struct cma_device	*cma_dev;
 
 	enum cma_state		state;
@@ -168,26 +170,39 @@ static inline void cma_set_vers(struct c
 	addr->version = (cma_ver << 4) + (ip_ver & 0xF);
 }
 
+static void cma_attach_to_dev(struct rdma_id_private *id_priv,
+			      struct cma_device *cma_dev)
+{
+	atomic_inc(&cma_dev->refcount);
+	id_priv->cma_dev = cma_dev;
+	id_priv->id.device = cma_dev->device;
+	list_add_tail(&id_priv->list, &cma_dev->id_list);
+}
+
+static void cma_detach_from_dev(struct rdma_id_private *id_priv)
+{
+	list_del(&id_priv->list);
+	if (atomic_dec_and_test(&id_priv->cma_dev->refcount))
+		wake_up(&id_priv->cma_dev->wait);
+	id_priv->cma_dev = NULL;
+}
+
 static int cma_acquire_ib_dev(struct rdma_id_private *id_priv,
 			      union ib_gid *gid)
 {
 	struct cma_device *cma_dev;
-	unsigned long flags;
 	int ret = -ENODEV;
 	u8 port;
 
-	spin_lock_irqsave(&lock, flags);
+	down(&mutex);
 	list_for_each_entry(cma_dev, &dev_list, list) {
 		ret = ib_find_cached_gid(cma_dev->device, gid, &port, NULL);
 		if (!ret) {
-			atomic_inc(&cma_dev->refcount);
-			id_priv->cma_dev = cma_dev;
-			id_priv->id.device = cma_dev->device;
-			list_add_tail(&id_priv->list, &cma_dev->id_list);
+			cma_attach_to_dev(id_priv, cma_dev);
 			break;
 		}
 	}
-	spin_unlock_irqrestore(&lock, flags);
+	up(&mutex);
 	return ret;
 }
 
@@ -221,6 +236,7 @@ struct rdma_cm_id* rdma_create_id(rdma_c
 	atomic_set(&id_priv->refcount, 1);
 	init_waitqueue_head(&id_priv->wait_remove);
 	atomic_set(&id_priv->dev_remove, 0);
+	INIT_LIST_HEAD(&id_priv->listen_list);
 
 	return &id_priv->id;
 }
@@ -353,6 +369,11 @@ static int cma_verify_addr(struct cma_ad
 	return 0;
 }
 
+static inline int cma_any_addr(struct sockaddr *addr)
+{
+	return ((struct sockaddr_in *) addr)->sin_addr.s_addr == 0;
+}
+
 static int cma_notify_user(struct rdma_id_private *id_priv,
 			   enum rdma_cm_event_type type, int status,
 			   void *data, u8 data_len)
@@ -389,6 +410,44 @@ static void cma_cancel_route(struct rdma
 	}
 }
 
+static inline int cma_internal_listen(struct rdma_id_private *id_priv)
+{
+	return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev &&
+	       cma_any_addr(&id_priv->id.route.addr.src_addr);
+}
+
+static void cma_destroy_listen(struct rdma_id_private *id_priv)
+{
+	cma_exch(id_priv, CMA_DESTROYING);
+
+ 	if (id_priv->cm_id && !IS_ERR(id_priv->cm_id))
+		ib_destroy_cm_id(id_priv->cm_id);
+
+	list_del(&id_priv->listen_list);
+	if (id_priv->cma_dev)
+		cma_detach_from_dev(id_priv);
+
+	atomic_dec(&id_priv->refcount);
+	wait_event(id_priv->wait, !atomic_read(&id_priv->refcount));
+
+	kfree(id_priv);
+}
+
+static void cma_cancel_listens(struct rdma_id_private *id_priv)
+{
+	struct rdma_id_private *dev_id_priv;
+
+	down(&mutex);
+	list_del(&id_priv->list);
+
+	while (!list_empty(&id_priv->listen_list)) {
+		dev_id_priv = list_entry(id_priv->listen_list.next,
+					 struct rdma_id_private, listen_list);
+		cma_destroy_listen(dev_id_priv);
+	}
+	up(&mutex);
+}
+
 static void cma_cancel_operation(struct rdma_id_private *id_priv,
 				 enum cma_state state)
 {
@@ -399,6 +458,11 @@ static void cma_cancel_operation(struct 
 	case CMA_ROUTE_QUERY:
 		cma_cancel_route(id_priv);
 		break;
+	case CMA_LISTEN:
+		if (cma_any_addr(&id_priv->id.route.addr.src_addr) &&
+		    !id_priv->cma_dev)
+			cma_cancel_listens(id_priv);
+		break;
 	default:
 		break;
 	}
@@ -408,7 +472,6 @@ void rdma_destroy_id(struct rdma_cm_id *
 {
 	struct rdma_id_private *id_priv;
 	enum cma_state state;
-	unsigned long flags;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	state = cma_exch(id_priv, CMA_DESTROYING);
@@ -418,12 +481,9 @@ void rdma_destroy_id(struct rdma_cm_id *
 		ib_destroy_cm_id(id_priv->cm_id);
 
 	if (id_priv->cma_dev) {
-	  	spin_lock_irqsave(&lock, flags);
-		list_del(&id_priv->list);
-		spin_unlock_irqrestore(&lock, flags);
-
-		if (atomic_dec_and_test(&id_priv->cma_dev->refcount))
-			wake_up(&id_priv->cma_dev->wait);
+	  	down(&mutex);
+		cma_detach_from_dev(id_priv);
+		up(&mutex);
 	}
 
 	atomic_dec(&id_priv->refcount);
@@ -660,6 +720,77 @@ static int cma_ib_listen(struct rdma_id_
 	return ret;
 }
 
+static int cma_duplicate_listen(struct rdma_id_private *id_priv)
+{
+	struct rdma_id_private *cur_id_priv;
+	struct sockaddr_in *cur_addr, *new_addr;
+
+	new_addr = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+	list_for_each_entry(cur_id_priv, &listen_any_list, listen_list) {
+		cur_addr = (struct sockaddr_in *)
+			    &cur_id_priv->id.route.addr.src_addr;
+		if (cur_addr->sin_port == new_addr->sin_port)
+			return -EADDRINUSE;
+	}
+	return 0;
+}
+
+static int cma_listen_handler(struct rdma_cm_id *id,
+			      struct rdma_cm_event *event)
+{
+	struct rdma_id_private *id_priv = id->context;
+
+	id->context = id_priv->id.context;
+	id->event_handler = id_priv->id.event_handler;
+	return id_priv->id.event_handler(id, event);
+}
+
+static void cma_listen_on_dev(struct rdma_id_private *id_priv,
+			      struct cma_device *cma_dev)
+{
+	struct rdma_id_private *dev_id_priv;
+	struct rdma_cm_id *id;
+	int ret;
+
+	id = rdma_create_id(cma_listen_handler, id_priv);
+	if (IS_ERR(id))
+		return;
+
+	dev_id_priv = container_of(id, struct rdma_id_private, id);
+	ret = rdma_bind_addr(id, &id_priv->id.route.addr.src_addr);
+	if (ret)
+		goto err;
+
+	cma_attach_to_dev(dev_id_priv, cma_dev);
+	list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
+
+	ret = rdma_listen(id);
+	if (ret)
+		goto err;
+
+	return;
+err:
+	cma_destroy_listen(dev_id_priv);
+}
+
+static int cma_listen_on_all(struct rdma_id_private *id_priv)
+{
+	struct cma_device *cma_dev;
+	int ret;
+
+	down(&mutex);
+	ret = cma_duplicate_listen(id_priv);
+	if (ret)
+		goto out;
+
+	list_add_tail(&id_priv->list, &listen_any_list);
+	list_for_each_entry(cma_dev, &dev_list, list)
+		cma_listen_on_dev(id_priv, cma_dev);
+out:
+	up(&mutex);
+	return ret;
+}
+
 int rdma_listen(struct rdma_cm_id *id)
 {
 	struct rdma_id_private *id_priv;
@@ -669,20 +800,18 @@ int rdma_listen(struct rdma_cm_id *id)
 	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
 		return -EINVAL;
 
-	/* TODO: handle listen across multiple devices */
-	if (!id->device) {
-		ret = -ENOSYS;
-		goto err;
-	}
+	if (id->device) {
+		switch (id->device->node_type) {
+		case IB_NODE_CA:
+			ret = cma_ib_listen(id_priv);
+			break;
+		default:
+			ret = -ENOSYS;
+			break;
+		}
+	} else
+		ret = cma_listen_on_all(id_priv);
 
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
-		ret = cma_ib_listen(id_priv);
-		break;
-	default:
-		ret = -ENOSYS;
-		break;
-	}
 	if (ret)
 		goto err;
 
@@ -850,7 +979,6 @@ EXPORT_SYMBOL(rdma_resolve_addr);
 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 {
 	struct rdma_id_private *id_priv;
-	struct sockaddr_in *ip_addr = (struct sockaddr_in *) addr;
 	struct ib_addr *ibaddr = &id->route.addr.addr.ibaddr;
 	int ret;
 
@@ -861,12 +989,14 @@ int rdma_bind_addr(struct rdma_cm_id *id
 	if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
 		return -EINVAL;
 
-	if (ip_addr->sin_addr.s_addr) {
+	if (cma_any_addr(addr)) {
+		id->route.addr.src_addr = *addr;
+		ret = 0;
+	} else {
 		ret = ib_translate_addr(addr, &ibaddr->sgid, &ibaddr->pkey);
 		if (!ret)
 			ret = cma_acquire_ib_dev(id_priv, &ibaddr->sgid);
-	} else
-		ret = -ENOSYS; /* TODO: support wild card addresses */
+	}
 
 	if (ret)
 		goto err;
@@ -1102,7 +1232,7 @@ static __be64 get_ca_guid(struct ib_devi
 static void cma_add_one(struct ib_device *device)
 {
 	struct cma_device *cma_dev;
-	unsigned long flags;
+	struct rdma_id_private *id_priv;
 
 	cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
 	if (!cma_dev)
@@ -1118,9 +1248,11 @@ static void cma_add_one(struct ib_device
 	INIT_LIST_HEAD(&cma_dev->id_list);
 	ib_set_client_data(device, &cma_client, cma_dev);
 
-	spin_lock_irqsave(&lock, flags);
+	down(&mutex);
 	list_add_tail(&cma_dev->list, &dev_list);
-	spin_unlock_irqrestore(&lock, flags);
+	list_for_each_entry(id_priv, &listen_any_list, list)
+		cma_listen_on_dev(id_priv, cma_dev);
+	up(&mutex);
 	return;
 err:
 	kfree(cma_dev);
@@ -1150,28 +1282,33 @@ static void cma_process_remove(struct cm
 {
 	struct list_head remove_list;
 	struct rdma_id_private *id_priv;
-	unsigned long flags;
 	int ret;
 
 	INIT_LIST_HEAD(&remove_list);
 
-	spin_lock_irqsave(&lock, flags);
+	down(&mutex);
 	while (!list_empty(&cma_dev->id_list)) {
 		id_priv = list_entry(cma_dev->id_list.next,
 				     struct rdma_id_private, list);
+
+		if (cma_internal_listen(id_priv)) {
+			cma_destroy_listen(id_priv);
+			continue;
+		}
+
 		list_del(&id_priv->list);
 		list_add_tail(&id_priv->list, &remove_list);
 		atomic_inc(&id_priv->refcount);
-		spin_unlock_irqrestore(&lock, flags);
+		up(&mutex);
 
 		ret = cma_remove_id_dev(id_priv);
 		cma_deref_id(id_priv);
 		if (ret)
 			rdma_destroy_id(&id_priv->id);
 
-		spin_lock_irqsave(&lock, flags);
+		down(&mutex);
 	}
-	spin_unlock_irqrestore(&lock, flags);
+	up(&mutex);
 
 	atomic_dec(&cma_dev->refcount);
 	wait_event(cma_dev->wait, !atomic_read(&cma_dev->refcount));
@@ -1180,15 +1317,14 @@ static void cma_process_remove(struct cm
 static void cma_remove_one(struct ib_device *device)
 {
 	struct cma_device *cma_dev;
-	unsigned long flags;
 
 	cma_dev = ib_get_client_data(device, &cma_client);
 	if (!cma_dev)
 		return;
 
-	spin_lock_irqsave(&lock, flags);
+	down(&mutex);
 	list_del(&cma_dev->list);
-	spin_unlock_irqrestore(&lock, flags);
+	up(&mutex);
 
 	cma_process_remove(cma_dev);
 	kfree(cma_dev);






More information about the general mailing list