[openib-general] [PATCH] RDMA CM: updates to 2.6.18 branch

Sean Hefty sean.hefty at intel.com
Mon May 15 15:32:35 PDT 2006


I'm assuming that since the CMA isn't upstream yet, a single patch will
work.

The patch below should contain everything that makes sense to merge
upstream for the CMA.

Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 2c1386b..0003b87 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2,7 +2,7 @@
  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
  * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
  * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
- * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
  *
  * This Software is licensed under one of the following licenses:
  *
@@ -29,9 +29,15 @@
  *
  */
 
+#include <linux/completion.h>
 #include <linux/in.h>
 #include <linux/in6.h>
+#include <linux/mutex.h>
 #include <linux/random.h>
+#include <linux/idr.h>
+
+#include <net/tcp.h>
+
 #include <rdma/rdma_cm.h>
 #include <rdma/ib_cache.h>
 #include <rdma/ib_cm.h>
@@ -57,12 +63,14 @@ static LIST_HEAD(dev_list);
 static LIST_HEAD(listen_any_list);
 static DEFINE_MUTEX(lock);
 static struct workqueue_struct *cma_wq;
+static DEFINE_IDR(sdp_ps);
+static DEFINE_IDR(tcp_ps);
 
 struct cma_device {
 	struct list_head	list;
 	struct ib_device	*device;
 	__be64			node_guid;
-	wait_queue_head_t	wait;
+	struct completion	comp;
 	atomic_t		refcount;
 	struct list_head	id_list;
 };
@@ -80,6 +88,12 @@ enum cma_state {
 	CMA_DESTROYING
 };
 
+struct rdma_bind_list {
+	struct idr		*ps;
+	struct hlist_head	owners;
+	unsigned short		port;
+};
+
 /*
  * Device removal can occur at anytime, so we need extra handling to
  * serialize notifying the user of device removal with other callbacks.
@@ -89,13 +103,15 @@ enum cma_state {
 struct rdma_id_private {
 	struct rdma_cm_id	id;
 
+	struct rdma_bind_list	*bind_list;
+	struct hlist_node	node;
 	struct list_head	list;
 	struct list_head	listen_list;
 	struct cma_device	*cma_dev;
 
 	enum cma_state		state;
 	spinlock_t		lock;
-	wait_queue_head_t	wait;
+	struct completion	comp;
 	atomic_t		refcount;
 	wait_queue_head_t	wait_remove;
 	atomic_t		dev_remove;
@@ -140,7 +156,7 @@ struct cma_hdr {
 
 struct sdp_hh {
 	u8 bsdh[16];
-	u8 sdp_version;
+	u8 sdp_version; /* Major version: 7:4 */
 	u8 ip_version;	/* IP version: 7:4 */
 	u8 sdp_specific1[10];
 	__u16 port;
@@ -149,8 +165,13 @@ struct sdp_hh {
 	union cma_ip_addr dst_addr;
 };
 
+struct sdp_hah {
+	u8 bsdh[16];
+	u8 sdp_version;
+};
+
 #define CMA_VERSION 0x00
-#define SDP_VERSION 0x22
+#define SDP_MAJ_VERSION 0x2
 
 static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
 {
@@ -199,6 +220,11 @@ static inline void cma_set_ip_ver(struct
 	hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
 }
 
+static inline u8 sdp_get_majv(u8 sdp_version)
+{
+	return sdp_version >> 4;
+}
+
 static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
 {
 	return hh->ip_version >> 4;
@@ -218,11 +244,16 @@ static void cma_attach_to_dev(struct rdm
 	list_add_tail(&id_priv->list, &cma_dev->id_list);
 }
 
+static inline void cma_deref_dev(struct cma_device *cma_dev)
+{
+	if (atomic_dec_and_test(&cma_dev->refcount))
+		complete(&cma_dev->comp);
+}
+
 static void cma_detach_from_dev(struct rdma_id_private *id_priv)
 {
 	list_del(&id_priv->list);
-	if (atomic_dec_and_test(&id_priv->cma_dev->refcount))
-		wake_up(&id_priv->cma_dev->wait);
+	cma_deref_dev(id_priv->cma_dev);
 	id_priv->cma_dev = NULL;
 }
 
@@ -260,7 +291,7 @@ static int cma_acquire_dev(struct rdma_i
 static void cma_deref_id(struct rdma_id_private *id_priv)
 {
 	if (atomic_dec_and_test(&id_priv->refcount))
-		wake_up(&id_priv->wait);
+		complete(&id_priv->comp);
 }
 
 static void cma_release_remove(struct rdma_id_private *id_priv)
@@ -283,7 +314,7 @@ struct rdma_cm_id *rdma_create_id(rdma_c
 	id_priv->id.event_handler = event_handler;
 	id_priv->id.ps = ps;
 	spin_lock_init(&id_priv->lock);
-	init_waitqueue_head(&id_priv->wait);
+	init_completion(&id_priv->comp);
 	atomic_set(&id_priv->refcount, 1);
 	init_waitqueue_head(&id_priv->wait_remove);
 	atomic_set(&id_priv->dev_remove, 0);
@@ -457,13 +488,19 @@ static inline int cma_any_addr(struct so
 	return cma_zero_addr(addr) || cma_loopback_addr(addr);
 }
 
+static inline int cma_any_port(struct sockaddr *addr)
+{
+	return !((struct sockaddr_in *) addr)->sin_port;
+}
+
 static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
 			    u8 *ip_ver, __u16 *port,
 			    union cma_ip_addr **src, union cma_ip_addr **dst)
 {
 	switch (ps) {
 	case RDMA_PS_SDP:
-		if (((struct sdp_hh *) hdr)->sdp_version != SDP_VERSION)
+		if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
+		    SDP_MAJ_VERSION)
 			return -EINVAL;
 
 		*ip_ver	= sdp_get_ip_ver(hdr);
@@ -481,6 +518,9 @@ static int cma_get_net_info(void *hdr, e
 		*dst	= &((struct cma_hdr *) hdr)->dst_addr;
 		break;
 	}
+
+	if (*ip_ver != 4 && *ip_ver != 6)
+		return -EINVAL;
 	return 0;
 }
 
@@ -581,8 +621,8 @@ static void cma_destroy_listen(struct rd
 	}
 	list_del(&id_priv->listen_list);
 
-	atomic_dec(&id_priv->refcount);
-	wait_event(id_priv->wait, !atomic_read(&id_priv->refcount));
+	cma_deref_id(id_priv);
+	wait_for_completion(&id_priv->comp);
 
 	kfree(id_priv);
 }
@@ -622,6 +662,22 @@ static void cma_cancel_operation(struct 
 	}
 }
 
+static void cma_release_port(struct rdma_id_private *id_priv)
+{
+	struct rdma_bind_list *bind_list = id_priv->bind_list;
+
+	if (!bind_list)
+		return;
+
+	mutex_lock(&lock);
+	hlist_del(&id_priv->node);
+	if (hlist_empty(&bind_list->owners)) {
+		idr_remove(bind_list->ps, bind_list->port);
+		kfree(bind_list);
+	}
+	mutex_unlock(&lock);
+}
+
 void rdma_destroy_id(struct rdma_cm_id *id)
 {
 	struct rdma_id_private *id_priv;
@@ -645,8 +701,9 @@ void rdma_destroy_id(struct rdma_cm_id *
 		mutex_unlock(&lock);
 	}
 
-	atomic_dec(&id_priv->refcount);
-	wait_event(id_priv->wait, !atomic_read(&id_priv->refcount));
+	cma_release_port(id_priv);
+	cma_deref_id(id_priv);
+	wait_for_completion(&id_priv->comp);
 
 	kfree(id_priv->id.route.path_rec);
 	kfree(id_priv);
@@ -677,6 +734,16 @@ reject:
 	return ret;
 }
 
+static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
+{
+	if (id_priv->id.ps == RDMA_PS_SDP &&
+	    sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
+	    SDP_MAJ_VERSION)
+		return -EINVAL;
+
+	return 0;
+}
+
 static int cma_rtu_recv(struct rdma_id_private *id_priv)
 {
 	int ret;
@@ -711,7 +778,10 @@ static int cma_ib_handler(struct ib_cm_i
 		status = -ETIMEDOUT;
 		break;
 	case IB_CM_REP_RECEIVED:
-		if (id_priv->id.qp) {
+		status = cma_verify_rep(id_priv, ib_event->private_data);
+		if (status)
+			event = RDMA_CM_EVENT_CONNECT_ERROR;
+		else if (id_priv->id.qp) {
 			status = cma_rep_recv(id_priv);
 			event = status ? RDMA_CM_EVENT_CONNECT_ERROR :
 					 RDMA_CM_EVENT_ESTABLISHED;
@@ -915,21 +985,6 @@ static int cma_ib_listen(struct rdma_id_
 	return ret;
 }
 
-static int cma_duplicate_listen(struct rdma_id_private *id_priv)
-{
-	struct rdma_id_private *cur_id_priv;
-	struct sockaddr_in *cur_addr, *new_addr;
-
-	new_addr = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
-	list_for_each_entry(cur_id_priv, &listen_any_list, listen_list) {
-		cur_addr = (struct sockaddr_in *)
-			    &cur_id_priv->id.route.addr.src_addr;
-		if (cur_addr->sin_port == new_addr->sin_port)
-			return -EADDRINUSE;
-	}
-	return 0;
-}
-
 static int cma_listen_handler(struct rdma_cm_id *id,
 			      struct rdma_cm_event *event)
 {
@@ -952,9 +1007,10 @@ static void cma_listen_on_dev(struct rdm
 		return;
 
 	dev_id_priv = container_of(id, struct rdma_id_private, id);
-	ret = rdma_bind_addr(id, &id_priv->id.route.addr.src_addr);
-	if (ret)
-		goto err;
+
+	dev_id_priv->state = CMA_ADDR_BOUND;
+	memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
+	       ip_addr_size(&id_priv->id.route.addr.src_addr));
 
 	cma_attach_to_dev(dev_id_priv, cma_dev);
 	list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
@@ -968,22 +1024,24 @@ err:
 	cma_destroy_listen(dev_id_priv);
 }
 
-static int cma_listen_on_all(struct rdma_id_private *id_priv)
+static void cma_listen_on_all(struct rdma_id_private *id_priv)
 {
 	struct cma_device *cma_dev;
-	int ret;
 
 	mutex_lock(&lock);
-	ret = cma_duplicate_listen(id_priv);
-	if (ret)
-		goto out;
-
 	list_add_tail(&id_priv->list, &listen_any_list);
 	list_for_each_entry(cma_dev, &dev_list, list)
 		cma_listen_on_dev(id_priv, cma_dev);
-out:
 	mutex_unlock(&lock);
-	return ret;
+}
+
+static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
+{
+	struct sockaddr_in addr_in;
+
+	memset(&addr_in, 0, sizeof addr_in);
+	addr_in.sin_family = af;
+	return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
 }
 
 int rdma_listen(struct rdma_cm_id *id, int backlog)
@@ -992,6 +1050,12 @@ int rdma_listen(struct rdma_cm_id *id, i
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
+	if (id_priv->state == CMA_IDLE) {
+		ret = cma_bind_any(id, AF_INET);
+		if (ret)
+			return ret;
+	}
+
 	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
 		return -EINVAL;
 
@@ -999,23 +1063,22 @@ int rdma_listen(struct rdma_cm_id *id, i
 		switch (id->device->node_type) {
 		case IB_NODE_CA:
 			ret = cma_ib_listen(id_priv);
+			if (ret)
+				goto err;
 			break;
 		default:
 			ret = -ENOSYS;
-			break;
+			goto err;
 		}
 	} else
-		ret = cma_listen_on_all(id_priv);
-
-	if (ret)
-		goto err;
+		cma_listen_on_all(id_priv);
 
 	id_priv->backlog = backlog;
 	return 0;
 err:
 	cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
 	return ret;
-};
+}
 EXPORT_SYMBOL(rdma_listen);
 
 static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
@@ -1252,15 +1315,10 @@ err:
 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 			 struct sockaddr *dst_addr)
 {
-	struct sockaddr_in addr_in;
-
 	if (src_addr && src_addr->sa_family)
 		return rdma_bind_addr(id, src_addr);
-	else {
-		memset(&addr_in, 0, sizeof addr_in);
-		addr_in.sin_family = dst_addr->sa_family;
-		return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
-	}
+	else
+		return cma_bind_any(id, dst_addr->sa_family);
 }
 
 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
@@ -1281,7 +1339,7 @@ int rdma_resolve_addr(struct rdma_cm_id 
 
 	atomic_inc(&id_priv->refcount);
 	memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
-	if (cma_loopback_addr(dst_addr))
+	if (cma_any_addr(dst_addr))
 		ret = cma_resolve_loopback(id_priv);
 	else
 		ret = rdma_resolve_ip(&id->route.addr.src_addr, dst_addr,
@@ -1298,32 +1356,140 @@ err:
 }
 EXPORT_SYMBOL(rdma_resolve_addr);
 
+static void cma_bind_port(struct rdma_bind_list *bind_list,
+			  struct rdma_id_private *id_priv)
+{
+	struct sockaddr_in *sin;
+
+	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+	sin->sin_port = htons(bind_list->port);
+	id_priv->bind_list = bind_list;
+	hlist_add_head(&id_priv->node, &bind_list->owners);
+}
+
+static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
+			  unsigned short snum)
+{
+	struct rdma_bind_list *bind_list;
+	int port, start, ret;
+
+	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
+	if (!bind_list)
+		return -ENOMEM;
+
+	start = snum ? snum : sysctl_local_port_range[0];
+
+	do {
+		ret = idr_get_new_above(ps, bind_list, start, &port);
+	} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
+
+	if (ret)
+		goto err;
+
+	if ((snum && port != snum) ||
+	    (!snum && port > sysctl_local_port_range[1])) {
+		idr_remove(ps, port);
+		ret = -EADDRNOTAVAIL;
+		goto err;
+	}
+
+	bind_list->ps = ps;
+	bind_list->port = (unsigned short) port;
+	cma_bind_port(bind_list, id_priv);
+	return 0;
+err:
+	kfree(bind_list);
+	return ret;
+}
+
+static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
+{
+	struct rdma_id_private *cur_id;
+	struct sockaddr_in *sin, *cur_sin;
+	struct rdma_bind_list *bind_list;
+	struct hlist_node *node;
+	unsigned short snum;
+
+	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+	snum = ntohs(sin->sin_port);
+	if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+		return -EACCES;
+
+	bind_list = idr_find(ps, snum);
+	if (!bind_list)
+		return cma_alloc_port(ps, id_priv, snum);
+
+	/*
+	 * We don't support binding to any address if anyone is bound to
+	 * a specific address on the same port.
+	 */
+	if (cma_any_addr(&id_priv->id.route.addr.src_addr))
+		return -EADDRNOTAVAIL;
+
+	hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
+		if (cma_any_addr(&cur_id->id.route.addr.src_addr))
+			return -EADDRNOTAVAIL;
+		
+		cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
+		if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
+			return -EADDRINUSE;
+	}
+
+	cma_bind_port(bind_list, id_priv);
+	return 0;
+}
+
+static int cma_get_port(struct rdma_id_private *id_priv)
+{
+	struct idr *ps;
+	int ret;
+
+	switch (id_priv->id.ps) {
+	case RDMA_PS_SDP:
+		ps = &sdp_ps;
+		break;
+	case RDMA_PS_TCP:
+		ps = &tcp_ps;
+		break;
+	default:
+		return -EPROTONOSUPPORT;
+	}
+
+	mutex_lock(&lock);
+	if (cma_any_port(&id_priv->id.route.addr.src_addr))
+		ret = cma_alloc_port(ps, id_priv, 0);
+	else
+		ret = cma_use_port(ps, id_priv);
+	mutex_unlock(&lock);
+
+	return ret;
+}
+
 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 {
 	struct rdma_id_private *id_priv;
-	struct rdma_dev_addr *dev_addr;
 	int ret;
 
 	if (addr->sa_family != AF_INET)
-		return -EINVAL;
+		return -EAFNOSUPPORT;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
 		return -EINVAL;
 
-	if (cma_any_addr(addr))
-		ret = 0;
-	else {
-		dev_addr = &id->route.addr.dev_addr;
-		ret = rdma_translate_ip(addr, dev_addr);
+	if (!cma_any_addr(addr)) {
+		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
 		if (!ret)
 			ret = cma_acquire_dev(id_priv);
+		if (ret)
+			goto err;
 	}
 
+	memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
+	ret = cma_get_port(id_priv);
 	if (ret)
 		goto err;
 
-	memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
 	return 0;
 err:
 	cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
@@ -1331,8 +1497,8 @@ err:
 }
 EXPORT_SYMBOL(rdma_bind_addr);
 
-static void cma_format_hdr(void *hdr, enum rdma_port_space ps,
-			   struct rdma_route *route)
+static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
+			  struct rdma_route *route)
 {
 	struct sockaddr_in *src4, *dst4;
 	struct cma_hdr *cma_hdr;
@@ -1344,7 +1510,8 @@ static void cma_format_hdr(void *hdr, en
 	switch (ps) {
 	case RDMA_PS_SDP:
 		sdp_hdr = hdr;
-		sdp_hdr->sdp_version = SDP_VERSION;
+		if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
+			return -EINVAL;
 		sdp_set_ip_ver(sdp_hdr, 4);
 		sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
 		sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
@@ -1359,6 +1526,7 @@ static void cma_format_hdr(void *hdr, en
 		cma_hdr->port = src4->sin_port;
 		break;
 	}
+	return 0;
 }
 
 static int cma_connect_ib(struct rdma_id_private *id_priv,
@@ -1388,7 +1556,9 @@ static int cma_connect_ib(struct rdma_id
 	}
 
 	route = &id_priv->id.route;
-	cma_format_hdr(private_data, id_priv->id.ps, route);
+	ret = cma_format_hdr(private_data, id_priv->id.ps, route);
+	if (ret)
+		goto out;
 	req.private_data = private_data;
 
 	req.primary_path = &route->path_rec[0];
@@ -1534,7 +1704,7 @@ int rdma_reject(struct rdma_cm_id *id, c
 		break;
 	}
 	return ret;
-};
+}
 EXPORT_SYMBOL(rdma_reject);
 
 int rdma_disconnect(struct rdma_cm_id *id)
@@ -1578,7 +1748,7 @@ static void cma_add_one(struct ib_device
 	if (!cma_dev->node_guid)
 		goto err;
 
-	init_waitqueue_head(&cma_dev->wait);
+	init_completion(&cma_dev->comp);
 	atomic_set(&cma_dev->refcount, 1);
 	INIT_LIST_HEAD(&cma_dev->id_list);
 	ib_set_client_data(device, &cma_client, cma_dev);
@@ -1645,8 +1815,8 @@ static void cma_process_remove(struct cm
 	}
 	mutex_unlock(&lock);
 
-	atomic_dec(&cma_dev->refcount);
-	wait_event(cma_dev->wait, !atomic_read(&cma_dev->refcount));
+	cma_deref_dev(cma_dev);
+	wait_for_completion(&cma_dev->comp);
 }
 
 static void cma_remove_one(struct ib_device *device)
@@ -1687,6 +1857,8 @@ static void cma_cleanup(void)
 {
 	ib_unregister_client(&cma_client);
 	destroy_workqueue(cma_wq);
+	idr_destroy(&sdp_ps);
+	idr_destroy(&tcp_ps);
 }
 
 module_init(cma_init);
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 2e56f25..402c63d 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -91,7 +91,7 @@ struct rdma_cm_id;
  *
  * Notes: Users may not call rdma_destroy_id from this callback to destroy
  *   the passed in id, or a corresponding listen id.  Returning a
- *   non-zero value from the callback will destroy the corresponding id.
+ *   non-zero value from the callback will destroy the passed in id.
  */
 typedef int (*rdma_cm_event_handler)(struct rdma_cm_id *id,
 				     struct rdma_cm_event *event);
@@ -241,13 +241,14 @@ int rdma_listen(struct rdma_cm_id *id, i
 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
 
 /**
- * rdma_reject - Called on the passive side to reject a connection request.
+ * rdma_reject - Called to reject a connection request or response.
  */
 int rdma_reject(struct rdma_cm_id *id, const void *private_data,
 		u8 private_data_len);
 
 /**
- * rdma_disconnect - This function disconnects the associated QP.
+ * rdma_disconnect - This function disconnects the associated QP and
+ *   transitions it into the error state.
  */
 int rdma_disconnect(struct rdma_cm_id *id);
 




More information about the general mailing list