[openib-general] [PATCH] RDMA CM: assign port numbers when binding a cm_id to an address

Tom Tucker tom at opengridcomputing.com
Tue Apr 18 20:42:34 PDT 2006


This looks like a great start.

One part I didn't understand, however, was where the local port is
assigned for children of the listening endpoint? The local port for
these endpoints will be the same as for the listening parent. So if
cma_use_port is used to bind these child endpoints (i.e. add them to the
owners list), then the logic will need to distinguish between rdma_cm_id
attempting to bind as a listener vs. rdma_cm_id binding as connected
children. 


On Tue, 2006-04-18 at 15:00 -0700, Sean Hefty wrote:
> Assign/reserve a port number when binding a cm_id.  If no port number is
> given, assign one from the local port space.  If a port number is given,
> reserve it.
> 
> The RDMA port space is separate from that used for TCP.  iWarp devices
> will need to coordinate between the port values assigned by the rdma_cm
> and those in use by TCP.  SDP also has its own port space.
> 
> Signed-off-by: Sean Hefty <sean.hefty at intel.com>
> ---
> Index: cma.c
> ===================================================================
> --- cma.c	(revision 6479)
> +++ cma.c	(working copy)
> @@ -33,6 +33,9 @@
>  #include <linux/in6.h>
>  #include <linux/mutex.h>
>  #include <linux/random.h>
> +#include <linux/idr.h>
> +#include <net/inet_hashtables.h>
> +#include <net/tcp.h>
>  #include <rdma/rdma_cm.h>
>  #include <rdma/ib_cache.h>
>  #include <rdma/ib_cm.h>
> @@ -58,6 +61,8 @@ static LIST_HEAD(dev_list);
>  static LIST_HEAD(listen_any_list);
>  static DEFINE_MUTEX(lock);
>  static struct workqueue_struct *cma_wq;
> +static DEFINE_IDR(sdp_ps);
> +static DEFINE_IDR(tcp_ps);
>  
>  struct cma_device {
>  	struct list_head	list;
> @@ -81,6 +86,12 @@ enum cma_state {
>  	CMA_DESTROYING
>  };
>  
> +struct rdma_bind_list {
> +	struct idr		*ps;
> +	struct hlist_head	owners;
> +	unsigned short		port;
> +};
> +
>  /*
>   * Device removal can occur at anytime, so we need extra handling to
>   * serialize notifying the user of device removal with other callbacks.
> @@ -90,6 +101,8 @@ enum cma_state {
>  struct rdma_id_private {
>  	struct rdma_cm_id	id;
>  
> +	struct rdma_bind_list	*bind_list;
> +	struct hlist_node	node;
>  	struct list_head	list;
>  	struct list_head	listen_list;
>  	struct cma_device	*cma_dev;
> @@ -460,6 +473,11 @@ static inline int cma_any_addr(struct so
>  	return cma_zero_addr(addr) || cma_loopback_addr(addr);
>  }
>  
> +static inline int cma_any_port(struct sockaddr *addr)
> +{
> +	return !((struct sockaddr_in *) addr)->sin_port;
> +}
> +
>  static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
>  			    u8 *ip_ver, __u16 *port,
>  			    union cma_ip_addr **src, union cma_ip_addr **dst)
> @@ -625,6 +643,22 @@ static void cma_cancel_operation(struct 
>  	}
>  }
>  
> +static void cma_release_port(struct rdma_id_private *id_priv)
> +{
> +	struct rdma_bind_list *bind_list = id_priv->bind_list;
> +
> +	if (!bind_list)
> +		return;
> +
> +	mutex_lock(&lock);
> +	hlist_del(&id_priv->node);
> +	if (hlist_empty(&bind_list->owners)) {
> +		idr_remove(bind_list->ps, bind_list->port);
> +		kfree(bind_list);
> +	}
> +	mutex_unlock(&lock);
> +}
> +
>  void rdma_destroy_id(struct rdma_cm_id *id)
>  {
>  	struct rdma_id_private *id_priv;
> @@ -648,6 +682,7 @@ void rdma_destroy_id(struct rdma_cm_id *
>  		mutex_unlock(&lock);
>  	}
>  
> +	cma_release_port(id_priv);
>  	atomic_dec(&id_priv->refcount);
>  	wait_event(id_priv->wait, !atomic_read(&id_priv->refcount));
>  
> @@ -918,21 +953,6 @@ static int cma_ib_listen(struct rdma_id_
>  	return ret;
>  }
>  
> -static int cma_duplicate_listen(struct rdma_id_private *id_priv)
> -{
> -	struct rdma_id_private *cur_id_priv;
> -	struct sockaddr_in *cur_addr, *new_addr;
> -
> -	new_addr = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
> -	list_for_each_entry(cur_id_priv, &listen_any_list, listen_list) {
> -		cur_addr = (struct sockaddr_in *)
> -			    &cur_id_priv->id.route.addr.src_addr;
> -		if (cur_addr->sin_port == new_addr->sin_port)
> -			return -EADDRINUSE;
> -	}
> -	return 0;
> -}
> -
>  static int cma_listen_handler(struct rdma_cm_id *id,
>  			      struct rdma_cm_event *event)
>  {
> @@ -955,9 +975,10 @@ static void cma_listen_on_dev(struct rdm
>  		return;
>  
>  	dev_id_priv = container_of(id, struct rdma_id_private, id);
> -	ret = rdma_bind_addr(id, &id_priv->id.route.addr.src_addr);
> -	if (ret)
> -		goto err;
> +
> +	dev_id_priv->state = CMA_ADDR_BOUND;
> +	memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
> +	       ip_addr_size(&id_priv->id.route.addr.src_addr));
>  
>  	cma_attach_to_dev(dev_id_priv, cma_dev);
>  	list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
> @@ -971,22 +992,15 @@ err:
>  	cma_destroy_listen(dev_id_priv);
>  }
>  
> -static int cma_listen_on_all(struct rdma_id_private *id_priv)
> +static void cma_listen_on_all(struct rdma_id_private *id_priv)
>  {
>  	struct cma_device *cma_dev;
> -	int ret;
>  
>  	mutex_lock(&lock);
> -	ret = cma_duplicate_listen(id_priv);
> -	if (ret)
> -		goto out;
> -
>  	list_add_tail(&id_priv->list, &listen_any_list);
>  	list_for_each_entry(cma_dev, &dev_list, list)
>  		cma_listen_on_dev(id_priv, cma_dev);
> -out:
>  	mutex_unlock(&lock);
> -	return ret;
>  }
>  
>  int rdma_listen(struct rdma_cm_id *id, int backlog)
> @@ -1002,16 +1016,15 @@ int rdma_listen(struct rdma_cm_id *id, i
>  		switch (rdma_node_get_transport(id->device->node_type)) {
>  		case RDMA_TRANSPORT_IB:
>  			ret = cma_ib_listen(id_priv);
> +			if (ret)
> +				goto err;
>  			break;
>  		default:
>  			ret = -ENOSYS;
> -			break;
> +			goto err;
>  		}
>  	} else
> -		ret = cma_listen_on_all(id_priv);
> -
> -	if (ret)
> -		goto err;
> +		cma_listen_on_all(id_priv);
>  
>  	id_priv->backlog = backlog;
>  	return 0;
> @@ -1310,32 +1323,135 @@ err:
>  }
>  EXPORT_SYMBOL(rdma_resolve_addr);
>  
> +static void cma_bind_port(struct rdma_bind_list *bind_list,
> +			  struct rdma_id_private *id_priv)
> +{
> +	struct sockaddr_in *sin;
> +
> +	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
> +	sin->sin_port = htons(bind_list->port);
> +	id_priv->bind_list = bind_list;
> +	hlist_add_head(&id_priv->node, &bind_list->owners);
> +}
> +
> +static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
> +			  unsigned short snum)
> +{
> +	struct rdma_bind_list *bind_list;
> +	int port, start, ret;
> +
> +	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
> +	if (!bind_list)
> +		return -ENOMEM;
> +
> +	start = snum ? snum : sysctl_local_port_range[0];
> +
> +	do {
> +		ret = idr_get_new_above(ps, bind_list, start, &port);
> +	} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
> +
> +	if (ret)
> +		goto err;
> +
> +	if ((snum && port != snum) ||
> +	    (!snum && port > sysctl_local_port_range[1])) {
> +		idr_remove(ps, port);
> +		ret = -EADDRNOTAVAIL;
> +		goto err;
> +	}
> +
> +	bind_list->ps = ps;
> +	bind_list->port = (unsigned short) port;
> +	cma_bind_port(bind_list, id_priv);
> +	return 0;
> +err:
> +	kfree(bind_list);
> +	return ret;
> +}
> +
> +static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
> +{
> +	struct rdma_id_private *cur_id;
> +	struct sockaddr_in *sin, *cur_sin;
> +	struct rdma_bind_list *bind_list;
> +	struct hlist_node *node;
> +
> +	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
> +	bind_list = idr_find(ps, ntohs(sin->sin_port));
> +	if (!bind_list)
> +		return cma_alloc_port(ps, id_priv, ntohs(sin->sin_port));
> +
> +	/*
> +	 * We don't support binding to any address if anyone is bound to
> +	 * a specific address on the same port.
> +	 */
> +	if (cma_any_addr(&id_priv->id.route.addr.src_addr))
> +		return -EADDRNOTAVAIL;
> +
> +	hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
> +		if (cma_any_addr(&cur_id->id.route.addr.src_addr))
> +			return -EADDRNOTAVAIL;
> +		
> +		cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
> +		if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
> +			return -EADDRINUSE;
> +	}
> +
> +	cma_bind_port(bind_list, id_priv);
> +	return 0;
> +}
> +
> +static int cma_get_port(struct rdma_id_private *id_priv)
> +{
> +	struct idr *ps;
> +	int ret;
> +
> +	switch (id_priv->id.ps) {
> +	case RDMA_PS_SDP:
> +		ps = &sdp_ps;
> +		break;
> +	case RDMA_PS_TCP:
> +		ps = &tcp_ps;
> +		break;
> +	default:
> +		return -EPROTONOSUPPORT;
> +	}
> +
> +	mutex_lock(&lock);
> +	if (cma_any_port(&id_priv->id.route.addr.src_addr))
> +		ret = cma_alloc_port(ps, id_priv, 0);
> +	else
> +		ret = cma_use_port(ps, id_priv);
> +	mutex_unlock(&lock);
> +
> +	return ret;
> +}
> +
>  int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
>  {
>  	struct rdma_id_private *id_priv;
> -	struct rdma_dev_addr *dev_addr;
>  	int ret;
>  
>  	if (addr->sa_family != AF_INET)
> -		return -EINVAL;
> +		return -EAFNOSUPPORT;
>  
>  	id_priv = container_of(id, struct rdma_id_private, id);
>  	if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
>  		return -EINVAL;
>  
> -	if (cma_any_addr(addr))
> -		ret = 0;
> -	else {
> -		dev_addr = &id->route.addr.dev_addr;
> -		ret = rdma_translate_ip(addr, dev_addr);
> +	if (!cma_any_addr(addr)) {
> +		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
>  		if (!ret)
>  			ret = cma_acquire_dev(id_priv);
> +		if (ret)
> +			goto err;
>  	}
>  
> +	memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
> +	ret = cma_get_port(id_priv);
>  	if (ret)
>  		goto err;
>  
> -	memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
>  	return 0;
>  err:
>  	cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
> @@ -1699,6 +1815,8 @@ static void cma_cleanup(void)
>  {
>  	ib_unregister_client(&cma_client);
>  	destroy_workqueue(cma_wq);
> +	idr_destroy(&sdp_ps);
> +	idr_destroy(&tcp_ps);
>  }
>  
>  module_init(cma_init);
> 
> _______________________________________________
> openib-general mailing list
> openib-general at openib.org
> http://openib.org/mailman/listinfo/openib-general
> 
> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general




More information about the general mailing list