[openib-general] [PATCH] RDMA CM: updates to 2.6.18 branch
Sean Hefty
sean.hefty at intel.com
Mon May 15 15:32:35 PDT 2006
I'm assuming that since the CMA isn't upstream yet, a single patch will
work.
The patch below should contain everything that makes sense to merge
upstream for the CMA.
Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 2c1386b..0003b87 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2,7 +2,7 @@
* Copyright (c) 2005 Voltaire Inc. All rights reserved.
* Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
* Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
- * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
*
* This Software is licensed under one of the following licenses:
*
@@ -29,9 +29,15 @@
*
*/
+#include <linux/completion.h>
#include <linux/in.h>
#include <linux/in6.h>
+#include <linux/mutex.h>
#include <linux/random.h>
+#include <linux/idr.h>
+
+#include <net/tcp.h>
+
#include <rdma/rdma_cm.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_cm.h>
@@ -57,12 +63,14 @@ static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
static struct workqueue_struct *cma_wq;
+static DEFINE_IDR(sdp_ps);
+static DEFINE_IDR(tcp_ps);
struct cma_device {
struct list_head list;
struct ib_device *device;
__be64 node_guid;
- wait_queue_head_t wait;
+ struct completion comp;
atomic_t refcount;
struct list_head id_list;
};
@@ -80,6 +88,12 @@ enum cma_state {
CMA_DESTROYING
};
+struct rdma_bind_list {
+ struct idr *ps;
+ struct hlist_head owners;
+ unsigned short port;
+};
+
/*
* Device removal can occur at anytime, so we need extra handling to
* serialize notifying the user of device removal with other callbacks.
@@ -89,13 +103,15 @@ enum cma_state {
struct rdma_id_private {
struct rdma_cm_id id;
+ struct rdma_bind_list *bind_list;
+ struct hlist_node node;
struct list_head list;
struct list_head listen_list;
struct cma_device *cma_dev;
enum cma_state state;
spinlock_t lock;
- wait_queue_head_t wait;
+ struct completion comp;
atomic_t refcount;
wait_queue_head_t wait_remove;
atomic_t dev_remove;
@@ -140,7 +156,7 @@ struct cma_hdr {
struct sdp_hh {
u8 bsdh[16];
- u8 sdp_version;
+ u8 sdp_version; /* Major version: 7:4 */
u8 ip_version; /* IP version: 7:4 */
u8 sdp_specific1[10];
__u16 port;
@@ -149,8 +165,13 @@ struct sdp_hh {
union cma_ip_addr dst_addr;
};
+struct sdp_hah {
+ u8 bsdh[16];
+ u8 sdp_version;
+};
+
#define CMA_VERSION 0x00
-#define SDP_VERSION 0x22
+#define SDP_MAJ_VERSION 0x2
static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
{
@@ -199,6 +220,11 @@ static inline void cma_set_ip_ver(struct
hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
}
+static inline u8 sdp_get_majv(u8 sdp_version)
+{
+ return sdp_version >> 4;
+}
+
static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
{
return hh->ip_version >> 4;
@@ -218,11 +244,16 @@ static void cma_attach_to_dev(struct rdm
list_add_tail(&id_priv->list, &cma_dev->id_list);
}
+static inline void cma_deref_dev(struct cma_device *cma_dev)
+{
+ if (atomic_dec_and_test(&cma_dev->refcount))
+ complete(&cma_dev->comp);
+}
+
static void cma_detach_from_dev(struct rdma_id_private *id_priv)
{
list_del(&id_priv->list);
- if (atomic_dec_and_test(&id_priv->cma_dev->refcount))
- wake_up(&id_priv->cma_dev->wait);
+ cma_deref_dev(id_priv->cma_dev);
id_priv->cma_dev = NULL;
}
@@ -260,7 +291,7 @@ static int cma_acquire_dev(struct rdma_i
static void cma_deref_id(struct rdma_id_private *id_priv)
{
if (atomic_dec_and_test(&id_priv->refcount))
- wake_up(&id_priv->wait);
+ complete(&id_priv->comp);
}
static void cma_release_remove(struct rdma_id_private *id_priv)
@@ -283,7 +314,7 @@ struct rdma_cm_id *rdma_create_id(rdma_c
id_priv->id.event_handler = event_handler;
id_priv->id.ps = ps;
spin_lock_init(&id_priv->lock);
- init_waitqueue_head(&id_priv->wait);
+ init_completion(&id_priv->comp);
atomic_set(&id_priv->refcount, 1);
init_waitqueue_head(&id_priv->wait_remove);
atomic_set(&id_priv->dev_remove, 0);
@@ -457,13 +488,19 @@ static inline int cma_any_addr(struct so
return cma_zero_addr(addr) || cma_loopback_addr(addr);
}
+static inline int cma_any_port(struct sockaddr *addr)
+{
+ return !((struct sockaddr_in *) addr)->sin_port;
+}
+
static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
u8 *ip_ver, __u16 *port,
union cma_ip_addr **src, union cma_ip_addr **dst)
{
switch (ps) {
case RDMA_PS_SDP:
- if (((struct sdp_hh *) hdr)->sdp_version != SDP_VERSION)
+ if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
+ SDP_MAJ_VERSION)
return -EINVAL;
*ip_ver = sdp_get_ip_ver(hdr);
@@ -481,6 +518,9 @@ static int cma_get_net_info(void *hdr, e
*dst = &((struct cma_hdr *) hdr)->dst_addr;
break;
}
+
+ if (*ip_ver != 4 && *ip_ver != 6)
+ return -EINVAL;
return 0;
}
@@ -581,8 +621,8 @@ static void cma_destroy_listen(struct rd
}
list_del(&id_priv->listen_list);
- atomic_dec(&id_priv->refcount);
- wait_event(id_priv->wait, !atomic_read(&id_priv->refcount));
+ cma_deref_id(id_priv);
+ wait_for_completion(&id_priv->comp);
kfree(id_priv);
}
@@ -622,6 +662,22 @@ static void cma_cancel_operation(struct
}
}
+static void cma_release_port(struct rdma_id_private *id_priv)
+{
+ struct rdma_bind_list *bind_list = id_priv->bind_list;
+
+ if (!bind_list)
+ return;
+
+ mutex_lock(&lock);
+ hlist_del(&id_priv->node);
+ if (hlist_empty(&bind_list->owners)) {
+ idr_remove(bind_list->ps, bind_list->port);
+ kfree(bind_list);
+ }
+ mutex_unlock(&lock);
+}
+
void rdma_destroy_id(struct rdma_cm_id *id)
{
struct rdma_id_private *id_priv;
@@ -645,8 +701,9 @@ void rdma_destroy_id(struct rdma_cm_id *
mutex_unlock(&lock);
}
- atomic_dec(&id_priv->refcount);
- wait_event(id_priv->wait, !atomic_read(&id_priv->refcount));
+ cma_release_port(id_priv);
+ cma_deref_id(id_priv);
+ wait_for_completion(&id_priv->comp);
kfree(id_priv->id.route.path_rec);
kfree(id_priv);
@@ -677,6 +734,16 @@ reject:
return ret;
}
+static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
+{
+ if (id_priv->id.ps == RDMA_PS_SDP &&
+ sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
+ SDP_MAJ_VERSION)
+ return -EINVAL;
+
+ return 0;
+}
+
static int cma_rtu_recv(struct rdma_id_private *id_priv)
{
int ret;
@@ -711,7 +778,10 @@ static int cma_ib_handler(struct ib_cm_i
status = -ETIMEDOUT;
break;
case IB_CM_REP_RECEIVED:
- if (id_priv->id.qp) {
+ status = cma_verify_rep(id_priv, ib_event->private_data);
+ if (status)
+ event = RDMA_CM_EVENT_CONNECT_ERROR;
+ else if (id_priv->id.qp) {
status = cma_rep_recv(id_priv);
event = status ? RDMA_CM_EVENT_CONNECT_ERROR :
RDMA_CM_EVENT_ESTABLISHED;
@@ -915,21 +985,6 @@ static int cma_ib_listen(struct rdma_id_
return ret;
}
-static int cma_duplicate_listen(struct rdma_id_private *id_priv)
-{
- struct rdma_id_private *cur_id_priv;
- struct sockaddr_in *cur_addr, *new_addr;
-
- new_addr = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
- list_for_each_entry(cur_id_priv, &listen_any_list, listen_list) {
- cur_addr = (struct sockaddr_in *)
- &cur_id_priv->id.route.addr.src_addr;
- if (cur_addr->sin_port == new_addr->sin_port)
- return -EADDRINUSE;
- }
- return 0;
-}
-
static int cma_listen_handler(struct rdma_cm_id *id,
struct rdma_cm_event *event)
{
@@ -952,9 +1007,10 @@ static void cma_listen_on_dev(struct rdm
return;
dev_id_priv = container_of(id, struct rdma_id_private, id);
- ret = rdma_bind_addr(id, &id_priv->id.route.addr.src_addr);
- if (ret)
- goto err;
+
+ dev_id_priv->state = CMA_ADDR_BOUND;
+ memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
+ ip_addr_size(&id_priv->id.route.addr.src_addr));
cma_attach_to_dev(dev_id_priv, cma_dev);
list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
@@ -968,22 +1024,24 @@ err:
cma_destroy_listen(dev_id_priv);
}
-static int cma_listen_on_all(struct rdma_id_private *id_priv)
+static void cma_listen_on_all(struct rdma_id_private *id_priv)
{
struct cma_device *cma_dev;
- int ret;
mutex_lock(&lock);
- ret = cma_duplicate_listen(id_priv);
- if (ret)
- goto out;
-
list_add_tail(&id_priv->list, &listen_any_list);
list_for_each_entry(cma_dev, &dev_list, list)
cma_listen_on_dev(id_priv, cma_dev);
-out:
mutex_unlock(&lock);
- return ret;
+}
+
+static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
+{
+ struct sockaddr_in addr_in;
+
+ memset(&addr_in, 0, sizeof addr_in);
+ addr_in.sin_family = af;
+ return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
}
int rdma_listen(struct rdma_cm_id *id, int backlog)
@@ -992,6 +1050,12 @@ int rdma_listen(struct rdma_cm_id *id, i
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
+ if (id_priv->state == CMA_IDLE) {
+ ret = cma_bind_any(id, AF_INET);
+ if (ret)
+ return ret;
+ }
+
if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
return -EINVAL;
@@ -999,23 +1063,22 @@ int rdma_listen(struct rdma_cm_id *id, i
switch (id->device->node_type) {
case IB_NODE_CA:
ret = cma_ib_listen(id_priv);
+ if (ret)
+ goto err;
break;
default:
ret = -ENOSYS;
- break;
+ goto err;
}
} else
- ret = cma_listen_on_all(id_priv);
-
- if (ret)
- goto err;
+ cma_listen_on_all(id_priv);
id_priv->backlog = backlog;
return 0;
err:
cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
return ret;
-};
+}
EXPORT_SYMBOL(rdma_listen);
static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
@@ -1252,15 +1315,10 @@ err:
static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
struct sockaddr *dst_addr)
{
- struct sockaddr_in addr_in;
-
if (src_addr && src_addr->sa_family)
return rdma_bind_addr(id, src_addr);
- else {
- memset(&addr_in, 0, sizeof addr_in);
- addr_in.sin_family = dst_addr->sa_family;
- return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
- }
+ else
+ return cma_bind_any(id, dst_addr->sa_family);
}
int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
@@ -1281,7 +1339,7 @@ int rdma_resolve_addr(struct rdma_cm_id
atomic_inc(&id_priv->refcount);
memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
- if (cma_loopback_addr(dst_addr))
+ if (cma_any_addr(dst_addr))
ret = cma_resolve_loopback(id_priv);
else
ret = rdma_resolve_ip(&id->route.addr.src_addr, dst_addr,
@@ -1298,32 +1356,140 @@ err:
}
EXPORT_SYMBOL(rdma_resolve_addr);
+static void cma_bind_port(struct rdma_bind_list *bind_list,
+ struct rdma_id_private *id_priv)
+{
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+ sin->sin_port = htons(bind_list->port);
+ id_priv->bind_list = bind_list;
+ hlist_add_head(&id_priv->node, &bind_list->owners);
+}
+
+static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
+ unsigned short snum)
+{
+ struct rdma_bind_list *bind_list;
+ int port, start, ret;
+
+ bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
+ if (!bind_list)
+ return -ENOMEM;
+
+ start = snum ? snum : sysctl_local_port_range[0];
+
+ do {
+ ret = idr_get_new_above(ps, bind_list, start, &port);
+ } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
+
+ if (ret)
+ goto err;
+
+ if ((snum && port != snum) ||
+ (!snum && port > sysctl_local_port_range[1])) {
+ idr_remove(ps, port);
+ ret = -EADDRNOTAVAIL;
+ goto err;
+ }
+
+ bind_list->ps = ps;
+ bind_list->port = (unsigned short) port;
+ cma_bind_port(bind_list, id_priv);
+ return 0;
+err:
+ kfree(bind_list);
+ return ret;
+}
+
+static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
+{
+ struct rdma_id_private *cur_id;
+ struct sockaddr_in *sin, *cur_sin;
+ struct rdma_bind_list *bind_list;
+ struct hlist_node *node;
+ unsigned short snum;
+
+ sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+ snum = ntohs(sin->sin_port);
+ if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+ return -EACCES;
+
+ bind_list = idr_find(ps, snum);
+ if (!bind_list)
+ return cma_alloc_port(ps, id_priv, snum);
+
+ /*
+ * We don't support binding to any address if anyone is bound to
+ * a specific address on the same port.
+ */
+ if (cma_any_addr(&id_priv->id.route.addr.src_addr))
+ return -EADDRNOTAVAIL;
+
+ hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
+ if (cma_any_addr(&cur_id->id.route.addr.src_addr))
+ return -EADDRNOTAVAIL;
+
+ cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
+ if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
+ return -EADDRINUSE;
+ }
+
+ cma_bind_port(bind_list, id_priv);
+ return 0;
+}
+
+static int cma_get_port(struct rdma_id_private *id_priv)
+{
+ struct idr *ps;
+ int ret;
+
+ switch (id_priv->id.ps) {
+ case RDMA_PS_SDP:
+ ps = &sdp_ps;
+ break;
+ case RDMA_PS_TCP:
+ ps = &tcp_ps;
+ break;
+ default:
+ return -EPROTONOSUPPORT;
+ }
+
+ mutex_lock(&lock);
+ if (cma_any_port(&id_priv->id.route.addr.src_addr))
+ ret = cma_alloc_port(ps, id_priv, 0);
+ else
+ ret = cma_use_port(ps, id_priv);
+ mutex_unlock(&lock);
+
+ return ret;
+}
+
int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
{
struct rdma_id_private *id_priv;
- struct rdma_dev_addr *dev_addr;
int ret;
if (addr->sa_family != AF_INET)
- return -EINVAL;
+ return -EAFNOSUPPORT;
id_priv = container_of(id, struct rdma_id_private, id);
if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
return -EINVAL;
- if (cma_any_addr(addr))
- ret = 0;
- else {
- dev_addr = &id->route.addr.dev_addr;
- ret = rdma_translate_ip(addr, dev_addr);
+ if (!cma_any_addr(addr)) {
+ ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
if (!ret)
ret = cma_acquire_dev(id_priv);
+ if (ret)
+ goto err;
}
+ memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
+ ret = cma_get_port(id_priv);
if (ret)
goto err;
- memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
return 0;
err:
cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
@@ -1331,8 +1497,8 @@ err:
}
EXPORT_SYMBOL(rdma_bind_addr);
-static void cma_format_hdr(void *hdr, enum rdma_port_space ps,
- struct rdma_route *route)
+static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
+ struct rdma_route *route)
{
struct sockaddr_in *src4, *dst4;
struct cma_hdr *cma_hdr;
@@ -1344,7 +1510,8 @@ static void cma_format_hdr(void *hdr, en
switch (ps) {
case RDMA_PS_SDP:
sdp_hdr = hdr;
- sdp_hdr->sdp_version = SDP_VERSION;
+ if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
+ return -EINVAL;
sdp_set_ip_ver(sdp_hdr, 4);
sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
@@ -1359,6 +1526,7 @@ static void cma_format_hdr(void *hdr, en
cma_hdr->port = src4->sin_port;
break;
}
+ return 0;
}
static int cma_connect_ib(struct rdma_id_private *id_priv,
@@ -1388,7 +1556,9 @@ static int cma_connect_ib(struct rdma_id
}
route = &id_priv->id.route;
- cma_format_hdr(private_data, id_priv->id.ps, route);
+ ret = cma_format_hdr(private_data, id_priv->id.ps, route);
+ if (ret)
+ goto out;
req.private_data = private_data;
req.primary_path = &route->path_rec[0];
@@ -1534,7 +1704,7 @@ int rdma_reject(struct rdma_cm_id *id, c
break;
}
return ret;
-};
+}
EXPORT_SYMBOL(rdma_reject);
int rdma_disconnect(struct rdma_cm_id *id)
@@ -1578,7 +1748,7 @@ static void cma_add_one(struct ib_device
if (!cma_dev->node_guid)
goto err;
- init_waitqueue_head(&cma_dev->wait);
+ init_completion(&cma_dev->comp);
atomic_set(&cma_dev->refcount, 1);
INIT_LIST_HEAD(&cma_dev->id_list);
ib_set_client_data(device, &cma_client, cma_dev);
@@ -1645,8 +1815,8 @@ static void cma_process_remove(struct cm
}
mutex_unlock(&lock);
- atomic_dec(&cma_dev->refcount);
- wait_event(cma_dev->wait, !atomic_read(&cma_dev->refcount));
+ cma_deref_dev(cma_dev);
+ wait_for_completion(&cma_dev->comp);
}
static void cma_remove_one(struct ib_device *device)
@@ -1687,6 +1857,8 @@ static void cma_cleanup(void)
{
ib_unregister_client(&cma_client);
destroy_workqueue(cma_wq);
+ idr_destroy(&sdp_ps);
+ idr_destroy(&tcp_ps);
}
module_init(cma_init);
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 2e56f25..402c63d 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -91,7 +91,7 @@ struct rdma_cm_id;
*
* Notes: Users may not call rdma_destroy_id from this callback to destroy
* the passed in id, or a corresponding listen id. Returning a
- * non-zero value from the callback will destroy the corresponding id.
+ * non-zero value from the callback will destroy the passed in id.
*/
typedef int (*rdma_cm_event_handler)(struct rdma_cm_id *id,
struct rdma_cm_event *event);
@@ -241,13 +241,14 @@ int rdma_listen(struct rdma_cm_id *id, i
int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
/**
- * rdma_reject - Called on the passive side to reject a connection request.
+ * rdma_reject - Called to reject a connection request or response.
*/
int rdma_reject(struct rdma_cm_id *id, const void *private_data,
u8 private_data_len);
/**
- * rdma_disconnect - This function disconnects the associated QP.
+ * rdma_disconnect - This function disconnects the associated QP and
+ * transitions it into the error state.
*/
int rdma_disconnect(struct rdma_cm_id *id);
More information about the general
mailing list