[openib-general] [PATCH] [CMA] support for SDP + standard protocol
Dan Bar Dov
bardov at gmail.com
Sun Dec 11 07:57:04 PST 2005
I would have preferred not to add upper layer aware code into CMA,
but I guess I'm late for that discussion.
Regarding the patch below, it makes sense. Are you going to apply it to all
affected modules?
Dan
On 12/9/05, Sean Hefty <sean.hefty at intel.com> wrote:
> The following patch updates the CMA to support the IB socket-based
> protocol standard and SDP's private data format.
>
> The CMA now defines RDMA "port spaces". RDMA identifiers are associated
> with a user-specified port space at creation time.
>
> Please respond with any comments on the approach. Note that these
> changes have not been pushed up to userspace yet.
>
> Signed-off-by: Sean Hefty <sean.hefty at intel.com>
>
>
>
> Index: ulp/iser/iser_verbs.c
> ===================================================================
> --- ulp/iser/iser_verbs.c (revision 4356)
> +++ ulp/iser/iser_verbs.c (working copy)
> @@ -428,7 +428,8 @@ iser_connect(struct iser_conn *p_iser_co
> return -1;
> }
> p_iser_conn->cma_id = rdma_create_id(iser_cma_handler,
> - (void *)p_iser_conn);
> + (void *)p_iser_conn,
> + RDMA_PS_TCP);
> if (IS_ERR(p_iser_conn->cma_id)) {
> ret = PTR_ERR(p_iser_conn->cma_id);
> iser_err("rdma_create_id failed: %d\n", ret);
> Index: include/rdma/rdma_cm.h
> ===================================================================
> --- include/rdma/rdma_cm.h (revision 4356)
> +++ include/rdma/rdma_cm.h (working copy)
> @@ -54,6 +54,13 @@ enum rdma_cm_event_type {
> RDMA_CM_EVENT_DEVICE_REMOVAL,
> };
>
> +enum rdma_port_space {
> + RDMA_PS_SDP = 0x0001,
> + RDMA_PS_TCP = 0x0106,
> + RDMA_PS_UDP = 0x0111,
> + RDMA_PS_SCTP = 0x0183
> +};
> +
> struct rdma_addr {
> struct sockaddr src_addr;
> u8 src_pad[sizeof(struct sockaddr_in6) -
> @@ -97,11 +104,20 @@ struct rdma_cm_id {
> struct ib_qp *qp;
> rdma_cm_event_handler event_handler;
> struct rdma_route route;
> + enum rdma_port_space ps;
> u8 port_num;
> };
>
> +/**
> + * rdma_create_id - Create an RDMA identifier.
> + *
> + * @event_handler: User callback invoked to report events associated with the
> + * returned rdma_id.
> + * @context: User specified context associated with the id.
> + * @ps: RDMA port space.
> + */
> struct rdma_cm_id* rdma_create_id(rdma_cm_event_handler event_handler,
> - void *context);
> + void *context, enum rdma_port_space ps);
>
> void rdma_destroy_id(struct rdma_cm_id *id);
>
> Index: core/cma.c
> ===================================================================
> --- core/cma.c (revision 4356)
> +++ core/cma.c (working copy)
> @@ -110,21 +110,35 @@ struct rdma_id_private {
> u8 srq;
> };
>
> -struct cma_addr {
> - u8 version; /* CMA version: 7:4, IP version: 3:0 */
> - u8 reserved;
> - __u16 port;
> +union cma_ip_addr {
> + struct in6_addr ip6;
> struct {
> - union {
> - struct in6_addr ip6;
> - struct {
> - __u32 pad[3];
> - __u32 addr;
> - } ip4;
> - } ver;
> - } src_addr, dst_addr;
> + __u32 pad[3];
> + __u32 addr;
> + } ip4;
> +};
> +
> +struct cma_hdr {
> + u8 cma_version;
> + u8 ip_version; /* IP version: 7:4 */
> + __u16 port;
> + union cma_ip_addr src_addr;
> + union cma_ip_addr dst_addr;
> };
>
> +struct sdp_hh {
> + u8 sdp_version;
> + u8 ip_version; /* IP version: 7:4 */
> + u8 sdp_specific1[10];
> + __u16 port;
> + __u16 sdp_specific2;
> + union cma_ip_addr src_addr;
> + union cma_ip_addr dst_addr;
> +};
> +
> +#define CMA_VERSION 0x10
> +#define SDP_VERSION 0x22
> +
> static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
> {
> unsigned long flags;
> @@ -162,19 +176,24 @@ static enum cma_state cma_exch(struct rd
> return old;
> }
>
> -static inline u8 cma_get_ip_ver(struct cma_addr *addr)
> +static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
> {
> - return addr->version & 0xF;
> + return hdr->ip_version >> 4;
> }
>
> -static inline u8 cma_get_cma_ver(struct cma_addr *addr)
> +static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
> {
> - return addr->version >> 4;
> + hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
> }
>
> -static inline void cma_set_vers(struct cma_addr *addr, u8 cma_ver, u8 ip_ver)
> +static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
> {
> - addr->version = (cma_ver << 4) + (ip_ver & 0xF);
> + return hh->ip_version >> 4;
> +}
> +
> +static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
> +{
> + hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
> }
>
> static void cma_attach_to_dev(struct rdma_id_private *id_priv,
> @@ -226,17 +245,18 @@ static void cma_release_remove(struct rd
> }
>
> struct rdma_cm_id* rdma_create_id(rdma_cm_event_handler event_handler,
> - void *context)
> + void *context, enum rdma_port_space ps)
> {
> struct rdma_id_private *id_priv;
>
> id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
> if (!id_priv)
> - return NULL;
> + return ERR_PTR(-ENOMEM);
>
> id_priv->state = CMA_IDLE;
> id_priv->id.context = context;
> id_priv->id.event_handler = event_handler;
> + id_priv->id.ps = ps;
> spin_lock_init(&id_priv->lock);
> init_waitqueue_head(&id_priv->wait);
> atomic_set(&id_priv->refcount, 1);
> @@ -387,25 +407,93 @@ int rdma_init_qp_attr(struct rdma_cm_id
> }
> EXPORT_SYMBOL(rdma_init_qp_attr);
>
> -static int cma_verify_addr(struct cma_addr *addr,
> - struct sockaddr_in *ip_addr)
> +static inline int cma_any_addr(struct sockaddr *addr)
> {
> - if (cma_get_cma_ver(addr) != 1 || cma_get_ip_ver(addr) != 4)
> - return -EINVAL;
> + struct in6_addr *ip6;
>
> - if (ip_addr->sin_port != addr->port)
> - return -EINVAL;
> + if (addr->sa_family == AF_INET)
> + return ((struct sockaddr_in *) addr)->sin_addr.s_addr ==
> + INADDR_ANY;
> + else {
> + ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
> + return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
> + ip6->s6_addr32[3] | ip6->s6_addr32[4]) == 0;
> + }
> +}
>
> - if (ip_addr->sin_addr.s_addr &&
> - (ip_addr->sin_addr.s_addr != addr->dst_addr.ver.ip4.addr))
> - return -EINVAL;
> +static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
> + u8 *ip_ver, __u16 *port,
> + union cma_ip_addr **src, union cma_ip_addr **dst)
> +{
> + switch (ps) {
> + case RDMA_PS_SDP:
> + if (((struct sdp_hh *) hdr)->sdp_version != SDP_VERSION)
> + return -EINVAL;
>
> + *ip_ver = sdp_get_ip_ver(hdr);
> + *port = ((struct sdp_hh *) hdr)->port;
> + *src = &((struct sdp_hh *) hdr)->src_addr;
> + *dst = &((struct sdp_hh *) hdr)->dst_addr;
> + break;
> + default:
> + if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
> + return -EINVAL;
> +
> + *ip_ver = cma_get_ip_ver(hdr);
> + *port = ((struct cma_hdr *) hdr)->port;
> + *src = &((struct cma_hdr *) hdr)->src_addr;
> + *dst = &((struct cma_hdr *) hdr)->dst_addr;
> + break;
> + }
> return 0;
> }
>
> -static inline int cma_any_addr(struct sockaddr *addr)
> +static void cma_save_net_info(struct rdma_addr *addr,
> + struct rdma_addr *listen_addr,
> + u8 ip_ver, __u16 port,
> + union cma_ip_addr *src, union cma_ip_addr *dst)
> +{
> + struct sockaddr_in *listen4, *ip4;
> + struct sockaddr_in6 *listen6, *ip6;
> +
> + switch (ip_ver) {
> + case 4:
> + listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
> + ip4 = (struct sockaddr_in *) &addr->src_addr;
> + ip4->sin_family = listen4->sin_family;
> + ip4->sin_addr.s_addr = dst->ip4.addr;
> + ip4->sin_port = listen4->sin_port;
> +
> + ip4 = (struct sockaddr_in *) &addr->dst_addr;
> + ip4->sin_family = listen4->sin_family;
> + ip4->sin_addr.s_addr = src->ip4.addr;
> + ip4->sin_port = port;
> + break;
> + case 6:
> + listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
> + ip6 = (struct sockaddr_in6 *) &addr->src_addr;
> + ip6->sin6_family = listen6->sin6_family;
> + ip6->sin6_addr = dst->ip6;
> + ip6->sin6_port = listen6->sin6_port;
> +
> + ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
> + ip6->sin6_family = listen6->sin6_family;
> + ip6->sin6_addr = src->ip6;
> + ip6->sin6_port = port;
> + break;
> + default:
> + break;
> + }
> +}
> +
> +static inline int cma_user_data_offset(enum rdma_port_space ps)
> {
> - return ((struct sockaddr_in *) addr)->sin_addr.s_addr == 0;
> + switch (ps) {
> + case RDMA_PS_SDP:
> + return 0;
> + default:
> + return sizeof(struct cma_hdr);
> + }
> }
>
> static int cma_notify_user(struct rdma_id_private *id_priv,
> @@ -640,53 +728,41 @@ static struct rdma_id_private* cma_new_i
> {
> struct rdma_id_private *id_priv;
> struct rdma_cm_id *id;
> - struct rdma_route *route;
> - struct sockaddr_in *ip_addr, *listen_addr;
> - struct ib_sa_path_rec *path_rec;
> - struct cma_addr *addr;
> - int num_paths;
> -
> - listen_addr = (struct sockaddr_in *) &listen_id->route.addr.src_addr;
> - if (cma_verify_addr(ib_event->private_data, listen_addr))
> - return NULL;
> + struct rdma_route *rt;
> + union cma_ip_addr *src, *dst;
> + __u16 port;
> + u8 ip_ver;
>
> - num_paths = 1 + (ib_event->param.req_rcvd.alternate_path != NULL);
> - path_rec = kmalloc(sizeof *path_rec * num_paths, GFP_KERNEL);
> - if (!path_rec)
> + id = rdma_create_id(listen_id->event_handler, listen_id->context,
> + listen_id->ps);
> + if (IS_ERR(id))
> return NULL;
>
> - id = rdma_create_id(listen_id->event_handler, listen_id->context);
> - if (!id)
> + rt = &id->route;
> + rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
> + rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, GFP_KERNEL);
> + if (!rt->path_rec)
> goto err;
>
> - addr = ib_event->private_data;
> - route = &id->route;
> + if (cma_get_net_info(ib_event->private_data, listen_id->ps,
> + &ip_ver, &port, &src, &dst))
> + goto err;
>
> - ip_addr = (struct sockaddr_in *) &route->addr.src_addr;
> - ip_addr->sin_family = listen_addr->sin_family;
> - ip_addr->sin_addr.s_addr = addr->dst_addr.ver.ip4.addr;
> - ip_addr->sin_port = listen_addr->sin_port;
> -
> - ip_addr = (struct sockaddr_in *) &route->addr.dst_addr;
> - ip_addr->sin_family = listen_addr->sin_family;
> - ip_addr->sin_addr.s_addr = addr->src_addr.ver.ip4.addr;
> - ip_addr->sin_port = addr->port;
> -
> - route->num_paths = num_paths;
> - route->path_rec = path_rec;
> - path_rec[0] = *ib_event->param.req_rcvd.primary_path;
> - if (num_paths == 2)
> - path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
> -
> - route->addr.addr.ibaddr.sgid = path_rec->sgid;
> - route->addr.addr.ibaddr.dgid = path_rec->dgid;
> - route->addr.addr.ibaddr.pkey = be16_to_cpu(path_rec->pkey);
> + cma_save_net_info(&id->route.addr, &listen_id->route.addr,
> + ip_ver, port, src, dst);
> + rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
> + if (rt->num_paths == 2)
> + rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
> +
> + rt->addr.addr.ibaddr.sgid = rt->path_rec[0].sgid;
> + rt->addr.addr.ibaddr.dgid = rt->path_rec[0].dgid;
> + rt->addr.addr.ibaddr.pkey = be16_to_cpu(rt->path_rec[0].pkey);
>
> id_priv = container_of(id, struct rdma_id_private, id);
> id_priv->state = CMA_CONNECT;
> return id_priv;
> err:
> - kfree(path_rec);
> + rdma_destroy_id(id);
> return NULL;
> }
>
> @@ -708,7 +784,6 @@ static int cma_req_handler(struct ib_cm_
> goto out;
> }
>
> - conn_id->state = CMA_CONNECT;
> atomic_inc(&conn_id->dev_remove);
> ret = cma_acquire_ib_dev(conn_id, &conn_id->id.route.path_rec[0].sgid);
> if (ret) {
> @@ -722,7 +797,7 @@ static int cma_req_handler(struct ib_cm_
> cm_id->context = conn_id;
> cm_id->cm_handler = cma_ib_handler;
>
> - offset = sizeof(struct cma_addr);
> + offset = cma_user_data_offset(listen_id->id.ps);
> ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0,
> ib_event->private_data + offset,
> IB_CM_REQ_PRIVATE_DATA_SIZE - offset);
> @@ -738,16 +813,16 @@ out:
> return ret;
> }
>
> -static __be64 cma_get_service_id(struct sockaddr *addr)
> +static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
> {
> - return cpu_to_be64(((u64)IB_OPENIB_OUI << 48) +
> + return cpu_to_be64(((u64)ps << 16) +
> ((struct sockaddr_in *) addr)->sin_port);
> }
>
> static void cma_set_compare_data(struct sockaddr *addr,
> struct ib_cm_private_data_compare *compare)
> {
> - struct cma_addr *data, *mask;
> + struct cma_hdr *data, *mask;
>
> memset(compare, 0, sizeof *compare);
> data = (void *) compare->data;
> @@ -755,19 +830,18 @@ static void cma_set_compare_data(struct
>
> switch (addr->sa_family) {
> case AF_INET:
> - cma_set_vers(data, 0, 4);
> - cma_set_vers(mask, 0, 0xF);
> - data->dst_addr.ver.ip4.addr = ((struct sockaddr_in *) addr)->
> - sin_addr.s_addr;
> - mask->dst_addr.ver.ip4.addr = ~0;
> + cma_set_ip_ver(data, 4);
> + cma_set_ip_ver(mask, 0xF);
> + data->dst_addr.ip4.addr = ((struct sockaddr_in *) addr)->
> + sin_addr.s_addr;
> + mask->dst_addr.ip4.addr = ~0;
> break;
> case AF_INET6:
> - cma_set_vers(data, 0, 6);
> - cma_set_vers(mask, 0, 0xF);
> - data->dst_addr.ver.ip6 = ((struct sockaddr_in6 *) addr)->
> - sin6_addr;
> - memset(&mask->dst_addr.ver.ip6, 1,
> - sizeof mask->dst_addr.ver.ip6);
> + cma_set_ip_ver(data, 6);
> + cma_set_ip_ver(mask, 0xF);
> + data->dst_addr.ip6 = ((struct sockaddr_in6 *) addr)->
> + sin6_addr;
> + memset(&mask->dst_addr.ip6, 1, sizeof mask->dst_addr.ip6);
> break;
> default:
> break;
> @@ -787,7 +861,7 @@ static int cma_ib_listen(struct rdma_id_
> return PTR_ERR(id_priv->cm_id);
>
> addr = &id_priv->id.route.addr.src_addr;
> - svc_id = cma_get_service_id(addr);
> + svc_id = cma_get_service_id(id_priv->id.ps, addr);
> if (cma_any_addr(addr))
> ret = ib_cm_listen(id_priv->cm_id, svc_id, 0, NULL);
> else {
> @@ -835,7 +909,7 @@ static void cma_listen_on_dev(struct rdm
> struct rdma_cm_id *id;
> int ret;
>
> - id = rdma_create_id(cma_listen_handler, id_priv);
> + id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
> if (IS_ERR(id))
> return;
>
> @@ -1099,19 +1173,34 @@ err:
> }
> EXPORT_SYMBOL(rdma_bind_addr);
>
> -static void cma_format_addr(struct cma_addr *addr, struct rdma_route *route)
> +static void cma_format_hdr(void *hdr, enum rdma_port_space ps,
> + struct rdma_route *route)
> {
> - struct sockaddr_in *ip_addr;
> -
> - memset(addr, 0, sizeof *addr);
> - cma_set_vers(addr, 1, 4);
> -
> - ip_addr = (struct sockaddr_in *) &route->addr.src_addr;
> - addr->src_addr.ver.ip4.addr = ip_addr->sin_addr.s_addr;
> -
> - ip_addr = (struct sockaddr_in *) &route->addr.dst_addr;
> - addr->dst_addr.ver.ip4.addr = ip_addr->sin_addr.s_addr;
> - addr->port = ip_addr->sin_port;
> + struct sockaddr_in *src4, *dst4;
> + struct cma_hdr *cma_hdr;
> + struct sdp_hh *sdp_hdr;
> +
> + src4 = (struct sockaddr_in *) &route->addr.src_addr;
> + dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
> +
> + switch (ps) {
> + case RDMA_PS_SDP:
> + sdp_hdr = hdr;
> + sdp_hdr->sdp_version = SDP_VERSION;
> + sdp_set_ip_ver(sdp_hdr, 4);
> + sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
> + sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
> + sdp_hdr->port = src4->sin_port;
> + break;
> + default:
> + cma_hdr = hdr;
> + cma_hdr->cma_version = CMA_VERSION;
> + cma_set_ip_ver(cma_hdr, 4);
> + cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
> + cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
> + cma_hdr->port = src4->sin_port;
> + break;
> + }
> }
>
> static int cma_connect_ib(struct rdma_id_private *id_priv,
> @@ -1119,17 +1208,20 @@ static int cma_connect_ib(struct rdma_id
> {
> struct ib_cm_req_param req;
> struct rdma_route *route;
> - struct cma_addr *addr;
> void *private_data;
> - int ret;
> + int offset, ret;
>
> memset(&req, 0, sizeof req);
> - req.private_data_len = sizeof *addr + conn_param->private_data_len;
> -
> - private_data = kmalloc(req.private_data_len, GFP_ATOMIC);
> + offset = cma_user_data_offset(id_priv->id.ps);
> + req.private_data_len = offset + conn_param->private_data_len;
> + private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
> if (!private_data)
> return -ENOMEM;
>
> + if (conn_param->private_data && conn_param->private_data_len)
> + memcpy(private_data + offset, conn_param->private_data,
> + conn_param->private_data_len);
> +
> id_priv->cm_id = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
> id_priv);
> if (IS_ERR(id_priv->cm_id)) {
> @@ -1137,20 +1229,16 @@ static int cma_connect_ib(struct rdma_id
> goto out;
> }
>
> - addr = private_data;
> route = &id_priv->id.route;
> - cma_format_addr(addr, route);
> -
> - if (conn_param->private_data && conn_param->private_data_len)
> - memcpy(addr + 1, conn_param->private_data,
> - conn_param->private_data_len);
> + cma_format_hdr(private_data, id_priv->id.ps, route);
> req.private_data = private_data;
>
> req.primary_path = &route->path_rec[0];
> if (route->num_paths == 2)
> req.alternate_path = &route->path_rec[1];
>
> - req.service_id = cma_get_service_id(&route->addr.dst_addr);
> + req.service_id = cma_get_service_id(id_priv->id.ps,
> + &route->addr.dst_addr);
> req.qp_num = id_priv->qp_num;
> req.qp_type = id_priv->qp_type;
> req.starting_psn = id_priv->seq_num;
> @@ -1317,23 +1405,6 @@ out:
> }
> EXPORT_SYMBOL(rdma_disconnect);
>
> -/* TODO: add this to the device structure - see Roland's patch */
> -static __be64 get_ca_guid(struct ib_device *device)
> -{
> - struct ib_device_attr *device_attr;
> - __be64 guid;
> - int ret;
> -
> - device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL);
> - if (!device_attr)
> - return 0;
> -
> - ret = ib_query_device(device, device_attr);
> - guid = ret ? 0 : device_attr->node_guid;
> - kfree(device_attr);
> - return guid;
> -}
> -
> static void cma_add_one(struct ib_device *device)
> {
> struct cma_device *cma_dev;
> @@ -1344,7 +1415,7 @@ static void cma_add_one(struct ib_device
> return;
>
> cma_dev->device = device;
> - cma_dev->node_guid = get_ca_guid(device);
> + cma_dev->node_guid = device->node_guid;
> if (!cma_dev->node_guid)
> goto err;
>
> Index: core/ucma.c
> ===================================================================
> --- core/ucma.c (revision 4356)
> +++ core/ucma.c (working copy)
> @@ -287,7 +287,7 @@ static ssize_t ucma_create_id(struct ucm
> return -ENOMEM;
>
> ctx->uid = cmd.uid;
> - ctx->cm_id = rdma_create_id(ucma_event_handler, ctx);
> + ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, RDMA_PS_TCP);
> if (IS_ERR(ctx->cm_id)) {
> ret = PTR_ERR(ctx->cm_id);
> goto err1;
>
>
>
> _______________________________________________
> openib-general mailing list
> openib-general at openib.org
> http://openib.org/mailman/listinfo/openib-general
>
> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
>
More information about the general
mailing list