[ofa-general][RFC] IPv6 support for RDMA CM

Aleksey Senin alekseys at voltaire.com
Tue Jul 29 10:11:01 PDT 2008


Patch adds IPv6 support for RDMA CM operations. It provides  basic
support and have limitations bellow:

No support for link-local addresses.
Can't use mixed IPv6 and IPv4 addresses as source and destinations
Not perform checks against ANYCAST address type.


Details:

struct addr_req extended to support sockaddr_in6 family
Functions like address_resolve_local, addr_resolve_remote changed to get
pointer to generic sockaddr struct insted of sockadd_in. Such functions
used as upper layer, and after parsing sa_family call corresponding IPv6
ot IPv4 function.
To perform network discovery and to symbols should be exported:
    nd_table
    ndisc_send_ns

The points that must be improved to specified above are:
    recognition of local device by given IP address
    sending network discovery without exporting symbols


Waiting for feedback, suggestions.  


-----------------

diff --git a/drivers/infiniband/core/addr.c
b/drivers/infiniband/core/addr.c
index 09a2bec..3c3f665 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -38,10 +38,13 @@
 #include <linux/workqueue.h>
 #include <linux/if_arp.h>
 #include <net/arp.h>
+#include <net/ndisc.h>
+#include <net/ip6_route.h>
 #include <net/neighbour.h>
 #include <net/route.h>
 #include <net/netevent.h>
 #include <rdma/ib_addr.h>
+#include <net/addrconf.h>
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("IB Address Translation");
@@ -50,7 +53,9 @@ MODULE_LICENSE("Dual BSD/GPL");
 struct addr_req {
 	struct list_head list;
 	struct sockaddr src_addr;
+	u8 src_pad[sizeof(struct sockaddr_in6) - sizeof(struct sockaddr)];
 	struct sockaddr dst_addr;
+	u8 dst_pad[sizeof(struct sockaddr_in6) - sizeof(struct sockaddr)];
 	struct rdma_dev_addr *addr;
 	struct rdma_addr_client *client;
 	void *context;
@@ -113,15 +118,24 @@ EXPORT_SYMBOL(rdma_copy_addr);
 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr
*dev_addr)
 {
 	struct net_device *dev;
-	__be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
 	int ret;
 
-	dev = ip_dev_find(&init_net, ip);
-	if (!dev)
+	if (addr->sa_family  == AF_INET ) {
+	    __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
+	    dev = ip_dev_find(&init_net, ip);
+	    if (!dev)
 		return -EADDRNOTAVAIL;
 
-	ret = rdma_copy_addr(dev_addr, dev, NULL);
-	dev_put(dev);
+	    ret = rdma_copy_addr(dev_addr, dev, NULL);
+	    dev_put(dev);
+	} else {
+	    for_each_netdev(&init_net, dev)
+		if ( ipv6_chk_addr(&init_net,  &((struct
sockaddr_in6*)addr)->sin6_addr, dev, 1) ) {
+		    ret = rdma_copy_addr(dev_addr, dev, NULL);
+		    break;
+		}
+	}
+
 	return ret;
 }
 EXPORT_SYMBOL(rdma_translate_ip);
@@ -171,7 +185,8 @@ static void addr_send_arp(struct sockaddr_in
*dst_in)
 	ip_rt_put(rt);
 }
 
-static int addr_resolve_remote(struct sockaddr_in *src_in,
+
+static int addr4_resolve_remote(struct sockaddr_in *src_in,
 			       struct sockaddr_in *dst_in,
 			       struct rdma_dev_addr *addr)
 {
@@ -220,10 +235,78 @@ out:
 	return ret;
 }
 
+static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
+			       struct sockaddr_in6 *dst_in,
+			       struct rdma_dev_addr *addr)
+{
+
+	struct neighbour *neigh;
+	int ret=-ENODATA;
+	struct dst_entry *dst;
+	struct in6_addr *target;
+	struct in6_addr mcaddr;
+
+	struct flowi fl = {
+		.nl_u = {
+			.ip6_u = {
+				.daddr = dst_in->sin6_addr,
+				.saddr = src_in->sin6_addr,
+			},
+		},
+	};
+
+	dst = ip6_route_output(&init_net, NULL, &fl);
+	if(!dst)
+		goto out;
+
+	/* If the device does ARP internally, return 'done' */
+	if (dst->dev->flags & IFF_NOARP) {
+		ret = rdma_copy_addr(addr, dst->dev, NULL);
+		goto put;
+	}
+
+	neigh = ndisc_get_neigh(dst->dev, &dst_in->sin6_addr);
+	if (!neigh) {
+		ret = -ENOMEM;
+		goto put;
+	}
+
+	if (!(neigh->nud_state & NUD_VALID)) {
+		target = (struct in6_addr*)&neigh->primary_key;
+		addrconf_addr_solict_mult(target, &mcaddr);
+		ndisc_send_ns(dst->dev, neigh, target, &mcaddr , NULL);
+		/* ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); */
+
+		ret = -ENODATA;
+		goto release;
+	}
+	ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
+
+
+release:
+	neigh_release(neigh);
+put:
+	dst_release(dst);
+out:
+	return ret;
+}
+
+static int addr_resolve_remote(struct sockaddr *src_in,
+					struct sockaddr *dst_in,
+					struct rdma_dev_addr *addr)
+{
+	int ret = -ENODATA;
+	if ( src_in->sa_family == AF_INET ) {
+		ret = addr4_resolve_remote((struct sockaddr_in*)src_in, (struct
sockaddr_in*)dst_in, addr);
+	} else if ( src_in->sa_family == AF_INET6 ) {
+		ret = addr6_resolve_remote((struct sockaddr_in6*)src_in, (struct
sockaddr_in6*)dst_in, addr);
+	}
+	return ret;
+}
+
 static void process_req(struct work_struct *work)
 {
 	struct addr_req *req, *temp_req;
-	struct sockaddr_in *src_in, *dst_in;
 	struct list_head done_list;
 
 	INIT_LIST_HEAD(&done_list);
@@ -231,10 +314,7 @@ static void process_req(struct work_struct *work)
 	mutex_lock(&lock);
 	list_for_each_entry_safe(req, temp_req, &req_list, list) {
 		if (req->status == -ENODATA) {
-			src_in = (struct sockaddr_in *) &req->src_addr;
-			dst_in = (struct sockaddr_in *) &req->dst_addr;
-			req->status = addr_resolve_remote(src_in, dst_in,
-							  req->addr);
+		    req->status = addr_resolve_remote(&req->src_addr, &req->dst_addr,
req->addr);
 			if (req->status && time_after_eq(jiffies, req->timeout))
 				req->status = -ETIMEDOUT;
 			else if (req->status == -ENODATA)
@@ -258,7 +338,7 @@ static void process_req(struct work_struct *work)
 	}
 }
 
-static int addr_resolve_local(struct sockaddr_in *src_in,
+static int addr4_resolve_local(struct sockaddr_in *src_in,
 			      struct sockaddr_in *dst_in,
 			      struct rdma_dev_addr *addr)
 {
@@ -289,6 +369,24 @@ static int addr_resolve_local(struct sockaddr_in
*src_in,
 	return ret;
 }
 
+static int addr6_resolve_local(struct sockaddr_in6 *src_in,
+				struct sockaddr_in6 *dst_in,
+				struct rdma_dev_addr *addr)
+{
+	return -EADDRNOTAVAIL;
+}
+
+static int addr_resolve_local(struct sockaddr *src_in,
+			      struct sockaddr *dst_in,
+			      struct rdma_dev_addr *addr)
+{
+	if ( src_in->sa_family == AF_INET ) {
+		 return addr4_resolve_local((struct sockaddr_in*)src_in, (struct
sockaddr_in*)dst_in, addr);
+	} else
+		 return addr6_resolve_local((struct sockaddr_in6*)src_in, (struct
sockaddr_in6*)dst_in, addr);
+}
+
+
 int rdma_resolve_ip(struct rdma_addr_client *client,
 		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
 		    struct rdma_dev_addr *addr, int timeout_ms,
@@ -296,7 +394,6 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
 				     struct rdma_dev_addr *addr, void *context),
 		    void *context)
 {
-	struct sockaddr_in *src_in, *dst_in;
 	struct addr_req *req;
 	int ret = 0;
 
@@ -313,12 +410,10 @@ int rdma_resolve_ip(struct rdma_addr_client
*client,
 	req->client = client;
 	atomic_inc(&client->refcount);
 
-	src_in = (struct sockaddr_in *) &req->src_addr;
-	dst_in = (struct sockaddr_in *) &req->dst_addr;
-
-	req->status = addr_resolve_local(src_in, dst_in, addr);
-	if (req->status == -EADDRNOTAVAIL)
-		req->status = addr_resolve_remote(src_in, dst_in, addr);
+	req->status = addr_resolve_local(&req->src_addr, &req->dst_addr,
addr);
+	if (req->status == -EADDRNOTAVAIL) {
+		req->status = addr_resolve_remote(&req->src_addr, &req->dst_addr,
addr);
+	}
 
 	switch (req->status) {
 	case 0:
@@ -328,7 +423,8 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
 	case -ENODATA:
 		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
 		queue_req(req);
-		addr_send_arp(dst_in);
+		if(req->dst_addr.sa_family == AF_INET )
+			addr_send_arp((struct sockaddr_in*)&req->dst_addr);
 		break;
 	default:
 		ret = req->status;
diff --git a/drivers/infiniband/core/cma.c
b/drivers/infiniband/core/cma.c
index e980ff3..87ae995 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2074,7 +2074,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct
sockaddr *addr)
 	struct rdma_id_private *id_priv;
 	int ret;
 
-	if (addr->sa_family != AF_INET)
+	if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 )
 		return -EAFNOSUPPORT;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
@@ -2115,30 +2115,58 @@ static int cma_format_hdr(void *hdr, enum
rdma_port_space ps,
 			  struct rdma_route *route)
 {
 	struct sockaddr_in *src4, *dst4;
+	struct sockaddr_in6 *src6, *dst6;
 	struct cma_hdr *cma_hdr;
 	struct sdp_hh *sdp_hdr;
-
-	src4 = (struct sockaddr_in *) &route->addr.src_addr;
-	dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
-
-	switch (ps) {
-	case RDMA_PS_SDP:
-		sdp_hdr = hdr;
-		if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
-			return -EINVAL;
-		sdp_set_ip_ver(sdp_hdr, 4);
-		sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
-		sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
-		sdp_hdr->port = src4->sin_port;
-		break;
-	default:
-		cma_hdr = hdr;
-		cma_hdr->cma_version = CMA_VERSION;
-		cma_set_ip_ver(cma_hdr, 4);
-		cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
-		cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
-		cma_hdr->port = src4->sin_port;
-		break;
+	if ( route->addr.src_addr.sa_family == AF_INET) {
+		src4 = (struct sockaddr_in *) &route->addr.src_addr;
+		dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
+	}else if (route->addr.src_addr.sa_family == AF_INET6) {
+		src6 = (struct sockaddr_in6 *) &route->addr.src_addr;
+		dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr;
+	}else
+	    return -EINVAL;
+
+	if ( route->addr.src_addr.sa_family == AF_INET) {
+	    switch (ps) {
+		case RDMA_PS_SDP:
+                        sdp_hdr = hdr;
+                        if (sdp_get_majv(sdp_hdr->sdp_version) !=
SDP_MAJ_VERSION)
+			   return -EINVAL;
+                        sdp_set_ip_ver(sdp_hdr, 4);
+                        sdp_hdr->src_addr.ip4.addr =
src4->sin_addr.s_addr;
+                        sdp_hdr->dst_addr.ip4.addr =
dst4->sin_addr.s_addr;
+                        sdp_hdr->port = src4->sin_port;
+                        break;
+		default:
+			cma_hdr = hdr;
+			cma_hdr->cma_version = CMA_VERSION;
+			cma_set_ip_ver(cma_hdr, 4);
+			cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
+			cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
+			cma_hdr->port = src4->sin_port;
+			break;
+	    }
+	}else {
+	    switch (ps) {
+		case RDMA_PS_SDP:
+			sdp_hdr = hdr;
+			if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
+			    return -EINVAL;
+			sdp_set_ip_ver(sdp_hdr, 6);
+			sdp_hdr->src_addr.ip6 = src6->sin6_addr;
+			sdp_hdr->dst_addr.ip6 = dst6->sin6_addr;
+			sdp_hdr->port = src6->sin6_port;
+			break;
+		default:
+			cma_hdr = hdr;
+			cma_hdr->cma_version = CMA_VERSION;
+			cma_set_ip_ver(cma_hdr, 6);
+			cma_hdr->src_addr.ip6 = src6->sin6_addr;
+			cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
+			cma_hdr->port = src6->sin6_port;
+			break;
+	}
 	}
 	return 0;
 }
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index beb48e3..fb1f59c 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -157,6 +157,7 @@ struct neigh_table nd_tbl = {
 	.gc_thresh3 =	1024,
 };
 
+EXPORT_SYMBOL(nd_tbl);
 /* ND options */
 struct ndisc_options {
 	struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX];
@@ -586,6 +587,8 @@ void ndisc_send_ns(struct net_device *dev, struct
neighbour *neigh,
 		     !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0);
 }
 
+EXPORT_SYMBOL(ndisc_send_ns);
+
 void ndisc_send_rs(struct net_device *dev, const struct in6_addr
*saddr,
 		   const struct in6_addr *daddr)
 {

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20080729/545cfba3/attachment.html>


More information about the general mailing list