[ofa-general][RFC] IPv6 support for RDMA CM

Aleksey Senin alekseys at voltaire.com
Wed Jul 30 04:22:04 PDT 2008


Here is better formatted version of patch.

Patch adds IPv6 support for RDMA CM operations. It provides  basic
support and have limitations bellow:

No support for link-local addresses.
Can't use mixed IPv6 and IPv4 addresses as source and destinations
Not perform checks against ANYCAST address type.


Details:

struct addr_req extended to support sockaddr_in6 family
Functions like address_resolve_local, addr_resolve_remote changed to get
pointer to generic sockaddr struct insted of sockadd_in. Such functions
used as upper layer, and after parsing sa_family call corresponding IPv6
ot IPv4 function.
To perform network discovery and to symbols should be exported:
    nd_table
    ndisc_send_ns

The points that must be improved to specified above are:
    recognition of local device by given IP address
    sending network discovery without exporting symbols


Waiting for feedback, suggestions.  

------------

drivers/infiniband/core/addr.c |  158 +++++++++++++++++++++++++++-------
drivers/infiniband/core/cma.c  |   72 +++++++++++++-----
net/ipv6/ndisc.c               |    2
3 files changed, 186 insertions(+), 46 deletions(-)



diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 09a2bec..05ab1d0 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -38,10 +38,13 @@
 #include <linux/workqueue.h>
 #include <linux/if_arp.h>
 #include <net/arp.h>
+#include <net/ndisc.h>
+#include <net/ip6_route.h>
 #include <net/neighbour.h>
 #include <net/route.h>
 #include <net/netevent.h>
 #include <rdma/ib_addr.h>
+#include <net/addrconf.h>
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("IB Address Translation");
@@ -50,7 +53,9 @@ MODULE_LICENSE("Dual BSD/GPL");
 struct addr_req {
 	struct list_head list;
 	struct sockaddr src_addr;
+	u8 src_pad[sizeof(struct sockaddr_in6) - sizeof(struct sockaddr)];
 	struct sockaddr dst_addr;
+	u8 dst_pad[sizeof(struct sockaddr_in6) - sizeof(struct sockaddr)];
 	struct rdma_dev_addr *addr;
 	struct rdma_addr_client *client;
 	void *context;
@@ -113,15 +118,27 @@ EXPORT_SYMBOL(rdma_copy_addr);
 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 {
 	struct net_device *dev;
-	__be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
 	int ret;
 
-	dev = ip_dev_find(&init_net, ip);
-	if (!dev)
-		return -EADDRNOTAVAIL;
+	if (addr->sa_family == AF_INET) {
+		__be32 ip = ((struct sockaddr_in *)addr)->sin_addr.s_addr;
+		dev = ip_dev_find(&init_net, ip);
+
+		if (!dev)
+			return -EADDRNOTAVAIL;
+
+		ret = rdma_copy_addr(dev_addr, dev, NULL);
+		dev_put(dev);
+	} else {
+		struct in6_addr *i = &((struct sockaddr_in6 *)addr)->sin6_addr;
+		for_each_netdev(&init_net, dev) {
+			if (ipv6_chk_addr(&init_net, i , dev, 1)) {
+				ret = rdma_copy_addr(dev_addr, dev, NULL);
+				break;
+			}
+		}
+	}
 
-	ret = rdma_copy_addr(dev_addr, dev, NULL);
-	dev_put(dev);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_translate_ip);
@@ -171,7 +188,8 @@ static void addr_send_arp(struct sockaddr_in *dst_in)
 	ip_rt_put(rt);
 }
 
-static int addr_resolve_remote(struct sockaddr_in *src_in,
+
+static int addr4_resolve_remote(struct sockaddr_in *src_in,
 			       struct sockaddr_in *dst_in,
 			       struct rdma_dev_addr *addr)
 {
@@ -220,10 +238,78 @@ out:
 	return ret;
 }
 
+static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
+			       struct sockaddr_in6 *dst_in,
+			       struct rdma_dev_addr *addr)
+{
+
+	struct neighbour *neigh;
+	int ret = -ENODATA;
+	struct dst_entry *dst;
+	struct in6_addr *target;
+	struct in6_addr mcaddr;
+
+	struct flowi fl = {
+		.nl_u = {
+			.ip6_u = {
+				.daddr = dst_in->sin6_addr,
+				.saddr = src_in->sin6_addr,
+			},
+		},
+	};
+
+	dst = ip6_route_output(&init_net, NULL, &fl);
+	if (!dst)
+		goto out;
+
+	/* If the device does ARP internally, return 'done' */
+	if (dst->dev->flags & IFF_NOARP) {
+		ret = rdma_copy_addr(addr, dst->dev, NULL);
+		goto put;
+	}
+
+	neigh = ndisc_get_neigh(dst->dev, &dst_in->sin6_addr);
+	if (!neigh) {
+		ret = -ENOMEM;
+		goto put;
+	}
+
+	if (!(neigh->nud_state & NUD_VALID)) {
+		target = (struct in6_addr *)&neigh->primary_key;
+		addrconf_addr_solict_mult(target, &mcaddr);
+		ndisc_send_ns(dst->dev, neigh, target, &mcaddr , NULL);
+		ret = -ENODATA;
+		goto release;
+	}
+	ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
+
+
+release:
+	neigh_release(neigh);
+put:
+	dst_release(dst);
+out:
+	return ret;
+}
+
+static int addr_resolve_remote(struct sockaddr *src_in,
+					struct sockaddr *dst_in,
+					struct rdma_dev_addr *addr)
+{
+	int ret = -ENODATA;
+	if (src_in->sa_family == AF_INET) {
+		ret = addr4_resolve_remote((struct sockaddr_in *)src_in, \
+		(struct sockaddr_in *)dst_in, addr);
+	} else if (src_in->sa_family == AF_INET6) {
+		ret = addr6_resolve_remote((struct sockaddr_in6 *)src_in, \
+		(struct sockaddr_in6 *)dst_in, addr);
+	}
+	return ret;
+}
+
 static void process_req(struct work_struct *work)
 {
 	struct addr_req *req, *temp_req;
-	struct sockaddr_in *src_in, *dst_in;
 	struct list_head done_list;
 
 	INIT_LIST_HEAD(&done_list);
@@ -231,14 +317,13 @@ static void process_req(struct work_struct *work)
 	mutex_lock(&lock);
 	list_for_each_entry_safe(req, temp_req, &req_list, list) {
 		if (req->status == -ENODATA) {
-			src_in = (struct sockaddr_in *) &req->src_addr;
-			dst_in = (struct sockaddr_in *) &req->dst_addr;
-			req->status = addr_resolve_remote(src_in, dst_in,
-							  req->addr);
-			if (req->status && time_after_eq(jiffies, req->timeout))
-				req->status = -ETIMEDOUT;
-			else if (req->status == -ENODATA)
-				continue;
+		    req->status = addr_resolve_remote(&req->src_addr, \
+			    &req->dst_addr, req->addr);
+
+		    if (req->status && time_after_eq(jiffies, req->timeout))
+			req->status = -ETIMEDOUT;
+		    else if (req->status == -ENODATA)
+			continue;
 		}
 		list_move_tail(&req->list, &done_list);
 	}
@@ -258,7 +343,7 @@ static void process_req(struct work_struct *work)
 	}
 }
 
-static int addr_resolve_local(struct sockaddr_in *src_in,
+static int addr4_resolve_local(struct sockaddr_in *src_in,
 			      struct sockaddr_in *dst_in,
 			      struct rdma_dev_addr *addr)
 {
@@ -289,6 +374,30 @@ static int addr_resolve_local(struct sockaddr_in *src_in,
 	return ret;
 }
 
+static int addr6_resolve_local(struct sockaddr_in6 *src_in,
+				struct sockaddr_in6 *dst_in,
+				struct rdma_dev_addr *addr)
+{
+	return -EADDRNOTAVAIL;
+}
+
+static int addr_resolve_local(struct sockaddr *src_in,
+			      struct sockaddr *dst_in,
+			      struct rdma_dev_addr *addr)
+{
+	int ret;
+
+	if (src_in->sa_family == AF_INET) {
+		ret = addr4_resolve_local((struct sockaddr_in *)src_in, \
+		(struct sockaddr_in *)dst_in, addr);
+	} else {
+		ret = addr6_resolve_local((struct sockaddr_in6 *)src_in, \
+		(struct sockaddr_in6 *)dst_in, addr);
+	}
+	return ret;
+}
+
+
 int rdma_resolve_ip(struct rdma_addr_client *client,
 		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
 		    struct rdma_dev_addr *addr, int timeout_ms,
@@ -296,7 +405,6 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
 				     struct rdma_dev_addr *addr, void *context),
 		    void *context)
 {
-	struct sockaddr_in *src_in, *dst_in;
 	struct addr_req *req;
 	int ret = 0;
 
@@ -313,12 +421,11 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
 	req->client = client;
 	atomic_inc(&client->refcount);
 
-	src_in = (struct sockaddr_in *) &req->src_addr;
-	dst_in = (struct sockaddr_in *) &req->dst_addr;
-
-	req->status = addr_resolve_local(src_in, dst_in, addr);
-	if (req->status == -EADDRNOTAVAIL)
-		req->status = addr_resolve_remote(src_in, dst_in, addr);
+	req->status = addr_resolve_local(&req->src_addr, &req->dst_addr, addr);
+	if (req->status == -EADDRNOTAVAIL) {
+		req->status = addr_resolve_remote(&req->src_addr, \
+		    &req->dst_addr,  addr);
+	}
 
 	switch (req->status) {
 	case 0:
@@ -328,7 +435,8 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
 	case -ENODATA:
 		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
 		queue_req(req);
-		addr_send_arp(dst_in);
+		if (req->dst_addr.sa_family == AF_INET)
+			addr_send_arp((struct sockaddr_in *)&req->dst_addr);
 		break;
 	default:
 		ret = req->status;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index e980ff3..0a1a413 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2074,7 +2074,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 	struct rdma_id_private *id_priv;
 	int ret;
 
-	if (addr->sa_family != AF_INET)
+	if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
 		return -EAFNOSUPPORT;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
@@ -2115,30 +2115,60 @@ static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
 			  struct rdma_route *route)
 {
 	struct sockaddr_in *src4, *dst4;
+	struct sockaddr_in6 *src6, *dst6;
 	struct cma_hdr *cma_hdr;
 	struct sdp_hh *sdp_hdr;
+	if (route->addr.src_addr.sa_family == AF_INET) {
+		src4 = (struct sockaddr_in *) &route->addr.src_addr;
+		dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
+	} else if (route->addr.src_addr.sa_family == AF_INET6) {
+		src6 = (struct sockaddr_in6 *) &route->addr.src_addr;
+		dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr;
+	} else
+	    return -EINVAL;
 
-	src4 = (struct sockaddr_in *) &route->addr.src_addr;
-	dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
+	if (route->addr.src_addr.sa_family == AF_INET) {
+		switch (ps) {
+		case RDMA_PS_SDP:
+			sdp_hdr = hdr;
 
-	switch (ps) {
-	case RDMA_PS_SDP:
-		sdp_hdr = hdr;
-		if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
-			return -EINVAL;
-		sdp_set_ip_ver(sdp_hdr, 4);
-		sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
-		sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
-		sdp_hdr->port = src4->sin_port;
-		break;
-	default:
-		cma_hdr = hdr;
-		cma_hdr->cma_version = CMA_VERSION;
-		cma_set_ip_ver(cma_hdr, 4);
-		cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
-		cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
-		cma_hdr->port = src4->sin_port;
-		break;
+			if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
+				return -EINVAL;
+
+			sdp_set_ip_ver(sdp_hdr, 4);
+			sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
+			sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
+			sdp_hdr->port = src4->sin_port;
+			break;
+		default:
+			cma_hdr = hdr;
+			cma_hdr->cma_version = CMA_VERSION;
+			cma_set_ip_ver(cma_hdr, 4);
+			cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
+			cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
+			cma_hdr->port = src4->sin_port;
+			break;
+	    }
+	} else {
+		switch (ps) {
+		case RDMA_PS_SDP:
+			sdp_hdr = hdr;
+			if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
+				return -EINVAL;
+			sdp_set_ip_ver(sdp_hdr, 6);
+			sdp_hdr->src_addr.ip6 = src6->sin6_addr;
+			sdp_hdr->dst_addr.ip6 = dst6->sin6_addr;
+			sdp_hdr->port = src6->sin6_port;
+			break;
+		default:
+			cma_hdr = hdr;
+			cma_hdr->cma_version = CMA_VERSION;
+			cma_set_ip_ver(cma_hdr, 6);
+			cma_hdr->src_addr.ip6 = src6->sin6_addr;
+			cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
+			cma_hdr->port = src6->sin6_port;
+			break;
+		}
 	}
 	return 0;
 }
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index beb48e3..014edd6 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -156,6 +156,7 @@ struct neigh_table nd_tbl = {
 	.gc_thresh2 =	 512,
 	.gc_thresh3 =	1024,
 };
+EXPORT_SYMBOL(nd_tbl);
 
 /* ND options */
 struct ndisc_options {
@@ -585,6 +586,7 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
 		     &icmp6h, solicit,
 		     !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0);
 }
+EXPORT_SYMBOL(ndisc_send_ns);
 
 void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
 		   const struct in6_addr *daddr)




More information about the general mailing list