[ofa-general][RFC] IPv6 support for RDMA CM
Aleksey Senin
alekseys at voltaire.com
Wed Jul 30 04:22:04 PDT 2008
Here is better formatted version of patch.
Patch adds IPv6 support for RDMA CM operations. It provides basic
support and have limitations bellow:
No support for link-local addresses.
Can't use mixed IPv6 and IPv4 addresses as source and destinations
Not perform checks against ANYCAST address type.
Details:
struct addr_req extended to support sockaddr_in6 family
Functions like address_resolve_local, addr_resolve_remote changed to get
pointer to generic sockaddr struct insted of sockadd_in. Such functions
used as upper layer, and after parsing sa_family call corresponding IPv6
ot IPv4 function.
To perform network discovery and to symbols should be exported:
nd_table
ndisc_send_ns
The points that must be improved to specified above are:
recognition of local device by given IP address
sending network discovery without exporting symbols
Waiting for feedback, suggestions.
------------
drivers/infiniband/core/addr.c | 158 +++++++++++++++++++++++++++-------
drivers/infiniband/core/cma.c | 72 +++++++++++++-----
net/ipv6/ndisc.c | 2
3 files changed, 186 insertions(+), 46 deletions(-)
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 09a2bec..05ab1d0 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -38,10 +38,13 @@
#include <linux/workqueue.h>
#include <linux/if_arp.h>
#include <net/arp.h>
+#include <net/ndisc.h>
+#include <net/ip6_route.h>
#include <net/neighbour.h>
#include <net/route.h>
#include <net/netevent.h>
#include <rdma/ib_addr.h>
+#include <net/addrconf.h>
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("IB Address Translation");
@@ -50,7 +53,9 @@ MODULE_LICENSE("Dual BSD/GPL");
struct addr_req {
struct list_head list;
struct sockaddr src_addr;
+ u8 src_pad[sizeof(struct sockaddr_in6) - sizeof(struct sockaddr)];
struct sockaddr dst_addr;
+ u8 dst_pad[sizeof(struct sockaddr_in6) - sizeof(struct sockaddr)];
struct rdma_dev_addr *addr;
struct rdma_addr_client *client;
void *context;
@@ -113,15 +118,27 @@ EXPORT_SYMBOL(rdma_copy_addr);
int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
{
struct net_device *dev;
- __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
int ret;
- dev = ip_dev_find(&init_net, ip);
- if (!dev)
- return -EADDRNOTAVAIL;
+ if (addr->sa_family == AF_INET) {
+ __be32 ip = ((struct sockaddr_in *)addr)->sin_addr.s_addr;
+ dev = ip_dev_find(&init_net, ip);
+
+ if (!dev)
+ return -EADDRNOTAVAIL;
+
+ ret = rdma_copy_addr(dev_addr, dev, NULL);
+ dev_put(dev);
+ } else {
+ struct in6_addr *i = &((struct sockaddr_in6 *)addr)->sin6_addr;
+ for_each_netdev(&init_net, dev) {
+ if (ipv6_chk_addr(&init_net, i , dev, 1)) {
+ ret = rdma_copy_addr(dev_addr, dev, NULL);
+ break;
+ }
+ }
+ }
- ret = rdma_copy_addr(dev_addr, dev, NULL);
- dev_put(dev);
return ret;
}
EXPORT_SYMBOL(rdma_translate_ip);
@@ -171,7 +188,8 @@ static void addr_send_arp(struct sockaddr_in *dst_in)
ip_rt_put(rt);
}
-static int addr_resolve_remote(struct sockaddr_in *src_in,
+
+static int addr4_resolve_remote(struct sockaddr_in *src_in,
struct sockaddr_in *dst_in,
struct rdma_dev_addr *addr)
{
@@ -220,10 +238,78 @@ out:
return ret;
}
+static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
+ struct sockaddr_in6 *dst_in,
+ struct rdma_dev_addr *addr)
+{
+
+ struct neighbour *neigh;
+ int ret = -ENODATA;
+ struct dst_entry *dst;
+ struct in6_addr *target;
+ struct in6_addr mcaddr;
+
+ struct flowi fl = {
+ .nl_u = {
+ .ip6_u = {
+ .daddr = dst_in->sin6_addr,
+ .saddr = src_in->sin6_addr,
+ },
+ },
+ };
+
+ dst = ip6_route_output(&init_net, NULL, &fl);
+ if (!dst)
+ goto out;
+
+ /* If the device does ARP internally, return 'done' */
+ if (dst->dev->flags & IFF_NOARP) {
+ ret = rdma_copy_addr(addr, dst->dev, NULL);
+ goto put;
+ }
+
+ neigh = ndisc_get_neigh(dst->dev, &dst_in->sin6_addr);
+ if (!neigh) {
+ ret = -ENOMEM;
+ goto put;
+ }
+
+ if (!(neigh->nud_state & NUD_VALID)) {
+ target = (struct in6_addr *)&neigh->primary_key;
+ addrconf_addr_solict_mult(target, &mcaddr);
+ ndisc_send_ns(dst->dev, neigh, target, &mcaddr , NULL);
+ ret = -ENODATA;
+ goto release;
+ }
+ ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
+
+
+release:
+ neigh_release(neigh);
+put:
+ dst_release(dst);
+out:
+ return ret;
+}
+
+static int addr_resolve_remote(struct sockaddr *src_in,
+ struct sockaddr *dst_in,
+ struct rdma_dev_addr *addr)
+{
+ int ret = -ENODATA;
+ if (src_in->sa_family == AF_INET) {
+ ret = addr4_resolve_remote((struct sockaddr_in *)src_in, \
+ (struct sockaddr_in *)dst_in, addr);
+ } else if (src_in->sa_family == AF_INET6) {
+ ret = addr6_resolve_remote((struct sockaddr_in6 *)src_in, \
+ (struct sockaddr_in6 *)dst_in, addr);
+ }
+ return ret;
+}
+
static void process_req(struct work_struct *work)
{
struct addr_req *req, *temp_req;
- struct sockaddr_in *src_in, *dst_in;
struct list_head done_list;
INIT_LIST_HEAD(&done_list);
@@ -231,14 +317,13 @@ static void process_req(struct work_struct *work)
mutex_lock(&lock);
list_for_each_entry_safe(req, temp_req, &req_list, list) {
if (req->status == -ENODATA) {
- src_in = (struct sockaddr_in *) &req->src_addr;
- dst_in = (struct sockaddr_in *) &req->dst_addr;
- req->status = addr_resolve_remote(src_in, dst_in,
- req->addr);
- if (req->status && time_after_eq(jiffies, req->timeout))
- req->status = -ETIMEDOUT;
- else if (req->status == -ENODATA)
- continue;
+ req->status = addr_resolve_remote(&req->src_addr, \
+ &req->dst_addr, req->addr);
+
+ if (req->status && time_after_eq(jiffies, req->timeout))
+ req->status = -ETIMEDOUT;
+ else if (req->status == -ENODATA)
+ continue;
}
list_move_tail(&req->list, &done_list);
}
@@ -258,7 +343,7 @@ static void process_req(struct work_struct *work)
}
}
-static int addr_resolve_local(struct sockaddr_in *src_in,
+static int addr4_resolve_local(struct sockaddr_in *src_in,
struct sockaddr_in *dst_in,
struct rdma_dev_addr *addr)
{
@@ -289,6 +374,30 @@ static int addr_resolve_local(struct sockaddr_in *src_in,
return ret;
}
+static int addr6_resolve_local(struct sockaddr_in6 *src_in,
+ struct sockaddr_in6 *dst_in,
+ struct rdma_dev_addr *addr)
+{
+ return -EADDRNOTAVAIL;
+}
+
+static int addr_resolve_local(struct sockaddr *src_in,
+ struct sockaddr *dst_in,
+ struct rdma_dev_addr *addr)
+{
+ int ret;
+
+ if (src_in->sa_family == AF_INET) {
+ ret = addr4_resolve_local((struct sockaddr_in *)src_in, \
+ (struct sockaddr_in *)dst_in, addr);
+ } else {
+ ret = addr6_resolve_local((struct sockaddr_in6 *)src_in, \
+ (struct sockaddr_in6 *)dst_in, addr);
+ }
+ return ret;
+}
+
+
int rdma_resolve_ip(struct rdma_addr_client *client,
struct sockaddr *src_addr, struct sockaddr *dst_addr,
struct rdma_dev_addr *addr, int timeout_ms,
@@ -296,7 +405,6 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
struct rdma_dev_addr *addr, void *context),
void *context)
{
- struct sockaddr_in *src_in, *dst_in;
struct addr_req *req;
int ret = 0;
@@ -313,12 +421,11 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
req->client = client;
atomic_inc(&client->refcount);
- src_in = (struct sockaddr_in *) &req->src_addr;
- dst_in = (struct sockaddr_in *) &req->dst_addr;
-
- req->status = addr_resolve_local(src_in, dst_in, addr);
- if (req->status == -EADDRNOTAVAIL)
- req->status = addr_resolve_remote(src_in, dst_in, addr);
+ req->status = addr_resolve_local(&req->src_addr, &req->dst_addr, addr);
+ if (req->status == -EADDRNOTAVAIL) {
+ req->status = addr_resolve_remote(&req->src_addr, \
+ &req->dst_addr, addr);
+ }
switch (req->status) {
case 0:
@@ -328,7 +435,8 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
case -ENODATA:
req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
queue_req(req);
- addr_send_arp(dst_in);
+ if (req->dst_addr.sa_family == AF_INET)
+ addr_send_arp((struct sockaddr_in *)&req->dst_addr);
break;
default:
ret = req->status;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index e980ff3..0a1a413 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2074,7 +2074,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
struct rdma_id_private *id_priv;
int ret;
- if (addr->sa_family != AF_INET)
+ if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
return -EAFNOSUPPORT;
id_priv = container_of(id, struct rdma_id_private, id);
@@ -2115,30 +2115,60 @@ static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
struct rdma_route *route)
{
struct sockaddr_in *src4, *dst4;
+ struct sockaddr_in6 *src6, *dst6;
struct cma_hdr *cma_hdr;
struct sdp_hh *sdp_hdr;
+ if (route->addr.src_addr.sa_family == AF_INET) {
+ src4 = (struct sockaddr_in *) &route->addr.src_addr;
+ dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
+ } else if (route->addr.src_addr.sa_family == AF_INET6) {
+ src6 = (struct sockaddr_in6 *) &route->addr.src_addr;
+ dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr;
+ } else
+ return -EINVAL;
- src4 = (struct sockaddr_in *) &route->addr.src_addr;
- dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
+ if (route->addr.src_addr.sa_family == AF_INET) {
+ switch (ps) {
+ case RDMA_PS_SDP:
+ sdp_hdr = hdr;
- switch (ps) {
- case RDMA_PS_SDP:
- sdp_hdr = hdr;
- if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
- return -EINVAL;
- sdp_set_ip_ver(sdp_hdr, 4);
- sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
- sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
- sdp_hdr->port = src4->sin_port;
- break;
- default:
- cma_hdr = hdr;
- cma_hdr->cma_version = CMA_VERSION;
- cma_set_ip_ver(cma_hdr, 4);
- cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
- cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
- cma_hdr->port = src4->sin_port;
- break;
+ if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
+ return -EINVAL;
+
+ sdp_set_ip_ver(sdp_hdr, 4);
+ sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
+ sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
+ sdp_hdr->port = src4->sin_port;
+ break;
+ default:
+ cma_hdr = hdr;
+ cma_hdr->cma_version = CMA_VERSION;
+ cma_set_ip_ver(cma_hdr, 4);
+ cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
+ cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
+ cma_hdr->port = src4->sin_port;
+ break;
+ }
+ } else {
+ switch (ps) {
+ case RDMA_PS_SDP:
+ sdp_hdr = hdr;
+ if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
+ return -EINVAL;
+ sdp_set_ip_ver(sdp_hdr, 6);
+ sdp_hdr->src_addr.ip6 = src6->sin6_addr;
+ sdp_hdr->dst_addr.ip6 = dst6->sin6_addr;
+ sdp_hdr->port = src6->sin6_port;
+ break;
+ default:
+ cma_hdr = hdr;
+ cma_hdr->cma_version = CMA_VERSION;
+ cma_set_ip_ver(cma_hdr, 6);
+ cma_hdr->src_addr.ip6 = src6->sin6_addr;
+ cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
+ cma_hdr->port = src6->sin6_port;
+ break;
+ }
}
return 0;
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index beb48e3..014edd6 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -156,6 +156,7 @@ struct neigh_table nd_tbl = {
.gc_thresh2 = 512,
.gc_thresh3 = 1024,
};
+EXPORT_SYMBOL(nd_tbl);
/* ND options */
struct ndisc_options {
@@ -585,6 +586,7 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
&icmp6h, solicit,
!ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0);
}
+EXPORT_SYMBOL(ndisc_send_ns);
void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
const struct in6_addr *daddr)
More information about the general
mailing list