[openib-general] [RFC] IB address translation using ARP

Sean Hefty sean.hefty at intel.com
Wed Sep 28 18:26:36 PDT 2005


Here's a first attempt at an API / implementation (that compiles only) for
an address translation module for IB using ARP.  The code should check the
ARP cache for information, but is missing the actual ARP processing.  (We
should be able to pull that from ib_at.)  The API is similar to the route
portion of ib_at, but corrects issues with canceling requests.  Only the
destination IP address is required for input.

The intent is that the CMA will use this service to locate the
proper RDMA device GUID and port to use in establishing a connection.
Hopefully, this makes it clearer how I envision address translation wrt
the CMA.

Signed-off-by: Sean Hefty <sean.hefty at intel.com>


/*
 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
 *
 * This Software is licensed under one of the following licenses:
 *
 * 1) under the terms of the "Common Public License 1.0" a copy of which is
 *    available from the Open Source Initiative, see
 *    http://www.opensource.org/licenses/cpl.php.
 *
 * 2) under the terms of the "The BSD License" a copy of which is
 *    available from the Open Source Initiative, see
 *    http://www.opensource.org/licenses/bsd-license.php.
 *
 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
 *    copy of which is available from the Open Source Initiative, see
 *    http://www.opensource.org/licenses/gpl-license.php.
 *
 * Licensee has the right to choose one of the above licenses.
 *
 * Redistributions of source code must retain the above copyright
 * notice and one of the license notices.
 *
 * Redistributions in binary form must reproduce both the above copyright
 * notice, one of the license notices in the documentation
 * and/or other materials provided with the distribution.
 *
 */

#if !defined(IB_ADDR_H)
#define IB_ADDR_H

#include <linux/socket.h>
#include <rdma/ib_verbs.h>

struct ib_addr {
	struct sockaddr src_addr;
	struct sockaddr dst_addr;
	union ib_gid	sgid;
	union ib_gid	dgid;
};

struct ib_addr_svc;

typedef void (*ib_addr_handler)(struct ib_addr_svc *svc, int status,
				struct ib_addr *addr);

struct ib_addr_svc {
	void		*context;
	ib_addr_handler	 handler;
};

struct ib_addr_svc* ib_addr_create_svc(void *context, ib_addr_handler handler);

void ib_addr_destroy_svc(struct ib_addr_svc *svc);

int ib_addr_resolve(struct ib_addr_svc *svc, struct ib_addr *addr,
		    int timeout_ms);

void ib_addr_cancel(struct ib_addr_svc *svc, struct ib_addr *addr);

#endif /* IB_ADDR_H */




/*
 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
 *
 * This Software is licensed under one of the following licenses:
 *
 * 1) under the terms of the "Common Public License 1.0" a copy of which is
 *    available from the Open Source Initiative, see
 *    http://www.opensource.org/licenses/cpl.php.
 *
 * 2) under the terms of the "The BSD License" a copy of which is
 *    available from the Open Source Initiative, see
 *    http://www.opensource.org/licenses/bsd-license.php.
 *
 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
 *    copy of which is available from the Open Source Initiative, see
 *    http://www.opensource.org/licenses/gpl-license.php.
 *
 * Licensee has the right to choose one of the above licenses.
 *
 * Redistributions of source code must retain the above copyright
 * notice and one of the license notices.
 *
 * Redistributions in binary form must reproduce both the above copyright
 * notice, one of the license notices in the documentation
 * and/or other materials provided with the distribution.
 */
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/inetdevice.h>
#include <linux/workqueue.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
#include <rdma/ib_addr.h>

MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("IB Address Translation");
MODULE_LICENSE("Dual BSD/GPL");

struct addr_svc {
	struct ib_addr_svc svc;
	wait_queue_head_t wait;
	atomic_t refcount;
};

struct addr_req {
	struct list_head list;
	struct addr_svc *add_svc;
	struct ib_addr *addr;
	unsigned long timeout;
	int status;
};

static void process_req(void *data);

static DEFINE_SPINLOCK(lock);
static LIST_HEAD(req_list);
static DECLARE_WORK(work, process_req, NULL);
static struct workqueue_struct *wq;
static unsigned long timeout;

struct ib_addr_svc* ib_addr_create_svc(void *context, ib_addr_handler handler)
{
	struct addr_svc *add_svc;

	add_svc = kmalloc(sizeof *add_svc, GFP_KERNEL);
	if (!add_svc)
		return ERR_PTR(-ENOMEM);

	add_svc->svc.context = context;
	add_svc->svc.handler = handler;
	init_waitqueue_head(&add_svc->wait);
	atomic_set(&add_svc->refcount, 1);

	return &add_svc->svc;
}
EXPORT_SYMBOL(ib_addr_create_svc);

void ib_addr_destroy_svc(struct ib_addr_svc *svc)
{
	struct addr_svc *add_svc = container_of(svc, struct addr_svc, svc);

	atomic_dec(&add_svc->refcount);
	wait_event(add_svc->wait, !atomic_read(&add_svc->refcount));
	kfree(add_svc);
}
EXPORT_SYMBOL(ib_addr_destroy_svc);

static void set_timeout(unsigned long time)
{
	unsigned long delay;

	timeout = time;
	cancel_delayed_work(&work);

	delay = time - jiffies;
	if ((long)delay <= 0)
		delay = 1;

	queue_delayed_work(wq, &work, delay);
}

static void process_req(void *data)
{
	struct addr_req *req, *temp_req;
	struct list_head done_list;
	unsigned long flags;

	INIT_LIST_HEAD(&done_list);

	spin_lock_irqsave(&lock, flags);
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
		if (time_after(req->timeout, jiffies)) {
			set_timeout(req->timeout);
			break;
		}

		list_del(&req->list);
		list_add_tail(&req->list, &done_list);
	}
	spin_unlock_irqrestore(&lock, flags);

	list_for_each_entry_safe(req, temp_req, &done_list, list) {
		list_del(&req->list);
		req->add_svc->svc.handler(&req->add_svc->svc, req->status,
					  req->addr);

		if (atomic_dec_and_test(&req->add_svc->refcount))
			wake_up(&req->add_svc->wait);
		kfree(req);
	}
}

static void queue_req(struct addr_req *req)
{
	struct addr_req *temp_req;

	list_for_each_entry_reverse(temp_req, &req_list, list) {
		if (time_after(req->timeout, temp_req->timeout))
			break;
	}

	atomic_inc(&req->add_svc->refcount);
	list_add(&req->list, &temp_req->list);

	if (req_list.next == &req->list)
		set_timeout(req->timeout);
}

static int addr_resolve(struct ib_addr *addr)
{
	struct sockaddr_in *src_addr;
	u32 src_ip, dst_ip;
	struct flowi flow;
	struct rtable *rt_tbl;
	struct neighbour *neigh;
	int ret;

	src_addr = (struct sockaddr_in *) &addr->src_addr;
	src_ip = src_addr->sin_addr.s_addr;
	dst_ip = ((struct sockaddr_in *) &addr->dst_addr)->sin_addr.s_addr;
	
	memset(&flow, 0, sizeof flow);
	flow.nl_u.ip4_u.daddr = dst_ip;
	flow.nl_u.ip4_u.saddr = src_ip;
	ret = ip_route_output_key(&rt_tbl, &flow);
	if (ret)
		return ret;

	neigh = neigh_lookup(&arp_tbl, &dst_ip, rt_tbl->u.dst.dev);
	ip_rt_put(rt_tbl);
	if (!neigh)
		return -ENODATA;

	if (!src_ip) {
		src_ip = inet_select_addr(neigh->dev, dst_ip, 0);
		if (!src_ip)
			return -EADDRNOTAVAIL;

		src_addr->sin_family = addr->dst_addr.sa_family;
		src_addr->sin_addr.s_addr = src_ip;
	}

	addr->sgid = *(union ib_gid *) (neigh->dev->dev_addr + 4);
	addr->dgid = *(union ib_gid *) (neigh->ha + 4);
	neigh_release(neigh);
	return 0;
}

int ib_addr_resolve(struct ib_addr_svc *svc, struct ib_addr *addr,
		    int timeout_ms)
{
	struct addr_svc *add_svc = container_of(svc, struct addr_svc, svc);
	struct addr_req *req;
	unsigned long flags;
	int ret;

	req = kmalloc(sizeof *req, GFP_KERNEL);
	if (!req)
		return -ENOMEM;

	req->addr = addr;
	req->add_svc = add_svc;
	req->status = addr_resolve(addr);
	req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;

	switch (req->status) {
	case -ENODATA:
		/* TODO: initiate ARP request */
	case 0:
		break;
	default:
		ret = req->status;
		goto err;
	}

	spin_lock_irqsave(&lock, flags);
	queue_req(req);
	spin_unlock_irqrestore(&lock, flags);
	return 0;
err:
	kfree(req);
	return ret;
}
EXPORT_SYMBOL(ib_addr_resolve);

void ib_addr_cancel(struct ib_addr_svc *svc, struct ib_addr *addr)
{
	struct addr_req *req, *temp_req;
	unsigned long flags;

	spin_lock_irqsave(&lock, flags);
	list_for_each_entry_safe(req, temp_req, &req_list, list) {
		if (&req->add_svc->svc == svc && req->addr == addr) {
			req->status = -ECANCELED;
			req->timeout = jiffies;
			list_del(&req->list);
			list_add(&req->list, &req_list);
			set_timeout(req->timeout);
			break;
		}
	}
	spin_unlock_irqrestore(&lock, flags);
}
EXPORT_SYMBOL(ib_addr_cancel);

static int addr_init(void)
{
	wq = create_singlethread_workqueue("ib_addr");
	if (!wq)
		return -ENOMEM;

	return 0;
}

static void addr_cleanup(void)
{
	destroy_workqueue(wq);
}

module_init(addr_init);
module_exit(addr_cleanup);






More information about the general mailing list