[openib-general] [PATCH] Start moving to a native IPoIB driver

Roland Dreier roland at topspin.com
Wed Oct 6 13:32:56 PDT 2004


I've just committed this patch.  It removes the fake ethernet layer
and starts turning IPoIB into a native driver (with addr_len 20 and
type ARPHRD_INFINIBAND).  The driver is working pretty well with these
changes, although multicast is not working at all and there are lots
of leaks and races that I still need to fix up.

I'm still not sure that I'm doing everything the right way but I think
I've made a lot of progress towards something I wouldn't be
embarrassed to post to the netdev list.  This approach seems to
simplify things quite a bit (diffstat shows a net deletion of 1300
lines from a driver that was < 5000 lines to start with) and
performance seems a bit better as well.

Surprisingly tcpdump and ethereal still work; tcpdump warns;

    tcpdump: WARNING: arptype 32 not supported by libpcap - falling back to cooked socket

but still works fine.  The ifconfig and arp commands can't cope with
the longer network address, but the ip command handles it fine:

    # ip link show dev ib0
    6: ib0: <BROADCAST,MULTICAST,UP> mtu 2044 qdisc pfifo_fast qlen 128
        link/[32] 00:00:04:04:fe:80:00:00:00:00:00:00:00:02:c9:01:07:8c:e4:61 brd 00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff

    # ip neigh show dev ib0
    12.0.0.1 lladdr 00:00:04:04:fe:80:00:00:00:00:00:00:00:02:c9:01:07:fc:c7:11 nud reachable

ip2pr (and indirectly sdp) are broken by these changes, but Libor has
said he will work on fixing this up.

For now I'm going to move onto integrating the new MAD layer into my
tree and come back to IPoIB in a few days.

 - Roland

Index: infiniband/ulp/Kconfig
===================================================================
--- infiniband/ulp/Kconfig	(revision 915)
+++ infiniband/ulp/Kconfig	(working copy)
@@ -32,7 +32,7 @@
 
 config INFINIBAND_SDP
 	tristate "Sockets Direct Protocol"
-	depends on INFINIBAND && INFINIBAND_IPOIB
+	depends on BROKEN && INFINIBAND && INFINIBAND_IPOIB
 	select INFINIBAND_CM
 	---help---
 	  Support for Sockets Direct Protocol (SDP).  This provides
Index: infiniband/ulp/ipoib/ipoib_verbs.c
===================================================================
--- infiniband/ulp/ipoib/ipoib_verbs.c	(revision 915)
+++ infiniband/ulp/ipoib/ipoib_verbs.c	(working copy)
@@ -122,6 +122,12 @@
 	}
 	priv->local_qpn = priv->qp->qp_num;
 
+	ipoib_dbg(priv, "Local QPN: %06x\n", priv->local_qpn);
+
+	priv->dev->dev_addr[1] = (priv->local_qpn >> 16) & 0xff;
+	priv->dev->dev_addr[2] = (priv->local_qpn >>  8) & 0xff;
+	priv->dev->dev_addr[3] = (priv->local_qpn      ) & 0xff;
+
 	qp_attr.qp_state = IB_QPS_INIT;
 	qp_attr.qkey = 0;
 	qp_attr.port_num = priv->port;
Index: infiniband/ulp/ipoib/ipoib_arp.c
===================================================================
--- infiniband/ulp/ipoib/ipoib_arp.c	(revision 915)
+++ infiniband/ulp/ipoib/ipoib_arp.c	(working copy)
@@ -1,1057 +0,0 @@
-/*
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available at
- * <http://www.fsf.org/copyleft/gpl.html>, or the OpenIB.org BSD
- * license, available in the LICENSE.TXT file accompanying this
- * software.  These details are also available at
- * <http://openib.org/license.html>.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- *
- * $Id$
- */
-
-#include <linux/slab.h>
-#include <linux/if_arp.h>
-#include <linux/module.h>
-
-#include "ipoib.h"
-
-#include "ts_ib_sa_client.h"
-
-enum {
-	IPOIB_ADDRESS_HASH_BITS = IPOIB_ADDRESS_HASH_BYTES * 8,
-};
-
-struct ipoib_sarp_cache {
-	struct list_head table[256];
-};
-
-struct ipoib_sarp {
-	struct list_head cache_list;
-
-	atomic_t refcnt;
-
-	uint8_t hash[IPOIB_ADDRESS_HASH_BYTES];
-
-	union ib_gid gid;
-	u32        qpn;
-	u16        lid;
-	tTS_IB_SL  sl;
-	struct ib_ah *address_handle;
-	tTS_IB_CLIENT_QUERY_TID tid;
-
-	unsigned long created;
-	unsigned long last_verify;
-	unsigned long last_directed_query;
-	unsigned long first_directed_reply;
-
-	unsigned char require_verify:1;
-	unsigned char directed_query:1;
-	unsigned char directed_reply:4;
-
-	unsigned char logcount;
-
-	struct sk_buff_head pkt_queue;
-	struct work_struct  path_record_work;
-
-	struct net_device *dev;
-};
-
-struct ipoib_sarp_iter {
-	struct net_device *dev;
-	uint8_t hash;
-	struct list_head *cur;
-};
-
-struct ipoib_arp_payload {
-	uint8_t src_hw_addr[IPOIB_HW_ADDR_LEN];
-	uint8_t src_ip_addr[4];
-	uint8_t dst_hw_addr[IPOIB_HW_ADDR_LEN];
-	uint8_t dst_ip_addr[4];
-};
-
-static void _ipoib_sarp_path_lookup(void *_entry);
-
-/* =============================================================== */
-/*.._ipoib_sarp_hash -- hash GID/QPN to 6 bytes                    */
-static void _ipoib_sarp_hash(union ib_gid *gid, u32 qpn, uint8_t *hash)
-{
-	/* We use the FNV hash (http://www.isthe.com/chongo/tech/comp/fnv/) */
-#define TS_FNV_64_PRIME 0x100000001b3ULL
-#define TS_FNV_64_INIT  0xcbf29ce484222325ULL
-
-	int i;
-	uint64_t h = TS_FNV_64_INIT;
-
-	/* make qpn big-endian so we know where digits are */
-	qpn = cpu_to_be32(qpn);
-
-	for (i = 0; i < sizeof (union ib_gid) + 3; ++i) {
-		h *= TS_FNV_64_PRIME;
-		h ^= (i < sizeof(tTS_IB_GID)
-		      ? gid->raw[i]
-		      : ((uint8_t *)&qpn)[i - sizeof (union ib_gid) + 1]);
-	}
-
-	/* xor fold down to 6 bytes and make big-endian */
-	h = cpu_to_be64((h >> IPOIB_ADDRESS_HASH_BITS)
-			^ (h & ((1ULL << IPOIB_ADDRESS_HASH_BITS) - 1)));
-
-	memcpy(hash, ((uint8_t *)&h) + 2, IPOIB_ADDRESS_HASH_BYTES);
-}
-
-/* =============================================================== */
-/*..ipoib_sarp_get -- increment reference count for ARP entry      */
-void ipoib_sarp_get(struct ipoib_sarp *entry)
-{
-	atomic_inc(&entry->refcnt);
-}
-
-/* =============================================================== */
-/*.._ipoib_sarp_alloc -- allocate shadow ARP entry                 */
-static struct ipoib_sarp *_ipoib_sarp_alloc(struct net_device *dev)
-{
-	struct ipoib_sarp *entry;
-
-	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
-	if (!entry)
-		return NULL;
-
-	atomic_set(&entry->refcnt, 2);	/* The calling function needs to put */
-
-	entry->dev = dev;
-
-	entry->require_verify = 0;
-	entry->directed_query = 1;
-	entry->directed_reply = 0;
-
-	entry->created = jiffies;
-	entry->last_verify = jiffies;
-	entry->last_directed_query = jiffies;
-	entry->first_directed_reply = jiffies;
-
-	entry->logcount = 0;
-
-	INIT_LIST_HEAD(&entry->cache_list);
-
-	skb_queue_head_init(&entry->pkt_queue);
-	INIT_WORK(&entry->path_record_work,
-		  _ipoib_sarp_path_lookup, entry);
-
-	entry->address_handle = NULL;
-
-	/* Will force a trigger on the first packet we need to send */
-	entry->tid = TS_IB_CLIENT_QUERY_TID_INVALID;
-
-	return entry;
-}
-
-/* =============================================================== */
-/*..ipoib_sarp_put -- decrement reference count for ARP entry      */
-void ipoib_sarp_put(struct ipoib_sarp *entry)
-{
-	struct net_device *dev = entry->dev;
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-
-	if (atomic_dec_and_test(&entry->refcnt)) {
-		ipoib_dbg(priv, "deleting ARP shadow cache entry "
-			  "%02x:%02x:%02x:%02x:%02x:%02x\n",
-			  entry->hash[0], entry->hash[1], entry->hash[2],
-			  entry->hash[3], entry->hash[4], entry->hash[5]);
-
-		if (entry->address_handle != NULL) {
-			int ret = ib_destroy_ah(entry->address_handle);
-			if (ret < 0)
-				ipoib_warn(priv, "ib_destroy_ah failed (ret = %d)\n",
-					   ret);
-		}
-
-		while (!skb_queue_empty(&entry->pkt_queue)) {
-			struct sk_buff *skb = skb_dequeue(&entry->pkt_queue);
-
-			skb->dev = dev;
-			dev_kfree_skb_any(skb);
-		}
-
-		kfree(entry);
-	}
-}
-
-/* =============================================================== */
-/*..__ipoib_sarp_find -- find ARP entry (unlocked)                 */
-static struct ipoib_sarp *__ipoib_sarp_find(struct net_device *dev,
-					    const uint8_t *hash)
-{
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	struct ipoib_sarp *entry;
-
-	list_for_each_entry(entry, &priv->sarp_cache->table[hash[0]],
-			    cache_list) {
-		ipoib_dbg_data(priv, "matching %02x:%02x:%02x:%02x:%02x:%02x\n",
-			       entry->hash[0], entry->hash[1], entry->hash[2],
-			       entry->hash[3], entry->hash[4], entry->hash[5]);
-
-		if (!memcmp(hash, entry->hash, IPOIB_ADDRESS_HASH_BYTES)) {
-			ipoib_sarp_get(entry);
-			return entry;
-		}
-	}
-
-	return NULL;
-}
-
-/* =============================================================== */
-/*.._ipoib_sarp_find -- find ARP entry                             */
-static struct ipoib_sarp *_ipoib_sarp_find(struct net_device *dev,
-					   const uint8_t *hash)
-{
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	struct ipoib_sarp *entry;
-	unsigned long flags;
-
-	spin_lock_irqsave(&priv->lock, flags);
-	entry = __ipoib_sarp_find(dev, hash);
-	spin_unlock_irqrestore(&priv->lock, flags);
-
-	return entry;
-}
-
-/* =============================================================== */
-/*..ipoib_sarp_iter_init -- create new ARP iterator                */
-struct ipoib_sarp_iter *ipoib_sarp_iter_init(struct net_device *dev)
-{
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	struct ipoib_sarp_iter *iter;
-
-	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
-	if (!iter)
-		return NULL;
-
-	iter->dev = dev;
-	iter->hash = 0;
-	iter->cur = priv->sarp_cache->table[0].next;
-
-	while (iter->cur == &priv->sarp_cache->table[iter->hash]) {
-		++iter->hash;
-		if (iter->hash == 0) {
-			/* ARP table is empty */
-			kfree(iter);
-			return NULL;
-		}
-		iter->cur = priv->sarp_cache->table[iter->hash].next;
-	}
-
-	return iter;
-}
-
-/* =============================================================== */
-/*..ipoib_sarp_iter_free -- free ARP iterator                      */
-void ipoib_sarp_iter_free(struct ipoib_sarp_iter *iter)
-{
-	kfree(iter);
-}
-
-/* =============================================================== */
-/*..ipoib_sarp_iter_next -- incr. iter. -- return non-zero at end  */
-int ipoib_sarp_iter_next(struct ipoib_sarp_iter *iter)
-{
-	struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
-
-	while (1) {
-		iter->cur = iter->cur->next;
-
-		if (iter->cur == &priv->sarp_cache->table[iter->hash]) {
-			++iter->hash;
-			if (!iter->hash)
-				return 1;
-
-			iter->cur = &priv->sarp_cache->table[iter->hash];
-		} else
-			return 0;
-	}
-}
-
-/* =============================================================== */
-/*..ipoib_sarp_iter_read -- get data pointed to by ARP iterator    */
-void ipoib_sarp_iter_read(struct ipoib_sarp_iter *iter, uint8_t *hash,
-			  union ib_gid *gid, u32 *qpn,
-			  unsigned long *created, unsigned long *last_verify,
-			  unsigned int *queuelen, unsigned int *complete)
-{
-	struct ipoib_sarp *entry;
-
-	entry = list_entry(iter->cur, struct ipoib_sarp, cache_list);
-
-	memcpy(hash, entry->hash, IPOIB_ADDRESS_HASH_BYTES);
-	*gid = entry->gid;
-	*qpn = entry->qpn;
-	*created = entry->created;
-	*last_verify = entry->last_verify;
-	*queuelen = skb_queue_len(&entry->pkt_queue);
-	*complete = entry->address_handle != NULL;
-}
-
-/* =============================================================== */
-/*..ipoib_sarp_add -- add ARP entry                                */
-struct ipoib_sarp *ipoib_sarp_add(struct net_device *dev, union ib_gid *gid,
-				  u32 qpn)
-{
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	uint8_t hash[IPOIB_ADDRESS_HASH_BYTES];
-	struct ipoib_sarp *entry;
-	unsigned long flags;
-
-	_ipoib_sarp_hash(gid, qpn, hash);
-
-	entry = _ipoib_sarp_find(dev, hash);
-	if (entry) {
-		if (entry->qpn != qpn ||
-		    memcmp(entry->gid.raw, gid->raw, sizeof (union ib_gid))) {
-			ipoib_warn(priv, "hash collision\n");
-			ipoib_sarp_put(entry);	/* for _find() */
-			return NULL;
-		} else
-			return entry;
-	}
-
-	entry = _ipoib_sarp_alloc(dev);
-	if (!entry) {
-		ipoib_warn(priv, "out of memory for ARP entry\n");
-		return NULL;
-	}
-
-	memcpy(entry->hash, hash, sizeof(entry->hash));
-	entry->gid = *gid;
-	entry->qpn = qpn;
-
-	entry->require_verify = 1;
-
-	spin_lock_irqsave(&priv->lock, flags);
-	list_add_tail(&entry->cache_list, &priv->sarp_cache->table[hash[0]]);
-	spin_unlock_irqrestore(&priv->lock, flags);
-
-	return entry;
-}
-
-/* =============================================================== */
-/*..ipoib_sarp_local_add -- add ARP hash for local node            */
-struct ipoib_sarp *ipoib_sarp_local_add(struct net_device *dev,
-					union ib_gid *gid, u32 qpn)
-{
-	_ipoib_sarp_hash(gid, qpn, dev->dev_addr);
-	return ipoib_sarp_add(dev, gid, qpn);
-}
-
-/* =============================================================== */
-/*..ipoib_sarp_delete -- delete shadow ARP cache entry             */
-int ipoib_sarp_delete(struct net_device *dev, const uint8_t *hash)
-{
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	struct ipoib_sarp *entry;
-	unsigned long flags;
-
-	spin_lock_irqsave(&priv->lock, flags);
-
-	entry = __ipoib_sarp_find(dev, hash);
-	if (!entry) {
-		spin_unlock_irqrestore(&priv->lock, flags);
-
-		return 0;
-	}
-
-	list_del_init(&entry->cache_list);
-
-	spin_unlock_irqrestore(&priv->lock, flags);
-
-	ipoib_sarp_put(entry);	/* for _find() */
-	ipoib_sarp_put(entry);	/* for original reference */
-
-	return 1;
-}
-
-/* =============================================================== */
-/*.._ipoib_sarp_path_record_completion -- path record comp func    */
-static int _ipoib_sarp_path_record_completion(tTS_IB_CLIENT_QUERY_TID tid,
-					      int status,
-					      struct ib_path_record *path,
-					      int remaining, void *entry_ptr)
-{
-	struct ipoib_sarp *entry = entry_ptr;
-	struct net_device *dev = entry->dev;
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-
-	ipoib_dbg(priv, "path record lookup done, status %d\n", status);
-
-	entry->tid = TS_IB_CLIENT_QUERY_TID_INVALID;
-
-	if (!status) {
-		struct ib_ah_attr av = {
-			.dlid 	       = path->dlid,
-			.sl 	       = path->sl,
-			.src_path_bits = 0,
-			.static_rate   = 0,
-			.ah_flags      = 0,
-			.port_num      = priv->port
-		};
-
-		entry->address_handle = ib_create_ah(priv->pd, &av);
-		if (IS_ERR(entry->address_handle)) {
-			ipoib_warn(priv, "ib_create_ah failed\n");
-		} else {
-			ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n",
-				  entry->address_handle, path->dlid, path->sl);
-
-			entry->lid = path->dlid;
-			entry->sl = path->sl;
-
-			/* actually send any queued packets */
-			while (!skb_queue_empty(&entry->pkt_queue)) {
-				struct sk_buff *skb =
-				    skb_dequeue(&entry->pkt_queue);
-
-				skb->dev = dev;
-
-				if (dev_queue_xmit(skb))
-					ipoib_warn(priv, "dev_queue_xmit failed "
-						   "to requeue packet\n");
-			}
-		}
-	} else {
-		if (status != -ETIMEDOUT && entry->logcount < 20) {
-			ipoib_warn(priv, "tsIbPathRecordRequest completion failed "
-				   "for %02x:%02x:%02x:%02x:%02x:%02x, status = %d\n",
-				   entry->hash[0], entry->hash[1],
-				   entry->hash[2], entry->hash[3],
-				   entry->hash[4], entry->hash[5], status);
-			entry->logcount++;
-		}
-
-		/* Flush out any queued packets */
-		while (!skb_queue_empty(&entry->pkt_queue)) {
-			struct sk_buff *skb = skb_dequeue(&entry->pkt_queue);
-
-			skb->dev = dev;
-			dev_kfree_skb_any(skb);
-		}
-	}
-
-	ipoib_sarp_put(entry);	/* for _get() in original call */
-
-	/* nonzero return means no more callbacks (we have our path) */
-	return 1;
-}
-
-/* =============================================================== */
-/*.._ipoib_sarp_path_lookup - start path lookup                    */
-static void _ipoib_sarp_path_lookup(void *entry_ptr)
-{
-	struct ipoib_sarp *entry = entry_ptr;
-	struct net_device *dev = entry->dev;
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	tTS_IB_CLIENT_QUERY_TID tid;
-
-	ipoib_sarp_get(entry);
-	if (tsIbPathRecordRequest(priv->ca, priv->port,
-				  priv->local_gid.raw,
-				  entry->gid.raw,
-				  priv->pkey, 0, HZ, 3600 * HZ,	/* XXX cache jiffies */
-				  _ipoib_sarp_path_record_completion,
-				  entry, &tid)) {
-		ipoib_warn(priv, "tsIbPathRecordRequest failed\n");
-		ipoib_sarp_put(entry);	/* for _get() */
-	} else {
-		ipoib_dbg(priv, "no address vector, starting path record lookup\n");
-		entry->tid = tid;
-	}
-}
-
-/* =============================================================== */
-/*.._ipoib_sarp_lookup -- return address and qpn for entry         */
-static int _ipoib_sarp_lookup(struct ipoib_sarp *entry)
-{
-	struct net_device *dev = entry->dev;
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-
-	/* If DEBUG is undefined, priv won't be used */
-	(void) priv;
-
-	if (entry->address_handle != NULL)
-		return 0;
-
-	/*
-	 * Found an entry, but without an address handle.
-	 * Check to see if we have a path record lookup executing and if not,
-	 * start one up.
-	 */
-
-	if (entry->tid == TS_IB_CLIENT_QUERY_TID_INVALID)
-		schedule_work(&entry->path_record_work);
-	else
-		ipoib_dbg(priv,  "no address vector, but path record lookup already started\n");
-
-	return -EAGAIN;
-}
-
-/* =============================================================== */
-/*..ipoib_sarp_lookup -- lookup a hash in shadow ARP cache         */
-int ipoib_sarp_lookup(struct net_device *dev, uint8_t *hash,
-		      struct ipoib_sarp **entry)
-{
-	struct ipoib_sarp *tentry;
-
-	tentry = _ipoib_sarp_find(dev, hash);
-	if (!tentry)
-		return -ENOENT;
-
-	*entry = tentry;
-
-	return _ipoib_sarp_lookup(tentry);
-}
-
-/* =============================================================== */
-/*.._ipoib_sarp_tx_callback -- put reference to entry after TX     */
-static void _ipoib_sarp_tx_callback(void *ptr)
-{
-	ipoib_sarp_put((struct ipoib_sarp *)ptr); /* for _get() in orig call */
-}
-
-/* =============================================================== */
-/*..ipoib_sarp_send -- send packet to dest                         */
-int ipoib_sarp_send(struct net_device *dev, struct ipoib_sarp *entry,
-		    struct sk_buff *skb)
-{
-	return ipoib_dev_send(dev, skb, _ipoib_sarp_tx_callback,
-			      entry, entry->address_handle, entry->qpn);
-}
-
-/* =============================================================== */
-/*..ipoib_sarp_queue_packet -- queue packet during path rec lookup */
-int ipoib_sarp_queue_packet(struct ipoib_sarp *entry, struct sk_buff *skb)
-{
-	skb_queue_tail(&entry->pkt_queue, skb);
-
-	return 0;
-}
-
-/* =============================================================== */
-/*..ipoib_sarp_rewrite_receive -- rewrite ARP packet for Linux     */
-int ipoib_sarp_rewrite_receive(struct net_device *dev, struct sk_buff *skb)
-{
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	struct arphdr *arp;
-	struct ipoib_arp_payload *payload;
-	struct arphdr *new_arp;
-	struct ethhdr *header;
-	uint8_t *new_payload;
-	struct sk_buff *new_skb;
-	struct ipoib_sarp *entry;
-	uint8_t hash[IPOIB_ADDRESS_HASH_BYTES];
-	int ret = 0;
-
-	/* If DEBUG is undefined, priv won't be used */
-	(void) priv;
-
-	arp = (struct arphdr *)skb->data;
-	payload = (struct ipoib_arp_payload *)skb_pull(skb, sizeof(*arp));
-
-	ipoib_dbg(priv, "ARP receive: hwtype=0x%04x proto=0x%04x hwlen=%d prlen=%d op=0x%04x "
-		  "sip=%d.%d.%d.%d dip=%d.%d.%d.%d\n",
-		 ntohs(arp->ar_hrd),
-		 ntohs(arp->ar_pro),
-		 arp->ar_hln,
-		 arp->ar_pln,
-		 ntohs(arp->ar_op),
-		 payload->src_ip_addr[0], payload->src_ip_addr[1],
-		 payload->src_ip_addr[2], payload->src_ip_addr[3],
-		 payload->dst_ip_addr[0], payload->dst_ip_addr[1],
-		 payload->dst_ip_addr[2], payload->dst_ip_addr[3]);
-
-	new_skb = dev_alloc_skb(dev->hard_header_len
-				+ sizeof(*new_arp)
-				+ 2 * (IPOIB_ADDRESS_HASH_BYTES + 4));
-	if (!new_skb) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	new_skb->mac.raw = new_skb->data;
-	header = (struct ethhdr *)new_skb->mac.raw;
-	skb_reserve(new_skb, dev->hard_header_len);
-
-	new_arp = (struct arphdr *)skb_put(new_skb, sizeof(*new_arp));
-	new_payload = (uint8_t *)skb_put(new_skb,
-					 2 * (IPOIB_ADDRESS_HASH_BYTES + 4));
-
-	header->h_proto = htons(ETH_P_ARP);
-
-	new_skb->dev = dev;
-	new_skb->pkt_type = PACKET_HOST;
-	new_skb->protocol = htons(ETH_P_ARP);
-
-	/* copy ARP header */
-	*new_arp = *arp;
-	new_arp->ar_hrd = htons(ARPHRD_ETHER);
-	new_arp->ar_hln = IPOIB_ADDRESS_HASH_BYTES;
-
-	/* copy IP addresses */
-	memcpy(new_payload + IPOIB_ADDRESS_HASH_BYTES,
-	       payload->src_ip_addr, 4);
-	memcpy(new_payload + 2 * IPOIB_ADDRESS_HASH_BYTES + 4,
-	       payload->dst_ip_addr, 4);
-
-	/* rewrite IPoIB hw address to hashes */
-	if (be32_to_cpu(*(uint32_t *)payload->src_hw_addr) & 0xffffff) {
-		_ipoib_sarp_hash((union ib_gid *) (payload->src_hw_addr + 4),
-				 be32_to_cpu(*(uint32_t *)payload->src_hw_addr) & 0xffffff, hash);
-
-		/* add shadow ARP entries if necessary */
-		if (ARPOP_REPLY == ntohs(arp->ar_op)) {
-			entry = _ipoib_sarp_find(dev, hash);
-			if (entry) {
-				if (entry->directed_query &&
-				    time_before(jiffies,
-						entry->last_directed_query +
-						HZ)) {
-					/* Directed query, everything's good */
-					entry->last_verify = jiffies;
-					entry->directed_query = 0;
-				} else {
-					/*
-					 * If we receive another ARP packet in that's not directed and
-					 * we already have a path record outstanding, don't drop it yet
-					 */
-					if (entry->tid ==
-					    TS_IB_CLIENT_QUERY_TID_INVALID) {
-						/* Delete old one and create a new one */
-						ipoib_dbg(priv, "LID change inferred on query for "
-							  "%02x:%02x:%02x:%02x:%02x:%02x\n",
-							  hash[0], hash[1], hash[2],
-							  hash[3], hash[4], hash[5]);
-
-						ipoib_sarp_delete(dev, hash);
-						ipoib_sarp_put(entry); /* for _find() */
-						entry = NULL;
-					} else
-						ipoib_dbg(priv, "lookup in progress, skipping destroying entry "
-							  "%02x:%02x:%02x:%02x:%02x:%02x\n",
-							  hash[0], hash[1], hash[2],
-							  hash[3], hash[4], hash[5]);
-				}
-			}
-		} else
-			entry = NULL;
-
-		/* Small optimization, if we already found it once, don't search again */
-		if (!entry)
-			entry = ipoib_sarp_add(dev,
-					       (union ib_gid *) (payload->src_hw_addr + 4),
-					       be32_to_cpu(*(uint32_t *)
-							   payload->src_hw_addr) &
-							   0xffffff);
-
-		if (ARPOP_REQUEST == ntohs(arp->ar_op)) {
-			if (entry && !entry->directed_reply)
-				/* Record when this window started */
-				entry->first_directed_reply = jiffies;
-		}
-
-		if (entry)
-			ipoib_sarp_put(entry);	/* for _find() */
-	} else
-		memset(hash, 0, sizeof(hash));
-
-	memcpy(new_payload, hash, IPOIB_ADDRESS_HASH_BYTES);
-	memcpy(header->h_source, hash, sizeof(header->h_source));
-
-	if (be32_to_cpu(*(uint32_t *)payload->dst_hw_addr) & 0xffffff) {
-		_ipoib_sarp_hash((union ib_gid *) (payload->dst_hw_addr + 4),
-				be32_to_cpu(*(uint32_t *)payload->dst_hw_addr) & 0xffffff, hash);
-
-		entry = ipoib_sarp_add(dev,
-				       (union ib_gid *) (payload->dst_hw_addr + 4),
-				       be32_to_cpu(*(uint32_t *)payload->dst_hw_addr) &
-				       0xffffff);
-		if (entry)
-			ipoib_sarp_put(entry);	/* for _add() */
-
-		memcpy(new_payload + IPOIB_ADDRESS_HASH_BYTES + 4,
-		       hash, IPOIB_ADDRESS_HASH_BYTES);
-		memcpy(header->h_dest, hash, sizeof(header->h_dest));
-	} else {
-		memset(new_payload + IPOIB_ADDRESS_HASH_BYTES + 4,
-		       0, IPOIB_ADDRESS_HASH_BYTES);
-		memset(header->h_dest, 0xff, sizeof(header->h_dest));
-	}
-
-	netif_rx_ni(new_skb);
-
-out:
-	dev_kfree_skb_any(skb);
-	return ret;
-}
-
-/* =============================================================== */
-/*..ipoib_sarp_rewrite_send -- rewrite and send ARP packet         */
-int ipoib_sarp_rewrite_send(struct net_device *dev, struct sk_buff *skb)
-{
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	unsigned char broadcast_mac_addr[] =
-	    { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
-	struct sk_buff *new_skb;
-	struct arphdr *arp = (struct arphdr *)(skb->data + ETH_HLEN);
-	uint8_t *payload = ((uint8_t *)arp) + sizeof(*arp);
-	struct arphdr *new_arp;
-	struct ipoib_arp_payload *new_payload;
-	struct ipoib_sarp *dentry = NULL, *entry;
-	struct ipoib_mcast *dmcast = NULL;
-	int ret;
-
-	ipoib_dbg(priv, "ARP send: hwtype=0x%04x proto=0x%04x hwlen=%d prlen=%d op=0x%04x "
-		  "sip=%d.%d.%d.%d dip=%d.%d.%d.%d\n",
-		  ntohs(arp->ar_hrd),
-		  ntohs(arp->ar_pro),
-		  arp->ar_hln,
-		  arp->ar_pln,
-		  ntohs(arp->ar_op),
-		  payload[arp->ar_hln], payload[arp->ar_hln + 1],
-		  payload[arp->ar_hln + 2], payload[arp->ar_hln + 3],
-		  payload[2 * arp->ar_hln + 4], payload[2 * arp->ar_hln + 5],
-		  payload[2 * arp->ar_hln + 6], payload[2 * arp->ar_hln + 7]);
-
-	if (memcmp(broadcast_mac_addr, skb->data, ETH_ALEN) == 0) {
-		/* Broadcast gets handled differently */
-		ret = ipoib_mcast_lookup(dev, &priv->bcast_gid, &dmcast);
-
-		/* mcast is only valid if we get a return code of 0 or -EAGAIN */
-		switch (ret) {
-		case 0:
-			break;
-		case -EAGAIN:
-			ipoib_mcast_queue_packet(dmcast, skb);
-			ipoib_mcast_put(dmcast);	/* for _lookup() */
-			return 0;
-		default:
-			ipoib_warn(priv, "dropping ARP packet with unknown dest "
-				   "%02x:%02x:%02x:%02x:%02x:%02x\n",
-				   skb->data[0], skb->data[1],
-				   skb->data[2], skb->data[3],
-				   skb->data[4], skb->data[5]);
-			return 1;
-		}
-	} else {
-		dentry = _ipoib_sarp_find(dev, skb->data);
-		if (!dentry) {
-			ipoib_warn(priv, "dropping ARP packet with unknown dest "
-				   "%02x:%02x:%02x:%02x:%02x:%02x\n",
-				   skb->data[0], skb->data[1],
-				   skb->data[2], skb->data[3],
-				   skb->data[4], skb->data[5]);
-			return 1;
-		}
-
-		/* Make sure we catch any LID changes */
-
-		/* Update the entry to mark that we last sent a directed ARP query */
-		if (dentry->require_verify
-		    && dentry->address_handle != NULL) {
-			if (ARPOP_REQUEST == ntohs(arp->ar_op)) {
-				dentry->directed_query = 1;
-				dentry->last_directed_query = jiffies;
-			}
-
-			/*
-			 * Catch a LID change on the remote end. If we reply to 3 or more
-			 * ARP queries without a reply, then ditch the entry we have and
-			 * requery
-			 */
-			if (ARPOP_REPLY == ntohs(arp->ar_op)) {
-				dentry->directed_reply++;
-
-				if (!time_before(jiffies,
-				     dentry->first_directed_reply + 4 * HZ)) {
-					/* We're outside of the time window, so restart the counter */
-					dentry->directed_reply = 0;
-				} else if (dentry->directed_reply > 3) {
-					if (dentry->tid ==
-					    TS_IB_CLIENT_QUERY_TID_INVALID) {
-						/* Delete old one and create a new one */
-						ipoib_dbg(priv, "LID change inferred on reply for "
-							 "%02x:%02x:%02x:%02x:%02x:%02x\n",
-							 dentry->hash[0], dentry->hash[1],
-							 dentry->hash[2], dentry->hash[3],
-							 dentry->hash[4], dentry->hash[5]);
-
-						ipoib_sarp_delete(dev,
-								  dentry->hash);
-						entry = ipoib_sarp_add(dev,
-								       &dentry->gid,
-								       dentry->qpn);
-						if (NULL == entry) {
-							ipoib_dbg(priv, "could not allocate new entry for "
-								  "%02x:%02x:%02x:%02x:%02x:%02x\n",
-								 dentry->hash[0], dentry->hash[1],
-								 dentry->hash[2], dentry->hash[3],
-								 dentry->hash[4], dentry->hash[5]);
-							ipoib_sarp_put(dentry); /* for _find() */
-							return 1;
-						}
-
-						ipoib_sarp_put(dentry);	/* for _find() */
-
-						dentry = entry;
-					} else
-						ipoib_dbg(priv, "lookup in progress, skipping destroying entry "
-							  "%02x:%02x:%02x:%02x:%02x:%02x\n",
-							 dentry->hash[0], dentry->hash[1],
-							 dentry->hash[2], dentry->hash[3],
-							 dentry->hash[4], dentry->hash[5]);
-				}
-			}
-		}
-
-		ret = _ipoib_sarp_lookup(dentry);
-		if (ret == -EAGAIN) {
-			ipoib_sarp_queue_packet(dentry, skb);
-			ipoib_sarp_put(dentry);		/* for _find() */
-			return 0;
-		}
-	}
-
-	new_skb = dev_alloc_skb(dev->hard_header_len
-				+ sizeof(*new_arp) + sizeof(*new_payload));
-	if (!new_skb) {
-		if (dentry)
-			ipoib_sarp_put(dentry);		/* for _find() */
-		if (dmcast)
-			ipoib_mcast_put(dmcast);	/* for _lookup() */
-
-		return 1;
-	}
-	skb_reserve(new_skb, dev->hard_header_len);
-
-	new_arp = (struct arphdr *)skb_put(new_skb, sizeof(*new_arp));
-	new_payload = (struct ipoib_arp_payload *)skb_put(new_skb,
-							  sizeof(*new_payload));
-
-	/* build what we need for the header */
-	{
-		uint16_t *t;
-
-		/* ethertype */
-		t = (uint16_t *)skb_push(new_skb, 2);
-		*t = htons(ETH_P_ARP);
-
-		/* leave space so send funct can skip ethernet addrs */
-		skb_push(new_skb, IPOIB_ADDRESS_HASH_BYTES * 2);
-	}
-
-	/* copy ARP header */
-	*new_arp = *arp;
-	new_arp->ar_hrd = htons(ARPHRD_INFINIBAND);
-	new_arp->ar_hln = IPOIB_HW_ADDR_LEN;
-
-	/* copy IP addresses */
-	memcpy(&new_payload->src_ip_addr, payload + arp->ar_hln, 4);
-	memcpy(&new_payload->dst_ip_addr, payload + 2 * arp->ar_hln + 4, 4);
-
-	/* rewrite hash to IPoIB hw address */
-	entry = _ipoib_sarp_find(dev, payload);
-	if (!entry) {
-		ipoib_warn(priv, "can't find hw address for hash "
-			   "%02x:%02x:%02x:%02x:%02x:%02x\n",
-			   payload[0], payload[1], payload[2],
-			   payload[3], payload[4], payload[5]);
-		memset(new_payload->src_hw_addr, 0, IPOIB_HW_ADDR_LEN);
-	} else {
-		*((uint32_t *)new_payload->src_hw_addr) =
-		    cpu_to_be32(entry->qpn);
-		memcpy(&new_payload->src_hw_addr[4], entry->gid.raw,
-		       sizeof (union ib_gid));
-		ipoib_sarp_put(entry);	/* for _find() */
-	}
-
-	if (memcmp(broadcast_mac_addr, payload + IPOIB_ADDRESS_HASH_BYTES + 4,
-		   ETH_ALEN) == 0) {
-		*((uint32_t *)new_payload->dst_hw_addr) =
-		    cpu_to_be32(IB_MULTICAST_QPN);
-		memcpy(&new_payload->dst_hw_addr[4], priv->bcast_gid.raw,
-		       sizeof (union ib_gid));
-	} else {
-		entry = _ipoib_sarp_find(dev, payload +
-					 IPOIB_ADDRESS_HASH_BYTES + 4);
-		if (!entry)
-			memset(new_payload->dst_hw_addr, 0, IPOIB_HW_ADDR_LEN);
-		else {
-			*((uint32_t *)new_payload->dst_hw_addr) =
-			    cpu_to_be32(entry->qpn);
-			memcpy(&new_payload->dst_hw_addr[4], entry->gid.raw,
-			       sizeof (union ib_gid));
-			ipoib_sarp_put(entry);	/* for _find() */
-		}
-	}
-
-	dev_kfree_skb_any(skb);
-
-	if (dmcast)
-		ipoib_mcast_send(dev, dmcast, new_skb);
-	else
-		ipoib_sarp_send(dev, dentry, new_skb);
-
-	return 0;
-}
-
-/* =============================================================== */
-/*..ipoib_sarp_dev_init -- initialize ARP cache                    */
-int ipoib_sarp_dev_init(struct net_device *dev)
-{
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	int i;
-
-	priv->sarp_cache = kmalloc(sizeof(*priv->sarp_cache), GFP_KERNEL);
-	if (!priv->sarp_cache)
-		return -ENOMEM;
-
-	for (i = 0; i < 256; ++i)
-		INIT_LIST_HEAD(&priv->sarp_cache->table[i]);
-
-	return 0;
-}
-
-/* =============================================================== */
-/*..ipoib_sarp_dev_flush -- flush ARP cache                        */
-void ipoib_sarp_dev_flush(struct net_device *dev)
-{
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	struct ipoib_sarp *entry, *tentry;
-	LIST_HEAD(delete_list);
-	unsigned long flags;
-	int i;
-
-	ipoib_dbg(priv, "flushing shadow ARP cache\n");
-
-	/*
-	 * We move to delete_list first because putting the reference could
-	 * eventually end up blocking and we're in a spinlock
-	 */
-
-	/*
-	 * Instead of destroying the address vector, we destroy the entire
-	 * entry, but we create a new empty entry before that. This way we
-	 * don't have any races with freeing a used address vector.
-	 */
-	spin_lock_irqsave(&priv->lock, flags);
-	for (i = 0; i < 256; ++i) {
-		list_for_each_entry_safe(entry, tentry,
-					 &priv->sarp_cache->table[i],
-					 cache_list) {
-			struct ipoib_sarp *nentry;
-
-			/*
-			 * Allocation failure isn't fatal, just drop the entry.
-			 * If it's important, a new one will be generated later
-			 * automatically.
-			 */
-			nentry = _ipoib_sarp_alloc(entry->dev);
-			if (nentry) {
-				memcpy(nentry->hash, entry->hash,
-				       sizeof(nentry->hash));
-				nentry->gid = entry->gid;
-
-				nentry->require_verify = entry->require_verify;
-				nentry->qpn = entry->qpn;
-
-				/* Add it before the current entry */
-				list_add_tail(&nentry->cache_list,
-					      &entry->cache_list);
-
-				ipoib_sarp_put(nentry);	/* for _alloc() */
-			}
-
-			list_del(&entry->cache_list);
-			list_add_tail(&entry->cache_list, &delete_list);
-		}
-	}
-	spin_unlock_irqrestore(&priv->lock, flags);
-
-	list_for_each_entry_safe(entry, tentry, &delete_list, cache_list) {
-		list_del_init(&entry->cache_list);
-		ipoib_sarp_put(entry);	/* for original reference */
-	}
-}
-
-/* =============================================================== */
-/*..ipoib_sarp_dev_destroy -- destroy ARP cache                    */
-static void ipoib_sarp_dev_destroy(struct net_device *dev)
-{
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	struct ipoib_sarp *entry, *tentry;
-	LIST_HEAD(delete_list);
-	unsigned long flags;
-	int i;
-
-	/*
-	 * We move to delete_list first because putting the reference could
-	 * eventually end up blocking and we're in a spinlock
-	 */
-	spin_lock_irqsave(&priv->lock, flags);
-	for (i = 0; i < 256; ++i) {
-		list_for_each_entry_safe(entry, tentry,
-					 &priv->sarp_cache->table[i],
-					 cache_list) {
-			list_del(&entry->cache_list);
-			list_add_tail(&entry->cache_list, &delete_list);
-		}
-	}
-	spin_unlock_irqrestore(&priv->lock, flags);
-
-	list_for_each_entry_safe(entry, tentry, &delete_list, cache_list) {
-		list_del_init(&entry->cache_list);
-		ipoib_sarp_put(entry);	/* for original reference */
-	}
-}
-
-/* =============================================================== */
-/*..ipoib_sarp_dev_cleanup -- clean up ARP cache                   */
-void ipoib_sarp_dev_cleanup(struct net_device *dev)
-{
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-
-	ipoib_sarp_dev_destroy(dev);
-	kfree(priv->sarp_cache);
-}
-
-/* =============================================================== */
-/*..ipoib_get_gid -- find a hash in shadow ARP cache               */
-int ipoib_get_gid(struct net_device *dev, uint8_t *hash, tTS_IB_GID gid)
-{
-	struct ipoib_sarp *entry = _ipoib_sarp_find(dev, hash);
-
-	if (!entry)
-		return -EINVAL;
-
-	memcpy(gid, entry->gid.raw, sizeof (union ib_gid));
-
-	ipoib_sarp_put(entry);	/* for _find() */
-
-	return 0;
-}
-EXPORT_SYMBOL(ipoib_get_gid);
-
-/*
- * Local Variables:
- * c-file-style: "linux"
- * indent-tabs-mode: t
- * End:
- */
-
Index: infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- infiniband/ulp/ipoib/ipoib_main.c	(revision 915)
+++ infiniband/ulp/ipoib/ipoib_main.c	(working copy)
@@ -49,8 +49,7 @@
 #endif
 
 module_param(debug_level, int, 0644);
-MODULE_PARM_DESC(debug_level,
-		 "Enable debug tracing if > 0" DATA_PATH_DEBUG_HELP);
+MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0" DATA_PATH_DEBUG_HELP);
 
 int mcast_debug_level;
 
@@ -62,8 +61,10 @@
 DECLARE_MUTEX(ipoib_device_mutex);
 LIST_HEAD(ipoib_device_list);
 
-static const uint8_t broadcast_mac_addr[] = {
-	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+static const u8 ipv4_bcast_addr[] = {
+	0x00, 0xff, 0xff, 0xff,
+	0xff, 0x12, 0x40, 0x1b,	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,	0xff, 0xff, 0xff, 0xff
 };
 
 struct workqueue_struct *ipoib_workqueue;
@@ -145,7 +146,7 @@
 }
 EXPORT_SYMBOL(ipoib_device_handle);
 
-int ipoib_dev_open(struct net_device *dev)
+int ipoib_open(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
@@ -184,7 +185,7 @@
 	return 0;
 }
 
-static int _ipoib_dev_stop(struct net_device *dev)
+static int ipoib_stop(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
@@ -217,7 +218,7 @@
 	return 0;
 }
 
-static int _ipoib_dev_change_mtu(struct net_device *dev, int new_mtu)
+static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
@@ -231,240 +232,364 @@
 	return 0;
 }
 
-static int _ipoib_dev_set_config(struct net_device *dev, struct ifmap *map)
+static int path_rec_completion(tTS_IB_CLIENT_QUERY_TID tid,
+			       int status,
+			       struct ib_path_record *pathrec,
+			       int remaining, void *path_ptr)
 {
-	return -EOPNOTSUPP;
+	struct ipoib_path *path = path_ptr;
+	struct ipoib_dev_priv *priv = netdev_priv(path->dev);
+	struct sk_buff *skb;
+	struct ib_ah *ah;
+
+	if (status)
+		goto err;
+
+	{
+		struct ib_ah_attr av = {
+			.dlid 	       = pathrec->dlid,
+			.sl 	       = pathrec->sl,
+			.src_path_bits = 0,
+			.static_rate   = 0,
+			.ah_flags      = 0,
+			.port_num      = priv->port
+		};
+
+		ah = ib_create_ah(priv->pd, &av);
+	}
+
+	if (IS_ERR(ah))
+		goto err;
+
+	path->ah = ah;
+
+	ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n",
+		  ah, pathrec->dlid, pathrec->sl);
+
+	while ((skb = __skb_dequeue(&path->queue))) {
+		skb->dev = path->dev;
+		if (dev_queue_xmit(skb))
+			ipoib_warn(priv, "dev_queue_xmit failed "
+				   "to requeue packet\n");
+	}
+
+	return 1;
+
+err:
+	while ((skb = __skb_dequeue(&path->queue)))
+		dev_kfree_skb(skb);
+	
+	if (path->neighbour)
+		IPOIB_PATH(path->neighbour) = NULL;
+
+	kfree(path);
+
+	return 1;
 }
 
-static int _ipoib_dev_xmit(struct sk_buff *skb, struct net_device *dev)
+static int path_rec_start(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	uint16_t ethertype;
-	int ret;
+	struct ipoib_path *path = kmalloc(sizeof *path, GFP_ATOMIC);
+	tTS_IB_CLIENT_QUERY_TID tid;
 
-	ethertype = ntohs(((struct ethhdr *)skb->data)->h_proto);
+	if (!path)
+		goto err;
 
-	ipoib_dbg_data(priv, "packet to transmit, length=%d ethertype=0x%04x\n",
-		       skb->len, ethertype);
+	path->ah  = NULL;
+	path->qpn = be32_to_cpup((u32 *) skb->dst->neighbour->ha);
+	path->dev = dev;
+	skb_queue_head_init(&path->queue);
+	__skb_queue_tail(&path->queue, skb);
+	path->neighbour = NULL;
 
-	if (!netif_carrier_ok(dev)) {
-		ipoib_dbg(priv, "dropping packet since fabric is not up\n");
+	/*
+	 * XXX there's a race here if path record completion runs
+	 * before we get to finish up.  Add a lock to path struct?
+	 */
+	if (tsIbPathRecordRequest(priv->ca, priv->port,
+				  priv->local_gid.raw,
+				  skb->dst->neighbour->ha + 4,
+				  priv->pkey, 0, HZ, 0,
+				  path_rec_completion,
+				  path, &tid)) {
+		ipoib_warn(priv, "tsIbPathRecordRequest failed\n");
+		goto err;
+	}
 
-		dev->trans_start = jiffies;
-		++priv->stats.tx_packets;
-		priv->stats.tx_bytes += skb->len;
-		dev_kfree_skb_any(skb);
-		return 0;
+	path->neighbour = skb->dst->neighbour;
+	IPOIB_PATH(skb->dst->neighbour) = path;
+	return 0;
+
+err:
+	kfree(path);
+	++priv->stats.tx_dropped;
+	dev_kfree_skb_any(skb);
+
+	return 0;
+}
+
+static int unicast_arp_completion(tTS_IB_CLIENT_QUERY_TID tid,
+				  int status,
+				  struct ib_path_record *pathrec,
+				  int remaining, void *skb_ptr)
+{
+	struct sk_buff *skb = skb_ptr;
+	struct ipoib_dev_priv *priv = netdev_priv(skb->dev);
+	struct ib_ah *ah;
+
+	if (status)
+		goto err;
+
+	{
+		struct ib_ah_attr av = {
+			.dlid 	       = pathrec->dlid,
+			.sl 	       = pathrec->sl,
+			.src_path_bits = 0,
+			.static_rate   = 0,
+			.ah_flags      = 0,
+			.port_num      = priv->port
+		};
+
+		ah = ib_create_ah(priv->pd, &av);
 	}
 
-	switch (ethertype) {
-	case ETH_P_ARP:
-		if (ipoib_sarp_rewrite_send(dev, skb)) {
-			++priv->stats.tx_dropped;
-			dev_kfree_skb_any(skb);
-		}
-		return 0;
+	if (IS_ERR(ah))
+		goto err;
 
-	case ETH_P_IP:
-		if (skb->data[0] == 0x01 && skb->data[1] == 0x00
-		    && skb->data[2] == 0x5e
-		    && (skb->data[3] & 0x80) == 0x00) {
-			/* Multicast MAC addr */
-			struct ipoib_mcast *mcast = NULL;
-			union ib_gid mgid;
-			struct iphdr *iph =
-			    (struct iphdr *)(skb->data + ETH_HLEN);
-			u32 multiaddr = ntohl(iph->daddr);
+	*(struct ib_ah **) skb->cb = ah;
 
-			mgid = ipoib_broadcast_mgid;
+	if (dev_queue_xmit(skb))
+		ipoib_warn(priv, "dev_queue_xmit failed "
+			   "to requeue ARP packet\n");
 
-			/* Add in the P_Key */
-			mgid.raw[4] = (priv->pkey >> 8) & 0xff;
-			mgid.raw[5] = priv->pkey & 0xff;
+	return 1;
 
-			/* Fixup the group mapping */
-			mgid.raw[12] = (multiaddr >> 24) & 0x0f;
-			mgid.raw[13] = (multiaddr >> 16) & 0xff;
-			mgid.raw[14] = (multiaddr >> 8) & 0xff;
-			mgid.raw[15] = multiaddr & 0xff;
+err:
+	dev_kfree_skb(skb);
 
-			ret = ipoib_mcast_lookup(dev, &mgid, &mcast);
-			switch (ret) {
-			case 0:
-				return ipoib_mcast_send(dev, mcast, skb);
-			case -EAGAIN:
-				ipoib_mcast_queue_packet(mcast, skb);
-				ipoib_mcast_put(mcast);
-				return 0;
-			}
-		} else
-		    if (memcmp(broadcast_mac_addr, skb->data, ETH_ALEN) == 0) {
-			struct ipoib_mcast *mcast = NULL;
+	return 1;
+}
 
-			ret = ipoib_mcast_lookup(dev, &priv->bcast_gid, &mcast);
-			switch (ret) {
-			case 0:
-				return ipoib_mcast_send(dev, mcast, skb);
-			case -EAGAIN:
-				ipoib_mcast_queue_packet(mcast, skb);
-				ipoib_mcast_put(mcast);
-				return 0;
-			}
-		} else {
-			struct ipoib_sarp *entry = NULL;
+static void unicast_arp_finish(struct sk_buff *skb)
+{
+	struct ib_ah *ah = *(struct ib_ah **) skb->cb;
 
-			ret = ipoib_sarp_lookup(dev, skb->data, &entry);
-			switch (ret) {
-			case 0:
-				return ipoib_sarp_send(dev, entry, skb);
-			case -EAGAIN:
-				ipoib_sarp_queue_packet(entry, skb);
-				ipoib_sarp_put(entry);
-				return 0;
-			}
-		}
+	if (ah)
+		ib_destroy_ah(ah);
+}
 
-		switch (ret) {
-		case 0:
-		case -EAGAIN:
-			/* Shouldn't get here anyway */
-			break;
-		case -ENOENT:
-			ipoib_warn(priv, "dropping packet with unknown dest "
-				   "%02x:%02x:%02x:%02x:%02x:%02x\n",
-				   skb->data[0], skb->data[1],
-				   skb->data[2], skb->data[3],
-				   skb->data[4], skb->data[5]);
+/*
+ * For unicast packets with no skb->dst->neighbour (unicast ARPs are
+ * the main example), we fire off a path record query for each packet.
+ * This is pretty bad for scalability (since this is going to hammer
+ * the SM on a big fabric) but it's the best I can think of for now.
+ *
+ * Also we might have a problem if a path changes, because ARPs will
+ * still go through (since we'll get the new path from the SM for
+ * these queries) so we'll never update the neighbour.
+ */
+static int unicast_arp_start(struct sk_buff *skb, struct net_device *dev,
+			     struct ipoib_pseudoheader *phdr)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct sk_buff *tmp_skb;
+	tTS_IB_CLIENT_QUERY_TID tid;
+
+	if (skb->destructor) {
+		tmp_skb = skb;
+		skb = skb_clone(tmp_skb, GFP_ATOMIC);
+		dev_kfree_skb_any(tmp_skb);
+		if (!skb) {
 			++priv->stats.tx_dropped;
-			dev_kfree_skb_any(skb);
 			return 0;
-		default:
-			ipoib_warn(priv, "sending to %02x:%02x:%02x:%02x:%02x:%02x "
-				   "failed (ret = %d)\n",
-				   skb->data[0], skb->data[1],
-				   skb->data[2], skb->data[3],
-				   skb->data[4], skb->data[5], ret);
-			++priv->stats.tx_dropped;
-			dev_kfree_skb_any(skb);
-			return 0;
 		}
-		return 0;
+	}
 
-	case ETH_P_IPV6:
-		ipoib_dbg(priv, "dropping IPv6 packet\n");
-		++priv->stats.tx_dropped;
-		dev_kfree_skb_any(skb);
-		return 0;
+	skb->dev        = dev;
+	skb->destructor = unicast_arp_finish;
+	memset(skb->cb, 0, sizeof skb->cb);
 
-	default:
-		ipoib_warn(priv, "dropping packet with unknown ethertype 0x%04x\n",
-			   ethertype);
+	/*
+	 * XXX We need to keep a record of the skb and TID somewhere
+	 * so that we can cancel the request if the device goes down
+	 * before it finishes.
+	 */
+	if (tsIbPathRecordRequest(priv->ca, priv->port,
+				  priv->local_gid.raw,
+				  phdr->hwaddr + 4,
+				  priv->pkey, 0, HZ, 0,
+				  unicast_arp_completion,
+				  skb, &tid)) {
+		ipoib_warn(priv, "tsIbPathRecordRequest failed\n");
 		++priv->stats.tx_dropped;
 		dev_kfree_skb_any(skb);
-		return 0;
 	}
 
 	return 0;
 }
 
-struct net_device_stats *_ipoib_dev_get_stats(struct net_device *dev)
+static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_path *path;
 
-	return &priv->stats;
-}
+	if (skb->dst && skb->dst->neighbour) {
+		if (unlikely(!IPOIB_PATH(skb->dst->neighbour)))
+			return path_rec_start(skb, dev);
 
-static void _ipoib_dev_timeout(struct net_device *dev)
-{
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
+		path = IPOIB_PATH(skb->dst->neighbour);
 
-	if (priv->tx_free && !test_bit(IPOIB_FLAG_TIMEOUT, &priv->flags)) {
-		char ring[IPOIB_TX_RING_SIZE + 1];
-		int i;
+		if (likely(path->ah)) {
+			ipoib_send(dev, skb, path->ah, path->qpn);
+			return 0;
+		}
 
-		for (i = 0; i < IPOIB_TX_RING_SIZE; ++i)
-			ring[i] = priv->tx_ring[i].skb ? 'X' : '.';
+		if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE)
+			__skb_queue_tail(&path->queue, skb);
+		else
+			goto err;
+	} else {
+		struct ipoib_pseudoheader *phdr =
+			(struct ipoib_pseudoheader *) skb->data;
+		skb_pull(skb, sizeof *phdr);
 
-		ring[i] = 0;
+		if (phdr->hwaddr[4] == 0xff) {
+			/* multicast/broadcast GID */
+			if (!memcmp(phdr->hwaddr, dev->broadcast, IPOIB_HW_ADDR_LEN))
+				ipoib_mcast_send(dev, priv->broadcast, skb);
+			else {
+				ipoib_dbg(priv, "Dropping (no %s): type %04x, QPN %06x "
+					  IPOIB_GID_FMT "\n",
+					  skb->dst ? "neigh" : "dst",
+					  be16_to_cpup((u16 *) skb->data),
+					  be32_to_cpup((u32 *) phdr->hwaddr),
+					  phdr->hwaddr[ 4], phdr->hwaddr[ 5],
+					  phdr->hwaddr[ 6], phdr->hwaddr[ 7],
+					  phdr->hwaddr[ 8], phdr->hwaddr[ 9],
+					  phdr->hwaddr[10], phdr->hwaddr[11],
+					  phdr->hwaddr[12], phdr->hwaddr[13],
+					  phdr->hwaddr[14], phdr->hwaddr[15],
+					  phdr->hwaddr[16], phdr->hwaddr[17],
+					  phdr->hwaddr[18], phdr->hwaddr[19]);
+				goto err;
+			}
+		} else {
+			/* unicast GID -- ARP reply?? */
 
-		ipoib_warn(priv, "transmit timeout: latency %ld, "
-			   "tx_free %d, tx_ring [%s]\n",
-			   jiffies - dev->trans_start, priv->tx_free, ring);
+			/*
+			 * If destructor is unicast_arp_finish, we've
+			 * already been through the path lookup and
+			 * now we can just send the packet.
+			 */
+			if (skb->destructor == unicast_arp_finish) {
+				ipoib_send(dev, skb, *(struct ib_ah **) skb->cb,
+					   be32_to_cpup((u32 *) phdr->hwaddr));
+				return 0;
+			}
 
-		set_bit(IPOIB_FLAG_TIMEOUT, &priv->flags);
-	} else
-		ipoib_dbg(priv, "transmit timeout: latency %ld\n",
-			  jiffies - dev->trans_start);
+			if (be16_to_cpup((u16 *) skb->data) != ETH_P_ARP)
+				ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x "
+					   IPOIB_GID_FMT "\n",
+					   skb->dst ? "neigh" : "dst",
+					   be16_to_cpup((u16 *) skb->data),
+					   be32_to_cpup((u32 *) phdr->hwaddr),
+					   phdr->hwaddr[ 4], phdr->hwaddr[ 5],
+					   phdr->hwaddr[ 6], phdr->hwaddr[ 7],
+					   phdr->hwaddr[ 8], phdr->hwaddr[ 9],
+					   phdr->hwaddr[10], phdr->hwaddr[11],
+					   phdr->hwaddr[12], phdr->hwaddr[13],
+					   phdr->hwaddr[14], phdr->hwaddr[15],
+					   phdr->hwaddr[16], phdr->hwaddr[17],
+					   phdr->hwaddr[18], phdr->hwaddr[19]);
+
+			/* put the pseudoheader back on */			  
+			skb_push(skb, sizeof *phdr);
+			return unicast_arp_start(skb, dev, phdr);
+		}
+	}
+
+	return 0;
+
+err:
+	++priv->stats.tx_dropped;
+	dev_kfree_skb_any(skb);
+
+	return 0;
 }
 
-/*
- * Setup the packet to look like ethernet here, we'll fix it later when
- * we actually send it to look like an IPoIB packet
- */
-static int _ipoib_dev_hard_header(struct sk_buff *skb,
-				  struct net_device *dev,
-				  unsigned short type,
-				  void *daddr, void *saddr, unsigned len)
+struct net_device_stats *ipoib_get_stats(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	struct ethhdr *header = (struct ethhdr *)skb_push(skb, ETH_HLEN);
 
-	/* If DEBUG is undefined, priv won't be used */
-	(void) priv;
+	return &priv->stats;
+}
 
-	ipoib_dbg_data(priv, "building header, ethertype=0x%04x\n", type);
+static void ipoib_timeout(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-	if (daddr)
-		memcpy(header->h_dest, daddr, IPOIB_ADDRESS_HASH_BYTES);
+	ipoib_warn(priv, "transmit timeout: latency %ld\n",
+		   jiffies - dev->trans_start);
+	/* XXX reset QP, etc. */
+}
 
-	if (saddr)
-		memcpy(header->h_source, saddr, IPOIB_ADDRESS_HASH_BYTES);
-	else
-		memcpy(header->h_source, dev->dev_addr,
-		       IPOIB_ADDRESS_HASH_BYTES);
+static int ipoib_hard_header(struct sk_buff *skb,
+			     struct net_device *dev,
+			     unsigned short type,
+			     void *daddr, void *saddr, unsigned len)
+{
+	struct ipoib_header *header;
 
-	header->h_proto = htons(type);
+	header = (struct ipoib_header *) skb_push(skb, sizeof *header);
 
+	header->proto = htons(type);
+	header->reserved = 0;
+
+	/*
+	 * If we don't have a neighbour structure, stuff the
+	 * destination address onto the front of the skb so we can
+	 * figure out where to send the packet later.
+	 */
+	if (!skb->dst || !skb->dst->neighbour) {
+		struct ipoib_pseudoheader *phdr =
+			(struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr);
+		memcpy(phdr->hwaddr, daddr, IPOIB_HW_ADDR_LEN);
+	}
+
 	return 0;
 }
 
-static void _ipoib_dev_set_mcast_list(struct net_device *dev)
+static void ipoib_set_mcast_list(struct net_device *dev)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-
-	schedule_work(&priv->restart_task);
+	/* XXX Join multicast groups */
 }
 
 int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-	if (ipoib_sarp_dev_init(dev))
-		goto out;
-
 	/* Allocate RX/TX "rings" to hold queued skbs */
 
-	priv->rx_ring =
-		kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf),
-			GFP_KERNEL);
+	priv->rx_ring =	kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf),
+				GFP_KERNEL);
 	if (!priv->rx_ring) {
 		printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
 		       ca->name, IPOIB_RX_RING_SIZE);
-		goto out_arp_cleanup;
+		goto out;
 	}
 	memset(priv->rx_ring, 0,
-	       IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf));
+	       IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf));
 
-	priv->tx_ring =
-	    kmalloc(IPOIB_TX_RING_SIZE * sizeof(struct ipoib_tx_buf),
-		    GFP_KERNEL);
+	priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf),
+				GFP_KERNEL);
 	if (!priv->tx_ring) {
 		printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
 		       ca->name, IPOIB_TX_RING_SIZE);
 		goto out_rx_ring_cleanup;
 	}
 	memset(priv->tx_ring, 0,
-	       IPOIB_TX_RING_SIZE * sizeof(struct ipoib_tx_buf));
+	       IPOIB_TX_RING_SIZE * sizeof(struct ipoib_buf));
 
 	/* set up the rest of our private data */
 
@@ -482,9 +607,6 @@
 out_rx_ring_cleanup:
 	kfree(priv->rx_ring);
 
-out_arp_cleanup:
-	ipoib_sarp_dev_cleanup(dev);
-
 out:
 	return -ENOMEM;
 }
@@ -507,7 +629,6 @@
 
 	ipoib_proc_dev_cleanup(dev);
 	ipoib_ib_dev_cleanup(dev);
-	ipoib_sarp_dev_cleanup(dev);
 
 	if (priv->rx_ring) {
 		for (i = 0; i < IPOIB_RX_RING_SIZE; ++i)
@@ -532,15 +653,14 @@
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-	dev->open 		 = ipoib_dev_open;
-	dev->stop 		 = _ipoib_dev_stop;
-	dev->change_mtu 	 = _ipoib_dev_change_mtu;
-	dev->set_config 	 = _ipoib_dev_set_config;
-	dev->hard_start_xmit 	 = _ipoib_dev_xmit;
-	dev->get_stats 		 = _ipoib_dev_get_stats;
-	dev->tx_timeout 	 = _ipoib_dev_timeout;
-	dev->hard_header 	 = _ipoib_dev_hard_header;
-	dev->set_multicast_list  = _ipoib_dev_set_mcast_list;
+	dev->open 		 = ipoib_open;
+	dev->stop 		 = ipoib_stop;
+	dev->change_mtu 	 = ipoib_change_mtu;
+	dev->hard_start_xmit 	 = ipoib_start_xmit;
+	dev->get_stats 		 = ipoib_get_stats;
+	dev->tx_timeout 	 = ipoib_timeout;
+	dev->hard_header 	 = ipoib_hard_header;
+	dev->set_multicast_list  = ipoib_set_mcast_list;
 	dev->watchdog_timeo 	 = HZ;
 
 	dev->rebuild_header 	 = NULL;
@@ -548,17 +668,21 @@
 	dev->header_cache_update = NULL;
 
 	dev->flags              |= IFF_BROADCAST | IFF_MULTICAST;
-
-	dev->hard_header_len 	 = ETH_HLEN;
-	dev->addr_len 		 = IPOIB_ADDRESS_HASH_BYTES;
-	dev->type 		 = ARPHRD_ETHER;
+	
+	/*
+	 * We add in IPOIB_HW_ADDR_LEN to allow for the destination
+	 * address "pseudoheader" for skbs without neighbour struct.
+	 */
+	dev->hard_header_len 	 = IPOIB_ENCAP_LEN + IPOIB_HW_ADDR_LEN;
+	dev->addr_len 		 = IPOIB_HW_ADDR_LEN;
+	dev->type 		 = ARPHRD_INFINIBAND;
 	dev->tx_queue_len 	 = IPOIB_TX_RING_SIZE * 2;
 
 	/* MTU will be reset when mcast join happens */
 	dev->mtu 		 = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN;
 	priv->mcast_mtu 	 = priv->admin_mtu = dev->mtu;
 
-	memset(dev->broadcast, 0xff, dev->addr_len);
+	memcpy(dev->broadcast, ipv4_bcast_addr, IPOIB_HW_ADDR_LEN);
 
 	netif_carrier_off(dev);
 
@@ -610,6 +734,9 @@
 		goto alloc_mem_failed;
 	}
 
+	priv->dev->broadcast[8] = priv->pkey >> 8;
+	priv->dev->broadcast[9] = priv->pkey & 0xff;
+
 	result = ipoib_dev_init(priv->dev, hca, port);
 	if (result < 0) {
 		printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n",
Index: infiniband/ulp/ipoib/ipoib.h
===================================================================
--- infiniband/ulp/ipoib/ipoib.h	(revision 915)
+++ infiniband/ulp/ipoib/ipoib.h	(working copy)
@@ -44,30 +44,27 @@
 
 /* constants */
 
-#define ARPHRD_INFINIBAND 32
+enum {
+	IPOIB_PACKET_SIZE         = 2048,
+	IPOIB_BUF_SIZE 		  = IPOIB_PACKET_SIZE + IB_GRH_BYTES,
 
-#define IPOIB_PACKET_SIZE	2048
+	IPOIB_ENCAP_LEN 	  = 4,
+	IPOIB_HW_ADDR_LEN 	  = 20,
 
-enum {
 	IPOIB_RX_RING_SIZE 	  = 128,
 	IPOIB_TX_RING_SIZE 	  = 64,
 
 	IPOIB_NUM_WC 		  = 4,
 
-	IPOIB_BUF_SIZE 		  = IPOIB_PACKET_SIZE + IB_GRH_BYTES,
+	IPOIB_MAX_PATH_REC_QUEUE  = 3,
 
-	IPOIB_ADDRESS_HASH_BYTES  = ETH_ALEN,
-	IPOIB_ENCAP_LEN 	  = 4,
-	IPOIB_HW_ADDR_LEN 	  = 20,
-
 	IPOIB_FLAG_TX_FULL 	  = 0,
-	IPOIB_FLAG_TIMEOUT 	  = 1,
-	IPOIB_FLAG_OPER_UP 	  = 2,
-	IPOIB_FLAG_ADMIN_UP 	  = 3,
-	IPOIB_PKEY_ASSIGNED 	  = 4,
-	IPOIB_PKEY_STOP 	  = 5,
-	IPOIB_FLAG_SUBINTERFACE   = 6,
-	IPOIB_MCAST_STOP 	  = 7,
+	IPOIB_FLAG_OPER_UP 	  = 1,
+	IPOIB_FLAG_ADMIN_UP 	  = 2,
+	IPOIB_PKEY_ASSIGNED 	  = 3,
+	IPOIB_PKEY_STOP 	  = 4,
+	IPOIB_FLAG_SUBINTERFACE   = 5,
+	IPOIB_MCAST_STOP 	  = 6,
 
 	IPOIB_MAX_BACKOFF_SECONDS = 16,
 
@@ -79,23 +76,22 @@
 
 /* structs */
 
-typedef void (*ipoib_tx_callback_t)(void *);
+struct ipoib_header {
+	u16 proto;
+	u16 reserved;
+};
 
-struct ipoib_sarp;
+struct ipoib_pseudoheader {
+	u8  hwaddr[IPOIB_HW_ADDR_LEN];
+};
+
 struct ipoib_mcast;
 
-struct ipoib_tx_buf {
+struct ipoib_buf {
 	struct sk_buff *skb;
-	ipoib_tx_callback_t callback;
-	void *ptr;
 	DECLARE_PCI_UNMAP_ADDR(mapping)
 };
 
-struct ipoib_rx_buf {
-	struct sk_buff *skb;
-	DECLARE_PCI_UNMAP_ADDR(mapping)
-};
-
 struct ipoib_dev_priv {
 	spinlock_t lock;
 
@@ -132,22 +128,17 @@
 	u16          local_lid;
 	u32          local_qpn;
 
-	union ib_gid bcast_gid;
-
 	unsigned int admin_mtu;
 	unsigned int mcast_mtu;
 
-	struct ipoib_rx_buf *rx_ring;
+	struct ipoib_buf *rx_ring;
 
-	struct ipoib_tx_buf *tx_ring;
+	struct ipoib_buf *tx_ring;
 	int tx_head;
 	int tx_free;
 
 	struct ib_wc ibwc[IPOIB_NUM_WC];
 
-	struct ipoib_sarp_cache *sarp_cache;
-
-	struct proc_dir_entry *arp_proc_entry;
 	struct proc_dir_entry *mcast_proc_entry;
 
 	struct ib_event_handler event_handler;
@@ -158,21 +149,29 @@
 	struct list_head child_intfs;
 };
 
+struct ipoib_path {
+	struct ib_ah       *ah;
+	u32                 qpn;
+	struct sk_buff_head queue;
+
+	struct net_device  *dev;
+	struct neighbour   *neighbour;
+};
+
+#define IPOIB_PATH(neigh) (*(struct ipoib_path **) ((neigh)->ha + 24))
+
 extern struct workqueue_struct *ipoib_workqueue;
 
 /* list of IPoIB network devices */
 extern struct semaphore ipoib_device_mutex;
 extern struct list_head ipoib_device_list;
 
-extern union ib_gid ipoib_broadcast_mgid;
-
 /* functions */
 
 void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
 
-int ipoib_dev_send(struct net_device *dev, struct sk_buff *skb,
-		   ipoib_tx_callback_t callback,
-		   void *ptr, struct ib_ah *address, u32 qpn);
+void ipoib_send(struct net_device *dev, struct sk_buff *skb,
+		struct ib_ah *address, u32 qpn);
 
 struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
 
@@ -188,33 +187,6 @@
 int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
 void ipoib_dev_cleanup(struct net_device *dev);
 
-void ipoib_sarp_get(struct ipoib_sarp *entry);
-void ipoib_sarp_put(struct ipoib_sarp *entry);
-struct ipoib_sarp *ipoib_sarp_add(struct net_device *dev, union ib_gid *gid,
-				  u32 qpn);
-struct ipoib_sarp *ipoib_sarp_local_add(struct net_device *dev, union ib_gid *gid,
-					u32 qpn);
-int ipoib_sarp_delete(struct net_device *dev, const uint8_t *hash);
-int ipoib_sarp_lookup(struct net_device *dev, uint8_t *hash,
-		      struct ipoib_sarp **entry);
-int ipoib_sarp_queue_packet(struct ipoib_sarp *entry, struct sk_buff *skb);
-int ipoib_sarp_send(struct net_device *dev, struct ipoib_sarp *entry,
-		    struct sk_buff *skb);
-int ipoib_sarp_rewrite_receive(struct net_device *dev, struct sk_buff *skb);
-int ipoib_sarp_rewrite_send(struct net_device *dev, struct sk_buff *skb);
-int ipoib_sarp_dev_init(struct net_device *dev);
-void ipoib_sarp_dev_flush(struct net_device *dev);
-void ipoib_sarp_dev_cleanup(struct net_device *dev);
-
-struct ipoib_sarp_iter *ipoib_sarp_iter_init(struct net_device *dev);
-void ipoib_sarp_iter_free(struct ipoib_sarp_iter *iter);
-int ipoib_sarp_iter_next(struct ipoib_sarp_iter *iter);
-void ipoib_sarp_iter_read(struct ipoib_sarp_iter *iter, uint8_t *hash,
-			  union ib_gid *gid, u32 *qpn,
-			  unsigned long *created,
-			  unsigned long *last_verify,
-			  unsigned int *queuelen, unsigned int *complete);
-
 int ipoib_proc_dev_init(struct net_device *dev);
 void ipoib_proc_dev_cleanup(struct net_device *dev);
 
@@ -223,8 +195,8 @@
 int ipoib_mcast_lookup(struct net_device *dev, union ib_gid *mgid,
 		       struct ipoib_mcast **mcast);
 int ipoib_mcast_queue_packet(struct ipoib_mcast *mcast, struct sk_buff *skb);
-int ipoib_mcast_send(struct net_device *dev, struct ipoib_mcast *mcast,
-		     struct sk_buff *skb);
+void ipoib_mcast_send(struct net_device *dev, struct ipoib_mcast *mcast,
+		      struct sk_buff *skb);
 
 void ipoib_mcast_restart_task(void *dev_ptr);
 int ipoib_mcast_start_thread(struct net_device *dev);
Index: infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- infiniband/ulp/ipoib/ipoib_ib.c	(revision 915)
+++ infiniband/ulp/ipoib/ipoib_ib.c	(working copy)
@@ -113,7 +113,7 @@
 
 		if (entry->opcode == IB_WC_SEND) {
 			if (work_request_id < IPOIB_TX_RING_SIZE) {
-				struct ipoib_tx_buf *tx_req;
+				struct ipoib_buf *tx_req;
 
 				tx_req = &priv->tx_ring[work_request_id];
 
@@ -145,42 +145,18 @@
 
 			if (entry->slid != priv->local_lid ||
 			    entry->src_qp != priv->local_qpn) {
-				struct ethhdr *header;
+				skb->protocol = ((struct ipoib_header *) skb->data)->proto;
 
-				skb->protocol = *(uint16_t *)skb->data;
-
-				/* pull the IPoIB header and add an ethernet header */
 				skb_pull(skb, IPOIB_ENCAP_LEN);
 
-				header = (struct ethhdr *)skb_push(skb,
-								   ETH_HLEN);
-
-				/*
-				 * We could figure out the MAC address from
-				 * the IPoIB header and matching, but it's
-				 * probably too much effort for what it's worth
-				 */
-				memset(header->h_dest, 0,
-				       sizeof(header->h_dest));
-				memset(header->h_source, 0,
-				       sizeof(header->h_source));
-				header->h_proto = skb->protocol;
-
-				skb->mac.raw = skb->data;
-				skb_pull(skb, ETH_HLEN);
-
 				dev->last_rx = jiffies;
 				++priv->stats.rx_packets;
 				priv->stats.rx_bytes += skb->len;
 
-				if (skb->protocol == htons(ETH_P_ARP)) {
-					if (ipoib_sarp_rewrite_receive(dev, skb))
-						ipoib_warn(priv, "ipoib_sarp_rewrite_receive failed\n");
-				} else {
-					skb->dev = dev;
-					skb->pkt_type = PACKET_HOST;
-					netif_rx_ni(skb);
-				}
+				skb->dev = dev;
+				/* XXX get correct PACKET_ type here */
+				skb->pkt_type = PACKET_HOST;
+				netif_rx_ni(skb);
 			} else {
 				ipoib_dbg_data(priv, "dropping loopback packet\n");
 				dev_kfree_skb_any(skb);
@@ -198,7 +174,7 @@
 
 	case IB_WC_SEND:
 	{
-		struct ipoib_tx_buf *tx_req;
+		struct ipoib_buf *tx_req;
 		unsigned long flags;
 
 		if (work_request_id >= IPOIB_TX_RING_SIZE) {
@@ -216,18 +192,12 @@
 				 tx_req->skb->len,
 				 PCI_DMA_TODEVICE);
 
-		clear_bit(IPOIB_FLAG_TIMEOUT, &priv->flags);
-
 		++priv->stats.tx_packets;
 		priv->stats.tx_bytes += tx_req->skb->len;
 
 		dev_kfree_skb_any(tx_req->skb);
 		tx_req->skb = NULL;
 
-		tx_req->callback(tx_req->ptr);
-		tx_req->callback = NULL;
-		tx_req->ptr = NULL;
-
 		spin_lock_irqsave(&priv->lock, flags);
 		++priv->tx_free;
 		if (priv->tx_free > IPOIB_TX_RING_SIZE / 2)
@@ -260,10 +230,10 @@
 	} while (n == IPOIB_NUM_WC);
 }
 
-static int _ipoib_ib_send(struct ipoib_dev_priv *priv,
-			  u64 work_request_id,
-			  struct ib_ah *address, u32 qpn,
-			  dma_addr_t addr, int len)
+static inline int post_send(struct ipoib_dev_priv *priv,
+			    u64 work_request_id,
+			    struct ib_ah *address, u32 qpn,
+			    dma_addr_t addr, int len)
 {
 	struct ib_sge list = {
 		.addr    = addr,
@@ -290,13 +260,12 @@
 }
 
 /* =============================================================== */
-/*..ipoib_dev_send -- schedule an IB send work request             */
-int ipoib_dev_send(struct net_device *dev, struct sk_buff *skb,
-		   ipoib_tx_callback_t callback, void *ptr,
-		   struct ib_ah *address, u32 qpn)
+/*..ipoib_send -- schedule an IB send work request                 */
+void ipoib_send(struct net_device *dev, struct sk_buff *skb,
+		struct ib_ah *address, u32 qpn)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	struct ipoib_tx_buf *tx_req;
+	struct ipoib_buf *tx_req;
 	dma_addr_t addr;
 
 	if (skb->len > dev->mtu + IPOIB_HW_ADDR_LEN) {
@@ -304,56 +273,39 @@
 			   skb->len, dev->mtu + IPOIB_HW_ADDR_LEN);
 		++priv->stats.tx_dropped;
 		++priv->stats.tx_errors;
-
-		goto err;
+		dev_kfree_skb_any(skb);
+		return;
 	}
 
 	if (!(skb = skb_unshare(skb, GFP_ATOMIC))) {
 		ipoib_warn(priv, "failed to unshare sk_buff. Dropping\n");
 		++priv->stats.tx_dropped;
 		++priv->stats.tx_errors;
-
-		goto out;
+		return;
 	}
 
 	ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
 		       skb->len, address, qpn);
 
-	/* make the skb look like an IPoIB packet again */
-	{
-		struct ethhdr *header = (struct ethhdr *)skb->data;
-		uint16_t *reserved, *ether_type;
-
-		skb_pull(skb, ETH_HLEN);
-		reserved = (uint16_t *)skb_push(skb, 2);
-		ether_type = (uint16_t *)skb_push(skb, 2);
-
-		*ether_type = header->h_proto;
-		*reserved = 0;
-	}
-
 	/*
-	 * We put the skb into the tx_ring _before_ we call _ipoib_ib_send()
+	 * We put the skb into the tx_ring _before_ we call post_send()
 	 * because it's entirely possible that the completion handler will
-	 * run before we execute anything after the _ipoib_ib_send().  That
+	 * run before we execute anything after the post_send().  That
 	 * means we have to make sure everything is properly recorded and
-	 * our state is consistent before we call _ipoib_ib_send().
+	 * our state is consistent before we call post_send().
 	 */
 	tx_req = &priv->tx_ring[priv->tx_head];
 	tx_req->skb = skb;
-	tx_req->callback = callback;
-	tx_req->ptr = ptr;
 	addr = pci_map_single(priv->ca->dma_device,
 			      skb->data, skb->len,
 			      PCI_DMA_TODEVICE);
 	pci_unmap_addr_set(tx_req, mapping, addr);
 
-	if (_ipoib_ib_send(priv, priv->tx_head, address, qpn, addr, skb->len)) {
-		ipoib_warn(priv, "_ipoib_ib_send failed\n");
+	if (post_send(priv, priv->tx_head, address, qpn, addr, skb->len)) {
+		ipoib_warn(priv, "post_send failed\n");
 		++priv->stats.tx_errors;
 		tx_req->skb = NULL;
-		tx_req->callback = NULL;
-		tx_req->ptr = NULL;
+		dev_kfree_skb_any(skb);
 	} else {
 		unsigned long flags;
 
@@ -368,17 +320,7 @@
 			netif_stop_queue(dev);
 		}
 		spin_unlock_irqrestore(&priv->lock, flags);
-
-		return 0;
 	}
-
-err:
-	dev_kfree_skb_any(skb);
-
-out:
-	callback(ptr);
-
-	return 0;
 }
 
 int ipoib_ib_dev_open(struct net_device *dev)
@@ -451,11 +393,7 @@
 
 	/* Delete broadcast and local addresses since they will be recreated */
 	ipoib_mcast_dev_down(dev);
-	ipoib_sarp_delete(dev, dev->dev_addr);
 
-	/* Invalidate all address vectors */
-	ipoib_sarp_dev_flush(dev);
-
 	return 0;
 }
 
@@ -546,7 +484,6 @@
 
 	/* Delete the broadcast address and the local address */
 	ipoib_mcast_dev_down(dev);
-	ipoib_sarp_delete(dev, dev->dev_addr);
 
 	ipoib_transport_dev_cleanup(dev);
 }
@@ -560,7 +497,7 @@
  * Bug #2507. This implementation will probably be removed when the P_Key
  * change async notification is available.
  */
-int ipoib_dev_open(struct net_device *dev);
+int ipoib_open(struct net_device *dev);
 
 /* =================================================================== */
 /*.. ipoib_pkey_dev_check_presence - Check for the interface P_Key presence */
@@ -585,7 +522,7 @@
 	ipoib_pkey_dev_check_presence(dev);
 
 	if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
-		ipoib_dev_open(dev);
+		ipoib_open(dev);
 	else {
 		down(&pkey_sem);
 		if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
Index: infiniband/ulp/ipoib/ipoib_vlan.c
===================================================================
--- infiniband/ulp/ipoib/ipoib_vlan.c	(revision 915)
+++ infiniband/ulp/ipoib/ipoib_vlan.c	(working copy)
@@ -75,6 +75,9 @@
 
 	priv->pkey = pkey;
 
+	priv->dev->broadcast[8] = pkey >> 8;
+	priv->dev->broadcast[9] = pkey & 0xff;
+
 	result = ipoib_dev_init(priv->dev, ppriv->ca, ppriv->port);
 	if (result < 0) {
 		ipoib_warn(ppriv, "failed to initialize subinterface: "
Index: infiniband/ulp/ipoib/ipoib_proc.c
===================================================================
--- infiniband/ulp/ipoib/ipoib_proc.c	(revision 915)
+++ infiniband/ulp/ipoib/ipoib_proc.c	(working copy)
@@ -33,263 +33,6 @@
 #include "ts_kernel_services.h"
 
 /*
- * ARP proc file stuff
- */
-
-static const char ipoib_arp_proc_entry_name[] = "ipoib_arp_%s";
-/*
- * we have a static variable to hold the device pointer between when
- * the /proc file is opened and the seq_file start function is
- * called.  (This is a kludge to get around the fact that we don't get
- * to pass user data to the seq_file start function)
- */
-static DECLARE_MUTEX(proc_arp_mutex);
-static struct net_device *proc_arp_device;
-
-/* =============================================================== */
-/*.._ipoib_sarp_seq_start -- seq file handling                     */
-static void *_ipoib_sarp_seq_start(struct seq_file *file, loff_t *pos)
-{
-	struct ipoib_sarp_iter *iter = ipoib_sarp_iter_init(proc_arp_device);
-	loff_t n = *pos;
-
-	while (n--) {
-		if (ipoib_sarp_iter_next(iter)) {
-			ipoib_sarp_iter_free(iter);
-			return NULL;
-		}
-	}
-
-	return iter;
-}
-
-/* =============================================================== */
-/*.._ipoib_sarp_seq_next -- seq file handling                      */
-static void *_ipoib_sarp_seq_next(struct seq_file *file, void *iter_ptr,
-				  loff_t *pos)
-{
-	struct ipoib_sarp_iter *iter = iter_ptr;
-
-	(*pos)++;
-
-	if (ipoib_sarp_iter_next(iter)) {
-		ipoib_sarp_iter_free(iter);
-		return NULL;
-	}
-
-	return iter;
-}
-
-/* =============================================================== */
-/*.._ipoib_sarp_seq_stop -- seq file handling                      */
-static void _ipoib_sarp_seq_stop(struct seq_file *file, void *iter_ptr)
-{
-	/* nothing for now */
-}
-
-/* =============================================================== */
-/*.._ipoib_sarp_seq_show -- seq file handling                      */
-static int _ipoib_sarp_seq_show(struct seq_file *file, void *iter_ptr)
-{
-	struct ipoib_sarp_iter *iter = iter_ptr;
-	uint8_t hash[IPOIB_ADDRESS_HASH_BYTES];
-	char gid_buf[sizeof("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff")];
-	union ib_gid gid;
-	u32 qpn;
-	int i, n;
-	unsigned long created, last_verify;
-	unsigned int queuelen, complete;
-
-	if (iter) {
-		ipoib_sarp_iter_read(iter, hash, &gid, &qpn, &created,
-				     &last_verify, &queuelen, &complete);
-
-		for (i = 0; i < IPOIB_ADDRESS_HASH_BYTES; ++i) {
-			seq_printf(file, "%02x", hash[i]);
-			if (i < IPOIB_ADDRESS_HASH_BYTES - 1)
-				seq_putc(file, ':');
-			else
-				seq_printf(file, "  ");
-		}
-
-		for (n = 0, i = 0; i < sizeof gid / 2; ++i) {
-			n += sprintf(gid_buf + n, "%x",
-				     be16_to_cpu(((u16 *)gid.raw)[i]));
-			if (i < sizeof gid / 2 - 1)
-				gid_buf[n++] = ':';
-		}
-	}
-
-	seq_printf(file, "GID: %*s", -(1 + (int) sizeof(gid_buf)), gid_buf);
-	seq_printf(file, "QP#: 0x%06x", qpn);
-
-	seq_printf(file,
-		   " created: %10ld last_verify: %10ld queuelen: %4d complete: %d\n",
-		   created, last_verify, queuelen, complete);
-
-	return 0;
-}
-
-static struct seq_operations ipoib_sarp_seq_operations = {
-	.start = _ipoib_sarp_seq_start,
-	.next = _ipoib_sarp_seq_next,
-	.stop = _ipoib_sarp_seq_stop,
-	.show = _ipoib_sarp_seq_show,
-};
-
-/* =============================================================== */
-/*.._ipoib_sarp_proc_open -- proc file handling                    */
-static int _ipoib_sarp_proc_open(struct inode *inode, struct file *file)
-{
-	struct proc_dir_entry *pde = PDE(inode);
-
-	if (down_interruptible(&proc_arp_mutex))
-		return -ERESTARTSYS;
-
-	proc_arp_device = pde->data;
-
-	return seq_open(file, &ipoib_sarp_seq_operations);
-}
-
-/*
-  _ipoib_ascii_to_gid is adapted from BSD's inet_pton6, which was
-  originally written by Paul Vixie
-*/
-
-/* =============================================================== */
-/*.._ipoib_ascii_to_gid -- read GID from string                     */
-static int _ipoib_ascii_to_gid(const char *src, union ib_gid *dst)
-{
-	static const char xdigits[] = "0123456789abcdef";
-	unsigned char *tp, *endp, *colonp;
-	const char *curtok;
-	int ch, saw_xdigit;
-	unsigned int val;
-
-	memset((tp = (char *) dst), 0, sizeof (union ib_gid));
-	endp = tp + sizeof (union ib_gid);
-	colonp = NULL;
-
-	/* Leading :: requires some special handling. */
-	if (*src == ':' && *++src != ':')
-		return 0;
-
-	curtok = src;
-	saw_xdigit = 0;
-	val = 0;
-
-	while ((ch = *src++) != '\0') {
-		const char *pch;
-
-		pch = strchr(xdigits, tolower(ch));
-
-		if (pch) {
-			val <<= 4;
-			val |= (pch - xdigits);
-			if (val > 0xffff)
-				return 0;
-
-			saw_xdigit = 1;
-			continue;
-		}
-
-		if (ch == ':') {
-			curtok = src;
-
-			if (!saw_xdigit) {
-				if (colonp)
-					return 0;
-
-				colonp = tp;
-				continue;
-			} else if (*src == '\0')
-				return 0;
-
-			if (tp + 2 > endp)
-				return 0;
-
-			*tp++ = (u_char) (val >> 8) & 0xff;
-			*tp++ = (u_char) val & 0xff;
-			saw_xdigit = 0;
-			val = 0;
-			continue;
-		}
-
-		return 0;
-	}
-
-	if (saw_xdigit) {
-		if (tp + 2 > endp)
-			return 0;
-
-		*tp++ = (u_char) (val >> 8) & 0xff;
-		*tp++ = (u_char) val & 0xff;
-	}
-
-	if (colonp) {
-		memmove(endp - (tp - colonp), colonp, tp - colonp);
-		memset(colonp, 0, tp - colonp);
-		tp = endp;
-	}
-
-	if (tp != endp)
-		return 0;
-
-	return 1;
-}
-
-/* =============================================================== */
-/*.._ipoib_sarp_proc_write -- proc file handling                   */
-static ssize_t _ipoib_sarp_proc_write(struct file *file, const char *buffer,
-				      size_t count, loff_t *pos)
-{
-	struct ipoib_sarp *entry;
-	char kernel_buf[256];
-	char gid_buf[sizeof("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff")];
-	union ib_gid gid;
-	u32 qpn;
-
-	count = min(count, sizeof(kernel_buf));
-
-	if (copy_from_user(kernel_buf, buffer, count))
-		return -EFAULT;
-
-	kernel_buf[count - 1] = '\0';
-
-	if (sscanf(kernel_buf, "%39s %i", gid_buf, &qpn) != 2)
-		return -EINVAL;
-
-	if (!_ipoib_ascii_to_gid(gid_buf, &gid))
-		return -EINVAL;
-
-	if (qpn > 0xffffff)
-		return -EINVAL;
-
-	entry = ipoib_sarp_add(proc_arp_device, &gid, qpn);
-	if (entry)
-		ipoib_sarp_put(entry);
-
-	return count;
-}
-
-/* =============================================================== */
-/*.._ipoib_sarp_proc_release -- proc file handling                 */
-static int _ipoib_sarp_proc_release(struct inode *inode, struct file *file)
-{
-	up(&proc_arp_mutex);
-
-	return seq_release(inode, file);
-}
-
-static struct file_operations ipoib_sarp_proc_device_operations = {
-	.open = _ipoib_sarp_proc_open,
-	.read = seq_read,
-	.write = _ipoib_sarp_proc_write,
-	.llseek = seq_lseek,
-	.release = _ipoib_sarp_proc_release,
-};
-
-/*
  * Multicast proc stuff
  */
 
@@ -419,28 +162,14 @@
 int ipoib_proc_dev_init(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	char name[sizeof(ipoib_arp_proc_entry_name) + sizeof (dev->name)];
+	char name[sizeof(ipoib_mcast_proc_entry_name) + sizeof (dev->name)];
 
-	snprintf(name, sizeof(name) - 1, ipoib_arp_proc_entry_name, dev->name);
-	priv->arp_proc_entry = create_proc_entry(name,
-						 S_IRUGO | S_IWUGO,
-						 tsKernelProcDirGet());
-
-	if (!priv->arp_proc_entry) {
-		ipoib_warn(priv, "Can't create %s in /proc\n", name);
-		return -ENOMEM;
-	}
-
-	priv->arp_proc_entry->proc_fops = &ipoib_sarp_proc_device_operations;
-	priv->arp_proc_entry->data = dev;
-
 	snprintf(name, sizeof(name) - 1, ipoib_mcast_proc_entry_name, dev->name);
 	priv->mcast_proc_entry = create_proc_entry(name,
 						   S_IRUGO,
 						   tsKernelProcDirGet());
 	if (!priv->mcast_proc_entry) {
 		ipoib_warn(priv, "Can't create %s in /proc\n", name);
-		/* FIXME: Delete ARP proc entry */
 		return -ENOMEM;
 	}
 
@@ -455,14 +184,8 @@
 void ipoib_proc_dev_cleanup(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	char name[sizeof(ipoib_arp_proc_entry_name) + sizeof(dev->name)];
+	char name[sizeof(ipoib_mcast_proc_entry_name) + sizeof(dev->name)];
 
-	if (priv->arp_proc_entry) {
-		snprintf(name, sizeof(name) - 1, ipoib_arp_proc_entry_name,
-			 dev->name);
-		remove_proc_entry(name, tsKernelProcDirGet());
-	}
-
 	if (priv->mcast_proc_entry) {
 		snprintf(name, sizeof(name) - 1, ipoib_mcast_proc_entry_name,
 			 dev->name);
Index: infiniband/ulp/ipoib/ipoib_multicast.c
===================================================================
--- infiniband/ulp/ipoib/ipoib_multicast.c	(revision 915)
+++ infiniband/ulp/ipoib/ipoib_multicast.c	(working copy)
@@ -62,11 +62,6 @@
 	struct rb_node *rb_node;
 };
 
-union ib_gid ipoib_broadcast_mgid = {
-	.raw = { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
-		 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff }
-};
-
 /* =============================================================== */
 /*..ipoib_mcast_get - get reference to multicast group             */
 static inline void ipoib_mcast_get(struct ipoib_mcast *mcast)
@@ -212,7 +207,7 @@
 	}
 
 	/* Set the cached Q_Key before we attach if it's the broadcast group */
-	if (!memcmp(mcast->mgid.raw, priv->bcast_gid.raw, sizeof (union ib_gid)))
+	if (!memcmp(mcast->mgid.raw, priv->dev->broadcast + 4, sizeof (union ib_gid)))
 		priv->qkey = priv->broadcast->mcast_member.qkey;
 
 	ret = ipoib_mcast_attach(dev, mcast->mcast_member.mlid, &mcast->mgid);
@@ -452,7 +447,6 @@
 {
 	struct net_device *dev = dev_ptr;
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	struct ipoib_sarp *entry;
 	unsigned long flags;
 
 	down(&mcast_mutex);
@@ -474,12 +468,9 @@
 			return;
 		}
 
-		priv->bcast_gid = ipoib_broadcast_mgid;
-		priv->bcast_gid.raw[4] = (priv->pkey >> 8) & 0xff;
-		priv->bcast_gid.raw[5] = priv->pkey & 0xff;
+		memcpy(priv->broadcast->mgid.raw, priv->dev->broadcast + 4,
+		       sizeof (union ib_gid));
 
-		priv->broadcast->mgid = priv->bcast_gid;
-
 		spin_lock_irqsave(&priv->lock, flags);
 		__ipoib_mcast_add(dev, priv->broadcast);
 		spin_unlock_irqrestore(&priv->lock, flags);
@@ -524,16 +515,14 @@
 
 	if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
 		ipoib_warn(priv, "ib_gid_entry_get() failed\n");
+	else
+		memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
 
 	priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcast_member.mtu)
 		- IPOIB_ENCAP_LEN;
 
 	dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
 
-	entry = ipoib_sarp_local_add(dev, &priv->local_gid, priv->local_qpn);
-	if (entry)
-		ipoib_sarp_put(entry);
-
 	ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
 
 	netif_carrier_on(dev);
@@ -682,19 +671,11 @@
 }
 
 /* =============================================================== */
-/*..ipoib_mcast_tx_callback -- put reference to group after TX    */
-static void ipoib_mcast_tx_callback(void *ptr)
-{
-	ipoib_mcast_put((struct ipoib_mcast *)ptr);
-}
-
-/* =============================================================== */
 /*..ipoib_mcast_send -- send skb to multicast group                */
-int ipoib_mcast_send(struct net_device *dev, struct ipoib_mcast *mcast,
-		     struct sk_buff *skb)
+void ipoib_mcast_send(struct net_device *dev, struct ipoib_mcast *mcast,
+		      struct sk_buff *skb)
 {
-	return ipoib_dev_send(dev, skb, ipoib_mcast_tx_callback, mcast,
-			      mcast->address_handle, IB_MULTICAST_QPN);
+	ipoib_send(dev, skb, mcast->address_handle, IB_MULTICAST_QPN);
 }
 
 /* =============================================================== */
@@ -830,7 +811,7 @@
 		u32 multiaddr = ntohl(im->multiaddr);
 		union ib_gid mgid;
 
-		mgid = ipoib_broadcast_mgid;
+		memcpy(mgid.raw, dev->broadcast + 4, sizeof mgid);
 
 		/* Add in the P_Key */
 		mgid.raw[4] = (priv->pkey >> 8) & 0xff;
@@ -843,8 +824,7 @@
 		mgid.raw[15] = multiaddr & 0xff;
 
 		mcast = __ipoib_mcast_find(dev, &mgid);
-		if (!mcast
-		    || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
+		if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
 			struct ipoib_mcast *nmcast;
 
 			/* Not found or send-only group, let's add a new entry */
Index: infiniband/ulp/ipoib/Makefile
===================================================================
--- infiniband/ulp/ipoib/Makefile	(revision 915)
+++ infiniband/ulp/ipoib/Makefile	(working copy)
@@ -2,13 +2,15 @@
     -Idrivers/infiniband/include \
     -D_NO_DATA_PATH_TRACE
 
-obj-$(CONFIG_INFINIBAND_IPOIB) += ib_ipoib.o ib_ip2pr.o
+obj-$(CONFIG_INFINIBAND_IPOIB) += ib_ipoib.o
 
+# ip2pr is BROKEN now
+# obj-$(CONFIG_INFINIBAND_IPOIB) += ib_ip2pr.o
+
 ib_ipoib-objs := \
     ipoib_main.o \
     ipoib_ib.o \
     ipoib_multicast.o \
-    ipoib_arp.o \
     ipoib_proc.o \
     ipoib_verbs.o \
     ipoib_vlan.o



More information about the general mailing list