[ewg] [PATCH v2] IB/ipoib: Add Receive Core Affinity (RCA) support

Eli Cohen eli at mellanox.co.il
Thu Jul 17 06:14:34 PDT 2008


The patch adds RCA support for datagram mode. It creates a number of
additional receive rings, equal to the number of CPU cores, and the
counts on the hardware to distribute the received packets based on
their IP address and port number; packets that are neither TCP nor
UDP, are routed to the regular UD QP. It does some arrangements in
the receive related data structs so as to unify processing of received
packets to the regular UD QP or to the any of the RCA rings. The
receive flow uses napi to provide context for simultaneous handling of
received packets.

Signed-off-by: Eli Cohen <eli at mellanox.co.il>
---

Changes:
Fix CM mode (previous patch ruined it)
remove debug print

 drivers/infiniband/ulp/ipoib/ipoib.h         |   35 ++-
 drivers/infiniband/ulp/ipoib/ipoib_cm.c      |    8 +
 drivers/infiniband/ulp/ipoib/ipoib_ethtool.c |   63 ++++-
 drivers/infiniband/ulp/ipoib/ipoib_ib.c      |  202 +++++++++++---
 drivers/infiniband/ulp/ipoib/ipoib_main.c    |   89 +++++-
 drivers/infiniband/ulp/ipoib/ipoib_verbs.c   |  378 +++++++++++++++++++++++++-
 6 files changed, 689 insertions(+), 86 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index b0ffc9a..bb6534d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -105,6 +105,8 @@ enum {
 
 	MAX_SEND_CQE		  = 16,
 	IPOIB_CM_COPYBREAK	  = 256,
+
+	MAX_RCA_QPS		  = 8,
 };
 
 #define	IPOIB_OP_RECV   (1ul << 31)
@@ -267,6 +269,26 @@ struct ipoib_lro {
 	struct net_lro_desc lro_desc[IPOIB_MAX_LRO_DESCRIPTORS];
 };
 
+struct rca_ring_ctx {
+	struct ib_qp	       *qp;
+	struct ib_cq	       *cq;
+	struct ib_wc		ibwc[IPOIB_NUM_WC];
+	struct napi_struct	napi;
+	struct ipoib_rx_buf    *rx_ring;
+	struct ib_recv_wr	rx_wr;
+	struct ib_sge		rx_sge[IPOIB_UD_RX_SG];
+	struct ipoib_lro	lro;
+	unsigned long		rx_packets;
+	unsigned long		rx_dropped;
+	unsigned long		rx_bytes;
+};
+
+struct ipoib_rca {
+	struct ib_qp	       *rca_qp;
+	int			nrings;
+	struct rca_ring_ctx	ring_arr[0];
+};
+
 /*
  * Device private locking: tx_lock protects members used in TX fast
  * path (and we use LLTX so upper layers don't do extra locking).
@@ -317,7 +339,7 @@ struct ipoib_dev_priv {
 	unsigned int mcast_mtu;
 	unsigned int max_ib_mtu;
 
-	struct ipoib_rx_buf *rx_ring;
+	struct rca_ring_ctx  rx_ring;
 
 	spinlock_t	     tx_lock;
 	struct ipoib_tx_buf *tx_ring;
@@ -328,11 +350,6 @@ struct ipoib_dev_priv {
 	unsigned	     tx_outstanding;
 	struct ib_wc	     send_wc[MAX_SEND_CQE];
 
-	struct ib_recv_wr    rx_wr;
-	struct ib_sge	     rx_sge[IPOIB_UD_RX_SG];
-
-	struct ib_wc ibwc[IPOIB_NUM_WC];
-
 	struct list_head dead_ahs;
 
 	struct ib_event_handler event_handler;
@@ -354,7 +371,7 @@ struct ipoib_dev_priv {
 	struct ipoib_ethtool_st ethtool;
 	struct timer_list poll_timer;
 
-	struct ipoib_lro lro;
+	struct ipoib_rca       *rca;
 };
 
 struct ipoib_ah {
@@ -427,6 +444,9 @@ extern struct workqueue_struct *ipoib_workqueue;
 int ipoib_poll(struct napi_struct *napi, int budget);
 void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
 void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr);
+void ipoib_rca_rx_handler(struct ib_cq *cq, void *ptr);
+int rca_ring_poll(struct napi_struct *napi, int budget);
+void ipoib_rca_qps_to_err(struct net_device *dev);
 
 struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
 				 struct ib_pd *pd, struct ib_ah_attr *attr);
@@ -495,6 +515,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
 int ipoib_init_qp(struct net_device *dev);
 int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
 void ipoib_transport_dev_cleanup(struct net_device *dev);
+int ipoib_rca_init(struct net_device *dev);
 
 void ipoib_event(struct ib_event_handler *handler,
 		 struct ib_event *record);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 0f2d304..b3223d8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1441,6 +1441,9 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
 			   "will cause multicast packet drops\n");
 
 		rtnl_lock();
+		dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
+		dev->dev_addr[2] = (priv->qp->qp_num >>  8) & 0xff;
+		dev->dev_addr[3] = priv->qp->qp_num & 0xff;
 		dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO);
 		rtnl_unlock();
 		priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
@@ -1453,6 +1456,11 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
 		clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
 
 		rtnl_lock();
+		if (priv->rca) {
+			dev->dev_addr[1] = (priv->rca->rca_qp->qp_num >> 16) & 0xff;
+			dev->dev_addr[2] = (priv->rca->rca_qp->qp_num >>  8) & 0xff;
+			dev->dev_addr[3] = priv->rca->rca_qp->qp_num & 0xff;
+		}
 		if (test_bit(IPOIB_FLAG_CSUM, &priv->flags)) {
 			dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
 			if (priv->hca_caps & IB_DEVICE_UD_TSO)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 66af5c1..2c64825 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -60,6 +60,8 @@ static int ipoib_set_coalesce(struct net_device *dev,
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int ret;
+	int i;
+	struct rca_ring_ctx *ring;
 
 	/*
 	 * Since IPoIB uses a single CQ for both rx and tx, we assume
@@ -78,6 +80,19 @@ static int ipoib_set_coalesce(struct net_device *dev,
 		return ret;
 	}
 
+	if (priv->rca) {
+		for (i = 0; i < priv->rca->nrings; ++i) {
+			ring = &priv->rca->ring_arr[i];
+			ret = ib_modify_cq(ring->cq, coal->rx_max_coalesced_frames,
+					   coal->rx_coalesce_usecs);
+			if (ret && ret != -ENOSYS) {
+				ipoib_warn(priv, "failed modifying CQ for ring %d (%d)\n", i, ret);
+				return ret;
+			}
+
+		}
+	}
+
 	coal->tx_coalesce_usecs       = coal->rx_coalesce_usecs;
 	coal->tx_max_coalesced_frames = coal->rx_max_coalesced_frames;
 	priv->ethtool.coalesce_usecs       = coal->rx_coalesce_usecs;
@@ -110,21 +125,57 @@ static int ipoib_get_sset_count(struct net_device *dev, int sset)
 	}
 }
 
+static u64 get_ring_aggr(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	u64 result;
+	int i;
+
+	result = priv->rx_ring.lro.lro_mgr.stats.aggregated;
+
+	if (!priv->rca)
+		return result;
+        
+	for (i = 0; i < priv->rca->nrings; ++i)
+		result += priv->rca->ring_arr[i].lro.lro_mgr.stats.aggregated;
+
+	return result;
+}
+
+static u64 get_ring_flushed(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	u64 result;
+	int i;
+
+	result = priv->rx_ring.lro.lro_mgr.stats.flushed;
+
+	if (!priv->rca)
+		return result;
+        
+	for (i = 0; i < priv->rca->nrings; ++i)
+		result += priv->rca->ring_arr[i].lro.lro_mgr.stats.flushed;
+
+	return result;
+}
+
 static void ipoib_get_ethtool_stats(struct net_device *dev,
 				struct ethtool_stats *stats, uint64_t *data)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int index = 0;
+	u64 aggr = get_ring_aggr(dev);
+	u64 flushed = get_ring_flushed(dev);
 
 	/* Get LRO statistics */
-	data[index++] = priv->lro.lro_mgr.stats.aggregated;
-	data[index++] = priv->lro.lro_mgr.stats.flushed;
-	if (priv->lro.lro_mgr.stats.flushed)
-		data[index++] = priv->lro.lro_mgr.stats.aggregated /
-				priv->lro.lro_mgr.stats.flushed;
+	data[index++] = aggr;
+	data[index++] = flushed;
+	if (flushed)
+		data[index++] = aggr / flushed;
 	else
 		data[index++] = 0;
-	data[index++] = priv->lro.lro_mgr.stats.no_desc;
+
+	data[index++] = priv->rx_ring.lro.lro_mgr.stats.no_desc;
 }
 
 static const struct ethtool_ops ipoib_ethtool_ops = {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 66cafa2..e8f686c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -125,29 +125,32 @@ static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv,
 
 }
 
-static int ipoib_ib_post_receive(struct net_device *dev, int id)
+static int ipoib_ib_post_receive(struct net_device *dev,
+				 struct rca_ring_ctx *ring, int id)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_recv_wr *bad_wr;
 	int ret;
 
-	priv->rx_wr.wr_id   = id | IPOIB_OP_RECV;
-	priv->rx_sge[0].addr = priv->rx_ring[id].mapping[0];
-	priv->rx_sge[1].addr = priv->rx_ring[id].mapping[1];
+	ring->rx_wr.wr_id   = id | IPOIB_OP_RECV;
+	ring->rx_sge[0].addr = ring->rx_ring[id].mapping[0];
+	ring->rx_sge[1].addr = ring->rx_ring[id].mapping[1];
 
 
-	ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr);
+	ret = ib_post_recv(ring->qp, &ring->rx_wr, &bad_wr);
 	if (unlikely(ret)) {
-		ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
-		ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[id].mapping);
-		dev_kfree_skb_any(priv->rx_ring[id].skb);
-		priv->rx_ring[id].skb = NULL;
+		ipoib_warn(priv, "%s failed for buf %d (%d)\n", __func__, id, ret);
+		ipoib_ud_dma_unmap_rx(priv, ring->rx_ring[id].mapping);
+		dev_kfree_skb_any(ring->rx_ring[id].skb);
+		ring->rx_ring[id].skb = NULL;
 	}
 
 	return ret;
 }
 
-static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
+static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev,
+					  struct ipoib_rx_buf *rx_ring,
+					  int id)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct sk_buff *skb;
@@ -170,7 +173,7 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
 	 */
 	skb_reserve(skb, 4);
 
-	mapping = priv->rx_ring[id].mapping;
+	mapping = rx_ring[id].mapping;
 	mapping[0] = ib_dma_map_single(priv->ca, skb->data, buf_size,
 				       DMA_FROM_DEVICE);
 	if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0])))
@@ -188,7 +191,7 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
 			goto partial_error;
 	}
 
-	priv->rx_ring[id].skb = skb;
+	rx_ring[id].skb = skb;
 	return skb;
 
 partial_error:
@@ -204,11 +207,31 @@ static int ipoib_ib_post_receives(struct net_device *dev)
 	int i;
 
 	for (i = 0; i < ipoib_recvq_size; ++i) {
-		if (!ipoib_alloc_rx_skb(dev, i)) {
+		if (!ipoib_alloc_rx_skb(dev, priv->rx_ring.rx_ring, i)) {
 			ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
 			return -ENOMEM;
 		}
-		if (ipoib_ib_post_receive(dev, i)) {
+		if (ipoib_ib_post_receive(dev, &priv->rx_ring, i)) {
+			ipoib_warn(priv, "%s failed for buf %d\n", __func__, i);
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+static int ipoib_post_ring_receives(struct net_device *dev,
+				    struct rca_ring_ctx *ring)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	int i;
+
+	for (i = 0; i < ipoib_recvq_size; ++i) {
+		if (!ipoib_alloc_rx_skb(dev, ring->rx_ring, i)) {
+			ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
+			return -ENOMEM;
+		}
+		if (ipoib_ib_post_receive(dev, ring, i)) {
 			ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
 			return -EIO;
 		}
@@ -217,7 +240,24 @@ static int ipoib_ib_post_receives(struct net_device *dev)
 	return 0;
 }
 
-static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
+static int ipoib_rca_post_receives(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	int err;
+	int i;
+
+	for (i = 0; i < priv->rca->nrings; ++i) {
+		err = ipoib_post_ring_receives(dev, &priv->rca->ring_arr[i]);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static void ipoib_ib_handle_rx_wc(struct net_device *dev,
+				  struct rca_ring_ctx *ring,
+				  struct ib_wc *wc)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
@@ -233,16 +273,16 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 		return;
 	}
 
-	skb  = priv->rx_ring[wr_id].skb;
+	skb  = ring->rx_ring[wr_id].skb;
 
 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
 		if (wc->status != IB_WC_WR_FLUSH_ERR)
 			ipoib_warn(priv, "failed recv event "
 				   "(status=%d, wrid=%d vend_err %x)\n",
 				   wc->status, wr_id, wc->vendor_err);
-		ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping);
+		ipoib_ud_dma_unmap_rx(priv, ring->rx_ring[wr_id].mapping);
 		dev_kfree_skb_any(skb);
-		priv->rx_ring[wr_id].skb = NULL;
+		ring->rx_ring[wr_id].skb = NULL;
 		return;
 	}
 
@@ -253,15 +293,15 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 	if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num)
 		goto repost;
 
-	memcpy(mapping, priv->rx_ring[wr_id].mapping,
+	memcpy(mapping, ring->rx_ring[wr_id].mapping,
 	       IPOIB_UD_RX_SG * sizeof *mapping);
 
 	/*
 	 * If we can't allocate a new RX buffer, dump
 	 * this packet and reuse the old buffer.
 	 */
-	if (unlikely(!ipoib_alloc_rx_skb(dev, wr_id))) {
-		++dev->stats.rx_dropped;
+	if (unlikely(!ipoib_alloc_rx_skb(dev, ring->rx_ring, wr_id))) {
+		++ring->rx_dropped;
 		goto repost;
 	}
 
@@ -278,8 +318,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 	skb_pull(skb, IPOIB_ENCAP_LEN);
 
 	dev->last_rx = jiffies;
-	++dev->stats.rx_packets;
-	dev->stats.rx_bytes += skb->len;
+	++ring->rx_packets;
+	ring->rx_bytes += skb->len;
 
 	skb->dev = dev;
 	/* XXX get correct PACKET_ type here */
@@ -289,12 +329,12 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
 	if (dev->features & NETIF_F_LRO)
-		lro_receive_skb(&priv->lro.lro_mgr, skb, NULL);
+		lro_receive_skb(&ring->lro.lro_mgr, skb, NULL);
 	else
 		netif_receive_skb(skb);
 
 repost:
-	if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
+	if (unlikely(ipoib_ib_post_receive(dev, ring, wr_id)))
 		ipoib_warn(priv, "ipoib_ib_post_receive failed "
 			   "for buf %d\n", wr_id);
 }
@@ -408,10 +448,10 @@ static int poll_tx(struct ipoib_dev_priv *priv)
 	return n == MAX_SEND_CQE;
 }
 
-int ipoib_poll(struct napi_struct *napi, int budget)
+static int rx_poll(struct napi_struct *napi, int budget, struct rca_ring_ctx *ring)
 {
-	struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi);
-	struct net_device *dev = priv->dev;
+	struct net_device *dev = napi->dev;
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int done;
 	int t;
 	int n, i;
@@ -423,17 +463,17 @@ poll_more:
 		int max = (budget - done);
 
 		t = min(IPOIB_NUM_WC, max);
-		n = ib_poll_cq(priv->recv_cq, t, priv->ibwc);
+		n = ib_poll_cq(ring->cq, t, ring->ibwc);
 
 		for (i = 0; i < n; i++) {
-			struct ib_wc *wc = priv->ibwc + i;
+			struct ib_wc *wc = ring->ibwc + i;
 
 			if (wc->wr_id & IPOIB_OP_RECV) {
 				++done;
 				if (wc->wr_id & IPOIB_OP_CM)
 					ipoib_cm_handle_rx_wc(dev, wc);
 				else
-					ipoib_ib_handle_rx_wc(dev, wc);
+					ipoib_ib_handle_rx_wc(dev, ring, wc);
 			} else
 				ipoib_cm_handle_tx_wc(priv->dev, wc);
 		}
@@ -444,10 +484,10 @@ poll_more:
 
 	if (done < budget) {
 		if (dev->features & NETIF_F_LRO)
-			lro_flush_all(&priv->lro.lro_mgr);
+			lro_flush_all(&ring->lro.lro_mgr);
 
 		netif_rx_complete(dev, napi);
-		if (unlikely(ib_req_notify_cq(priv->recv_cq,
+		if (unlikely(ib_req_notify_cq(ring->cq,
 					      IB_CQ_NEXT_COMP |
 					      IB_CQ_REPORT_MISSED_EVENTS)) &&
 		    netif_rx_reschedule(dev, napi))
@@ -457,6 +497,28 @@ poll_more:
 	return done;
 }
 
+int rca_ring_poll(struct napi_struct *napi, int budget)
+{
+	struct rca_ring_ctx *ring = container_of(napi, struct rca_ring_ctx, napi);
+
+	return rx_poll(napi, budget, ring); 
+}
+
+int ipoib_poll(struct napi_struct *napi, int budget)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(napi->dev);
+
+	return rx_poll(napi, budget, &priv->rx_ring);
+}
+
+void ipoib_rca_rx_handler(struct ib_cq *cq, void *ptr)
+{
+	struct rca_ring_ctx *ring = ptr;
+	struct net_device *dev = ring->napi.dev;
+
+	netif_rx_schedule(dev, &ring->napi);
+}
+
 void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
 {
 	struct net_device *dev = dev_ptr;
@@ -677,6 +739,14 @@ int ipoib_ib_dev_open(struct net_device *dev)
 		return -1;
 	}
 
+	if (priv->rca) {
+		ret = ipoib_rca_init(dev);
+		if (!ret)
+			ipoib_rca_post_receives(dev);
+		else
+			ipoib_warn(priv, "ipoib_rca_init returned %d\n", ret);
+	}
+
 	clear_bit(IPOIB_STOP_REAPER, &priv->flags);
 	queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
 			   round_jiffies_relative(HZ));
@@ -748,12 +818,22 @@ static int recvs_pending(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int pending = 0;
-	int i;
+	int i, j;
 
 	for (i = 0; i < ipoib_recvq_size; ++i)
-		if (priv->rx_ring[i].skb)
+		if (priv->rx_ring.rx_ring[i].skb)
 			++pending;
 
+	if (!priv->rca)
+		goto out;
+
+	for (j = 0; j < priv->rca->nrings; ++j) {
+		for (i = 0; i < ipoib_recvq_size; ++i)
+			if (priv->rca->ring_arr[j].rx_ring[i].skb)
+				++pending;
+	}
+
+out:
 	return pending;
 }
 
@@ -762,23 +842,23 @@ void ipoib_drain_cq(struct net_device *dev)
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int i, n;
 	do {
-		n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc);
+		n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->rx_ring.ibwc);
 		for (i = 0; i < n; ++i) {
 			/*
 			 * Convert any successful completions to flush
 			 * errors to avoid passing packets up the
 			 * stack after bringing the device down.
 			 */
-			if (priv->ibwc[i].status == IB_WC_SUCCESS)
-				priv->ibwc[i].status = IB_WC_WR_FLUSH_ERR;
+			if (priv->rx_ring.ibwc[i].status == IB_WC_SUCCESS)
+				priv->rx_ring.ibwc[i].status = IB_WC_WR_FLUSH_ERR;
 
-			if (priv->ibwc[i].wr_id & IPOIB_OP_RECV) {
-				if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
-					ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
+			if (priv->rx_ring.ibwc[i].wr_id & IPOIB_OP_RECV) {
+				if (priv->rx_ring.ibwc[i].wr_id & IPOIB_OP_CM)
+					ipoib_cm_handle_rx_wc(dev, priv->rx_ring.ibwc + i);
 				else
-					ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
+					ipoib_ib_handle_rx_wc(dev, &priv->rx_ring, priv->rx_ring.ibwc + i);
 			} else
-				ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
+				ipoib_cm_handle_tx_wc(dev, priv->rx_ring.ibwc + i);
 		}
 	} while (n == IPOIB_NUM_WC);
 
@@ -786,6 +866,34 @@ void ipoib_drain_cq(struct net_device *dev)
 		; /* nothing */
 }
 
+void ipoib_drain_rca_rx_wrs(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct rca_ring_ctx *ring;
+	int i, n, k;
+
+	if (!priv->rca)
+		return;
+
+	for (k = 0; k < priv->rca->nrings; ++k) {
+		ring = &priv->rca->ring_arr[k];
+		do {
+			n = ib_poll_cq(ring->cq, IPOIB_NUM_WC, ring->ibwc);
+			for (i = 0; i < n; ++i) {
+				/*
+				 * Convert any successful completions to flush
+				 * errors to avoid passing packets up the
+				 * stack after bringing the device down.
+				 */
+				if (ring->ibwc[i].status == IB_WC_SUCCESS)
+					ring->ibwc[i].status = IB_WC_WR_FLUSH_ERR;
+
+				ipoib_ib_handle_rx_wc(dev, ring, ring->ibwc + i);
+			}
+		} while (n == IPOIB_NUM_WC);
+	}
+}
+
 int ipoib_ib_dev_stop(struct net_device *dev, int flush)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -806,6 +914,8 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
 	if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
 		ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
 
+	ipoib_rca_qps_to_err(dev);
+
 	/* Wait for all sends and receives to complete */
 	begin = jiffies;
 
@@ -830,11 +940,11 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
 			for (i = 0; i < ipoib_recvq_size; ++i) {
 				struct ipoib_rx_buf *rx_req;
 
-				rx_req = &priv->rx_ring[i];
+				rx_req = &priv->rx_ring.rx_ring[i];
 				if (!rx_req->skb)
 					continue;
 				ipoib_ud_dma_unmap_rx(priv,
-						      priv->rx_ring[i].mapping);
+						      priv->rx_ring.rx_ring[i].mapping);
 				dev_kfree_skb_any(rx_req->skb);
 				rx_req->skb = NULL;
 			}
@@ -843,6 +953,8 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
 		}
 
 		ipoib_drain_cq(dev);
+		ipoib_drain_rca_rx_wrs(dev);
+
 
 		msleep(1);
 	}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 8be9ea0..796cf86 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -100,6 +100,31 @@ static struct ib_client ipoib_client = {
 	.remove = ipoib_remove_one
 };
 
+
+static void enable_rca_napi(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	int i;
+
+	if (!priv->rca)
+		return;
+
+	for (i = 0; i < priv->rca->nrings; ++i)
+		napi_enable(&priv->rca->ring_arr[i].napi);
+}
+
+static void disable_rca_napi(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	int i;
+
+	if (!priv->rca)
+		return;
+
+	for (i = 0; i < priv->rca->nrings; ++i)
+		napi_disable(&priv->rca->ring_arr[i].napi);
+}
+
 int ipoib_open(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -107,6 +132,7 @@ int ipoib_open(struct net_device *dev)
 	ipoib_dbg(priv, "bringing up interface\n");
 
 	napi_enable(&priv->napi);
+	enable_rca_napi(dev);
 	set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
 
 	if (ipoib_pkey_dev_delay_open(dev))
@@ -114,12 +140,14 @@ int ipoib_open(struct net_device *dev)
 
 	if (ipoib_ib_dev_open(dev)) {
 		napi_disable(&priv->napi);
+		disable_rca_napi(dev);
 		return -EINVAL;
 	}
 
 	if (ipoib_ib_dev_up(dev)) {
 		ipoib_ib_dev_stop(dev, 1);
 		napi_disable(&priv->napi);
+		enable_rca_napi(dev);
 		return -EINVAL;
 	}
 
@@ -153,6 +181,7 @@ static int ipoib_stop(struct net_device *dev)
 
 	clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
 	napi_disable(&priv->napi);
+	disable_rca_napi(dev);
 
 	netif_stop_queue(dev);
 
@@ -921,9 +950,9 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
 	/* Allocate RX/TX "rings" to hold queued skbs */
-	priv->rx_ring =	kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
+	priv->rx_ring.rx_ring =	kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring.rx_ring,
 				GFP_KERNEL);
-	if (!priv->rx_ring) {
+	if (!priv->rx_ring.rx_ring) {
 		printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
 		       ca->name, ipoib_recvq_size);
 		goto out;
@@ -948,7 +977,7 @@ out_tx_ring_cleanup:
 	vfree(priv->tx_ring);
 
 out_rx_ring_cleanup:
-	kfree(priv->rx_ring);
+	kfree(priv->rx_ring.rx_ring);
 
 out:
 	return -ENOMEM;
@@ -969,10 +998,10 @@ void ipoib_dev_cleanup(struct net_device *dev)
 
 	ipoib_ib_dev_cleanup(dev);
 
-	kfree(priv->rx_ring);
+	kfree(priv->rx_ring.rx_ring);
 	vfree(priv->tx_ring);
 
-	priv->rx_ring = NULL;
+	priv->rx_ring.rx_ring = NULL;
 	priv->tx_ring = NULL;
 }
 
@@ -1017,15 +1046,33 @@ static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
 	return 0;
 }
 
-static void ipoib_lro_setup(struct ipoib_dev_priv *priv)
+static void ipoib_lro_setup(struct ipoib_lro *lro, struct net_device *dev)
+{
+	lro->lro_mgr.max_aggr	 = lro_max_aggr;
+	lro->lro_mgr.max_desc	 = IPOIB_MAX_LRO_DESCRIPTORS;
+	lro->lro_mgr.lro_arr	 = lro->lro_desc;
+	lro->lro_mgr.get_skb_header = get_skb_hdr;
+	lro->lro_mgr.features	 = LRO_F_NAPI;
+	lro->lro_mgr.dev		 = dev;
+	lro->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
+}
+
+struct net_device_stats* ipoib_get_stats(struct net_device *dev)
 {
-	priv->lro.lro_mgr.max_aggr	 = lro_max_aggr;
-	priv->lro.lro_mgr.max_desc	 = IPOIB_MAX_LRO_DESCRIPTORS;
-	priv->lro.lro_mgr.lro_arr	 = priv->lro.lro_desc;
-	priv->lro.lro_mgr.get_skb_header = get_skb_hdr;
-	priv->lro.lro_mgr.features	 = LRO_F_NAPI;
-	priv->lro.lro_mgr.dev		 = priv->dev;
-	priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	int i;
+
+	dev->stats.rx_packets = priv->rx_ring.rx_packets;
+	dev->stats.rx_dropped = priv->rx_ring.rx_dropped;
+	dev->stats.rx_bytes = priv->rx_ring.rx_bytes;
+	if (priv->rca) {
+		for (i = 0; i < priv->rca->nrings; ++i) {
+			dev->stats.rx_packets += priv->rca->ring_arr[i].rx_packets;
+			dev->stats.rx_dropped += priv->rca->ring_arr[i].rx_dropped;
+			dev->stats.rx_bytes += priv->rca->ring_arr[i].rx_bytes;
+		}
+	}
+	return &dev->stats;
 }
 
 static void ipoib_setup(struct net_device *dev)
@@ -1040,6 +1087,7 @@ static void ipoib_setup(struct net_device *dev)
 	dev->header_ops		 = &ipoib_header_ops;
 	dev->set_multicast_list	 = ipoib_set_mcast_list;
 	dev->neigh_setup	 = ipoib_neigh_setup_dev;
+	dev->get_stats		 = ipoib_get_stats;
 
 	ipoib_set_ethtool_ops(dev);
 
@@ -1067,7 +1115,7 @@ static void ipoib_setup(struct net_device *dev)
 
 	priv->dev = dev;
 
-	ipoib_lro_setup(priv);
+	ipoib_lro_setup(&priv->rx_ring.lro, dev);
 
 	spin_lock_init(&priv->lock);
 	spin_lock_init(&priv->tx_lock);
@@ -1190,6 +1238,16 @@ int ipoib_add_pkey_attr(struct net_device *dev)
 	return device_create_file(&dev->dev, &dev_attr_pkey);
 }
 
+static void ipoib_register_rings(struct ipoib_dev_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->rca->nrings; ++i) {
+		netif_napi_add(priv->dev, &priv->rca->ring_arr[i].napi, rca_ring_poll, 100);
+		ipoib_lro_setup(&priv->rca->ring_arr[i].lro, priv->dev);
+	}
+}
+
 static struct net_device *ipoib_add_port(const char *format,
 					 struct ib_device *hca, u8 port)
 {
@@ -1273,6 +1331,9 @@ static struct net_device *ipoib_add_port(const char *format,
 		goto device_init_failed;
 	}
 
+	if (priv->rca)
+		ipoib_register_rings(priv);
+
 	INIT_IB_EVENT_HANDLER(&priv->event_handler,
 			      priv->ca, ipoib_event);
 	result = ib_register_event_handler(&priv->event_handler);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 6832511..6506e23 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -33,6 +33,35 @@
 
 #include "ipoib.h"
 
+static int rca;
+module_param(rca, bool, 0444);
+MODULE_PARM_DESC(rca,  "Enable RCA (Receive Core Affinity)");
+
+static int set_rca_qkey(struct ipoib_dev_priv *priv)
+{
+	struct ib_qp_attr *qp_attr;
+	int ret = -ENOMEM;
+	int i;
+
+	if (!priv->rca)
+		return 0;
+
+	qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
+	if (!qp_attr)
+		return -ENOMEM;
+
+	qp_attr->qkey = priv->qkey;
+	for (i = 0; i < priv->rca->nrings; ++i) {
+		ret = ib_modify_qp(priv->rca->ring_arr[i].qp, qp_attr, IB_QP_QKEY);
+		if (ret)
+			goto out;
+	}
+
+out:
+	kfree(qp_attr);
+	return ret;
+}
+
 int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -60,6 +89,12 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int
 			ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret);
 			goto out;
 		}
+
+		ret = set_rca_qkey(priv);
+		if (ret) {
+			ipoib_warn(priv, "failed to modify RCA QPs qkey, ret = %d\n", ret);
+			goto out;
+		}
 	}
 
 	/* attach QP to multicast group */
@@ -126,6 +161,308 @@ out_fail:
 	return ret;
 }
 
+static void qp2reset(struct ipoib_dev_priv *priv, struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+
+	qp_attr.qp_state = IB_QPS_RESET;
+	if (ib_modify_qp(qp, &qp_attr, IB_QP_STATE))
+		ipoib_warn(priv, "Failed to modify QP to RESET state\n");
+}
+
+static void qp2err(struct ipoib_dev_priv *priv, struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+
+	qp_attr.qp_state = IB_QPS_ERR;
+	if (ib_modify_qp(qp, &qp_attr, IB_QP_STATE))
+		ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
+}
+
+static int modify_to_rts(struct ipoib_dev_priv *priv, struct ib_qp *qp) // ?? need to modify to rtr
+{
+	int ret;
+	struct ib_qp_attr qp_attr;
+	int attr_mask;
+
+	qp_attr.qp_state = IB_QPS_INIT;
+	qp_attr.qkey = 0;
+	qp_attr.port_num = priv->port;
+	qp_attr.pkey_index = priv->pkey_index;
+	attr_mask =
+	    IB_QP_QKEY |
+	    IB_QP_PORT |
+	    IB_QP_PKEY_INDEX |
+	    IB_QP_STATE;
+	ret = ib_modify_qp(qp, &qp_attr, attr_mask);
+	if (ret) {
+		ipoib_warn(priv, "failed to modify QP to init, ret = %d\n", ret);
+		goto out_fail;
+	}
+
+	qp_attr.qp_state = IB_QPS_RTR;
+	/* Can't set this in a INIT->RTR transition */
+	attr_mask &= ~IB_QP_PORT;
+	ret = ib_modify_qp(qp, &qp_attr, attr_mask);
+	if (ret) {
+		ipoib_warn(priv, "failed to modify QP to RTR, ret = %d\n", ret);
+		goto out_fail;
+	}
+
+	qp_attr.qp_state = IB_QPS_RTS;
+	qp_attr.sq_psn = 0;
+	attr_mask |= IB_QP_SQ_PSN;
+	attr_mask &= ~IB_QP_PKEY_INDEX;
+	ret = ib_modify_qp(qp, &qp_attr, attr_mask);
+	if (ret) {
+		ipoib_warn(priv, "failed to modify QP to RTS, ret = %d\n", ret);
+		goto out_fail;
+	}
+
+	return 0;
+
+out_fail:
+	qp2reset(priv, qp);
+	return ret;
+}
+
+static int prepare_rca_qp(struct ipoib_dev_priv *priv, struct ib_qp *qp)
+{
+	int ret;
+	struct ib_qp_attr qp_attr;
+	int attr_mask;
+
+	qp_attr.qp_state = IB_QPS_INIT;
+	qp_attr.qkey = 0;
+	qp_attr.port_num = priv->port;
+	qp_attr.pkey_index = priv->pkey_index;
+	qp_attr.rca.base_qpn = priv->rca->ring_arr[0].qp->qp_num;
+	qp_attr.rca.num_qpn = priv->rca->nrings;
+	qp_attr.rca.default_qpn = priv->qp->qp_num;
+
+	attr_mask =
+	    IB_QP_QKEY |
+	    IB_QP_PORT |
+	    IB_QP_PKEY_INDEX |
+	    IB_QP_STATE |
+	    IB_QP_RCA;
+	ret = ib_modify_qp(qp, &qp_attr, attr_mask);
+	if (ret) {
+		ipoib_warn(priv, "failed to modify QP to init, ret = %d\n", ret);
+		goto out_fail;
+	}
+
+	qp_attr.qp_state = IB_QPS_RTR;
+	/* Can't set this in a INIT->RTR transition */
+	attr_mask &= ~(IB_QP_PORT | IB_QP_RCA);
+	ret = ib_modify_qp(qp, &qp_attr, attr_mask);
+	if (ret) {
+		ipoib_warn(priv, "failed to modify QP to RTR, ret = %d\n", ret);
+		goto out_fail;
+	}
+
+	return 0;
+
+out_fail:
+	qp2reset(priv, qp);
+	return ret;
+}
+
+int ipoib_rca_init(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	int err;
+	int i;
+
+	for (i = 0; i < priv->rca->nrings; ++i) {
+		err = modify_to_rts(priv, priv->rca->ring_arr[i].qp);
+		if (err) {
+			ipoib_warn(priv, "ring[%d] failed modify to RTS\n", i);
+			goto exit_err;
+		}
+	}
+
+	err = prepare_rca_qp(priv, priv->rca->rca_qp);
+	if (err)
+		goto exit_err;
+
+	return 0;
+
+exit_err:
+	for (--i; i >= 0; --i)
+		qp2reset(priv, priv->rca->ring_arr[i].qp);
+
+	return err;
+}
+
+void ipoib_rca_qps_to_err(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	int i;
+
+	if (!priv->rca)
+		return;
+
+	for (i = 0; i < priv->rca->nrings; ++i)
+		qp2err(priv, priv->rca->ring_arr[i].qp);
+}
+
+static void destroy_rca_resources(struct ipoib_dev_priv *priv)
+{
+	int i;
+	int err;
+
+	for (i = 0; i < priv->rca->nrings; ++i) {
+		if (priv->rca->ring_arr[i].qp) {
+			err = ib_destroy_qp(priv->rca->ring_arr[i].qp);
+			if (err)
+				printk(KERN_WARNING "%s: failed to destroy qp[%d], err = %d\n",
+				       __func__, i, err);
+		}
+
+		if (priv->rca->ring_arr[i].cq) {
+			err = ib_destroy_cq(priv->rca->ring_arr[i].cq);
+			if (err)
+				printk(KERN_WARNING "%s: failed to destroy cq[%d], err = %d\n",
+				       __func__, i, err);
+		}
+
+		kfree(priv->rca->ring_arr[i].rx_ring);
+	}
+
+	if (priv->rca->rca_qp)
+		if (ib_destroy_qp(priv->rca->rca_qp))
+			printk(KERN_WARNING "failed to destroy rca qp\n");
+
+	kfree(priv->rca);
+	priv->rca = NULL;
+}
+
+static void qpi_rx_event_handler(struct ib_event *event, void *ctx)
+{
+	struct ipoib_dev_priv *priv = ctx;
+
+	ipoib_warn(priv, "got event %d on RCA QP. QPN = 0x%x\n", event->event,
+		   event->element.qp->qp_num);
+}
+
+static void rca_qp_rx_event_handler(struct ib_event *event, void *ctx)
+{
+	struct ipoib_dev_priv *priv = ctx;
+
+	ipoib_warn(priv, "RCA QP  got event %d. QPN = 0x%x\n", event->event,
+		   event->element.qp->qp_num);
+}
+
+static void init_ring(struct ipoib_dev_priv *priv, struct rca_ring_ctx *ring)
+{
+	ring->rx_sge[0].lkey = priv->mr->lkey;
+	if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
+		ring->rx_sge[0].length = IPOIB_UD_HEAD_SIZE;
+		ring->rx_sge[1].length = PAGE_SIZE;
+		ring->rx_sge[1].lkey = priv->mr->lkey;
+		ring->rx_wr.num_sge = IPOIB_UD_RX_SG;
+	} else {
+		ring->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
+		ring->rx_wr.num_sge = 1;
+	}
+	ring->rx_wr.next = NULL;
+	ring->rx_wr.sg_list = ring->rx_sge;
+}
+
+static int create_rca_resources(struct ipoib_dev_priv *priv, struct ib_device *ca)
+{
+	struct ib_qp_init_attr *attr;
+	int err;
+	int i;
+	int num_comp_vectors = ca->num_comp_vectors;
+	struct ib_qp **qp_arr;
+
+	priv->rca = kzalloc(sizeof *priv->rca + num_comp_vectors *
+			    sizeof *priv->rca->ring_arr, GFP_KERNEL);
+	if (!priv->rca)
+		return -ENOMEM;
+
+	attr = kzalloc(num_comp_vectors * sizeof *attr, GFP_KERNEL);
+	if (!attr) {
+		err = -ENOMEM;
+		goto exit_err;
+	}
+	qp_arr = kmalloc(num_comp_vectors * sizeof *qp_arr, GFP_KERNEL);
+	if (!qp_arr) {
+		kfree(attr);
+		err = -ENOMEM;
+		goto exit_err;
+	}
+
+	for (i = 0; i < num_comp_vectors; ++i) {
+		priv->rca->ring_arr[i].rx_ring = kzalloc(sizeof(struct ipoib_rx_buf) *
+							 ipoib_recvq_size, GFP_KERNEL);
+		if (!priv->rca->ring_arr[i].rx_ring) {
+			err = -ENOMEM;
+			goto exit_free;
+		}
+
+		init_ring(priv, &priv->rca->ring_arr[i]);
+
+		priv->rca->ring_arr[i].cq = ib_create_cq(ca, ipoib_rca_rx_handler, NULL,
+							 &priv->rca->ring_arr[i], ipoib_recvq_size, i);
+		if (IS_ERR(priv->rca->ring_arr[i].cq)) {
+			err = PTR_ERR(priv->rca->ring_arr[i].cq);
+			priv->rca->ring_arr[i].cq = NULL;
+			goto exit_free;
+		}
+
+		err = ib_req_notify_cq(priv->rca->ring_arr[i].cq, IB_CQ_NEXT_COMP);
+		if (err)
+			goto exit_free;
+
+		attr[i].event_handler = qpi_rx_event_handler;
+		attr[i].qp_context = priv;
+		attr[i].sq_sig_type = IB_SIGNAL_ALL_WR;
+		attr[i].qp_type = IB_QPT_UD;
+                attr[i].cap.max_recv_wr  = ipoib_recvq_size;
+		attr[i].cap.max_recv_sge = IPOIB_UD_RX_SG;
+		attr[i].recv_cq = attr[i].send_cq = priv->rca->ring_arr[i].cq;
+	}
+
+	err = ib_create_qp_range(priv->pd, attr, num_comp_vectors,
+				 1 << ilog2(num_comp_vectors), qp_arr);
+	if (err)
+		goto exit_free;
+
+	for (i = 0; i < num_comp_vectors; ++i)
+		priv->rca->ring_arr[i].qp = qp_arr[i];
+
+	memset(attr, 0, sizeof *attr);
+	attr[0].event_handler = rca_qp_rx_event_handler;
+	attr[0].qp_context = priv;
+	attr[0].qp_type = IB_QPT_UD;
+	attr[0].recv_cq = attr[0].send_cq = priv->recv_cq;
+	attr[0].create_flags = IB_QP_CREATE_IPOIB_RCA;
+	priv->rca->rca_qp = ib_create_qp(priv->pd, attr);
+	if (IS_ERR(priv->rca->rca_qp)) {
+		err = PTR_ERR(priv->rca->rca_qp);
+		priv->rca->rca_qp = NULL;
+		goto exit_free;
+	}
+
+	kfree(qp_arr);
+	kfree(attr);
+	priv->rca->nrings = num_comp_vectors;
+
+	return 0;
+
+exit_free:
+	kfree(qp_arr);
+	kfree(attr);
+
+exit_err:
+	destroy_rca_resources(priv);
+
+	return err;
+}
+
 int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -139,7 +476,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 		.sq_sig_type = IB_SIGNAL_ALL_WR,
 		.qp_type     = IB_QPT_UD
 	};
-
+	u32	hw_qpn;
 	int ret, size;
 	int i;
 
@@ -199,10 +536,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 		goto out_free_send_cq;
 	}
 
-	priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
-	priv->dev->dev_addr[2] = (priv->qp->qp_num >>  8) & 0xff;
-	priv->dev->dev_addr[3] = (priv->qp->qp_num      ) & 0xff;
-
 	for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
 		priv->tx_sge[i].lkey = priv->mr->lkey;
 
@@ -210,18 +543,32 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 	priv->tx_wr.sg_list	= priv->tx_sge;
 	priv->tx_wr.send_flags	= IB_SEND_SIGNALED;
 
-	priv->rx_sge[0].lkey = priv->mr->lkey;
+	priv->rx_ring.rx_sge[0].lkey = priv->mr->lkey;
 	if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
-		priv->rx_sge[0].length = IPOIB_UD_HEAD_SIZE;
-		priv->rx_sge[1].length = PAGE_SIZE;
-		priv->rx_sge[1].lkey = priv->mr->lkey;
-		priv->rx_wr.num_sge = IPOIB_UD_RX_SG;
+		priv->rx_ring.rx_sge[0].length = IPOIB_UD_HEAD_SIZE;
+		priv->rx_ring.rx_sge[1].length = PAGE_SIZE;
+		priv->rx_ring.rx_sge[1].lkey = priv->mr->lkey;
+		priv->rx_ring.rx_wr.num_sge = IPOIB_UD_RX_SG;
 	} else {
-		priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
-		priv->rx_wr.num_sge = 1;
+		priv->rx_ring.rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
+		priv->rx_ring.rx_wr.num_sge = 1;
+	}
+	priv->rx_ring.rx_wr.next = NULL;
+	priv->rx_ring.rx_wr.sg_list = priv->rx_ring.rx_sge;
+	priv->rx_ring.qp = priv->qp;
+	priv->rx_ring.cq = priv->recv_cq;
+
+	hw_qpn = priv->qp->qp_num;
+	if (rca && priv->hca_caps & IB_DEVICE_IPOIB_RCA) {
+		if (create_rca_resources(priv, ca))
+			printk(KERN_WARNING "not using RCA for device %s\n", ca->name);
+		else
+			hw_qpn = priv->rca->rca_qp->qp_num;
 	}
-	priv->rx_wr.next = NULL;
-	priv->rx_wr.sg_list = priv->rx_sge;
+
+	priv->dev->dev_addr[1] = (hw_qpn >> 16) & 0xff;
+	priv->dev->dev_addr[2] = (hw_qpn >>  8) & 0xff;
+	priv->dev->dev_addr[3] = hw_qpn		& 0xff;
 
 	return 0;
 
@@ -244,6 +591,9 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
+	if (priv->rca)
+		destroy_rca_resources(priv);
+
 	if (priv->qp) {
 		if (ib_destroy_qp(priv->qp))
 			ipoib_warn(priv, "ib_qp_destroy failed\n");
-- 
1.5.6




More information about the ewg mailing list