[ofa-general] [PATCH 3/5] IB/ipoib: post to SRQ every n buffers

Eli Cohen eli at dev.mellanox.co.il
Fri Feb 1 02:25:06 PST 2008


IB/ipoib: post to SRQ every n buffers

To reduce the overhead of posting receive buffers to the SRQ,
we do it every 16 received buffers.

Signed-off-by: Eli Cohen <eli at mellanox.co.il>
---


Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h	2008-01-28 21:50:46.000000000 +0200
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h	2008-01-28 21:52:55.745918000 +0200
@@ -96,6 +96,7 @@ enum {
 	IPOIB_MCAST_FLAG_ATTACHED = 3,
 
 	MAX_SEND_CQE              = 16,
+	CM_POST_SRQ_COUNT         = 16,
 };
 
 #define	IPOIB_OP_RECV   (1ul << 31)
@@ -283,6 +284,11 @@ struct ipoib_cm_rx_buf {
 	u64 mapping[IPOIB_CM_RX_SG];
 };
 
+struct ipoib_cm_rx_wr {
+	struct ib_recv_wr	wr;
+	struct ib_sge		rx_sge[IPOIB_CM_RX_SG];
+};
+
 struct ipoib_cm_dev_priv {
 	struct ib_srq  	       *srq;
 	struct ipoib_cm_rx_buf *srq_ring;
@@ -301,10 +307,10 @@ struct ipoib_cm_dev_priv {
 	struct list_head        start_list;
 	struct list_head        reap_list;
 	struct ib_wc            ibwc[IPOIB_NUM_WC];
-	struct ib_sge           rx_sge[IPOIB_CM_RX_SG];
-	struct ib_recv_wr       rx_wr;
 	int			max_cm_mtu;
 	int			num_frags;
+	struct ipoib_cm_rx_wr  *head;
+	struct ipoib_cm_rx_wr   *rx_wr_arr;
 };
 
 struct ipoib_ethtool_st {
Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_cm.c	2008-01-28 21:50:45.000000000 +0200
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c	2008-01-28 21:51:45.846437000 +0200
@@ -81,24 +81,46 @@ static void ipoib_cm_dma_unmap_rx(struct
 		ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
 }
 
-static int ipoib_cm_post_receive(struct net_device *dev, int id)
+static int ipoib_cm_post_receive(struct net_device *dev, int id, int pi)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_recv_wr *bad_wr;
-	int i, ret;
+	int i, ret = 0;
+	struct ipoib_cm_rx_wr *cur;
+	struct ipoib_cm_rx_wr *prev;
+	int post;
+
+	ipoib_dbg_data(priv, "posting to id=%d, pi=%d\n", id, pi);
+	cur = &priv->cm.rx_wr_arr[id];
+	prev = &priv->cm.rx_wr_arr[(id - 1) & (ipoib_recvq_size - 1)];
+
+	prev->wr.next = &cur->wr;
+	cur->wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
+	cur->wr.next = NULL;
+
 
-	priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
 
 	for (i = 0; i < priv->cm.num_frags; ++i)
-		priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
+		cur->rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
+
+		post = pi || (((unsigned long)(cur - priv->cm.head) & (ipoib_recvq_size - 1))
+		       >= CM_POST_SRQ_COUNT);
+	if (post) {
+		ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.head->wr, &bad_wr);
+		if (unlikely(ret)) {
+			ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
+			while (bad_wr) {
+				id = bad_wr->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
+				ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
+						      priv->cm.srq_ring[id].mapping);
+				dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
+				priv->cm.srq_ring[id].skb = NULL;
+				bad_wr = bad_wr->next;
+			}
+		} else
+			priv->cm.head = &priv->cm.rx_wr_arr[(id + 1) & (ipoib_recvq_size - 1)];
+
 
-	ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
-	if (unlikely(ret)) {
-		ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
-		ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
-				      priv->cm.srq_ring[id].mapping);
-		dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
-		priv->cm.srq_ring[id].skb = NULL;
 	}
 
 	return ret;
@@ -483,7 +505,7 @@ void ipoib_cm_handle_rx_wc(struct net_de
 	netif_receive_skb(skb);
 
 repost:
-	if (unlikely(ipoib_cm_post_receive(dev, wr_id)))
+	if (unlikely(ipoib_cm_post_receive(dev, wr_id, 0)))
 		ipoib_warn(priv, "ipoib_cm_post_receive failed "
 			   "for buf %d\n", wr_id);
 }
@@ -1277,7 +1299,7 @@ int ipoib_cm_dev_init(struct net_device 
 			.max_wr  = ipoib_recvq_size,
 		}
 	};
-	int ret, i;
+	int ret, i, j;
 	struct ib_device_attr attr;
 
 	INIT_LIST_HEAD(&priv->cm.passive_ids);
@@ -1307,8 +1329,14 @@ int ipoib_cm_dev_init(struct net_device 
 
 	srq_init_attr.attr.max_sge = attr.max_srq_sge;
 
+	priv->cm.rx_wr_arr = kzalloc(ipoib_recvq_size * sizeof priv->cm.rx_wr_arr[0],
+				     GFP_KERNEL);
+	if (!priv->cm.rx_wr_arr)
+		return -ENOMEM;
+
 	priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
 	if (IS_ERR(priv->cm.srq)) {
+		kfree(priv->cm.rx_wr_arr);
 		ret = PTR_ERR(priv->cm.srq);
 		priv->cm.srq = NULL;
 		return ret;
@@ -1328,15 +1356,19 @@ int ipoib_cm_dev_init(struct net_device 
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < priv->cm.num_frags; ++i)
-		priv->cm.rx_sge[i].lkey	= priv->mr->lkey;
+	for (j = 0; j < ipoib_recvq_size; ++j) {
+		for (i = 0; i < priv->cm.num_frags; ++i)
+			priv->cm.rx_wr_arr[j].rx_sge[i].lkey = priv->mr->lkey;
+
+		priv->cm.rx_wr_arr[j].rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
+		for (i = 1; i < priv->cm.num_frags; ++i)
+			priv->cm.rx_wr_arr[j].rx_sge[i].length = PAGE_SIZE;
+
+		priv->cm.rx_wr_arr[j].wr.sg_list = priv->cm.rx_wr_arr[j].rx_sge;
+		priv->cm.rx_wr_arr[j].wr.num_sge = priv->cm.num_frags;
+	}
 
-	priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
-	for (i = 1; i < priv->cm.num_frags; ++i)
-		priv->cm.rx_sge[i].length = PAGE_SIZE;
-	priv->cm.rx_wr.next = NULL;
-	priv->cm.rx_wr.sg_list = priv->cm.rx_sge;
-	priv->cm.rx_wr.num_sge = priv->cm.num_frags;
+	priv->cm.head = &priv->cm.rx_wr_arr[0];
 
 	for (i = 0; i < ipoib_recvq_size; ++i) {
 		if (!ipoib_cm_alloc_rx_skb(dev, i, priv->cm.num_frags - 1,
@@ -1345,7 +1377,7 @@ int ipoib_cm_dev_init(struct net_device 
 			ipoib_cm_dev_cleanup(dev);
 			return -ENOMEM;
 		}
-		if (ipoib_cm_post_receive(dev, i)) {
+		if (ipoib_cm_post_receive(dev, i, 1)) {
 			ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
 			ipoib_cm_dev_cleanup(dev);
 			return -EIO;
@@ -1375,11 +1407,12 @@ void ipoib_cm_dev_cleanup(struct net_dev
 		return;
 	for (i = 0; i < ipoib_recvq_size; ++i)
 		if (priv->cm.srq_ring[i].skb) {
-			ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
+			ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
 					      priv->cm.srq_ring[i].mapping);
 			dev_kfree_skb_any(priv->cm.srq_ring[i].skb);
 			priv->cm.srq_ring[i].skb = NULL;
 		}
 	kfree(priv->cm.srq_ring);
+	kfree(priv->cm.rx_wr_arr);
 	priv->cm.srq_ring = NULL;
 }





More information about the general mailing list