[ofa-general] [PATCH] [RFC] IPOIB/CM Enable SRQ support on HCAs with less than 16 SG entries

Pradeep Satyanarayana pradeeps at linux.vnet.ibm.com
Thu Dec 20 15:53:41 PST 2007


Some HCAs like ehca2 support fewer than 16 SG entries. Currently IPoIB/CM
implicitly assumes all HCAs will support 16 SG entries of 4K pages for 64K 
MTUs. This patch removes that restriction.

This patch continues to use order 0 allocations and enables implementation of 
connected mode on such HCAs with smaller MTUs. HCAs having the capability to 
support 16 SG entries are left untouched.

This patch addresses bug# 728:
https://bugs.openfabrics.org/show_bug.cgi?id=728

While working on this patch I discovered that mthca reports an incorrect
value of max_srq_sge. I had reported this issue previously too several 
weeks ago. I solved that by using a hard coded value of 16 for max_srq_sge
(mthca only). More on that in a following mail.

Signed-off-by: Pradeep Satyanarayana <pradeeps at linux.vnet.ibm.com>
---

--- a/drivers/infiniband/ulp/ipoib/ipoib.h	2007-11-03 11:37:02.000000000 -0700
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h	2007-12-20 13:17:43.000000000 -0800
@@ -466,6 +466,7 @@ void ipoib_drain_cq(struct net_device *d
 #define IPOIB_CM_SUPPORTED(ha)   (ha[0] & (IPOIB_FLAGS_RC))
 
 extern int ipoib_max_conn_qp;
+extern int max_cm_mtu;
 
 static inline int ipoib_cm_admin_enabled(struct net_device *dev)
 {
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c	2007-11-21 07:46:35.000000000 -0800
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c	2007-12-20 14:47:13.000000000 -0800
@@ -74,6 +74,9 @@ static struct ib_send_wr ipoib_cm_rx_dra
 	.opcode = IB_WR_SEND,
 };
 
+static int num_of_frags;
+int max_cm_mtu;
+
 static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
 			       struct ib_cm_event *event);
 
@@ -96,13 +99,13 @@ static int ipoib_cm_post_receive_srq(str
 
 	priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
 
-	for (i = 0; i < IPOIB_CM_RX_SG; ++i)
+	for (i = 0; i < num_of_frags; ++i)
 		priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
 
 	ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
 	if (unlikely(ret)) {
 		ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
-		ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
+		ipoib_cm_dma_unmap_rx(priv, num_of_frags - 1,
 				      priv->cm.srq_ring[id].mapping);
 		dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
 		priv->cm.srq_ring[id].skb = NULL;
@@ -623,6 +626,7 @@ repost:
 			--p->recv_count;
 			ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed "
 				   "for buf %d\n", wr_id);
+		kfree(mapping); /*** Check if this needed ***/
 		}
 	}
 }
@@ -1399,16 +1403,17 @@ int ipoib_cm_add_mode_attr(struct net_de
 	return device_create_file(&dev->dev, &dev_attr_mode);
 }
 
-static void ipoib_cm_create_srq(struct net_device *dev)
+static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_srq_init_attr srq_init_attr = {
 		.attr = {
 			.max_wr  = ipoib_recvq_size,
-			.max_sge = IPOIB_CM_RX_SG
 		}
 	};
 
+	srq_init_attr.attr.max_sge = max_sge;
+
 	priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
 	if (IS_ERR(priv->cm.srq)) {
 		if (PTR_ERR(priv->cm.srq) != -ENOSYS)
@@ -1418,6 +1423,7 @@ static void ipoib_cm_create_srq(struct n
 		return;
 	}
 
+
 	priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring,
 				    GFP_KERNEL);
 	if (!priv->cm.srq_ring) {
@@ -1431,7 +1437,9 @@ static void ipoib_cm_create_srq(struct n
 int ipoib_cm_dev_init(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	int i;
+	int i, ret;
+	struct ib_srq_attr srq_attr;
+	struct ib_device_attr attr;
 
 	INIT_LIST_HEAD(&priv->cm.passive_ids);
 	INIT_LIST_HEAD(&priv->cm.reap_list);
@@ -1448,22 +1456,46 @@ int ipoib_cm_dev_init(struct net_device 
 
 	skb_queue_head_init(&priv->cm.skb_queue);
 
-	for (i = 0; i < IPOIB_CM_RX_SG; ++i)
+	ret = ib_query_device(priv->ca, &attr);
+	if (ret) {
+		printk(KERN_WARNING "ib_query_device() failed with %d\n", ret);
+		return ret;
+	}
+
+	ipoib_dbg(priv, "max_srq_sge=%d\n", attr.max_srq_sge);
+
+	ipoib_cm_create_srq(dev, attr.max_srq_sge);
+
+	if (ipoib_cm_has_srq(dev)) {
+		ret = ib_query_srq(priv->cm.srq, &srq_attr);
+		if (ret) {
+			printk(KERN_WARNING "ib_query_srq() failed with %d\n", ret);
+			return -EINVAL;
+		}
+		/* pad similar to IPOIB_CM_MTU */
+		max_cm_mtu = srq_attr.max_sge * PAGE_SIZE - 0x10;
+		num_of_frags = srq_attr.max_sge;
+		ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_of_frags=%d\n",
+			  max_cm_mtu, num_of_frags);
+	} else {
+		max_cm_mtu = IPOIB_CM_MTU;
+		num_of_frags  = IPOIB_CM_RX_SG;
+	}
+
+	for (i = 0; i < num_of_frags; ++i)
 		priv->cm.rx_sge[i].lkey	= priv->mr->lkey;
 
 	priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
-	for (i = 1; i < IPOIB_CM_RX_SG; ++i)
+	for (i = 1; i < num_of_frags; ++i)
 		priv->cm.rx_sge[i].length = PAGE_SIZE;
 	priv->cm.rx_wr.next = NULL;
 	priv->cm.rx_wr.sg_list = priv->cm.rx_sge;
-	priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG;
-
-	ipoib_cm_create_srq(dev);
+	priv->cm.rx_wr.num_sge = num_of_frags;
 
 	if (ipoib_cm_has_srq(dev)) {
 		for (i = 0; i < ipoib_recvq_size; ++i) {
 			if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i,
-						   IPOIB_CM_RX_SG - 1,
+						   num_of_frags - 1,
 						   priv->cm.srq_ring[i].mapping)) {
 				ipoib_warn(priv, "failed to allocate "
 					   "receive buffer %d\n", i);
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c	2007-12-19 14:02:15.000000000 -0800
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c	2007-12-20 13:17:43.000000000 -0800
@@ -182,12 +182,15 @@ static int ipoib_change_mtu(struct net_d
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
 	/* dev->mtu > 2K ==> connected mode */
-	if (ipoib_cm_admin_enabled(dev) && new_mtu <= IPOIB_CM_MTU) {
-		if (new_mtu > priv->mcast_mtu)
-			ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n",
+	if (ipoib_cm_admin_enabled(dev)) {
+		if (new_mtu <= max_cm_mtu) {
+			if (new_mtu > priv->mcast_mtu)
+				ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n",
 				   priv->mcast_mtu);
-		dev->mtu = new_mtu;
-		return 0;
+			dev->mtu = new_mtu;
+			return 0;
+		} else
+			return -EINVAL;
 	}
 
 	if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) {





More information about the general mailing list