[ewg] [PATCH] IPoIB/CM Enable SRQ support for HCAs with les than 16 s/g entries (in OFED 1.3)
Pradeep Satyanarayana
pradeeps at linux.vnet.ibm.com
Thu Jan 17 14:53:51 PST 2008
Some HCAs like ehca2 support fewer than 16 SG entries. Currently IPoIB/CM
implicitly assumes all HCAs will support 16 SG entries of 4K pages for 64K
MTUs. This patch removes that restriction.
This patch continues to use order 0 allocations and enables implementation of
connected mode on such HCAs with smaller MTUs. HCAs having the capability to
support 16 SG entries are left untouched.
A version of this patch has been integrated into Roland's for-2.6.25 git tree for
a couple of weeks. Here is a back ported version of that patch (for OFED 1.3). Please
consider for inclusion into OFED 1.3.
This patch addresses bug# 728:
https://bugs.openfabrics.org/show_bug.cgi?id=728
Signed-off-by: Pradeep Satyanarayana <pradeeps at linux.vnet.ibm.com>
---
--- a/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-10 13:13:12.000000000 -0500
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-10 18:07:11.000000000 -0500
@@ -234,6 +234,8 @@ struct ipoib_cm_dev_priv {
struct ib_wc ibwc[IPOIB_NUM_WC];
struct ib_sge rx_sge[IPOIB_CM_RX_SG];
struct ib_recv_wr rx_wr;
+ int max_cm_mtu;
+ int num_frags;
};
/*
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-01-10 13:13:12.000000000 -0500
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-01-17 15:53:30.000000000 -0500
@@ -89,13 +89,13 @@ static int ipoib_cm_post_receive(struct
priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
- for (i = 0; i < IPOIB_CM_RX_SG; ++i)
+ for (i = 0; i < priv->cm.num_frags; ++i)
priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
if (unlikely(ret)) {
ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
- ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
+ ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
priv->cm.srq_ring[id].mapping);
dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
priv->cm.srq_ring[id].skb = NULL;
@@ -1261,10 +1261,10 @@ int ipoib_cm_dev_init(struct net_device
struct ib_srq_init_attr srq_init_attr = {
.attr = {
.max_wr = ipoib_recvq_size,
- .max_sge = IPOIB_CM_RX_SG
}
};
int ret, i;
+ struct ib_device_attr attr;
INIT_LIST_HEAD(&priv->cm.passive_ids);
INIT_LIST_HEAD(&priv->cm.reap_list);
@@ -1281,6 +1281,18 @@ int ipoib_cm_dev_init(struct net_device
skb_queue_head_init(&priv->cm.skb_queue);
+ ret = ib_query_device(priv->ca, &attr);
+ if (ret) {
+ printk(KERN_WARNING "ib_query_device() failed with %d\n", ret);
+ return ret;
+ }
+
+ ipoib_dbg(priv, "max_srq_sge=%d\n", attr.max_srq_sge);
+
+ attr.max_srq_sge = min(IPOIB_CM_RX_SG, attr.max_srq_sge);
+
+ srq_init_attr.attr.max_sge = attr.max_srq_sge;
+
priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
if (IS_ERR(priv->cm.srq)) {
ret = PTR_ERR(priv->cm.srq);
@@ -1288,6 +1300,11 @@ int ipoib_cm_dev_init(struct net_device
return ret;
}
+ priv->cm.max_cm_mtu = attr.max_srq_sge * PAGE_SIZE - 0x10;
+ priv->cm.num_frags = attr.max_srq_sge;
+ ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n",
+ priv->cm.max_cm_mtu, priv->cm.num_frags);
+
priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring,
GFP_KERNEL);
if (!priv->cm.srq_ring) {
@@ -1297,18 +1314,18 @@ int ipoib_cm_dev_init(struct net_device
return -ENOMEM;
}
- for (i = 0; i < IPOIB_CM_RX_SG; ++i)
+ for (i = 0; i < priv->cm.num_frags; ++i)
priv->cm.rx_sge[i].lkey = priv->mr->lkey;
priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
- for (i = 1; i < IPOIB_CM_RX_SG; ++i)
+ for (i = 1; i < priv->cm.num_frags; ++i)
priv->cm.rx_sge[i].length = PAGE_SIZE;
priv->cm.rx_wr.next = NULL;
priv->cm.rx_wr.sg_list = priv->cm.rx_sge;
- priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG;
+ priv->cm.rx_wr.num_sge = priv->cm.num_frags;
for (i = 0; i < ipoib_recvq_size; ++i) {
- if (!ipoib_cm_alloc_rx_skb(dev, i, IPOIB_CM_RX_SG - 1,
+ if (!ipoib_cm_alloc_rx_skb(dev, i, priv->cm.num_frags - 1,
priv->cm.srq_ring[i].mapping)) {
ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
ipoib_cm_dev_cleanup(dev);
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c 2008-01-10 13:13:12.000000000 -0500
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c 2008-01-17 17:15:27.000000000 -0500
@@ -182,7 +182,10 @@ static int ipoib_change_mtu(struct net_d
struct ipoib_dev_priv *priv = netdev_priv(dev);
/* dev->mtu > 2K ==> connected mode */
- if (ipoib_cm_admin_enabled(dev) && new_mtu <= IPOIB_CM_MTU) {
+ if (ipoib_cm_admin_enabled(dev)) {
+ if (new_mtu > priv->cm.max_cm_mtu)
+ return -EINVAL;
+
if (new_mtu > priv->mcast_mtu)
ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n",
priv->mcast_mtu);
@@ -190,9 +193,8 @@ static int ipoib_change_mtu(struct net_d
return 0;
}
- if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) {
+ if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN)
return -EINVAL;
- }
priv->admin_mtu = new_mtu;
More information about the ewg
mailing list