[ofa-general] IPOIB CM (NOSRQ)[PATCH V3] patch for review
Pradeep Satyanarayana
pradeep at us.ibm.com
Fri Apr 27 17:51:14 PDT 2007
Here is a third version of the IPOIB_CM_NOSRQ patch for review. This patch
will benefit adapters that do not support shared receive queues.
This patch incorporates the following review comments from v2:
There should be no line wrap issues now
Code restructured to seperate the SRQ/non-SRQ in several places
This patch has been tested with linux-2.6.21-rc5 and rc7 (derived from
Roland's for 2.6.22 git tree on 04/25/2007)
with Topspin and IBM HCAs on ppc64 machines. I have run netperf between
two IBM HCAs and as well
as between IBM and Topspin HCA.
Note 1: I have retained the code to avoid IB_WC_RETRY_EXC_ERR while
performing interoperability tests
As discussed in this mailing list that may be a CM bug or have the various
HCA address it. Hence I would
like to seperate out that issue from this patch. At a future point when
the issue gets resolved I can provide
another patch to change the retry_count values back to 0 if need be.
Note 2: "Modify Port" patch submitted by Joachim Fenkes is needed for the
ehca driver to work on the IBM HCAs.
Have not tested with this patch as yet.
Signed-off-by: Pradeep Satyanarayana <pradeep at us.ibm.com>
---
--- a/linux-2.6.21-rc7/drivers/infiniband/ulp/ipoib/ipoib.h 2007-04-24
18:10:17.000000000 -0700
+++ b//linux-2.6.21-rc7/drivers/infiniband/ulp/ipoib/ipoib.h 2007-04-25
10:11:34.000000000 -0700
@@ -99,6 +99,12 @@ enum {
#define IPOIB_OP_RECV (1ul << 31)
#ifdef CONFIG_INFINIBAND_IPOIB_CM
#define IPOIB_CM_OP_SRQ (1ul << 30)
+#define IPOIB_CM_OP_NOSRQ (1ul << 29)
+
+/* These two go hand in hand */
+#define NOSRQ_INDEX_RING_SIZE 1024
+#define NOSRQ_INDEX_MASK 0x00000000000003ff
+
#else
#define IPOIB_CM_OP_SRQ (0)
#endif
@@ -136,9 +142,11 @@ struct ipoib_cm_data {
struct ipoib_cm_rx {
struct ib_cm_id *id;
struct ib_qp *qp;
+ struct ipoib_cm_rx_buf *rx_ring;
struct list_head list;
struct net_device *dev;
unsigned long jiffies;
+ u32 index;
};
struct ipoib_cm_tx {
@@ -177,6 +185,7 @@ struct ipoib_cm_dev_priv {
struct ib_wc ibwc[IPOIB_NUM_WC];
struct ib_sge rx_sge[IPOIB_CM_RX_SG];
struct ib_recv_wr rx_wr;
+ struct ipoib_cm_rx **rx_index_ring;
};
/*
--- a/linux-2.6.21-rc7/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2007-04-24
18:10:17.000000000 -0700
+++ b//linux-2.6.21-rc7/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2007-04-27
14:03:40.000000000 -0700
@@ -76,7 +76,7 @@ static void ipoib_cm_dma_unmap_rx(struct
ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE,
DMA_FROM_DEVICE);
}
-static int ipoib_cm_post_receive(struct net_device *dev, int id)
+static int post_receive_srq(struct net_device *dev, u64 id)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_recv_wr *bad_wr;
@@ -85,13 +85,14 @@ static int ipoib_cm_post_receive(struct
priv->cm.rx_wr.wr_id = id | IPOIB_CM_OP_SRQ;
for (i = 0; i < IPOIB_CM_RX_SG; ++i)
- priv->cm.rx_sge[i].addr =
priv->cm.srq_ring[id].mapping[i];
+ priv->cm.rx_sge[i].addr =
+ priv->cm.srq_ring[id].mapping[i];
ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
if (unlikely(ret)) {
ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id,
ret);
ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
- priv->cm.srq_ring[id].mapping);
+ priv->cm.srq_ring[id].mapping);
dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
priv->cm.srq_ring[id].skb = NULL;
}
@@ -99,12 +100,69 @@ static int ipoib_cm_post_receive(struct
return ret;
}
-static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, int
id, int frags,
+static int post_receive_nosrq(struct net_device *dev, u64 id)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ib_recv_wr *bad_wr;
+ int i, ret;
+ u32 index;
+ u64 wr_id;
+ struct ipoib_cm_rx *rx_ptr;
+ unsigned long flags;
+
+ index = id & NOSRQ_INDEX_MASK ;
+ wr_id = id >> 32;
+
+ /* There is a slender chance of a race between the stale_task
+ * running after a period of inactivity and the receipt of
+ * a packet being processed at about the same instant.
+ * Hence the lock */
+
+ spin_lock_irqsave(&priv->lock, flags);
+ rx_ptr = priv->cm.rx_index_ring[index];
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ priv->cm.rx_wr.wr_id = wr_id << 32 | index | IPOIB_CM_OP_NOSRQ;
+
+ for (i = 0; i < IPOIB_CM_RX_SG; ++i)
+ priv->cm.rx_sge[i].addr =
rx_ptr->rx_ring[wr_id].mapping[i];
+
+ ret = ib_post_recv(rx_ptr->qp, &priv->cm.rx_wr, &bad_wr);
+ if (unlikely(ret)) {
+ ipoib_warn(priv, "post recv failed for buf %d (%d)\n",
+ wr_id, ret);
+ ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
+ rx_ptr->rx_ring[wr_id].mapping);
+ dev_kfree_skb_any(rx_ptr->rx_ring[wr_id].skb);
+ rx_ptr->rx_ring[wr_id].skb = NULL;
+ }
+
+ return ret;
+}
+
+static int ipoib_cm_post_receive(struct net_device *dev, u64 id)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ int ret;
+
+ if (priv->cm.srq)
+ ret = post_receive_srq(dev, id);
+ else
+ ret = post_receive_nosrq(dev, id);
+
+ return ret;
+}
+
+static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, u64
id,
+ int frags,
u64 mapping[IPOIB_CM_RX_SG])
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct sk_buff *skb;
int i;
+ struct ipoib_cm_rx *rx_ptr;
+ u32 index, wr_id;
+ unsigned long flags;
skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12);
if (unlikely(!skb))
@@ -123,7 +181,7 @@ static struct sk_buff *ipoib_cm_alloc_rx
return NULL;
}
- for (i = 0; i < frags; i++) {
+ for (i = 0; i < frags; i++) {
struct page *page = alloc_page(GFP_ATOMIC);
if (!page)
@@ -136,7 +194,17 @@ static struct sk_buff *ipoib_cm_alloc_rx
goto partial_error;
}
- priv->cm.srq_ring[id].skb = skb;
+ if (priv->cm.srq)
+ priv->cm.srq_ring[id].skb = skb;
+ else {
+ index = id & NOSRQ_INDEX_MASK ;
+ wr_id = id >> 32;
+ spin_lock_irqsave(&priv->lock, flags);
+ rx_ptr = priv->cm.rx_index_ring[index];
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ rx_ptr->rx_ring[wr_id].skb = skb;
+ }
return skb;
partial_error:
@@ -157,13 +225,20 @@ static struct ib_qp *ipoib_cm_create_rx_
struct ib_qp_init_attr attr = {
.send_cq = priv->cq, /* does not matter, we never send
anything */
.recv_cq = priv->cq,
- .srq = priv->cm.srq,
.cap.max_send_wr = 1, /* FIXME: 0 Seems not to work */
+ .cap.max_recv_wr = ipoib_recvq_size + 1,
.cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
+ .cap.max_recv_sge = IPOIB_CM_RX_SG, /* Is this correct? */
.sq_sig_type = IB_SIGNAL_ALL_WR,
.qp_type = IB_QPT_RC,
.qp_context = p,
};
+
+ if (priv->cm.srq)
+ attr.srq = priv->cm.srq;
+ else
+ attr.srq = NULL;
+
return ib_create_qp(priv->pd, &attr);
}
@@ -198,6 +273,7 @@ static int ipoib_cm_modify_rx_qp(struct
ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
return ret;
}
+
return 0;
}
@@ -217,12 +293,87 @@ static int ipoib_cm_send_rep(struct net_
rep.flow_control = 0;
rep.rnr_retry_count = req->rnr_retry_count;
rep.target_ack_delay = 20; /* FIXME */
- rep.srq = 1;
rep.qp_num = qp->qp_num;
rep.starting_psn = psn;
+
+ if (priv->cm.srq)
+ rep.srq = 1;
+ else
+ rep.srq = 0;
return ib_send_cm_rep(cm_id, &rep);
}
+int allocate_and_post_rbuf_nosrq(struct ib_cm_id *cm_id, struct
ipoib_cm_rx *p, unsigned psn)
+{
+ struct net_device *dev = cm_id->context;
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ int ret;
+ u32 qp_num, index;
+ u64 i;
+
+ qp_num = p->qp->qp_num;
+ /* Allocate space for the rx_ring here */
+ p->rx_ring = kzalloc(ipoib_recvq_size * sizeof *p->rx_ring,
+ GFP_KERNEL);
+ if (p->rx_ring == NULL)
+ return -ENOMEM;
+
+ cm_id->context = p;
+ p->jiffies = jiffies;
+ spin_lock_irq(&priv->lock);
+ list_add(&p->list, &priv->cm.passive_ids);
+
+ /* Find an empty rx_index_ring[] entry */
+ for (index = 0; index < NOSRQ_INDEX_RING_SIZE; index++)
+ if (priv->cm.rx_index_ring[index] == NULL)
+ break;
+
+ if ( index == NOSRQ_INDEX_RING_SIZE) {
+ spin_unlock_irq(&priv->lock);
+ printk(KERN_WARNING "NOSRQ supports a max of %d RC "
+ "QPs. That limit has now been reached\n",
+ NOSRQ_INDEX_RING_SIZE);
+ return -EINVAL;
+ }
+
+ /* Store the pointer to retrieve it later using the index */
+ priv->cm.rx_index_ring[index] = p;
+ spin_unlock_irq(&priv->lock);
+ p->index = index;
+
+ ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn);
+ if (ret) {
+ ipoib_warn(priv, "ipoib_cm_modify_rx_qp() failed %d\n",
ret);
+ goto err_modify_nosrq;
+ }
+
+ for (i = 0; i < ipoib_recvq_size; ++i) {
+ if (!ipoib_cm_alloc_rx_skb(dev, i << 32 | index,
+ IPOIB_CM_RX_SG - 1,
+ p->rx_ring[i].mapping)) {
+ ipoib_warn(priv, "failed to allocate receive "
+ "buffer %d\n", i);
+ ipoib_cm_dev_cleanup(dev);
+ /* Free rx_ring previously allocated */
+ kfree(p->rx_ring);
+ return -ENOMEM;
+ }
+
+ /* Can we call the nosrq version? */
+ if (ipoib_cm_post_receive(dev, i << 32 | index)) {
+ ipoib_warn(priv, "ipoib_ib_post_receive "
+ "failed for buf %d\n", i);
+ ipoib_cm_dev_cleanup(dev);
+ return -EIO;
+ }
+ } /* end for */
+
+ return 0;
+
+err_modify_nosrq:
+ return ret;
+}
+
static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct
ib_cm_event *event)
{
struct net_device *dev = cm_id->context;
@@ -243,10 +394,17 @@ static int ipoib_cm_req_handler(struct i
goto err_qp;
}
- psn = random32() & 0xffffff;
- ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn);
- if (ret)
- goto err_modify;
+ if (priv->cm.srq == NULL) { /* NOSRQ */
+ psn = random32() & 0xffffff;
+ if (ret = allocate_and_post_rbuf_nosrq(cm_id, p, psn))
+ goto err_modify;
+ } else { /* SRQ */
+ p->rx_ring = NULL; /* This is used only by NOSRQ */
+ psn = random32() & 0xffffff;
+ ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn);
+ if (ret)
+ goto err_modify;
+ }
ret = ipoib_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd,
psn);
if (ret) {
@@ -254,11 +412,13 @@ static int ipoib_cm_req_handler(struct i
goto err_rep;
}
- cm_id->context = p;
- p->jiffies = jiffies;
- spin_lock_irq(&priv->lock);
- list_add(&p->list, &priv->cm.passive_ids);
- spin_unlock_irq(&priv->lock);
+ if (priv->cm.srq) {
+ cm_id->context = p;
+ p->jiffies = jiffies;
+ spin_lock_irq(&priv->lock);
+ list_add(&p->list, &priv->cm.passive_ids);
+ spin_unlock_irq(&priv->lock);
+ }
queue_delayed_work(ipoib_workqueue,
&priv->cm.stale_task, IPOIB_CM_RX_DELAY);
return 0;
@@ -339,23 +499,40 @@ static void skb_put_frags(struct sk_buff
}
}
-void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
+static void timer_check(struct ipoib_dev_priv *priv, struct ipoib_cm_rx
*p)
+{
+ unsigned long flags;
+
+ if (time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME))
{
+ spin_lock_irqsave(&priv->lock, flags);
+ p->jiffies = jiffies;
+ /* Move this entry to list head, but do
+ * not re-add it if it has been removed. */
+ if (!list_empty(&p->list))
+ list_move(&p->list, &priv->cm.passive_ids);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ queue_delayed_work(ipoib_workqueue,
+ &priv->cm.stale_task,
IPOIB_CM_RX_DELAY);
+ }
+}
+static int handle_rx_wc_srq(struct net_device *dev, struct ib_wc *wc)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- unsigned int wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ;
struct sk_buff *skb, *newskb;
+ u64 mapping[IPOIB_CM_RX_SG], wr_id;
struct ipoib_cm_rx *p;
unsigned long flags;
- u64 mapping[IPOIB_CM_RX_SG];
- int frags;
+ int frags, ret;
+
+ wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ;
ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
wr_id, wc->status);
if (unlikely(wr_id >= ipoib_recvq_size)) {
- ipoib_warn(priv, "cm recv completion event with wrid %d (>
%d)\n",
- wr_id, ipoib_recvq_size);
- return;
+ ipoib_warn(priv, "cm recv completion event with wrid %d "
+ "(> %d)\n", wr_id, ipoib_recvq_size);
+ return 1;
}
skb = priv->cm.srq_ring[wr_id].skb;
@@ -365,22 +542,12 @@ void ipoib_cm_handle_rx_wc(struct net_de
"(status=%d, wrid=%d vend_err %x)\n",
wc->status, wr_id, wc->vendor_err);
++priv->stats.rx_dropped;
- goto repost;
+ goto repost_srq;
}
if (!likely(wr_id & IPOIB_CM_RX_UPDATE_MASK)) {
p = wc->qp->qp_context;
- if (time_after_eq(jiffies, p->jiffies +
IPOIB_CM_RX_UPDATE_TIME)) {
- spin_lock_irqsave(&priv->lock, flags);
- p->jiffies = jiffies;
- /* Move this entry to list head, but do
- * not re-add it if it has been removed. */
- if (!list_empty(&p->list))
- list_move(&p->list,
&priv->cm.passive_ids);
- spin_unlock_irqrestore(&priv->lock, flags);
- queue_delayed_work(ipoib_workqueue,
- &priv->cm.stale_task,
IPOIB_CM_RX_DELAY);
- }
+ timer_check(priv, p);
}
frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
@@ -388,22 +555,119 @@ void ipoib_cm_handle_rx_wc(struct net_de
newskb = ipoib_cm_alloc_rx_skb(dev, wr_id, frags, mapping);
if (unlikely(!newskb)) {
- /*
- * If we can't allocate a new RX buffer, dump
- * this packet and reuse the old buffer.
- */
- ipoib_dbg(priv, "failed to allocate receive buffer %d\n",
wr_id);
+ /*
+ * If we can't allocate a new RX buffer, dump
+ * this packet and reuse the old buffer.
+ */
+ ipoib_dbg(priv, "failed to allocate receive buffer %d\n",
wr_id);
+ ++priv->stats.rx_dropped;
+ goto repost_srq;
+ }
+
+ ipoib_cm_dma_unmap_rx(priv, frags,
+ priv->cm.srq_ring[wr_id].mapping);
+ memcpy(priv->cm.srq_ring[wr_id].mapping, mapping,
+ (frags + 1) * sizeof *mapping);
+ ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
+ wc->byte_len, wc->slid);
+
+ skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
+
+ skb->protocol = ((struct ipoib_header *) skb->data)->proto;
+ skb->mac.raw = skb->data;
+ skb_pull(skb, IPOIB_ENCAP_LEN);
+
+ dev->last_rx = jiffies;
+ ++priv->stats.rx_packets;
+ priv->stats.rx_bytes += skb->len;
+
+ skb->dev = dev;
+ /* XXX get correct PACKET_ type here */
+ skb->pkt_type = PACKET_HOST;
+
+ netif_rx_ni(skb);
+
+repost_srq:
+ ret = ipoib_cm_post_receive(dev, wr_id);
+
+ if (unlikely(ret)) {
+ ipoib_warn(priv, "ipoib_cm_post_receive failed for buf
%d\n",
+ wr_id);
+ return 1;
+ }
+
+ return 0;
+
+}
+
+static int handle_rx_wc_nosrq(struct net_device *dev, struct ib_wc *wc)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct sk_buff *skb, *newskb;
+ u64 mapping[IPOIB_CM_RX_SG], wr_id;
+ u32 index;
+ struct ipoib_cm_rx *p, *rx_ptr;
+ unsigned long flags;
+ int frags, ret;
+
+
+ wr_id = wc->wr_id >> 32;
+
+ ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
+ wr_id, wc->status);
+
+ if (unlikely(wr_id >= ipoib_recvq_size)) {
+ ipoib_warn(priv, "cm recv completion event with wrid %d "
+ "(> %d)\n", wr_id, ipoib_recvq_size);
+ return 1;
+ }
+
+ index = (wc->wr_id & ~IPOIB_CM_OP_NOSRQ) & NOSRQ_INDEX_MASK ;
+ spin_lock_irqsave(&priv->lock, flags);
+ rx_ptr = priv->cm.rx_index_ring[index];
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ skb = rx_ptr->rx_ring[wr_id].skb;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ ipoib_dbg(priv, "cm recv error "
+ "(status=%d, wrid=%d vend_err %x)\n",
+ wc->status, wr_id, wc->vendor_err);
++priv->stats.rx_dropped;
- goto repost;
+ goto repost_nosrq;
}
- ipoib_cm_dma_unmap_rx(priv, frags,
priv->cm.srq_ring[wr_id].mapping);
- memcpy(priv->cm.srq_ring[wr_id].mapping, mapping, (frags + 1) *
sizeof *mapping);
+ if (!likely(wr_id & IPOIB_CM_RX_UPDATE_MASK)) {
+ /* There are no guarantees that wc->qp is not NULL for
HCAs
+ * that do not support SRQ. */
+ p = rx_ptr;
+ timer_check(priv, p);
+ }
+
+ frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
+ (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
+
+ newskb = ipoib_cm_alloc_rx_skb(dev, wr_id << 32 | index, frags,
+ mapping);
+ if (unlikely(!newskb)) {
+ /*
+ * If we can't allocate a new RX buffer, dump
+ * this packet and reuse the old buffer.
+ */
+ ipoib_dbg(priv, "failed to allocate receive buffer %d\n",
wr_id);
+ ++priv->stats.rx_dropped;
+ goto repost_nosrq;
+ }
+
+ ipoib_cm_dma_unmap_rx(priv, frags,
+ rx_ptr->rx_ring[wr_id].mapping);
+ memcpy(rx_ptr->rx_ring[wr_id].mapping, mapping,
+ (frags + 1) * sizeof *mapping);
ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
wc->byte_len, wc->slid);
- skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
+ skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
skb->protocol = ((struct ipoib_header *) skb->data)->proto;
skb->mac.raw = skb->data;
@@ -416,12 +680,34 @@ void ipoib_cm_handle_rx_wc(struct net_de
skb->dev = dev;
/* XXX get correct PACKET_ type here */
skb->pkt_type = PACKET_HOST;
+
netif_rx_ni(skb);
-repost:
- if (unlikely(ipoib_cm_post_receive(dev, wr_id)))
- ipoib_warn(priv, "ipoib_cm_post_receive failed "
- "for buf %d\n", wr_id);
+repost_nosrq:
+ ret = ipoib_cm_post_receive(dev, wr_id << 32 | index);
+
+ if (unlikely(ret)) {
+ ipoib_warn(priv, "ipoib_cm_post_receive failed for buf
%d\n",
+ wr_id);
+ return 1;
+ }
+
+ return 0;
+}
+
+void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ int ret;
+
+
+ if (priv->cm.srq)
+ ret = handle_rx_wc_srq(dev, wc);
+ else
+ ret = handle_rx_wc_nosrq(dev, wc);
+
+ if (unlikely(ret))
+ ipoib_warn(priv, "Error processing rx wc\n");
}
static inline int post_send(struct ipoib_dev_priv *priv,
@@ -606,6 +892,22 @@ int ipoib_cm_dev_open(struct net_device
return 0;
}
+static void free_resources_nosrq(struct ipoib_dev_priv *priv, struct
ipoib_cm_rx *p)
+{
+ int i;
+
+ for(i = 0; i < ipoib_recvq_size; ++i)
+ if(p->rx_ring[i].skb) {
+ ipoib_cm_dma_unmap_rx(priv,
+ IPOIB_CM_RX_SG - 1,
+ p->rx_ring[i].mapping);
+ dev_kfree_skb_any(p->rx_ring[i].skb);
+ p->rx_ring[i].skb = NULL;
+ }
+ kfree(p->rx_ring);
+}
+
+
void ipoib_cm_dev_stop(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -618,6 +920,8 @@ void ipoib_cm_dev_stop(struct net_device
spin_lock_irq(&priv->lock);
while (!list_empty(&priv->cm.passive_ids)) {
p = list_entry(priv->cm.passive_ids.next, typeof(*p),
list);
+ if (priv->cm.srq == NULL)
+ free_resources_nosrq(priv, p);
list_del_init(&p->list);
spin_unlock_irq(&priv->lock);
ib_destroy_cm_id(p->id);
@@ -703,9 +1007,14 @@ static struct ib_qp *ipoib_cm_create_tx_
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_init_attr attr = {};
attr.recv_cq = priv->cq;
- attr.srq = priv->cm.srq;
+ if (priv->cm.srq)
+ attr.srq = priv->cm.srq;
+ else
+ attr.srq = NULL;
attr.cap.max_send_wr = ipoib_sendq_size;
+ attr.cap.max_recv_wr = 1; /* Not in MST code */
attr.cap.max_send_sge = 1;
+ attr.cap.max_recv_sge = 1; /* Not in MST code */
attr.sq_sig_type = IB_SIGNAL_ALL_WR;
attr.qp_type = IB_QPT_RC;
attr.send_cq = cq;
@@ -742,10 +1051,13 @@ static int ipoib_cm_send_req(struct net_
req.responder_resources = 4;
req.remote_cm_response_timeout = 20;
req.local_cm_response_timeout = 20;
- req.retry_count = 0; /* RFC draft warns against
retries */
- req.rnr_retry_count = 0; /* RFC draft warns against
retries */
+ req.retry_count = 6; /* RFC draft warns against
retries */
+ req.rnr_retry_count = 6;/* RFC draft warns against
retries */
req.max_cm_retries = 15;
- req.srq = 1;
+ if (priv->cm.srq)
+ req.srq = 1;
+ else
+ req.srq = 0;
return ib_send_cm_req(id, &req);
}
@@ -1089,6 +1401,10 @@ static void ipoib_cm_stale_task(struct w
p = list_entry(priv->cm.passive_ids.prev, typeof(*p),
list);
if (time_before_eq(jiffies, p->jiffies +
IPOIB_CM_RX_TIMEOUT))
break;
+ if (priv->cm.srq == NULL) { /* NOSRQ */
+ free_resources_nosrq(priv, p);
+ priv->cm.rx_index_ring[p->index] = NULL;
+ }
list_del_init(&p->list);
spin_unlock_irq(&priv->lock);
ib_destroy_cm_id(p->id);
@@ -1143,16 +1459,40 @@ int ipoib_cm_add_mode_attr(struct net_de
return device_create_file(&dev->dev, &dev_attr_mode);
}
+static int create_srq(struct net_device *dev, struct ipoib_dev_priv
*priv)
+{
+ struct ib_srq_init_attr srq_init_attr;
+ int ret;
+
+ srq_init_attr.attr.max_wr = ipoib_recvq_size;
+ srq_init_attr.attr.max_sge = IPOIB_CM_RX_SG;
+
+ priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
+ if (IS_ERR(priv->cm.srq)) {
+ ret = PTR_ERR(priv->cm.srq);
+ priv->cm.srq = NULL;
+ return ret;
+ }
+
+ priv->cm.srq_ring = kzalloc(ipoib_recvq_size *
+ sizeof *priv->cm.srq_ring,
+ GFP_KERNEL);
+ if (!priv->cm.srq_ring) {
+ printk(KERN_WARNING "%s: failed to allocate CM ring "
+ "(%d entries)\n",
+ priv->ca->name, ipoib_recvq_size);
+ ipoib_cm_dev_cleanup(dev);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
int ipoib_cm_dev_init(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ib_srq_init_attr srq_init_attr = {
- .attr = {
- .max_wr = ipoib_recvq_size,
- .max_sge = IPOIB_CM_RX_SG
- }
- };
- int ret, i;
+ int ret, i, supports_srq;
+ struct ib_device_attr attr;
INIT_LIST_HEAD(&priv->cm.passive_ids);
INIT_LIST_HEAD(&priv->cm.reap_list);
@@ -1164,21 +1504,26 @@ int ipoib_cm_dev_init(struct net_device
skb_queue_head_init(&priv->cm.skb_queue);
- priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
- if (IS_ERR(priv->cm.srq)) {
- ret = PTR_ERR(priv->cm.srq);
- priv->cm.srq = NULL;
+ if (ret = ib_query_device(priv->ca, &attr))
return ret;
+ if (attr.max_srq)
+ supports_srq = 1; /* This device supports SRQ */
+ else {
+ supports_srq = 0;
}
- priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof
*priv->cm.srq_ring,
- GFP_KERNEL);
- if (!priv->cm.srq_ring) {
- printk(KERN_WARNING "%s: failed to allocate CM ring (%d
entries)\n",
- priv->ca->name, ipoib_recvq_size);
- ipoib_cm_dev_cleanup(dev);
- return -ENOMEM;
- }
+ if (supports_srq) {
+ if (ret = create_srq(dev, priv))
+ return ret;
+
+ priv->cm.rx_index_ring = NULL; /* Not needed for SRQ */
+ } else {
+ priv->cm.srq = NULL;
+ priv->cm.srq_ring = NULL;
+ priv->cm.rx_index_ring = kzalloc(NOSRQ_INDEX_RING_SIZE *
+ sizeof *priv->cm.rx_index_ring,
+ GFP_KERNEL);
+ }
for (i = 0; i < IPOIB_CM_RX_SG; ++i)
priv->cm.rx_sge[i].lkey = priv->mr->lkey;
@@ -1190,19 +1535,25 @@ int ipoib_cm_dev_init(struct net_device
priv->cm.rx_wr.sg_list = priv->cm.rx_sge;
priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG;
- for (i = 0; i < ipoib_recvq_size; ++i) {
- if (!ipoib_cm_alloc_rx_skb(dev, i, IPOIB_CM_RX_SG - 1,
+ /* One can post receive buffers even before the RX QP is created
+ * only in the SRQ case. Therefore for NOSRQ we skip the rest of
init
+ * and do that in ipoib_cm_req_handler() */
+
+ if (priv->cm.srq) {
+ for (i = 0; i < ipoib_recvq_size; ++i) {
+ if (!ipoib_cm_alloc_rx_skb(dev, i, IPOIB_CM_RX_SG
- 1,
priv->cm.srq_ring[i].mapping))
{
- ipoib_warn(priv, "failed to allocate receive
buffer %d\n", i);
- ipoib_cm_dev_cleanup(dev);
- return -ENOMEM;
- }
- if (ipoib_cm_post_receive(dev, i)) {
- ipoib_warn(priv, "ipoib_ib_post_receive failed for
buf %d\n", i);
- ipoib_cm_dev_cleanup(dev);
- return -EIO;
+ ipoib_warn(priv, "failed to allocate
receive buffer %d\n", i);
+ ipoib_cm_dev_cleanup(dev);
+ return -ENOMEM;
+ }
+ if (ipoib_cm_post_receive(dev, i)) {
+ ipoib_warn(priv, "ipoib_ib_post_receive
failed for buf %d\n", i);
+ ipoib_cm_dev_cleanup(dev);
+ return -EIO;
+ }
}
- }
+ } /* if SRQ */
priv->dev->dev_addr[0] = IPOIB_FLAGS_RC;
return 0;
--- a/linux-2.6.21-rc7/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2007-04-24
18:10:17.000000000 -0700
+++ b//linux-2.6.21-rc7/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2007-04-25
10:11:34.000000000 -0700
@@ -282,7 +282,7 @@ static void ipoib_ib_handle_tx_wc(struct
static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc)
{
- if (wc->wr_id & IPOIB_CM_OP_SRQ)
+ if ((wc->wr_id & IPOIB_CM_OP_SRQ) || (wc->wr_id &
IPOIB_CM_OP_NOSRQ))
ipoib_cm_handle_rx_wc(dev, wc);
else if (wc->wr_id & IPOIB_OP_RECV)
ipoib_ib_handle_rx_wc(dev, wc);
Pradeep
pradeep at us.ibm.com
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ipoib_cm.nosrq.patch.v3
Type: application/octet-stream
Size: 23445 bytes
Desc: not available
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20070427/14c48c1b/attachment.obj>
More information about the general
mailing list