[ofa-general] IPOIB CM (NOSRQ)[PATCH V2] patch for review
Pradeep Satyanarayana
pradeep at us.ibm.com
Wed Apr 18 17:56:44 PDT 2007
Here is a second version of the IPOIB_CM_NOSRQ patch for review. This
patch will benefit adapters that do not support shared receive queues.
This patch incorporates the previous review comments:
-#ifdefs removed and a single binary drives HCAs that do and do not
support SRQs
-avoids linear traversal through a list of QPs
-extraneous code removed
-compile time selection removed
-No HTML version as part of this patch
This patch has been tested with linux-2.6.21-rc5 and rc7 with Topspin and
IBM HCAs on ppc64 machines. I have run
netperf between two IBM HCAs and two Topspin HCAs, as well as between IBM
and Topspin HCA.
Note 1: There was interesting discovery that I made when I ran netperf
between Topsin and IBM HCA. I started to see
the IB_WC_RETRY_EXC_ERR error upon send completion. This may have been due
to the differences in the
processing speeds of the two HCA. This was rectified by seting the
retry_count to a non-zero value in ipoib_cm_send_req().
I had to do this inspite of the comment --> /* RFC draft warns against
retries */
Can someone point me to where this comment is in the RFC? I would like to
understand the reasoning.
Note 2: The IB_WC_RETRY_EXC_ERR is not seen when the two HCAs are of the
same type.
Note 3: Another small patch (not in this one) is needed to the ehca driver
for it to work on the IBM HCAs.
Signed-off-by: Pradeep Satyanarayana <pradeep at us.ibm.com>
---
--- linux-2.6.21-rc5.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2007-04-02
17:44:58.000000000 -0700
+++ linux-2.6.21-rc5/drivers/infiniband/ulp/ipoib/ipoib.h 2007-04-03
10:59:54.000000000 -0700
@@ -99,6 +99,12 @@ enum {
#define IPOIB_OP_RECV (1ul << 31)
#ifdef CONFIG_INFINIBAND_IPOIB_CM
#define IPOIB_CM_OP_SRQ (1ul << 30)
+#define IPOIB_CM_OP_NOSRQ (1ul << 29)
+
+/* These two go hand in hand */
+#define NOSRQ_INDEX_RING_SIZE 1024
+#define NOSRQ_INDEX_MASK 0x00000000000003ff
+
#else
#define IPOIB_CM_OP_SRQ (0)
#endif
@@ -136,9 +142,11 @@ struct ipoib_cm_data {
struct ipoib_cm_rx {
struct ib_cm_id *id;
struct ib_qp *qp;
+ struct ipoib_cm_rx_buf *rx_ring;
struct list_head list;
struct net_device *dev;
unsigned long jiffies;
+ u32 index;
};
struct ipoib_cm_tx {
@@ -177,6 +185,7 @@ struct ipoib_cm_dev_priv {
struct ib_wc ibwc[IPOIB_NUM_WC];
struct ib_sge rx_sge[IPOIB_CM_RX_SG];
struct ib_recv_wr rx_wr;
+ struct ipoib_cm_rx **rx_index_ring;
};
/*
--- linux-2.6.21-rc5.orig/drivers/infiniband/ulp/ipoib/ipoib_cm.c
2007-04-02 17:44:58.000000000 -0700
+++ linux-2.6.21-rc5/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2007-04-18
16:23:12.000000000 -0700
@@ -76,35 +76,73 @@ static void ipoib_cm_dma_unmap_rx(struct
ib_dma_unmap_single(priv->ca, mapping[i +
1], PAGE_SIZE, DMA_FROM_DEVICE);
}
-static int ipoib_cm_post_receive(struct net_device *dev, int id)
+static int ipoib_cm_post_receive(struct net_device *dev, u64 id)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_recv_wr *bad_wr;
int i, ret;
+ u32 index;
+ u64 wr_id;
+ struct ipoib_cm_rx *rx_ptr;
+ unsigned long flags;
- priv->cm.rx_wr.wr_id = id | IPOIB_CM_OP_SRQ;
+ if (priv->cm.srq) {
+ priv->cm.rx_wr.wr_id = id |
IPOIB_CM_OP_SRQ; /* Check id val */
- for (i = 0; i < IPOIB_CM_RX_SG; ++i)
- priv->cm.rx_sge[i].addr =
priv->cm.srq_ring[id].mapping[i];
+ for (i = 0; i < IPOIB_CM_RX_SG; ++i)
+ priv->cm.rx_sge[i].addr =
+ priv->cm.srq_ring[id].mapping[i];
+
+ ret = ib_post_srq_recv(priv->cm.srq,
&priv->cm.rx_wr, &bad_wr);
+ if (unlikely(ret)) {
+ ipoib_warn(priv, "post
srq failed for buf %d (%d)\n", id, ret);
+ ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
+ priv->cm.srq_ring[id].mapping);
+ dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
+ priv->cm.srq_ring[id].skb
= NULL;
+ }
+ } else {
+ index = id & NOSRQ_INDEX_MASK ;
+ wr_id = id >> 32;
- ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr,
&bad_wr);
- if (unlikely(ret)) {
- ipoib_warn(priv, "post srq failed for buf
%d (%d)\n", id, ret);
- ipoib_cm_dma_unmap_rx(priv,
IPOIB_CM_RX_SG - 1,
- priv->cm.srq_ring[id].mapping);
- dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
- priv->cm.srq_ring[id].skb = NULL;
- }
+ /* There is a slender chance of a race
between the stale_task
+ * running after a period of inactivity
and the receipt of
+ * a packet being processed at about the
same instant.
+ * Hence the lock */
+
+ spin_lock_irqsave(&priv->lock, flags);
+ rx_ptr = priv->cm.rx_index_ring[index];
+ spin_unlock_irqrestore(&priv->lock,
flags);
+
+ priv->cm.rx_wr.wr_id = wr_id << 32 |
index | IPOIB_CM_OP_NOSRQ;
+
+ for (i = 0; i < IPOIB_CM_RX_SG; ++i)
+ priv->cm.rx_sge[i].addr =
rx_ptr->rx_ring[wr_id].mapping[i];
+
+ ret = ib_post_recv(rx_ptr->qp,
&priv->cm.rx_wr, &bad_wr);
+ if (unlikely(ret)) {
+ ipoib_warn(priv, "post
recv failed for buf %d (%d)\n",
+ wr_id, ret);
+ ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
+ rx_ptr->rx_ring[wr_id].mapping);
+ dev_kfree_skb_any(rx_ptr->rx_ring[wr_id].skb);
+ rx_ptr->rx_ring[wr_id].skb = NULL;
+ }
+ } /* else NO SRQ */
return ret;
}
-static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, int
id, int frags,
+static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, u64
id,
+ int frags,
u64 mapping[IPOIB_CM_RX_SG])
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct sk_buff *skb;
int i;
+ struct ipoib_cm_rx *rx_ptr;
+ u32 index, wr_id;
+ unsigned long flags;
skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12);
if (unlikely(!skb))
@@ -123,7 +161,7 @@ static struct sk_buff *ipoib_cm_alloc_rx
return NULL;
}
- for (i = 0; i < frags; i++) {
+ for (i = 0; i < frags; i++) {
struct page *page =
alloc_page(GFP_ATOMIC);
if (!page)
@@ -136,7 +174,17 @@ static struct sk_buff *ipoib_cm_alloc_rx
goto partial_error;
}
- priv->cm.srq_ring[id].skb = skb;
+ if (priv->cm.srq)
+ priv->cm.srq_ring[id].skb = skb;
+ else {
+ index = id & NOSRQ_INDEX_MASK ;
+ wr_id = id >> 32;
+ spin_lock_irqsave(&priv->lock, flags);
+ rx_ptr = priv->cm.rx_index_ring[index];
+ spin_unlock_irqrestore(&priv->lock,
flags);
+
+ rx_ptr->rx_ring[wr_id].skb = skb;
+ }
return skb;
partial_error:
@@ -157,13 +205,20 @@ static struct ib_qp *ipoib_cm_create_rx_
struct ib_qp_init_attr attr = {
.send_cq = priv->cq, /* does not matter,
we never send anything */
.recv_cq = priv->cq,
- .srq = priv->cm.srq,
.cap.max_send_wr = 1, /* FIXME: 0 Seems
not to work */
+ .cap.max_recv_wr = ipoib_recvq_size + 1,
.cap.max_send_sge = 1, /* FIXME: 0 Seems
not to work */
+ .cap.max_recv_sge = IPOIB_CM_RX_SG, /* Is
this correct? */
.sq_sig_type = IB_SIGNAL_ALL_WR,
.qp_type = IB_QPT_RC,
.qp_context = p,
};
+
+ if (priv->cm.srq)
+ attr.srq = priv->cm.srq;
+ else
+ attr.srq = NULL;
+
return ib_create_qp(priv->pd, &attr);
}
@@ -217,9 +272,13 @@ static int ipoib_cm_send_rep(struct net_
rep.flow_control = 0;
rep.rnr_retry_count = req->rnr_retry_count;
rep.target_ack_delay = 20; /* FIXME */
- rep.srq = 1;
rep.qp_num = qp->qp_num;
rep.starting_psn = psn;
+
+ if (priv->cm.srq)
+ rep.srq = 1;
+ else
+ rep.srq = 0;
return ib_send_cm_rep(cm_id, &rep);
}
@@ -231,6 +290,8 @@ static int ipoib_cm_req_handler(struct i
unsigned long flags;
unsigned psn;
int ret;
+ u32 qp_num, index;
+ u64 i;
ipoib_dbg(priv, "REQ arrived\n");
p = kzalloc(sizeof *p, GFP_KERNEL);
@@ -244,10 +305,69 @@ static int ipoib_cm_req_handler(struct i
goto err_qp;
}
- psn = random32() & 0xffffff;
- ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn);
- if (ret)
- goto err_modify;
+ if (priv->cm.srq == NULL) { /* NOSRQ */
+ qp_num = p->qp->qp_num;
+ /* Allocate space for the rx_ring here */
+ p->rx_ring = kzalloc(ipoib_recvq_size *
sizeof *p->rx_ring,
+ GFP_KERNEL);
+ if (p->rx_ring == NULL)
+ return -ENOMEM;
+
+ cm_id->context = p;
+ p->jiffies = jiffies;
+ spin_lock_irqsave(&priv->lock, flags);
+ list_add(&p->list,
&priv->cm.passive_ids);
+
+ /* Find an empty rx_index_ring[] entry */
+ for (index = 0; index <
NOSRQ_INDEX_RING_SIZE; index++)
+ if
(priv->cm.rx_index_ring[index] == NULL)
+ break;
+
+ if ( index == NOSRQ_INDEX_RING_SIZE) {
+ spin_unlock_irqrestore(&priv->lock, flags);
+ printk(KERN_WARNING
"NOSRQ supports a max of %d RC "
+ "QPs. That limit
has now been reached\n",
+ NOSRQ_INDEX_RING_SIZE);
+ return -EINVAL;
+ }
+
+ /* Store the pointer to retrieve it later
using the index */
+ priv->cm.rx_index_ring[index] = p;
+ spin_unlock_irqrestore(&priv->lock,
flags);
+ p->index = index;
+
+ psn = random32() & 0xffffff;
+ ret = ipoib_cm_modify_rx_qp(dev, cm_id,
p->qp, psn);
+ if (ret) {
+ ipoib_warn(priv,
"ipoib_cm_modify_rx_qp() failed %d\n",
+ ret);
+ goto err_modify;
+ }
+
+ for (i = 0; i < ipoib_recvq_size; ++i) {
+ if
(!ipoib_cm_alloc_rx_skb(dev, i << 32 | index,
+ IPOIB_CM_RX_SG - 1,
+ p->rx_ring[i].mapping)) {
+ ipoib_warn(priv, "failed to allocate receive "
+ "buffer %d\n", i);
+ ipoib_cm_dev_cleanup(dev);
+ return
-ENOMEM;
+ }
+
+ if
(ipoib_cm_post_receive(dev, i << 32 | index)) {
+ ipoib_warn(priv, "ipoib_ib_post_receive "
+ "failed for buf %d\n", i);
+ ipoib_cm_dev_cleanup(dev);
+ return
-EIO;
+ }
+ }
+ } else { /* SRQ */
+ p->rx_ring = NULL; /* This is used only
by NOSRQ */
+ psn = random32() & 0xffffff;
+ ret = ipoib_cm_modify_rx_qp(dev, cm_id,
p->qp, psn);
+ if (ret)
+ goto err_modify;
+ }
ret = ipoib_cm_send_rep(dev, cm_id, p->qp,
&event->param.req_rcvd, psn);
if (ret) {
@@ -255,13 +375,15 @@ static int ipoib_cm_req_handler(struct i
goto err_rep;
}
- cm_id->context = p;
- p->jiffies = jiffies;
- spin_lock_irqsave(&priv->lock, flags);
- list_add(&p->list, &priv->cm.passive_ids);
- spin_unlock_irqrestore(&priv->lock, flags);
+ if (priv->cm.srq) {
+ cm_id->context = p;
+ p->jiffies = jiffies;
+ spin_lock_irqsave(&priv->lock, flags);
+ list_add(&p->list,
&priv->cm.passive_ids);
+ spin_unlock_irqrestore(&priv->lock,
flags);
+ }
queue_delayed_work(ipoib_workqueue,
- &priv->cm.stale_task,
IPOIB_CM_RX_DELAY);
+ &priv->cm.stale_task,
IPOIB_CM_RX_DELAY);
return 0;
err_rep:
@@ -344,12 +466,19 @@ static void skb_put_frags(struct sk_buff
void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- unsigned int wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ;
struct sk_buff *skb, *newskb;
struct ipoib_cm_rx *p;
unsigned long flags;
- u64 mapping[IPOIB_CM_RX_SG];
+ u64 mapping[IPOIB_CM_RX_SG], wr_id;
+ u32 index;
int frags;
+ struct ipoib_cm_rx *rx_ptr;
+
+
+ if (priv->cm.srq)
+ wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ;
+ else
+ wr_id = wc->wr_id >> 32;
ipoib_dbg_data(priv, "cm recv completion: id %d, op %d,
status: %d\n",
wr_id, wc->opcode, wc->status);
@@ -360,7 +489,16 @@ void ipoib_cm_handle_rx_wc(struct net_de
return;
}
- skb = priv->cm.srq_ring[wr_id].skb;
+ if(priv->cm.srq)
+ skb = priv->cm.srq_ring[wr_id].skb;
+ else {
+ index = (wc->wr_id & ~IPOIB_CM_OP_NOSRQ)
& NOSRQ_INDEX_MASK ;
+ spin_lock_irqsave(&priv->lock, flags);
+ rx_ptr = priv->cm.rx_index_ring[index];
+ spin_unlock_irqrestore(&priv->lock,
flags);
+
+ skb = rx_ptr->rx_ring[wr_id].skb;
+ } /* NOSRQ */
if (unlikely(wc->status != IB_WC_SUCCESS)) {
ipoib_dbg(priv, "cm recv error "
@@ -371,7 +509,13 @@ void ipoib_cm_handle_rx_wc(struct net_de
}
if (!likely(wr_id & IPOIB_CM_RX_UPDATE_MASK)) {
- p = wc->qp->qp_context;
+ if(priv->cm.srq == NULL)
+ /* There are no guarantees that wc->qp is
not NULL for HCAs
+ * that do not support SRQ. */
+ p = rx_ptr;
+ else
+ p = wc->qp->qp_context;
+
if (time_after_eq(jiffies, p->jiffies +
IPOIB_CM_RX_UPDATE_TIME)) {
spin_lock_irqsave(&priv->lock, flags);
p->jiffies = jiffies;
@@ -388,7 +532,11 @@ void ipoib_cm_handle_rx_wc(struct net_de
frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
(unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
- newskb = ipoib_cm_alloc_rx_skb(dev, wr_id, frags,
mapping);
+ if (priv->cm.srq)
+ newskb = ipoib_cm_alloc_rx_skb(dev,
wr_id, frags, mapping);
+ else
+ newskb = ipoib_cm_alloc_rx_skb(dev, wr_id
<< 32 | index, frags,
+ mapping);
if (unlikely(!newskb)) {
/*
* If we can't allocate a new RX buffer,
dump
@@ -399,13 +547,22 @@ void ipoib_cm_handle_rx_wc(struct net_de
goto repost;
}
- ipoib_cm_dma_unmap_rx(priv, frags,
priv->cm.srq_ring[wr_id].mapping);
- memcpy(priv->cm.srq_ring[wr_id].mapping, mapping, (frags
+ 1) * sizeof *mapping);
+ if (priv->cm.srq) {
+ ipoib_cm_dma_unmap_rx(priv, frags,
+ priv->cm.srq_ring[wr_id].mapping);
+ memcpy(priv->cm.srq_ring[wr_id].mapping,
mapping,
+ (frags + 1) * sizeof *mapping);
+ } else {
+ ipoib_cm_dma_unmap_rx(priv, frags,
+ rx_ptr->rx_ring[wr_id].mapping);
+ memcpy(rx_ptr->rx_ring[wr_id].mapping,
mapping,
+ (frags + 1) * sizeof *mapping);
+ }
ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
wc->byte_len, wc->slid);
- skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len,
newskb);
+ skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len,
newskb);
skb->protocol = ((struct ipoib_header *)
skb->data)->proto;
skb->mac.raw = skb->data;
@@ -418,12 +575,19 @@ void ipoib_cm_handle_rx_wc(struct net_de
skb->dev = dev;
/* XXX get correct PACKET_ type here */
skb->pkt_type = PACKET_HOST;
+
netif_rx_ni(skb);
repost:
- if (unlikely(ipoib_cm_post_receive(dev, wr_id)))
- ipoib_warn(priv, "ipoib_cm_post_receive
failed "
- "for buf %d\n",
wr_id);
+ if (priv->cm.srq) {
+ if (unlikely(ipoib_cm_post_receive(dev,
wr_id)))
+ ipoib_warn(priv,
"ipoib_cm_post_receive failed "
+ "for buf
%d\n", wr_id);
+ } else {
+ if (unlikely(ipoib_cm_post_receive(dev,
wr_id << 32 | index)))
+ ipoib_warn(priv,
"ipoib_cm_post_receive failed "
+ "for buf
%d\n", wr_id);
+ }
}
static inline int post_send(struct ipoib_dev_priv *priv,
@@ -432,6 +596,9 @@ static inline int post_send(struct ipoib
u64 addr, int len)
{
struct ib_send_wr *bad_wr;
+ struct ib_qp_attr qp_attr;
+ struct ib_qp_init_attr qp_init_attr;
+ int ret, qp_attr_mask;
priv->tx_sge.addr = addr;
priv->tx_sge.length = len;
@@ -613,6 +780,7 @@ void ipoib_cm_dev_stop(struct net_device
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_cm_rx *p;
unsigned long flags;
+ int i;
if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
return;
@@ -621,6 +789,17 @@ void ipoib_cm_dev_stop(struct net_device
spin_lock_irqsave(&priv->lock, flags);
while (!list_empty(&priv->cm.passive_ids)) {
p = list_entry(priv->cm.passive_ids.next,
typeof(*p), list);
+ if (priv->cm.srq == NULL) {
+ for(i = 0; i <
ipoib_recvq_size; ++i)
+ if(p->rx_ring[i].skb) {
+ ipoib_cm_dma_unmap_rx(priv,
+ IPOIB_CM_RX_SG - 1,
+ p->rx_ring[i].mapping);
+ dev_kfree_skb_any(p->rx_ring[i].skb);
+ p->rx_ring[i].skb = NULL;
+ }
+ kfree(p->rx_ring);
+ }
list_del_init(&p->list);
spin_unlock_irqrestore(&priv->lock,
flags);
ib_destroy_cm_id(p->id);
@@ -707,9 +886,14 @@ static struct ib_qp *ipoib_cm_create_tx_
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_init_attr attr = {};
attr.recv_cq = priv->cq;
- attr.srq = priv->cm.srq;
+ if (priv->cm.srq)
+ attr.srq = priv->cm.srq;
+ else
+ attr.srq = NULL;
attr.cap.max_send_wr = ipoib_sendq_size;
+ attr.cap.max_recv_wr = 1; /* Not in MST code */
attr.cap.max_send_sge = 1;
+ attr.cap.max_recv_sge = 1; /* Not in MST code */
attr.sq_sig_type = IB_SIGNAL_ALL_WR;
attr.qp_type = IB_QPT_RC;
attr.send_cq = cq;
@@ -746,10 +930,13 @@ static int ipoib_cm_send_req(struct net_
req.responder_resources = 4;
req.remote_cm_response_timeout = 20;
req.local_cm_response_timeout = 20;
- req.retry_count = 0; /* RFC draft
warns against retries */
- req.rnr_retry_count = 0; /* RFC draft
warns against retries */
+ req.retry_count = 6; /* RFC draft
warns against retries */
+ req.rnr_retry_count = 6;/* RFC draft
warns against retries */
req.max_cm_retries = 15;
- req.srq = 1;
+ if (priv->cm.srq)
+ req.srq = 1;
+ else
+ req.srq = 0;
return ib_send_cm_req(id, &req);
}
@@ -1089,6 +1276,7 @@ static void ipoib_cm_stale_task(struct w
cm.stale_task.work);
struct ipoib_cm_rx *p;
unsigned long flags;
+ int i;
spin_lock_irqsave(&priv->lock, flags);
while (!list_empty(&priv->cm.passive_ids)) {
@@ -1097,6 +1285,19 @@ static void ipoib_cm_stale_task(struct w
p = list_entry(priv->cm.passive_ids.prev,
typeof(*p), list);
if (time_before_eq(jiffies, p->jiffies +
IPOIB_CM_RX_TIMEOUT))
break;
+ if (priv->cm.srq == NULL) { /* NOSRQ */
+ for(i = 0; i <
ipoib_recvq_size; ++i)
+ if(p->rx_ring[i].skb) {
+ ipoib_cm_dma_unmap_rx(priv,
+ IPOIB_CM_RX_SG - 1,
+ p->rx_ring[i].mapping);
+ dev_kfree_skb_any(p->rx_ring[i].skb);
+ p->rx_ring[i].skb = NULL;
+ }
+ /* Free the rx_ring */
+ kfree(p->rx_ring);
+ priv->cm.rx_index_ring[p->index] = NULL;
+ }
list_del_init(&p->list);
spin_unlock_irqrestore(&priv->lock,
flags);
ib_destroy_cm_id(p->id);
@@ -1154,13 +1355,9 @@ int ipoib_cm_add_mode_attr(struct net_de
int ipoib_cm_dev_init(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ib_srq_init_attr srq_init_attr = {
- .attr = {
- .max_wr =
ipoib_recvq_size,
- .max_sge = IPOIB_CM_RX_SG
- }
- };
- int ret, i;
+ struct ib_srq_init_attr srq_init_attr;
+ int ret, i, supports_srq;
+ struct ib_device_attr attr;
INIT_LIST_HEAD(&priv->cm.passive_ids);
INIT_LIST_HEAD(&priv->cm.reap_list);
@@ -1172,21 +1369,43 @@ int ipoib_cm_dev_init(struct net_device
skb_queue_head_init(&priv->cm.skb_queue);
- priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
- if (IS_ERR(priv->cm.srq)) {
- ret = PTR_ERR(priv->cm.srq);
- priv->cm.srq = NULL;
+ if (ret = ib_query_device(priv->ca, &attr))
return ret;
+ if (attr.max_srq)
+ supports_srq = 1; /* This device supports
SRQ */
+ else {
+ supports_srq = 0;
}
- priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof
*priv->cm.srq_ring,
- GFP_KERNEL);
- if (!priv->cm.srq_ring) {
- printk(KERN_WARNING "%s: failed to
allocate CM ring (%d entries)\n",
- priv->ca->name, ipoib_recvq_size);
- ipoib_cm_dev_cleanup(dev);
- return -ENOMEM;
- }
+ if (supports_srq) {
+ srq_init_attr.attr.max_wr =
ipoib_recvq_size;
+ srq_init_attr.attr.max_sge =
IPOIB_CM_RX_SG;
+
+ priv->cm.srq = ib_create_srq(priv->pd,
&srq_init_attr);
+ if (IS_ERR(priv->cm.srq)) {
+ ret =
PTR_ERR(priv->cm.srq);
+ priv->cm.srq = NULL;
+ return ret;
+ }
+
+ priv->cm.srq_ring =
kzalloc(ipoib_recvq_size *
+ sizeof *priv->cm.srq_ring,
+ GFP_KERNEL);
+ if (!priv->cm.srq_ring) {
+ printk(KERN_WARNING "%s:
failed to allocate CM ring "
+ "(%d entries)\n",
+ priv->ca->name, ipoib_recvq_size);
+ ipoib_cm_dev_cleanup(dev);
+ return -ENOMEM;
+ }
+ priv->cm.rx_index_ring = NULL; /* Not
needed for SRQ */
+ } else {
+ priv->cm.srq = NULL;
+ priv->cm.srq_ring = NULL;
+ priv->cm.rx_index_ring =
kzalloc(NOSRQ_INDEX_RING_SIZE *
+ sizeof *priv->cm.rx_index_ring,
+ GFP_KERNEL);
+ }
for (i = 0; i < IPOIB_CM_RX_SG; ++i)
priv->cm.rx_sge[i].lkey =
priv->mr->lkey;
@@ -1198,19 +1417,25 @@ int ipoib_cm_dev_init(struct net_device
priv->cm.rx_wr.sg_list = priv->cm.rx_sge;
priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG;
- for (i = 0; i < ipoib_recvq_size; ++i) {
- if (!ipoib_cm_alloc_rx_skb(dev, i,
IPOIB_CM_RX_SG - 1,
+ /* One can post receive buffers even before the RX QP is
created
+ * only in the SRQ case. Therefore for NOSRQ we skip the
rest of init
+ * and do that in ipoib_cm_req_handler() */
+
+ if (priv->cm.srq) {
+ for (i = 0; i < ipoib_recvq_size; ++i) {
+ if
(!ipoib_cm_alloc_rx_skb(dev, i, IPOIB_CM_RX_SG - 1,
priv->cm.srq_ring[i].mapping)) {
- ipoib_warn(priv, "failed
to allocate receive buffer %d\n", i);
- ipoib_cm_dev_cleanup(dev);
- return -ENOMEM;
- }
- if (ipoib_cm_post_receive(dev, i)) {
- ipoib_warn(priv,
"ipoib_ib_post_receive failed for buf %d\n", i);
- ipoib_cm_dev_cleanup(dev);
- return -EIO;
+ ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
+ ipoib_cm_dev_cleanup(dev);
+ return
-ENOMEM;
+ }
+ if
(ipoib_cm_post_receive(dev, i)) {
+ ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
+ ipoib_cm_dev_cleanup(dev);
+ return
-EIO;
+ }
}
- }
+ } /* if supports SRQ */
priv->dev->dev_addr[0] = IPOIB_FLAGS_RC;
return 0;
--- linux-2.6.21-rc5.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c
2007-04-02 17:44:58.000000000 -0700
+++ linux-2.6.21-rc5/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2007-04-03
11:00:26.000000000 -0700
@@ -282,7 +282,7 @@ static void ipoib_ib_handle_tx_wc(struct
static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc)
{
- if (wc->wr_id & IPOIB_CM_OP_SRQ)
+ if ((wc->wr_id & IPOIB_CM_OP_SRQ) || (wc->wr_id &
IPOIB_CM_OP_NOSRQ))
ipoib_cm_handle_rx_wc(dev, wc);
else if (wc->wr_id & IPOIB_OP_RECV)
ipoib_ib_handle_rx_wc(dev, wc);
Pradeep
pradeep at us.ibm.com
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ipoib_cm.nosrq.patch.v2
Type: application/octet-stream
Size: 20146 bytes
Desc: not available
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20070418/03156a9f/attachment.obj>
More information about the general
mailing list