Michael,<br>
<br>
Thanks..<br>
<br>
Roland,<br>
<br>
Once you generate a kernel patch, I can test out both user and kernel mthca since I have the tests<br>
ready..<br>
<br>
-Viswa<br>
<br><br><div><span class="gmail_quote">On 9/13/05, <b class="gmail_sendername">Michael S. Tsirkin</b> <<a href="mailto:mst@mellanox.co.il">mst@mellanox.co.il</a>> wrote:</span><blockquote class="gmail_quote" style="border-left: 1px solid rgb(204, 204, 204); margin: 0pt 0pt 0pt 0.8ex; padding-left: 1ex;">
Quoting r. Roland Dreier <<a href="mailto:rolandd@cisco.com">rolandd@cisco.com</a>>:<br>> Subject: strange mem-free bug (was: [openib-general] completion Q overflow error/panic)<br>><br>> While looking at Viswa's example, I've found what seems to be a
<br>> problem using lots of QPs on mem-free HCAs.<br><br>Hi, Roland!<br>This seems to be a bug in libmthca. Patch below.<br><br>We probably need a similiar fix for kernel mthca - let me know if<br>you plan to work on that, otherwise I'll look into it tomorrow.
<br>And its probably something we want fixed for 2.6.14, right?<br>Let me know.<br><br>With regard to the test code that you posted - I also have some small<br>comments. If you plan to use it in the future, you can stick it
<br>in svn somewhere and I'll send patches.<br><br>---<br><br>Fix posting of the first work request for memfree hardware.<br>Simplify code for tavor mode hardware.<br><br>Signed-off-by: Michael S. Tsirkin <<a href="mailto:mst@mellanox.co.il">
mst@mellanox.co.il</a>><br><br>Index: userspace/libmthca/src/qp.c<br>===================================================================<br>--- userspace.orig/libmthca/src/qp.c 2005-09-13 17:17:58.000000000 +0300<br>
+++ userspace/libmthca/src/qp.c 2005-09-13 17:26:23.000000000 +0300<br>@@ -259,15 +259,13 @@ int mthca_tavor_post_send(struct ibv_qp<br> goto
out;<br> }<br><br>- if (prev_wqe) {<br>-
((struct mthca_next_seg *) prev_wqe)->nda_op =<br>-
htonl(((ind << qp->sq.wqe_shift) +<br>- qp->send_wqe_offset)
|<br>-
mthca_opcode[wr->opcode]);<br>+
((struct mthca_next_seg *) prev_wqe)->nda_op =<br>+
htonl(((ind << qp->sq.wqe_shift) +<br>+ qp->send_wqe_offset)
|<br>+
mthca_opcode[wr->opcode]);<br><br>-
((struct mthca_next_seg *) prev_wqe)->ee_nds =<br>-
htonl((size0 ? 0 : MTHCA_NEXT_DBD) | size);<br>- }<br>+
((struct mthca_next_seg *) prev_wqe)->ee_nds =<br>+
htonl((size0 ? 0 : MTHCA_NEXT_DBD) | size);<br><br> if (!size0) {<br> size0
= size;<br>@@ -353,12 +351,10 @@ int mthca_tavor_post_recv(struct ibv_qp<br><br> qp->wrid[ind] = wr->wr_id;<br><br>- if (prev_wqe) {<br>-
((struct mthca_next_seg *) prev_wqe)->nda_op =<br>-
htonl((ind << qp->rq.wqe_shift) | 1);<br>-
((struct mthca_next_seg *) prev_wqe)->ee_nds =<br>-
htonl(MTHCA_NEXT_DBD | size);<br>- }<br>+
((struct mthca_next_seg *) prev_wqe)->nda_op =<br>+
htonl((ind << qp->rq.wqe_shift) | 1);<br>+
((struct mthca_next_seg *) prev_wqe)->ee_nds =<br>+
htonl(MTHCA_NEXT_DBD | size);<br><br> if (!size0)<br> size0
= size;<br>@@ -562,15 +558,13 @@ int mthca_arbel_post_send(struct ibv_qp<br> goto
out;<br> }<br><br>- if (prev_wqe) {<br>-
((struct mthca_next_seg *) prev_wqe)->nda_op =<br>-
htonl(((ind << qp->sq.wqe_shift) +<br>- qp->send_wqe_offset)
|<br>-
mthca_opcode[wr->opcode]);<br>-
mb();<br>-
((struct mthca_next_seg *) prev_wqe)->ee_nds =<br>-
htonl(MTHCA_NEXT_DBD | size);<br>- }<br>+
((struct mthca_next_seg *) prev_wqe)->nda_op =<br>+
htonl(((ind << qp->sq.wqe_shift) +<br>+ qp->send_wqe_offset)
|<br>+
mthca_opcode[wr->opcode]);<br>+ mb();<br>+
((struct mthca_next_seg *) prev_wqe)->ee_nds =<br>+
htonl(MTHCA_NEXT_DBD | size);<br><br> if (!size0) {<br> size0
= size;<br>@@ -767,6 +761,8 @@ int mthca_alloc_qp_buf(struct ibv_pd *pd<br> }<br> }<br><br>+ qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);<br>+ qp->rq.last = get_recv_wqe(qp, qp->
sq.max - 1);<br> return 0;<br> }<br><br>Index: userspace/libmthca/src/srq.c<br>===================================================================<br>--- userspace.orig/libmthca/src/srq.c 2005-09-13 17:25:41.000000000
+0300<br>+++ userspace/libmthca/src/srq.c 2005-09-13 17:25:51.000000000 +0300<br>@@ -142,13 +142,11 @@ int mthca_tavor_post_srq_recv(struct ibv<br> ((struct
mthca_data_seg *) wqe)->addr = 0;<br> }<br><br>- if (prev_wqe) {<br>-
((struct mthca_next_seg *) prev_wqe)->nda_op =<br>-
htonl((ind << srq->wqe_shift) | 1);<br>-
mb();<br>-
((struct mthca_next_seg *) prev_wqe)->ee_nds =<br>-
htonl(MTHCA_NEXT_DBD);<br>- }<br>+
((struct mthca_next_seg *) prev_wqe)->nda_op =<br>+
htonl((ind << srq->wqe_shift) | 1);<br>+ mb();<br>+
((struct mthca_next_seg *) prev_wqe)->ee_nds =<br>+
htonl(MTHCA_NEXT_DBD);<br><br> srq->wrid[ind] =
wr->wr_id;<br> srq->first_free = next_ind;<br>@@ -294,6 +292,7 @@ int mthca_alloc_srq_buf(struct ibv_pd *p<br><br> srq->first_free = 0;<br> srq->last_free = srq->max - 1;<br>+ srq->last = get_wqe(srq, srq->max - 1);
<br><br> return 0;<br> }<br>Index: userspace/libmthca/src/verbs.c<br>===================================================================<br>--- userspace.orig/libmthca/src/verbs.c 2005-08-23 14:03:12.000000000 +0300
<br>+++ userspace/libmthca/src/verbs.c 2005-09-13 17:25:14.000000000 +0300<br>@@ -306,7 +306,6 @@ struct ibv_srq *mthca_create_srq(struct<br><br> srq->max
= align_queue_size(pd->context, attr->attr.max_wr, 1);<br> srq->max_gs = attr->attr.max_sge;<br>- srq->last = NULL;<br> srq->counter = 0;<br><br> if (mthca_alloc_srq_buf(pd, &attr->attr, srq))
<br>@@ -413,14 +412,12 @@ struct ibv_qp *mthca_create_qp(struct ib<br> qp->sq.last_comp = qp->sq.max - 1;<br> qp->sq.head = 0;<br> qp->sq.tail = 0;<br>- qp->sq.last = NULL;
<br><br> qp->rq.max
= align_queue_size(pd->context, attr->cap.max_recv_wr, 0);<br> qp->rq.next_ind = 0;<br> qp->rq.last_comp = qp->rq.max - 1;<br> qp->rq.head = 0;<br> qp->rq.tail = 0;
<br>- qp->rq.last = NULL;<br><br> if (mthca_alloc_qp_buf(pd, &attr->cap, qp))<br> goto err;<br><br><br>--<br>MST<br></blockquote></div><br>