Michael,<br>
<br>
Thanks..<br>
<br>
Roland,<br>
<br>
Once you generate a kernel patch, I can test out both user and kernel mthca since I have the tests<br>
ready..<br>
<br>
-Viswa<br>
<br><br><div><span class="gmail_quote">On 9/13/05, <b class="gmail_sendername">Michael S. Tsirkin</b> <<a href="mailto:mst@mellanox.co.il">mst@mellanox.co.il</a>> wrote:</span><blockquote class="gmail_quote" style="border-left: 1px solid rgb(204, 204, 204); margin: 0pt 0pt 0pt 0.8ex; padding-left: 1ex;">
Quoting r. Roland Dreier <<a href="mailto:rolandd@cisco.com">rolandd@cisco.com</a>>:<br>> Subject: strange mem-free bug (was: [openib-general] completion Q overflow error/panic)<br>><br>> While looking at Viswa's example, I've found what seems to be a
<br>> problem using lots of QPs on mem-free HCAs.<br><br>Hi, Roland!<br>This seems to be a bug in libmthca. Patch below.<br><br>We probably need a similiar fix for kernel mthca - let me know if<br>you plan to work on that, otherwise I'll look into it tomorrow.
<br>And its probably something we want fixed for 2.6.14, right?<br>Let me know.<br><br>With regard to the test code that you posted - I also have some small<br>comments. If you plan to use it in the future, you can stick it
<br>in svn somewhere and I'll send patches.<br><br>---<br><br>Fix posting of the first work request for memfree hardware.<br>Simplify code for tavor mode hardware.<br><br>Signed-off-by: Michael S. Tsirkin <<a href="mailto:mst@mellanox.co.il">
mst@mellanox.co.il</a>><br><br>Index: userspace/libmthca/src/qp.c<br>===================================================================<br>--- userspace.orig/libmthca/src/qp.c    2005-09-13 17:17:58.000000000 +0300<br>
+++ userspace/libmthca/src/qp.c 2005-09-13 17:26:23.000000000 +0300<br>@@ -259,15 +259,13 @@ int mthca_tavor_post_send(struct ibv_qp<br>                        goto
out;<br>                }<br><br>-               if (prev_wqe) {<br>-                      
((struct mthca_next_seg *) prev_wqe)->nda_op =<br>-                              
htonl(((ind << qp->sq.wqe_shift) +<br>-                                      qp->send_wqe_offset)
|<br>-                                    
mthca_opcode[wr->opcode]);<br>+              
((struct mthca_next_seg *) prev_wqe)->nda_op =<br>+                      
htonl(((ind << qp->sq.wqe_shift) +<br>+                              qp->send_wqe_offset)
|<br>+                            
mthca_opcode[wr->opcode]);<br><br>-                      
((struct mthca_next_seg *) prev_wqe)->ee_nds =<br>-                              
htonl((size0 ? 0 : MTHCA_NEXT_DBD) | size);<br>-               }<br>+              
((struct mthca_next_seg *) prev_wqe)->ee_nds =<br>+                      
htonl((size0 ? 0 : MTHCA_NEXT_DBD) | size);<br><br>                if (!size0) {<br>                        size0
= size;<br>@@ -353,12 +351,10 @@ int mthca_tavor_post_recv(struct ibv_qp<br><br>                qp->wrid[ind] = wr->wr_id;<br><br>-               if (prev_wqe) {<br>-                      
((struct mthca_next_seg *) prev_wqe)->nda_op =<br>-                              
htonl((ind << qp->rq.wqe_shift) | 1);<br>-                      
((struct mthca_next_seg *) prev_wqe)->ee_nds =<br>-                              
htonl(MTHCA_NEXT_DBD | size);<br>-               }<br>+              
((struct mthca_next_seg *) prev_wqe)->nda_op =<br>+                      
htonl((ind << qp->rq.wqe_shift) | 1);<br>+              
((struct mthca_next_seg *) prev_wqe)->ee_nds =<br>+                      
htonl(MTHCA_NEXT_DBD | size);<br><br>                if (!size0)<br>                        size0
= size;<br>@@ -562,15 +558,13 @@ int mthca_arbel_post_send(struct ibv_qp<br>                        goto
out;<br>                }<br><br>-               if (prev_wqe) {<br>-                      
((struct mthca_next_seg *) prev_wqe)->nda_op =<br>-                              
htonl(((ind << qp->sq.wqe_shift) +<br>-                                      qp->send_wqe_offset)
|<br>-                                    
mthca_opcode[wr->opcode]);<br>-                      
mb();<br>-                      
((struct mthca_next_seg *) prev_wqe)->ee_nds =<br>-                              
htonl(MTHCA_NEXT_DBD | size);<br>-               }<br>+              
((struct mthca_next_seg *) prev_wqe)->nda_op =<br>+                      
htonl(((ind << qp->sq.wqe_shift) +<br>+                              qp->send_wqe_offset)
|<br>+                            
mthca_opcode[wr->opcode]);<br>+               mb();<br>+              
((struct mthca_next_seg *) prev_wqe)->ee_nds =<br>+                      
htonl(MTHCA_NEXT_DBD | size);<br><br>                if (!size0) {<br>                        size0
= size;<br>@@ -767,6 +761,8 @@ int mthca_alloc_qp_buf(struct ibv_pd *pd<br>                }<br>        }<br><br>+       qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);<br>+       qp->rq.last = get_recv_wqe(qp, qp->
sq.max - 1);<br>        return 0;<br> }<br><br>Index: userspace/libmthca/src/srq.c<br>===================================================================<br>--- userspace.orig/libmthca/src/srq.c   2005-09-13 17:25:41.000000000
 +0300<br>+++ userspace/libmthca/src/srq.c        2005-09-13 17:25:51.000000000 +0300<br>@@ -142,13 +142,11 @@ int mthca_tavor_post_srq_recv(struct ibv<br>                        ((struct
mthca_data_seg *) wqe)->addr = 0;<br>                }<br><br>-               if (prev_wqe) {<br>-                      
((struct mthca_next_seg *) prev_wqe)->nda_op =<br>-                              
htonl((ind << srq->wqe_shift) | 1);<br>-                      
mb();<br>-                      
((struct mthca_next_seg *) prev_wqe)->ee_nds =<br>-                              
htonl(MTHCA_NEXT_DBD);<br>-               }<br>+              
((struct mthca_next_seg *) prev_wqe)->nda_op =<br>+                      
htonl((ind << srq->wqe_shift) | 1);<br>+               mb();<br>+              
((struct mthca_next_seg *) prev_wqe)->ee_nds =<br>+                      
htonl(MTHCA_NEXT_DBD);<br><br>                srq->wrid[ind]  =
wr->wr_id;<br>                srq->first_free = next_ind;<br>@@ -294,6 +292,7 @@ int mthca_alloc_srq_buf(struct ibv_pd *p<br><br>        srq->first_free = 0;<br>        srq->last_free  = srq->max - 1;<br>+       srq->last = get_wqe(srq, srq->max - 1);
<br><br>        return 0;<br> }<br>Index: userspace/libmthca/src/verbs.c<br>===================================================================<br>--- userspace.orig/libmthca/src/verbs.c 2005-08-23 14:03:12.000000000 +0300
<br>+++ userspace/libmthca/src/verbs.c      2005-09-13 17:25:14.000000000 +0300<br>@@ -306,7 +306,6 @@ struct ibv_srq *mthca_create_srq(struct<br><br>        srq->max    
= align_queue_size(pd->context, attr->attr.max_wr, 1);<br>        srq->max_gs  = attr->attr.max_sge;<br>-       srq->last    = NULL;<br>        srq->counter = 0;<br><br>        if (mthca_alloc_srq_buf(pd, &attr->attr, srq))
<br>@@ -413,14 +412,12 @@ struct ibv_qp *mthca_create_qp(struct ib<br>        qp->sq.last_comp = qp->sq.max - 1;<br>        qp->sq.head      = 0;<br>        qp->sq.tail      = 0;<br>-       qp->sq.last      = NULL;
<br><br>        qp->rq.max      
= align_queue_size(pd->context, attr->cap.max_recv_wr, 0);<br>        qp->rq.next_ind  = 0;<br>        qp->rq.last_comp = qp->rq.max - 1;<br>        qp->rq.head      = 0;<br>        qp->rq.tail      = 0;
<br>-       qp->rq.last      = NULL;<br><br>        if (mthca_alloc_qp_buf(pd, &attr->cap, qp))<br>                goto err;<br><br><br>--<br>MST<br></blockquote></div><br>