[ofa-general] IPoIB post_send failed

Roland Dreier rdreier at cisco.com
Wed Jul 29 20:27:17 PDT 2009


Also if it's reproducible for you, it would be interesting to apply the
patch below (which says why the send is failing) and rerun, and send the
kernel log when you get the message about sends failing.

Thanks,
  Roland

 drivers/infiniband/hw/mlx4/qp.c |   11 +++++++++--
 1 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index c4a0264..6b475ae 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1471,12 +1471,17 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
 	 * This is a temporary limitation and will be removed in
 	 * a forthcoming FW release:
 	 */
-	if (unlikely(halign > 64))
+	if (unlikely(halign > 64)) {
+		printk(KERN_ERR "%s: halign %d (hlen %d)\n", __func__, halign, wr->wr.ud.hlen);
 		return -EINVAL;
+	}
 
 	if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
-		     wr->num_sge > qp->sq.max_gs - (halign >> 4)))
+		     wr->num_sge > qp->sq.max_gs - (halign >> 4))) {
+		printk(KERN_ERR "%s: num_sge %d, max_gs %d, halign %d\n",
+		       __func__, wr->num_sge, qp->sq.max_gs, halign);
 		return -EINVAL;
+	}
 
 	memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
 
@@ -1528,12 +1533,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		lso_wqe = &dummy;
 
 		if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
+			printk(KERN_ERR "%s: wq overflow\n", __func__);
 			err = -ENOMEM;
 			*bad_wr = wr;
 			goto out;
 		}
 
 		if (unlikely(wr->num_sge > qp->sq.max_gs)) {
+			printk(KERN_ERR "%s: num_sge %d, max_gs %d\n", __func__, wr->num_sge, qp->sq.max_gs);
 			err = -EINVAL;
 			*bad_wr = wr;
 			goto out;



More information about the general mailing list