[ofa-general] [RFC] mlx4: Add some form of error-path debug output

Jack Morgenstein jackm at dev.mellanox.co.il
Sat Nov 17 22:35:37 PST 2007


Roland,

I quickly dashed off an example of how I can rapidly implement
some form of debug printouts to assist in supporting the driver.

The philosophy I used below is to focus more on errors which would be
returned as -EINVAL -- to provide a bit more detail.

Also, I wanted the facility to be on-off switchable at run-time
(i.e., module-parameter controlled).

Finally, although I think that it would be nice to have component-based
trace capability, (i.e., treat the debug level parameter as a bitmask),
I think we can do without this for now (since we are only implementing
error path printouts).

Let me know what you think.

- Jack

(P.S., I've not yet compiled this, so it may have bugs).
=============================================================
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index d8287d9..970e181 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -43,7 +43,7 @@
 #include "mlx4_ib.h"
 #include "user.h"
 
-#define DRV_NAME	"mlx4_ib"
+#define DRV_NAME	MLX4_IB_DRV_NAME
 #define DRV_VERSION	"0.01"
 #define DRV_RELDATE	"May 1, 2006"
 
@@ -52,6 +52,14 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(DRV_VERSION);
 
+#ifdef CONFIG_MLX4_DEBUG
+
+int mlx4_ib_debug_level = 0;
+module_param_named(debug_level, mlx4_ib_debug_level, int, 0644);
+MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
+
+#endif /* CONFIG_MLX4_DEBUG */
+
 static const char mlx4_ib_version[] __devinitdata =
 	DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
 	DRV_VERSION " (" DRV_RELDATE ")\n";
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 2869765..3664f86 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -43,6 +43,25 @@
 #include <linux/mlx4/device.h>
 #include <linux/mlx4/doorbell.h>
 
+
+#define MLX4_IB_DRV_NAME	"mlx4_ib"
+
+#ifdef CONFIG_MLX4_DEBUG
+extern int mlx4_ib_debug_level;
+
+#define mlx4_ib_dbg(format, arg...) 		\
+	do {					\
+		if (mlx4_ib_debug_level) 	\
+			printk(KERN_DEBUG MLX4_IB_DRV_NAME ", %s: " format "\n",\
+			__func__, ## arg);	\
+	} while (0)
+
+#else /* CONFIG_MLX4_DEBUG */
+
+#define mlx4_ib_dbg(format, arg...) do {} while (0)
+
+#endif /* CONFIG_MLX4_DEBUG */
+
 enum {
 	MLX4_IB_DB_PER_PAGE	= PAGE_SIZE / 4
 };
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 8cba9c5..fe48b13 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -213,19 +213,25 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
 {
 	/* Sanity check RQ size before proceeding */
 	if (cap->max_recv_wr  > dev->dev->caps.max_wqes  ||
-	    cap->max_recv_sge > dev->dev->caps.max_rq_sg)
+	    cap->max_recv_sge > dev->dev->caps.max_rq_sg) {
+		mlx4_ib_dbg("Requested RQ size (sge or wr) too large");
 		return -EINVAL;
+	}
 
 	if (has_srq) {
 		/* QPs attached to an SRQ should have no RQ */
-		if (cap->max_recv_wr)
+		if (cap->max_recv_wr) {
+			mlx4_ib_dbg("non-zero RQ size for QP using SRQ");
 			return -EINVAL;
+		}
 
 		qp->rq.wqe_cnt = qp->rq.max_gs = 0;
 	} else {
 		/* HW requires >= 1 RQ entry with >= 1 gather entry */
-		if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge))
+		if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) {
+			mlx4_ib_dbg("user QP RQ has 0 wr's or 0 sge's");
 			return -EINVAL;
+		}
 
 		qp->rq.wqe_cnt	 = roundup_pow_of_two(max(1U, cap->max_recv_wr));
 		qp->rq.max_gs	 = roundup_pow_of_two(max(1U, cap->max_recv_sge));
@@ -245,16 +251,20 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
 	if (cap->max_send_wr	 > dev->dev->caps.max_wqes  ||
 	    cap->max_send_sge	 > dev->dev->caps.max_sq_sg ||
 	    cap->max_inline_data + send_wqe_overhead(type) +
-	    sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
+	    sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) {
+		mlx4_ib_dbg("Requested SQ resources exceed device maxima");
 		return -EINVAL;
+	}
 
 	/*
 	 * For MLX transport we need 2 extra S/G entries:
 	 * one for the header and one for the checksum at the end
 	 */
 	if ((type == IB_QPT_SMI || type == IB_QPT_GSI) &&
-	    cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
+	    cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) {
+		mlx4_ib_dbg("No space for SQP hdr/csum sge's");
 		return -EINVAL;
+	}
 
 	qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge *
 							sizeof (struct mlx4_wqe_data_seg),
@@ -297,8 +307,10 @@ static int set_user_sq_size(struct mlx4_ib_dev *dev,
 	if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes ||
 	    ucmd->log_sq_stride >
 		ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||
-	    ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)
+	    ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE) {
+		mlx4_ib_dbg("Requested max wqes or wqe stride exceeds max");
 		return -EINVAL;
+	}
 
 	qp->sq.wqe_cnt   = 1 << ucmd->log_sq_bb_count;
 	qp->sq.wqe_shift = ucmd->log_sq_stride;
@@ -350,23 +362,30 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 				       qp->buf_size, 0);
 		if (IS_ERR(qp->umem)) {
 			err = PTR_ERR(qp->umem);
+			mlx4_ib_dbg("ib_umem_get error (%d)", err);
 			goto err;
 		}
 
 		err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),
 				    ilog2(qp->umem->page_size), &qp->mtt);
-		if (err)
+		if (err) {
+			mlx4_ib_dbg("mlx4_mtt_init error (%d)", err);
 			goto err_buf;
+		}
 
 		err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);
-		if (err)
+		if (err) {
+			mlx4_ib_dbg("mlx4_ib_umem_write_mtt error (%d)", err);
 			goto err_mtt;
+		}
 
 		if (!init_attr->srq) {
 			err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
 						  ucmd.db_addr, &qp->db);
-			if (err)
+			if (err) {
+				mlx4_ib_dbg("mlx4_ib_db_map_user error (%d)", err);
 				goto err_mtt;
+			}
 		}
 	} else {
 		qp->sq_no_prefetch = 0;
@@ -390,12 +409,16 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 
 		err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
 				    &qp->mtt);
-		if (err)
+		if (err) {
+			mlx4_ib_dbg("kernel qp mlx4_mtt_init error (%d)", err);
 			goto err_buf;
+		}
 
 		err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
-		if (err)
+		if (err) {
+			mlx4_ib_dbg("mlx4_buf_write_mtt error (%d)", err);
 			goto err_mtt;
+		}
 
 		qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);
 		qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);




More information about the general mailing list