[ofa-general] [RFC] mlx4: Add some form of error-path debug output
Jack Morgenstein
jackm at dev.mellanox.co.il
Sat Nov 17 22:35:37 PST 2007
Roland,
I quickly dashed off an example of how I can rapidly implement
some form of debug printouts to assist in supporting the driver.
The philosophy I used below is to focus more on errors which would be
returned as -EINVAL -- to provide a bit more detail.
Also, I wanted the facility to be on-off switchable at run-time
(i.e., module-parameter controlled).
Finally, although I think that it would be nice to have component-based
trace capability, (i.e., treat the debug level parameter as a bitmask),
I think we can do without this for now (since we are only implementing
error path printouts).
Let me know what you think.
- Jack
(P.S., I've not yet compiled this, so it may have bugs).
=============================================================
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index d8287d9..970e181 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -43,7 +43,7 @@
#include "mlx4_ib.h"
#include "user.h"
-#define DRV_NAME "mlx4_ib"
+#define DRV_NAME MLX4_IB_DRV_NAME
#define DRV_VERSION "0.01"
#define DRV_RELDATE "May 1, 2006"
@@ -52,6 +52,14 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
+#ifdef CONFIG_MLX4_DEBUG
+
+int mlx4_ib_debug_level = 0;
+module_param_named(debug_level, mlx4_ib_debug_level, int, 0644);
+MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
+
+#endif /* CONFIG_MLX4_DEBUG */
+
static const char mlx4_ib_version[] __devinitdata =
DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
DRV_VERSION " (" DRV_RELDATE ")\n";
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 2869765..3664f86 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -43,6 +43,25 @@
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
+
+#define MLX4_IB_DRV_NAME "mlx4_ib"
+
+#ifdef CONFIG_MLX4_DEBUG
+extern int mlx4_ib_debug_level;
+
+#define mlx4_ib_dbg(format, arg...) \
+ do { \
+ if (mlx4_ib_debug_level) \
+ printk(KERN_DEBUG MLX4_IB_DRV_NAME ", %s: " format "\n",\
+ __func__, ## arg); \
+ } while (0)
+
+#else /* CONFIG_MLX4_DEBUG */
+
+#define mlx4_ib_dbg(format, arg...) do {} while (0)
+
+#endif /* CONFIG_MLX4_DEBUG */
+
enum {
MLX4_IB_DB_PER_PAGE = PAGE_SIZE / 4
};
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 8cba9c5..fe48b13 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -213,19 +213,25 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
{
/* Sanity check RQ size before proceeding */
if (cap->max_recv_wr > dev->dev->caps.max_wqes ||
- cap->max_recv_sge > dev->dev->caps.max_rq_sg)
+ cap->max_recv_sge > dev->dev->caps.max_rq_sg) {
+ mlx4_ib_dbg("Requested RQ size (sge or wr) too large");
return -EINVAL;
+ }
if (has_srq) {
/* QPs attached to an SRQ should have no RQ */
- if (cap->max_recv_wr)
+ if (cap->max_recv_wr) {
+ mlx4_ib_dbg("non-zero RQ size for QP using SRQ");
return -EINVAL;
+ }
qp->rq.wqe_cnt = qp->rq.max_gs = 0;
} else {
/* HW requires >= 1 RQ entry with >= 1 gather entry */
- if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge))
+ if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) {
+ mlx4_ib_dbg("user QP RQ has 0 wr's or 0 sge's");
return -EINVAL;
+ }
qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr));
qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge));
@@ -245,16 +251,20 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
if (cap->max_send_wr > dev->dev->caps.max_wqes ||
cap->max_send_sge > dev->dev->caps.max_sq_sg ||
cap->max_inline_data + send_wqe_overhead(type) +
- sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
+ sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) {
+ mlx4_ib_dbg("Requested SQ resources exceed device maxima");
return -EINVAL;
+ }
/*
* For MLX transport we need 2 extra S/G entries:
* one for the header and one for the checksum at the end
*/
if ((type == IB_QPT_SMI || type == IB_QPT_GSI) &&
- cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
+ cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) {
+ mlx4_ib_dbg("No space for SQP hdr/csum sge's");
return -EINVAL;
+ }
qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge *
sizeof (struct mlx4_wqe_data_seg),
@@ -297,8 +307,10 @@ static int set_user_sq_size(struct mlx4_ib_dev *dev,
if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes ||
ucmd->log_sq_stride >
ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||
- ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)
+ ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE) {
+ mlx4_ib_dbg("Requested max wqes or wqe stride exceeds max");
return -EINVAL;
+ }
qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count;
qp->sq.wqe_shift = ucmd->log_sq_stride;
@@ -350,23 +362,30 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
qp->buf_size, 0);
if (IS_ERR(qp->umem)) {
err = PTR_ERR(qp->umem);
+ mlx4_ib_dbg("ib_umem_get error (%d)", err);
goto err;
}
err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),
ilog2(qp->umem->page_size), &qp->mtt);
- if (err)
+ if (err) {
+ mlx4_ib_dbg("mlx4_mtt_init error (%d)", err);
goto err_buf;
+ }
err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);
- if (err)
+ if (err) {
+ mlx4_ib_dbg("mlx4_ib_umem_write_mtt error (%d)", err);
goto err_mtt;
+ }
if (!init_attr->srq) {
err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
ucmd.db_addr, &qp->db);
- if (err)
+ if (err) {
+ mlx4_ib_dbg("mlx4_ib_db_map_user error (%d)", err);
goto err_mtt;
+ }
}
} else {
qp->sq_no_prefetch = 0;
@@ -390,12 +409,16 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
&qp->mtt);
- if (err)
+ if (err) {
+ mlx4_ib_dbg("kernel qp mlx4_mtt_init error (%d)", err);
goto err_buf;
+ }
err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
- if (err)
+ if (err) {
+ mlx4_ib_dbg("mlx4_buf_write_mtt error (%d)", err);
goto err_mtt;
+ }
qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);
qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);
More information about the general
mailing list