[ofa-general] [PATCH v2] mlx4: make firmware diagnostic counters available via sysfs

Jack Morgenstein jackm at dev.mellanox.co.il
Wed Jun 18 02:32:35 PDT 2008


mlx4: make firmware diagnostic counters available via sysfs.

version 2:
1. Extensive cleanup based upon Roland's comments.
2. Eliminated use of class_device.
3. Eliminated intermediate macros and functions for diag clear.

Developed by: Gabi Liron of Mellanox.

Signed-off-by: Jack Morgenstein <jackm at dev.mellanox.co.il>

---
Roland,

I cleaned up the patch along the lines you indicated.
I also eliminated all use of class_device.

I left in the "in_mod" parameter (renamed "op_mod") in macro
DEVICE_DIAG_RPRT_ATTR, in case we later implement further
diagnostic counter groups.

I did leave in the array arguments of mlx4_query_diag_counters(),
so that the API is ready if we decide to implement query functions which
retrieve an entire vector of counters at once (the FW command gets all
the counters of a given diagnostic group at once).

Finally, I eliminated the CLASS_DEVICE_DIAG_CLR_RPRT_ATTR macro, because it
does not add anything at this point -- I just declare "clear_diag" directly,
and define its procedure directly (the "clear" resets an entire counter group).
If we add more counter groups, I will add more "clear" functions.

Index: ofa_1_4_dev_kernel/drivers/net/mlx4/fw.c
===================================================================
--- ofa_1_4_dev_kernel.orig/drivers/net/mlx4/fw.c	2008-06-18 10:31:50.000000000 +0300
+++ ofa_1_4_dev_kernel/drivers/net/mlx4/fw.c	2008-06-18 12:00:34.000000000 +0300
@@ -860,3 +860,37 @@ int mlx4_NOP(struct mlx4_dev *dev)
 	/* Input modifier of 0x1f means "finish as soon as possible." */
 	return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, 100);
 }
+
+int mlx4_query_diag_counters(struct mlx4_dev *dev, int array_length,
+			     u8 op_modifier, u32 in_offset[], u32 counter_out[])
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	u32 *outbox;
+	int ret;
+	int i;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	outbox = mailbox->buf;
+
+	ret = mlx4_cmd_box(dev, 0, mailbox->dma, 0, op_modifier,
+			   MLX4_CMD_DIAG_RPRT, MLX4_CMD_TIME_CLASS_A);
+	if (ret)
+		goto out;
+
+	for (i=0; i < array_length; i++) {
+		if (in_offset[i] > MLX4_MAILBOX_SIZE) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		MLX4_GET(counter_out[i], outbox, in_offset[i]);
+	}
+
+out:
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mlx4_query_diag_counters);
+
Index: ofa_1_4_dev_kernel/include/linux/mlx4/device.h
===================================================================
--- ofa_1_4_dev_kernel.orig/include/linux/mlx4/device.h	2008-06-18 10:31:50.000000000 +0300
+++ ofa_1_4_dev_kernel/include/linux/mlx4/device.h	2008-06-18 10:46:02.000000000 +0300
@@ -417,5 +417,7 @@ void mlx4_fmr_unmap(struct mlx4_dev *dev
 		    u32 *lkey, u32 *rkey);
 int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
 int mlx4_SYNC_TPT(struct mlx4_dev *dev);
+int mlx4_query_diag_counters(struct mlx4_dev *mlx4_dev, int array_length,
+			     u8 op_modifier, u32 in_offset[], u32 counter_out[]);
 
 #endif /* MLX4_DEVICE_H */
Index: ofa_1_4_dev_kernel/drivers/infiniband/hw/mlx4/main.c
===================================================================
--- ofa_1_4_dev_kernel.orig/drivers/infiniband/hw/mlx4/main.c	2008-06-18 10:31:50.000000000 +0300
+++ ofa_1_4_dev_kernel/drivers/infiniband/hw/mlx4/main.c	2008-06-18 12:08:23.000000000 +0300
@@ -614,6 +614,136 @@ static struct device_attribute *mlx4_cla
 	&dev_attr_board_id
 };
 
+/*
+ * create show function and a device_attribute struct pointing to
+ * the function for _name
+ */
+#define DEVICE_DIAG_RPRT_ATTR(_name, _offset, _op_mod)		\
+static ssize_t show_rprt_##_name(struct device *dev,		\
+				 struct device_attribute *attr,	\
+				 char *buf){			\
+	return show_diag_rprt(dev, buf, _offset, _op_mod);	\
+}								\
+static DEVICE_ATTR(_name, S_IRUGO, show_rprt_##_name, NULL);
+
+#define MLX4_DIAG_RPRT_CLEAR_DIAGS 3
+
+static size_t show_diag_rprt(struct device *device, char *buf,
+                              u32 offset, u8 op_modifier)
+{
+	size_t ret;
+	u32 counter_offset = offset;
+	u32 diag_counter = 0;
+	struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev,
+					       ib_dev.dev);
+
+	ret = mlx4_query_diag_counters(dev->dev, 1, op_modifier,
+				       &counter_offset, &diag_counter);
+	if (ret)
+		return ret;
+
+	return sprintf(buf,"%d\n", diag_counter);
+}
+
+static ssize_t clear_diag_counters(struct device *device,
+				   struct device_attribute *attr,
+				   const char *buf, size_t length)
+{
+	size_t ret;
+	struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev,
+					       ib_dev.dev);
+
+	ret = mlx4_query_diag_counters(dev->dev, 0, MLX4_DIAG_RPRT_CLEAR_DIAGS,
+				       NULL, NULL);
+	if (ret)
+		return ret;
+
+	return length;
+}
+
+DEVICE_DIAG_RPRT_ATTR(rq_num_lle	, 0x00, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_lle	, 0x04, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_lqpoe	, 0x08, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_lqpoe 	, 0x0C, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_leeoe	, 0x10, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_leeoe	, 0x14, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_lpe	, 0x18, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_lpe	, 0x1C, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_wrfe	, 0x20, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_wrfe	, 0x24, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_mwbe	, 0x2C, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_bre	, 0x34, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_lae	, 0x38, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_rire	, 0x44, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_rire	, 0x48, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_rae	, 0x4C, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_rae	, 0x50, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_roe	, 0x54, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_tree	, 0x5C, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_rree	, 0x64, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_rnr	, 0x68, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_rnr	, 0x6C, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_rabrte	, 0x7C, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_ieecne	, 0x84, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_ieecse	, 0x8C, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_oos	, 0x100, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_oos	, 0x104, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_mce	, 0x108, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_rsync	, 0x110, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_rsync	, 0x114, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_udsdprd	, 0x118, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_ucsdprd	, 0x120, 2);
+DEVICE_DIAG_RPRT_ATTR(num_cqovf		, 0x1A0, 2);
+DEVICE_DIAG_RPRT_ATTR(num_eqovf		, 0x1A4, 2);
+DEVICE_DIAG_RPRT_ATTR(num_baddb		, 0x1A8, 2);
+
+static DEVICE_ATTR(clear_diag, S_IWUGO, NULL, clear_diag_counters);
+
+static struct attribute *diag_rprt_attrs[] = {
+	&dev_attr_rq_num_lle.attr,
+	&dev_attr_sq_num_lle.attr,
+	&dev_attr_rq_num_lqpoe.attr,
+	&dev_attr_sq_num_lqpoe.attr,
+	&dev_attr_rq_num_leeoe.attr,
+	&dev_attr_sq_num_leeoe.attr,
+	&dev_attr_rq_num_lpe.attr,
+	&dev_attr_sq_num_lpe.attr,
+	&dev_attr_rq_num_wrfe.attr,
+	&dev_attr_sq_num_wrfe.attr,
+	&dev_attr_sq_num_mwbe.attr,
+	&dev_attr_sq_num_bre.attr,
+	&dev_attr_rq_num_lae.attr,
+	&dev_attr_sq_num_rire.attr,
+	&dev_attr_rq_num_rire.attr,
+	&dev_attr_sq_num_rae.attr,
+	&dev_attr_rq_num_rae.attr,
+	&dev_attr_sq_num_roe.attr,
+	&dev_attr_sq_num_tree.attr,
+	&dev_attr_sq_num_rree.attr,
+	&dev_attr_rq_num_rnr.attr,
+	&dev_attr_sq_num_rnr.attr,
+	&dev_attr_sq_num_rabrte.attr,
+	&dev_attr_sq_num_ieecne.attr,
+	&dev_attr_sq_num_ieecse.attr,
+	&dev_attr_rq_num_oos.attr,
+	&dev_attr_sq_num_oos.attr,
+	&dev_attr_rq_num_mce.attr,
+	&dev_attr_rq_num_rsync.attr,
+	&dev_attr_sq_num_rsync.attr,
+	&dev_attr_rq_num_udsdprd.attr,
+	&dev_attr_rq_num_ucsdprd.attr,
+	&dev_attr_num_cqovf.attr,
+	&dev_attr_num_eqovf.attr,
+	&dev_attr_num_baddb.attr,
+	&dev_attr_clear_diag.attr,
+	NULL
+};
+
+static struct attribute_group diag_counters_group = {
+	.name  = "diag_counters",
+	.attrs  = diag_rprt_attrs
+};
+
 static void *mlx4_ib_add(struct mlx4_dev *dev)
 {
 	static int mlx4_ib_version_printed;
@@ -757,6 +887,9 @@ static void *mlx4_ib_add(struct mlx4_dev
 			goto err_reg;
 	}
 
+	if(sysfs_create_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group))
+		goto err_reg;
+
 	return ibdev;
 
 err_reg:
@@ -782,6 +915,8 @@ static void mlx4_ib_remove(struct mlx4_d
 	struct mlx4_ib_dev *ibdev = ibdev_ptr;
 	int p;
 
+	sysfs_remove_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group);
+
 	for (p = 1; p <= dev->caps.num_ports; ++p)
 		mlx4_CLOSE_PORT(dev, p);
 



More information about the general mailing list