[ofa-general] FW: [PATCH] mstvpd (resend)

Mike Heinz michael.heinz at qlogic.com
Thu Jan 22 06:25:59 PST 2009


I sent this a week ago, and never got any kind of response - this is a patch (included both inline and as an attachment) for  mstvpd. Should it be directed someplace else?

--
Michael Heinz
Principal Engineer, Qlogic Corporation
King of Prussia, Pennsylvania
From: Mike Heinz
Sent: Wednesday, January 14, 2009 1:42 PM
To: 'general at lists.openfabrics.org'
Subject: [PATCH] mstvpd (resend)

We've repeatedly run into a problem where mstvpd can hang on certain HCA models, and on HCAs that have failed. This is an issue for us, because mstvpd is one of the tools we use to automatically capture information about a system that's experiencing problems.

I previously opened PR 1440 on the problem, but it doesn't appear to have been investigated.

For this reason, I'm proposing the attached patch. Basically, it adds a configurable time out and it terminates the attempt to read the VPD area if it fails to retrieve data before the time out expires. The default is 30 seconds.  It uses a stupid busy-loop to check for time out because that's what the existing code does.

Other changes were also made to support this change - I changed how command line options are processed and extended the usage() function.

--- vpd.c.orig    2009-01-08 16:56:12.000000000 -0500
+++ vpd.c   2009-01-08 17:44:01.000000000 -0500
@@ -44,6 +44,13 @@
 #include <stdlib.h>
 #include <errno.h>
 #include <libgen.h>
+#include <sys/times.h>
+
+/* pread is non-blocking, so we loop until we find data.  Unfortunately,
+ * we can loop forever if the HCA is crashed or if the wrong device is
+ * specified as an argument. So, we set time outs.
+ */
+static clock_t ticks_per_sec, start_t, curr_t, timeout_t = 30;

 struct vpd_cap {
      unsigned char id;
@@ -168,7 +175,13 @@
      if (ret != sizeof addr_flag)
            return ret;

+     start_t = times(NULL);
      while((addr_flag[1] & VPD_FLAG) != VPD_FLAG_READ_READY) {
+           curr_t = times(NULL);
+           if ((curr_t - start_t) / ticks_per_sec > timeout_t) {
+                 return -EIO;
+           }
+
            ret = pread(device, addr_flag, sizeof addr_flag,
                       vpd_cap_offset + VPD_ADDR_OFFSET);
            if (ret != sizeof addr_flag)
@@ -437,24 +450,34 @@
            rc = 1;
            goto usage;
      }
-     if (argc == 3) {
-           if (!strcmp("-m", argv[1])) {
-                 argv++;
-                 argc--;
-                 m = 1;
-           } else if (!strcmp("-n", argv[1])) {
-                 argv++;
-                 argc--;
-                 n = 1;
-           } else {
-                 rc = 2;
-                 goto usage;
+
+     ticks_per_sec = sysconf(_SC_CLK_TCK);
+
+     do
+     {
+           i=getopt(argc, argv, "mnt:");
+           if (i<0) {
+                 break;
            }
-     }

-     name = argv[1];
-     argv++;
-     argc--;
+           switch (i) {
+                 case 'm':
+                       m=1;
+                       break;
+                 case 'n':
+                       n=1;
+                       break;
+                 case 't':
+                       timeout_t = strtol(optarg, NULL, 0);
+                       break;
+                 default:
+                       goto usage;
+           }
+     } while (1 == 1);
+
+     name = argv[optind];
+     argc -= optind;
+     argv += optind;

      if (!strcmp("-", name)) {
            if (fread(d, VPD_MAX_SIZE, 1, stdin) != 1)
@@ -486,6 +509,14 @@
      return 0;

 usage:
-     fprintf(stderr, "Usage: %s [-m|-n] <file|-> [-- keyword ...]\n", argv[0]);
+     fprintf(stderr, "Usage: %s [-m|-n] [-t ##] <file> [-- keyword ...]\n", argv[0]);
+     fprintf(stderr, "-m\tDump raw VPD data to stdout.\n");
+     fprintf(stderr, "-n\tDo not validate check sum.\n");
+     fprintf(stderr, "-t ##\tTime out after ## seconds. (Default is 30.)\n\n");
+     fprintf(stderr, "file\tThe PCI id number of the HCA (for example, \"2:00.0\"),\n");
+     fprintf(stderr, "\tthe device name (such as \"mlx4_0\")\n");
+     fprintf(stderr, "\tthe absolute path to the device (\"/sys/class/infiniband/mlx4_0/device\")\n");
+     fprintf(stderr, "\tor '-' to read VPD data from the standard input.\n\n");
+     fprintf(stderr, "keyword(s): Only display the requested information. (ID, PN, EC, SN, etc...)\n");
      return rc;
 }


--
Michael Heinz
Principal Engineer, Qlogic Corporation
King of Prussia, Pennsylvania
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20090122/4eeacafd/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: mstvpd.patch
Type: application/octet-stream
Size: 2501 bytes
Desc: mstvpd.patch
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20090122/4eeacafd/attachment.obj>


More information about the general mailing list