[openib-general] cycles_to_units is incorrect in rdma_lat, rdma_bw.

Grant Grundler iod00d at hp.com
Fri Jun 10 14:59:52 PDT 2005


On Thu, Jun 09, 2005 at 05:33:52PM -0700, Grant Grundler wrote:
> I think adding such a calibration to get_cpu_mhz()
> so we can warn if /proc/cpuinfo data doesn't agree with
> gettimeofday() and get_cycles().

The appended patch prints the Mhz reported in cpuinfo and
the Mhz calculated from get_cycles()/gettimeofday().
(Sorry - this is a complete diff including previous
changes that haven't been committed yet.)

Conclusion: Maybe add another parameter to rdma_bw() to switch
the source of the CPU Mhz.
rdma_bw should continue using cpuinfo to report BW by default.

The problem is gettimeofday() will also measure any other
activity that occurs while the test is running. So it is
more likely to under report bandwidth.
Theoretically rdma_bw can discard extreme get_cycle()
readings caused by noise on the test machine.
(We don't do that today).


My r2600 says:
/proc/cpuinfo reports  1500 Mhz
Calculated speed is    1492 Mhz  (130150841056 cycles / 871849 usec)

This is derived from 199.<mumble> MHz FSB speed * (15:2 ratio) == 1492.5 Mhz.
The "Calculated speed" is really 1492.81 Mhz.  That's < 0.5% error.

I'm told only HP 1 and 2-way boxes exhibit this "feature" - other boxes
calculate the actual ITC speed just like the patch does and report
that in /proc/cpuinfo. I just checked my rx4640 (also ZX1 chipset)
and it seems to report the proper speed:
cpu MHz    : 1299.762145
itc MHz    : 1299.762145

While closer, rmda_bw still doesn't seem to agree:
/proc/cpuinfo reports 1299.76 Mhz
Calculated speed is    1295 Mhz  (118700953436 cycles / 916265 usec)

On this machine, I'm going to trust the firmware is more accurate
than my user space test.

hth,
grant


Index: rdma_bw.c
===================================================================
--- rdma_bw.c	(revision 2572)
+++ rdma_bw.c	(working copy)
@@ -61,10 +61,6 @@
 
 static int page_size;
 
-struct report_options {
-	int cycles;   /* report delta's in cycles, not microsec's */
-};
-
 struct pingpong_context {
 	struct ibv_context *context;
 	struct ibv_pd      *pd;
@@ -422,19 +418,14 @@
 	printf("  -t, --tx-depth=<dep>   size of tx queue (default 100)\n");
 	printf("  -n, --iters=<iters>    number of exchanges (at least 2, default 1000)\n");
 	printf("  -b, --bidirectional    measure bidirectional bandwidth (default unidirectional)\n");
-	printf("  -C, --report-cycles    report times in cpu cycle units (default seconds)\n");
-	printf("  -H, --report-histogram print out all results (default print summary only)\n");
-	printf("  -U, --report-unsorted  (implies -H) print out unsorted results (default sorted)\n");
 }
 
-static void print_report(struct report_options * options,
-			 unsigned int iters, double size, int duplex,
+static void print_report(unsigned int iters, int size, int duplex,
 			 cycles_t *tposted, cycles_t *tcompleted)
 {
 	double cycles_to_units;
-	double tsize; /* Transferred size, in megabytes */
+	unsigned long tsize;	/* Transferred size, in megabytes */
 	int i, j;
-	const char* units;
 	int opt_posted = 0, opt_completed = 0;
 	cycles_t opt_delta;
 	cycles_t t;
@@ -453,19 +444,21 @@
 			}
 		}
 
-	if (options->cycles) {
-		cycles_to_units = 1;
-		units = "cycles";
-	} else {
-		cycles_to_units = get_cpu_mhz() * 1000000;
-		units = "sec";
-	}
+	cycles_to_units = get_cpu_mhz() * 1000000;
 
 	tsize = duplex ? 2 : 1;
-	tsize = tsize * size / 0x100000;
+	tsize = tsize * size / 1024;
 
-	printf("Bandwidth peak (#%d to #%d): %g MByte/%s\n", opt_posted, opt_completed, tsize * cycles_to_units / opt_delta, units);
-	printf("Bandwidth average: %g MByte/%s\n", tsize * iters * cycles_to_units / (tcompleted[iters - 1] - tposted[0]), units);
+	printf("Bandwidth peak (#%d to #%d): %g MB/sec\n",
+			 opt_posted, opt_completed,
+			 tsize * cycles_to_units / opt_delta / 1024);
+	printf("Bandwidth average: %g MB/sec\n",
+			 tsize * iters * cycles_to_units / (tcompleted[iters - 1] - tposted[0]) / 1024);
+
+	printf("Service Demand peak (#%d to #%d): %ld cycles/KB\n",
+			 opt_posted, opt_completed, opt_delta/tsize);
+	printf("Service Demand Avg  : %ld cycles/KB\n",
+			 (tcompleted[iters - 1] - tposted[0])/(tsize * iters));
 }
 
 
@@ -478,16 +471,18 @@
 	struct pingpong_dest    *rem_dest;
 	char                    *ib_devname = NULL;
 	char                    *servername = NULL;
+        struct timeval           tod_start, tod_end;
+        cycles_t		 tsc_start, tsc_end;
 	int                      port = 18515;
 	int                      ib_port = 1;
-	int                      size = 1;
+	int                      size = 4 * 1024;
 	int                      tx_depth = 100;
 	int                      iters = 1000;
 	int                      scnt, ccnt;
 	int			 sockfd;
 	int                      duplex = 0;
 	struct ibv_qp		*qp;
-	struct report_options    report = {};
+	unsigned long long 	tsc_total, tod_total;
 
 	cycles_t	*tposted;
 	cycles_t	*tcompleted;
@@ -504,11 +499,10 @@
 			{ .name = "iters",          .has_arg = 1, .val = 'n' },
 			{ .name = "tx-depth",       .has_arg = 1, .val = 't' },
 			{ .name = "bidirectional",  .has_arg = 0, .val = 'b' },
-			{ .name = "report-cycles",  .has_arg = 0, .val = 'C' },
 			{ 0 }
 		};
 
-		c = getopt_long(argc, argv, "p:d:i:s:n:t:bC", long_options, NULL);
+		c = getopt_long(argc, argv, "p:d:i:s:n:t:b", long_options, NULL);
 		if (c == -1)
 			break;
 
@@ -565,10 +559,6 @@
 			duplex = 1;
 			break;
 
-		case 'C':
-			report.cycles = 1;
-			break;
-
 		default:
 			usage(argv[0]);
 			return 1;
@@ -701,10 +691,15 @@
 	}
 
 	/* Done with setup. Start the test. */
-
+        if (gettimeofday(&tod_start, NULL)) {
+                perror("gettimeofday");
+                return 1;
+        }
+	tsc_start = get_cycles();
+	
 	while (scnt < iters || ccnt < iters) {
 
-		while (scnt < iters && scnt - ccnt < tx_depth) {
+		while ((scnt < iters) && (scnt - ccnt < tx_depth)) {
 			struct ibv_send_wr *bad_wr;
 			tposted[scnt] = get_cycles();
 
@@ -742,6 +737,12 @@
 		}
 	}
 
+        if (gettimeofday(&tod_end, NULL)) {
+                perror("gettimeofday");
+                return 1;
+        }
+	tsc_end = get_cycles();
+
 	if (servername) {
 		rem_dest = pp_client_exch_dest(sockfd, &my_dest);
 	} else {
@@ -751,8 +752,22 @@
 	write(sockfd, "done", sizeof "done");
 	close(sockfd);
 
-	print_report(&report, iters, size, duplex, tposted, tcompleted);
+	/* sum total cycles into tsc_start */
+	tsc_total = tposted[0] - tsc_start;
+	for (ccnt = 0; ccnt < iters; ccnt++)
+		tsc_total += tcompleted[ccnt] - tposted[ccnt];
+ 	tsc_total += tsc_end - tcompleted[ccnt-1];
 
+	tod_total =  (unsigned long long) (tod_end.tv_sec - tod_start.tv_sec) * 1000000 +
+				(tod_end.tv_usec - tod_start.tv_usec);
+
+	printf("/proc/cpuinfo reports %5g Mhz\n", get_cpu_mhz());
+
+	printf("Calculated speed is   %5Ld Mhz",  tsc_total / tod_total / 100);
+	printf("  (%Ld cycles / %Ld usec)\n", tsc_total, tod_total);
+
+	print_report(iters, size, duplex, tposted, tcompleted);
+
 	free(tposted);
 	free(tcompleted);
 	return 0;



More information about the general mailing list