[openib-general] cycles_to_units is incorrect in rdma_lat, rdma_bw.

Grant Grundler iod00d at hp.com
Fri Jun 10 21:27:27 PDT 2005


On Fri, Jun 10, 2005 at 02:59:52PM -0700, Grant Grundler wrote:
> My r2600 says:
> /proc/cpuinfo reports  1500 Mhz
> Calculated speed is    1492 Mhz  (130150841056 cycles / 871849 usec)

Sorry, I wasn't paying attention...the 1492 number is wrong and
I finally figured out why.

> This is derived from 199.<mumble> MHz FSB speed * (15:2 ratio) == 1492.5 Mhz.
> The "Calculated speed" is really 1492.81 Mhz.  That's < 0.5% error.

"<mumble>" is probably 199.9 or so - very close to spec (200Mhz).
The error is well below %0.1.

> I just checked my rx4640 (also ZX1 chipset)
> and it seems to report the proper speed:
> cpu MHz    : 1299.762145
> itc MHz    : 1299.762145

In this case, the FSB is 199.96 Mhz.

> On this machine, I'm going to trust the firmware is more accurate
> than my user space test.

Definitely.

> +	printf("Calculated speed is   %5Ld Mhz",  tsc_total / tod_total / 100);

Sorry - I should have figured out why I needed the "/ 100".
It directly corresponds to tx_depth (default 100).
And it was a big, fat, hairy clue something wasn't right.
I just didn't work out the math at the time though it bothered
me a bit becuase it didn't make sense.

The posted/completed get_cycles() measurements overlap.
Ergo with big "-n 10000" I was getting a number that was 100x too big. 
My second clue was that smaller -n would have "bigger" errors.

Patch below sums the get_cycle measurements correctly (I think)
and results look like:
/proc/cpuinfo reports 1500.00 Mhz
Calculated speed is   1500.02 Mhz  (642797211 cycles / 428525 usec)


or with different parameters:

/proc/cpuinfo reports 1500.00 Mhz
Calculated speed is   1499.99 Mhz  (129930481 cycles / 86621 usec)

(129930481/86621 is 1499.98823...)

Patch below adds "-g" (gettimeofday) and "-M=<CPU Mhz>" parameters
in case someone's firmware is less accurate than HP's.

And to keep Bernhard happy, fixes usage() output to match 16KB default. :^)

apologies,
grant


Index: rdma_bw.c
===================================================================
--- rdma_bw.c	(revision 2572)
+++ rdma_bw.c	(working copy)
@@ -58,13 +58,10 @@
 #include "get_clock.h"
 
 #define PINGPONG_RDMA_WRID	3
+#define MILLION	1000000
 
 static int page_size;
 
-struct report_options {
-	int cycles;   /* report delta's in cycles, not microsec's */
-};
-
 struct pingpong_context {
 	struct ibv_context *context;
 	struct ibv_pd      *pd;
@@ -418,23 +415,20 @@ static void usage(const char *argv0)
 	printf("  -p, --port=<port>      listen on/connect to port <port> (default 18515)\n");
 	printf("  -d, --ib-dev=<dev>     use IB device <dev> (default first device found)\n");
 	printf("  -i, --ib-port=<port>   use port <port> of IB device (default 1)\n");
-	printf("  -s, --size=<size>      size of message to exchange (default 1)\n");
+	printf("  -s, --size=<size>      size of message to exchange (default 16KB)\n");
 	printf("  -t, --tx-depth=<dep>   size of tx queue (default 100)\n");
 	printf("  -n, --iters=<iters>    number of exchanges (at least 2, default 1000)\n");
 	printf("  -b, --bidirectional    measure bidirectional bandwidth (default unidirectional)\n");
-	printf("  -C, --report-cycles    report times in cpu cycle units (default seconds)\n");
-	printf("  -H, --report-histogram print out all results (default print summary only)\n");
-	printf("  -U, --report-unsorted  (implies -H) print out unsorted results (default sorted)\n");
+	printf("  -g, --gettimeofday     Use gettimeofday() to calibrate Mhz (default /proc/cpuinfo)\n");
+	printf("  -M, --mhz=<CPU Mhz>    Use <CPU Mhz> for Mhz\n");
 }
 
-static void print_report(struct report_options * options,
-			 unsigned int iters, double size, int duplex,
+static void print_report(double cycles_to_units, unsigned int iters,
+			 int size, int duplex,
 			 cycles_t *tposted, cycles_t *tcompleted)
 {
-	double cycles_to_units;
-	double tsize; /* Transferred size, in megabytes */
+	unsigned long tsize;	/* Transferred size, in megabytes */
 	int i, j;
-	const char* units;
 	int opt_posted = 0, opt_completed = 0;
 	cycles_t opt_delta;
 	cycles_t t;
@@ -453,19 +447,20 @@ static void print_report(struct report_o
 			}
 		}
 
-	if (options->cycles) {
-		cycles_to_units = 1;
-		units = "cycles";
-	} else {
-		cycles_to_units = get_cpu_mhz() * 1000000;
-		units = "sec";
-	}
 
 	tsize = duplex ? 2 : 1;
-	tsize = tsize * size / 0x100000;
+	tsize = tsize * size / 1024;	/* convert to KB */
 
-	printf("Bandwidth peak (#%d to #%d): %g MByte/%s\n", opt_posted, opt_completed, tsize * cycles_to_units / opt_delta, units);
-	printf("Bandwidth average: %g MByte/%s\n", tsize * iters * cycles_to_units / (tcompleted[iters - 1] - tposted[0]), units);
+	printf("Bandwidth peak (#%d to #%d): %'.2f MB/sec\n",
+			 opt_posted, opt_completed,
+			 tsize * cycles_to_units / opt_delta / 1024);
+	printf("Bandwidth average: %'.2f MB/sec\n",
+			 tsize * iters * cycles_to_units / (tcompleted[iters - 1] - tposted[0]) / 1024);
+
+	printf("Service Demand peak (#%d to #%d): %ld cycles/KB\n",
+			 opt_posted, opt_completed, opt_delta/tsize);
+	printf("Service Demand Avg  : %ld cycles/KB\n",
+			 (tcompleted[iters - 1] - tposted[0])/(tsize * iters));
 }
 
 
@@ -478,16 +473,21 @@ int main(int argc, char *argv[])
 	struct pingpong_dest    *rem_dest;
 	char                    *ib_devname = NULL;
 	char                    *servername = NULL;
+        struct timeval           tod_start, tod_end;
+        cycles_t		 tsc_start, tsc_end;
 	int                      port = 18515;
 	int                      ib_port = 1;
-	int                      size = 1;
+	int                      size = 16 * 1024;
 	int                      tx_depth = 100;
 	int                      iters = 1000;
 	int                      scnt, ccnt;
 	int			 sockfd;
 	int                      duplex = 0;
+	int                      use_gettod = 0;
 	struct ibv_qp		*qp;
-	struct report_options    report = {};
+	unsigned long long 	tsc_total, tod_total;
+	double			cycles_to_units = get_cpu_mhz() * MILLION;
+	double			mhz = 0;
 
 	cycles_t	*tposted;
 	cycles_t	*tcompleted;
@@ -503,12 +503,13 @@ int main(int argc, char *argv[])
 			{ .name = "size",           .has_arg = 1, .val = 's' },
 			{ .name = "iters",          .has_arg = 1, .val = 'n' },
 			{ .name = "tx-depth",       .has_arg = 1, .val = 't' },
+			{ .name = "mhz",            .has_arg = 1, .val = 'M' },
 			{ .name = "bidirectional",  .has_arg = 0, .val = 'b' },
-			{ .name = "report-cycles",  .has_arg = 0, .val = 'C' },
+			{ .name = "gettimeofday",   .has_arg = 0, .val = 'g' },
 			{ 0 }
 		};
 
-		c = getopt_long(argc, argv, "p:d:i:s:n:t:bC", long_options, NULL);
+		c = getopt_long(argc, argv, "p:d:i:s:n:t:M:bg", long_options, NULL);
 		if (c == -1)
 			break;
 
@@ -565,8 +566,16 @@ int main(int argc, char *argv[])
 			duplex = 1;
 			break;
 
-		case 'C':
-			report.cycles = 1;
+		case 'g':
+			use_gettod = 1;  /* i.e. don't use /proc/cpuinfo */
+			break;
+
+		case 'M':
+			mhz = strtod(optarg, NULL);
+			if (mhz < 1) {
+				usage(argv[0]);
+				return 1;
+			}
 			break;
 
 		default:
@@ -701,10 +710,15 @@ int main(int argc, char *argv[])
 	}
 
 	/* Done with setup. Start the test. */
-
+        if (gettimeofday(&tod_start, NULL)) {
+                perror("gettimeofday");
+                return 1;
+        }
+	tsc_start = get_cycles();
+	
 	while (scnt < iters || ccnt < iters) {
 
-		while (scnt < iters && scnt - ccnt < tx_depth) {
+		while ((scnt < iters) && (scnt - ccnt < tx_depth)) {
 			struct ibv_send_wr *bad_wr;
 			tposted[scnt] = get_cycles();
 
@@ -742,6 +756,12 @@ int main(int argc, char *argv[])
 		}
 	}
 
+        if (gettimeofday(&tod_end, NULL)) {
+                perror("gettimeofday");
+                return 1;
+        }
+	tsc_end = get_cycles();
+
 	if (servername) {
 		rem_dest = pp_client_exch_dest(sockfd, &my_dest);
 	} else {
@@ -751,7 +771,38 @@ int main(int argc, char *argv[])
 	write(sockfd, "done", sizeof "done");
 	close(sockfd);
 
-	print_report(&report, iters, size, duplex, tposted, tcompleted);
+	/* sum total cycles into tsc_start */
+	tsc_total = tposted[0] - tsc_start;	/* prelude */
+	for (ccnt = 0; ccnt < iters-1; ccnt++)	/* time in loop sending */
+		tsc_total += tposted[ccnt+1] - tposted[ccnt];
+
+	/* time until last send completed */
+ 	tsc_total += tcompleted[ccnt] - tposted[ccnt];
+ 	tsc_total += tsc_end - tcompleted[ccnt];	/* epilogue */
+
+	tod_total =  (unsigned long long) (tod_end.tv_sec - tod_start.tv_sec) *
+				 MILLION +
+				(tod_end.tv_usec - tod_start.tv_usec);
+
+
+	printf("/proc/cpuinfo reports %'.2f Mhz\n", cycles_to_units/MILLION);
+	printf("Calculated speed is   %'.2f Mhz",  (double) tsc_total / tod_total);
+	printf("  (%Ld cycles / %Ld usec)\n", tsc_total, tod_total);
+
+	if (mhz) {
+		printf("User specified        %7g Mhz",  mhz);
+		cycles_to_units = mhz * MILLION;
+	} else if (use_gettod) {
+		if (tod_total < 10000)
+			printf(	"WARNING: Test was too short (%1.08g sec) "
+					" not using gettimeofday()\n"
+				"         Use -b, more (-n) or bigger (-s) samples.\n"
+				,  (double) tod_total / MILLION);
+		else 
+			cycles_to_units = (double) tsc_total / tod_total * MILLION;
+	}
+
+	print_report(cycles_to_units, iters, size, duplex, tposted, tcompleted);
 
 	free(tposted);
 	free(tcompleted);



More information about the general mailing list