[openib-general] cycles_to_units is incorrect in rdma_lat, rdma_bw.
Grant Grundler
iod00d at hp.com
Fri Jun 10 14:59:52 PDT 2005
On Thu, Jun 09, 2005 at 05:33:52PM -0700, Grant Grundler wrote:
> I think adding such a calibration to get_cpu_mhz()
> so we can warn if /proc/cpuinfo data doesn't agree with
> gettimeofday() and get_cycles().
The appended patch prints the Mhz reported in cpuinfo and
the Mhz calculated from get_cycles()/gettimeofday().
(Sorry - this is a complete diff including previous
changes that haven't been committed yet.)
Conclusion: Maybe add another parameter to rdma_bw() to switch
the source of the CPU Mhz.
rdma_bw should continue using cpuinfo to report BW by default.
The problem is gettimeofday() will also measure any other
activity that occurs while the test is running. So it is
more likely to under report bandwidth.
Theoretically rdma_bw can discard extreme get_cycle()
readings caused by noise on the test machine.
(We don't do that today).
My r2600 says:
/proc/cpuinfo reports 1500 Mhz
Calculated speed is 1492 Mhz (130150841056 cycles / 871849 usec)
This is derived from 199.<mumble> MHz FSB speed * (15:2 ratio) == 1492.5 Mhz.
The "Calculated speed" is really 1492.81 Mhz. That's < 0.5% error.
I'm told only HP 1 and 2-way boxes exhibit this "feature" - other boxes
calculate the actual ITC speed just like the patch does and report
that in /proc/cpuinfo. I just checked my rx4640 (also ZX1 chipset)
and it seems to report the proper speed:
cpu MHz : 1299.762145
itc MHz : 1299.762145
While closer, rmda_bw still doesn't seem to agree:
/proc/cpuinfo reports 1299.76 Mhz
Calculated speed is 1295 Mhz (118700953436 cycles / 916265 usec)
On this machine, I'm going to trust the firmware is more accurate
than my user space test.
hth,
grant
Index: rdma_bw.c
===================================================================
--- rdma_bw.c (revision 2572)
+++ rdma_bw.c (working copy)
@@ -61,10 +61,6 @@
static int page_size;
-struct report_options {
- int cycles; /* report delta's in cycles, not microsec's */
-};
-
struct pingpong_context {
struct ibv_context *context;
struct ibv_pd *pd;
@@ -422,19 +418,14 @@
printf(" -t, --tx-depth=<dep> size of tx queue (default 100)\n");
printf(" -n, --iters=<iters> number of exchanges (at least 2, default 1000)\n");
printf(" -b, --bidirectional measure bidirectional bandwidth (default unidirectional)\n");
- printf(" -C, --report-cycles report times in cpu cycle units (default seconds)\n");
- printf(" -H, --report-histogram print out all results (default print summary only)\n");
- printf(" -U, --report-unsorted (implies -H) print out unsorted results (default sorted)\n");
}
-static void print_report(struct report_options * options,
- unsigned int iters, double size, int duplex,
+static void print_report(unsigned int iters, int size, int duplex,
cycles_t *tposted, cycles_t *tcompleted)
{
double cycles_to_units;
- double tsize; /* Transferred size, in megabytes */
+ unsigned long tsize; /* Transferred size, in megabytes */
int i, j;
- const char* units;
int opt_posted = 0, opt_completed = 0;
cycles_t opt_delta;
cycles_t t;
@@ -453,19 +444,21 @@
}
}
- if (options->cycles) {
- cycles_to_units = 1;
- units = "cycles";
- } else {
- cycles_to_units = get_cpu_mhz() * 1000000;
- units = "sec";
- }
+ cycles_to_units = get_cpu_mhz() * 1000000;
tsize = duplex ? 2 : 1;
- tsize = tsize * size / 0x100000;
+ tsize = tsize * size / 1024;
- printf("Bandwidth peak (#%d to #%d): %g MByte/%s\n", opt_posted, opt_completed, tsize * cycles_to_units / opt_delta, units);
- printf("Bandwidth average: %g MByte/%s\n", tsize * iters * cycles_to_units / (tcompleted[iters - 1] - tposted[0]), units);
+ printf("Bandwidth peak (#%d to #%d): %g MB/sec\n",
+ opt_posted, opt_completed,
+ tsize * cycles_to_units / opt_delta / 1024);
+ printf("Bandwidth average: %g MB/sec\n",
+ tsize * iters * cycles_to_units / (tcompleted[iters - 1] - tposted[0]) / 1024);
+
+ printf("Service Demand peak (#%d to #%d): %ld cycles/KB\n",
+ opt_posted, opt_completed, opt_delta/tsize);
+ printf("Service Demand Avg : %ld cycles/KB\n",
+ (tcompleted[iters - 1] - tposted[0])/(tsize * iters));
}
@@ -478,16 +471,18 @@
struct pingpong_dest *rem_dest;
char *ib_devname = NULL;
char *servername = NULL;
+ struct timeval tod_start, tod_end;
+ cycles_t tsc_start, tsc_end;
int port = 18515;
int ib_port = 1;
- int size = 1;
+ int size = 4 * 1024;
int tx_depth = 100;
int iters = 1000;
int scnt, ccnt;
int sockfd;
int duplex = 0;
struct ibv_qp *qp;
- struct report_options report = {};
+ unsigned long long tsc_total, tod_total;
cycles_t *tposted;
cycles_t *tcompleted;
@@ -504,11 +499,10 @@
{ .name = "iters", .has_arg = 1, .val = 'n' },
{ .name = "tx-depth", .has_arg = 1, .val = 't' },
{ .name = "bidirectional", .has_arg = 0, .val = 'b' },
- { .name = "report-cycles", .has_arg = 0, .val = 'C' },
{ 0 }
};
- c = getopt_long(argc, argv, "p:d:i:s:n:t:bC", long_options, NULL);
+ c = getopt_long(argc, argv, "p:d:i:s:n:t:b", long_options, NULL);
if (c == -1)
break;
@@ -565,10 +559,6 @@
duplex = 1;
break;
- case 'C':
- report.cycles = 1;
- break;
-
default:
usage(argv[0]);
return 1;
@@ -701,10 +691,15 @@
}
/* Done with setup. Start the test. */
-
+ if (gettimeofday(&tod_start, NULL)) {
+ perror("gettimeofday");
+ return 1;
+ }
+ tsc_start = get_cycles();
+
while (scnt < iters || ccnt < iters) {
- while (scnt < iters && scnt - ccnt < tx_depth) {
+ while ((scnt < iters) && (scnt - ccnt < tx_depth)) {
struct ibv_send_wr *bad_wr;
tposted[scnt] = get_cycles();
@@ -742,6 +737,12 @@
}
}
+ if (gettimeofday(&tod_end, NULL)) {
+ perror("gettimeofday");
+ return 1;
+ }
+ tsc_end = get_cycles();
+
if (servername) {
rem_dest = pp_client_exch_dest(sockfd, &my_dest);
} else {
@@ -751,8 +752,22 @@
write(sockfd, "done", sizeof "done");
close(sockfd);
- print_report(&report, iters, size, duplex, tposted, tcompleted);
+ /* sum total cycles into tsc_start */
+ tsc_total = tposted[0] - tsc_start;
+ for (ccnt = 0; ccnt < iters; ccnt++)
+ tsc_total += tcompleted[ccnt] - tposted[ccnt];
+ tsc_total += tsc_end - tcompleted[ccnt-1];
+ tod_total = (unsigned long long) (tod_end.tv_sec - tod_start.tv_sec) * 1000000 +
+ (tod_end.tv_usec - tod_start.tv_usec);
+
+ printf("/proc/cpuinfo reports %5g Mhz\n", get_cpu_mhz());
+
+ printf("Calculated speed is %5Ld Mhz", tsc_total / tod_total / 100);
+ printf(" (%Ld cycles / %Ld usec)\n", tsc_total, tod_total);
+
+ print_report(iters, size, duplex, tposted, tcompleted);
+
free(tposted);
free(tcompleted);
return 0;
More information about the general
mailing list