[openib-general] cycles_to_units is incorrect in rdma_lat, rdma_bw.
Grant Grundler
iod00d at hp.com
Fri Jun 10 21:27:27 PDT 2005
On Fri, Jun 10, 2005 at 02:59:52PM -0700, Grant Grundler wrote:
> My r2600 says:
> /proc/cpuinfo reports 1500 Mhz
> Calculated speed is 1492 Mhz (130150841056 cycles / 871849 usec)
Sorry, I wasn't paying attention...the 1492 number is wrong and
I finally figured out why.
> This is derived from 199.<mumble> MHz FSB speed * (15:2 ratio) == 1492.5 Mhz.
> The "Calculated speed" is really 1492.81 Mhz. That's < 0.5% error.
"<mumble>" is probably 199.9 or so - very close to spec (200Mhz).
The error is well below %0.1.
> I just checked my rx4640 (also ZX1 chipset)
> and it seems to report the proper speed:
> cpu MHz : 1299.762145
> itc MHz : 1299.762145
In this case, the FSB is 199.96 Mhz.
> On this machine, I'm going to trust the firmware is more accurate
> than my user space test.
Definitely.
> + printf("Calculated speed is %5Ld Mhz", tsc_total / tod_total / 100);
Sorry - I should have figured out why I needed the "/ 100".
It directly corresponds to tx_depth (default 100).
And it was a big, fat, hairy clue something wasn't right.
I just didn't work out the math at the time though it bothered
me a bit becuase it didn't make sense.
The posted/completed get_cycles() measurements overlap.
Ergo with big "-n 10000" I was getting a number that was 100x too big.
My second clue was that smaller -n would have "bigger" errors.
Patch below sums the get_cycle measurements correctly (I think)
and results look like:
/proc/cpuinfo reports 1500.00 Mhz
Calculated speed is 1500.02 Mhz (642797211 cycles / 428525 usec)
or with different parameters:
/proc/cpuinfo reports 1500.00 Mhz
Calculated speed is 1499.99 Mhz (129930481 cycles / 86621 usec)
(129930481/86621 is 1499.98823...)
Patch below adds "-g" (gettimeofday) and "-M=<CPU Mhz>" parameters
in case someone's firmware is less accurate than HP's.
And to keep Bernhard happy, fixes usage() output to match 16KB default. :^)
apologies,
grant
Index: rdma_bw.c
===================================================================
--- rdma_bw.c (revision 2572)
+++ rdma_bw.c (working copy)
@@ -58,13 +58,10 @@
#include "get_clock.h"
#define PINGPONG_RDMA_WRID 3
+#define MILLION 1000000
static int page_size;
-struct report_options {
- int cycles; /* report delta's in cycles, not microsec's */
-};
-
struct pingpong_context {
struct ibv_context *context;
struct ibv_pd *pd;
@@ -418,23 +415,20 @@ static void usage(const char *argv0)
printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n");
printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n");
printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n");
- printf(" -s, --size=<size> size of message to exchange (default 1)\n");
+ printf(" -s, --size=<size> size of message to exchange (default 16KB)\n");
printf(" -t, --tx-depth=<dep> size of tx queue (default 100)\n");
printf(" -n, --iters=<iters> number of exchanges (at least 2, default 1000)\n");
printf(" -b, --bidirectional measure bidirectional bandwidth (default unidirectional)\n");
- printf(" -C, --report-cycles report times in cpu cycle units (default seconds)\n");
- printf(" -H, --report-histogram print out all results (default print summary only)\n");
- printf(" -U, --report-unsorted (implies -H) print out unsorted results (default sorted)\n");
+ printf(" -g, --gettimeofday Use gettimeofday() to calibrate Mhz (default /proc/cpuinfo)\n");
+ printf(" -M, --mhz=<CPU Mhz> Use <CPU Mhz> for Mhz\n");
}
-static void print_report(struct report_options * options,
- unsigned int iters, double size, int duplex,
+static void print_report(double cycles_to_units, unsigned int iters,
+ int size, int duplex,
cycles_t *tposted, cycles_t *tcompleted)
{
- double cycles_to_units;
- double tsize; /* Transferred size, in megabytes */
+ unsigned long tsize; /* Transferred size, in megabytes */
int i, j;
- const char* units;
int opt_posted = 0, opt_completed = 0;
cycles_t opt_delta;
cycles_t t;
@@ -453,19 +447,20 @@ static void print_report(struct report_o
}
}
- if (options->cycles) {
- cycles_to_units = 1;
- units = "cycles";
- } else {
- cycles_to_units = get_cpu_mhz() * 1000000;
- units = "sec";
- }
tsize = duplex ? 2 : 1;
- tsize = tsize * size / 0x100000;
+ tsize = tsize * size / 1024; /* convert to KB */
- printf("Bandwidth peak (#%d to #%d): %g MByte/%s\n", opt_posted, opt_completed, tsize * cycles_to_units / opt_delta, units);
- printf("Bandwidth average: %g MByte/%s\n", tsize * iters * cycles_to_units / (tcompleted[iters - 1] - tposted[0]), units);
+ printf("Bandwidth peak (#%d to #%d): %'.2f MB/sec\n",
+ opt_posted, opt_completed,
+ tsize * cycles_to_units / opt_delta / 1024);
+ printf("Bandwidth average: %'.2f MB/sec\n",
+ tsize * iters * cycles_to_units / (tcompleted[iters - 1] - tposted[0]) / 1024);
+
+ printf("Service Demand peak (#%d to #%d): %ld cycles/KB\n",
+ opt_posted, opt_completed, opt_delta/tsize);
+ printf("Service Demand Avg : %ld cycles/KB\n",
+ (tcompleted[iters - 1] - tposted[0])/(tsize * iters));
}
@@ -478,16 +473,21 @@ int main(int argc, char *argv[])
struct pingpong_dest *rem_dest;
char *ib_devname = NULL;
char *servername = NULL;
+ struct timeval tod_start, tod_end;
+ cycles_t tsc_start, tsc_end;
int port = 18515;
int ib_port = 1;
- int size = 1;
+ int size = 16 * 1024;
int tx_depth = 100;
int iters = 1000;
int scnt, ccnt;
int sockfd;
int duplex = 0;
+ int use_gettod = 0;
struct ibv_qp *qp;
- struct report_options report = {};
+ unsigned long long tsc_total, tod_total;
+ double cycles_to_units = get_cpu_mhz() * MILLION;
+ double mhz = 0;
cycles_t *tposted;
cycles_t *tcompleted;
@@ -503,12 +503,13 @@ int main(int argc, char *argv[])
{ .name = "size", .has_arg = 1, .val = 's' },
{ .name = "iters", .has_arg = 1, .val = 'n' },
{ .name = "tx-depth", .has_arg = 1, .val = 't' },
+ { .name = "mhz", .has_arg = 1, .val = 'M' },
{ .name = "bidirectional", .has_arg = 0, .val = 'b' },
- { .name = "report-cycles", .has_arg = 0, .val = 'C' },
+ { .name = "gettimeofday", .has_arg = 0, .val = 'g' },
{ 0 }
};
- c = getopt_long(argc, argv, "p:d:i:s:n:t:bC", long_options, NULL);
+ c = getopt_long(argc, argv, "p:d:i:s:n:t:M:bg", long_options, NULL);
if (c == -1)
break;
@@ -565,8 +566,16 @@ int main(int argc, char *argv[])
duplex = 1;
break;
- case 'C':
- report.cycles = 1;
+ case 'g':
+ use_gettod = 1; /* i.e. don't use /proc/cpuinfo */
+ break;
+
+ case 'M':
+ mhz = strtod(optarg, NULL);
+ if (mhz < 1) {
+ usage(argv[0]);
+ return 1;
+ }
break;
default:
@@ -701,10 +710,15 @@ int main(int argc, char *argv[])
}
/* Done with setup. Start the test. */
-
+ if (gettimeofday(&tod_start, NULL)) {
+ perror("gettimeofday");
+ return 1;
+ }
+ tsc_start = get_cycles();
+
while (scnt < iters || ccnt < iters) {
- while (scnt < iters && scnt - ccnt < tx_depth) {
+ while ((scnt < iters) && (scnt - ccnt < tx_depth)) {
struct ibv_send_wr *bad_wr;
tposted[scnt] = get_cycles();
@@ -742,6 +756,12 @@ int main(int argc, char *argv[])
}
}
+ if (gettimeofday(&tod_end, NULL)) {
+ perror("gettimeofday");
+ return 1;
+ }
+ tsc_end = get_cycles();
+
if (servername) {
rem_dest = pp_client_exch_dest(sockfd, &my_dest);
} else {
@@ -751,7 +771,38 @@ int main(int argc, char *argv[])
write(sockfd, "done", sizeof "done");
close(sockfd);
- print_report(&report, iters, size, duplex, tposted, tcompleted);
+ /* sum total cycles into tsc_start */
+ tsc_total = tposted[0] - tsc_start; /* prelude */
+ for (ccnt = 0; ccnt < iters-1; ccnt++) /* time in loop sending */
+ tsc_total += tposted[ccnt+1] - tposted[ccnt];
+
+ /* time until last send completed */
+ tsc_total += tcompleted[ccnt] - tposted[ccnt];
+ tsc_total += tsc_end - tcompleted[ccnt]; /* epilogue */
+
+ tod_total = (unsigned long long) (tod_end.tv_sec - tod_start.tv_sec) *
+ MILLION +
+ (tod_end.tv_usec - tod_start.tv_usec);
+
+
+ printf("/proc/cpuinfo reports %'.2f Mhz\n", cycles_to_units/MILLION);
+ printf("Calculated speed is %'.2f Mhz", (double) tsc_total / tod_total);
+ printf(" (%Ld cycles / %Ld usec)\n", tsc_total, tod_total);
+
+ if (mhz) {
+ printf("User specified %7g Mhz", mhz);
+ cycles_to_units = mhz * MILLION;
+ } else if (use_gettod) {
+ if (tod_total < 10000)
+ printf( "WARNING: Test was too short (%1.08g sec) "
+ " not using gettimeofday()\n"
+ " Use -b, more (-n) or bigger (-s) samples.\n"
+ , (double) tod_total / MILLION);
+ else
+ cycles_to_units = (double) tsc_total / tod_total * MILLION;
+ }
+
+ print_report(cycles_to_units, iters, size, duplex, tposted, tcompleted);
free(tposted);
free(tcompleted);
More information about the general
mailing list