[ofw] [Patch][Tools] Change Peak BW calculation when the difference between post and completion is abnormal

Alex Naslednikov xalex at mellanox.co.il
Sun May 23 00:06:31 PDT 2010


The function that used for time measuring  - QueryPerformanceCounter()  - not always returns proper results.
According to MS (http://msdn.microsoft.com/en-us/library/ms644904%28VS.85%29.aspx)
1. It can return 0
2. By default, one does not set the affinity and thus this function' call can be adressed to any of the CPUs and you can get different results
3. As a consequence, a time difference between post and completion can be infinitely close to 0.
4. It will not affect the BW but may affect Peak BW on some computers

signed-off by: Alexander Naslednikov (xalex at mellanox.co.il)
Index: D:/windows/MLNX_VPI_trunk/tools/perftests/user/send_bw/send_bw.c
===================================================================
--- D:/windows/MLNX_VPI_trunk/tools/perftests/user/send_bw/send_bw.c (revision 5893)
+++ D:/windows/MLNX_VPI_trunk/tools/perftests/user/send_bw/send_bw.c (revision 5894)
@@ -571,13 +571,17 @@
  cycles_t   t;


+ tsize = duplex ? 2 : 1;
+ tsize = tsize * size;
+
  opt_delta = tcompleted[opt_posted] - tposted[opt_completed];
-
+#define MAX_AVAILABLE_BW 40000000
  /* Find the peak bandwidth */
  for (i = 0; i < (int)iters; ++i)
   for (j = i; j < (int)iters; ++j) {
    t = (tcompleted[j] - tposted[i]) / (j - i + 1);
-   if (t < opt_delta) {
+   if (t < opt_delta && t > (tsize / MAX_AVAILABLE_BW)) {
+    // Avoid the sitatuation when opt_delta is infinitely close to 0
     opt_delta  = t;
     opt_posted = i;
     opt_completed = j;
@@ -586,8 +590,6 @@

  cycles_to_units = get_cpu_mhz();

- tsize = duplex ? 2 : 1;
- tsize = tsize * size;
  printf("%7d        %d            %7.2f               %7.2f \n",
         size,iters,tsize * cycles_to_units / opt_delta / 0x100000,
   (uint64_t)tsize * iters * cycles_to_units /(tcompleted[iters - 1] - tposted[0]) / 0x100000);
Index: D:/windows/MLNX_VPI_trunk/tools/perftests/user/write_bw/write_bw.c
===================================================================
--- D:/windows/MLNX_VPI_trunk/tools/perftests/user/write_bw/write_bw.c (revision 5893)
+++ D:/windows/MLNX_VPI_trunk/tools/perftests/user/write_bw/write_bw.c (revision 5894)
@@ -501,13 +501,17 @@
  cycles_t   t;


+ tsize = duplex ? 2 : 1;
+ tsize = tsize * size;
+
  opt_delta = tcompleted[opt_posted] - tposted[opt_completed];
-
+#define MAX_AVAILABLE_BW 40000000
  /* Find the peak bandwidth */
  for (i = 0; i < iters * user_param->numofqps; ++i)
   for (j = i; j < iters * user_param->numofqps; ++j) {
    t = (tcompleted[j] - tposted[i]) / (j - i + 1);
-   if (t < opt_delta) {
+   if (t < opt_delta && t > (tsize / MAX_AVAILABLE_BW)) {
+    // Avoid the sitatuation when opt_delta is infinitely close to 0
     opt_delta  = t;
     opt_posted = i;
     opt_completed = j;
@@ -517,8 +521,6 @@

  cycles_to_units = get_cpu_mhz();

- tsize = duplex ? 2 : 1;
- tsize = tsize * size;
  printf("%7d        %d            %7.2f               %7.2f\n",
   size,iters,tsize * cycles_to_units / opt_delta / 0x100000,
   (uint64_t)tsize * iters * user_param->numofqps * cycles_to_units /(tcompleted[(iters* user_param->numofqps) - 1] - tposted[0]) / 0x100000);
Index: D:/windows/MLNX_VPI_trunk/tools/perftests/user/read_bw/read_bw.c
===================================================================
--- D:/windows/MLNX_VPI_trunk/tools/perftests/user/read_bw/read_bw.c (revision 5893)
+++ D:/windows/MLNX_VPI_trunk/tools/perftests/user/read_bw/read_bw.c (revision 5894)
@@ -467,13 +467,17 @@
  cycles_t   t;


+ tsize = duplex ? 2 : 1;
+ tsize = tsize * size;
+
  opt_delta = tcompleted[opt_posted] - tposted[opt_completed];
-
+#define MAX_AVAILABLE_BW 40000000
  /* Find the peak bandwidth */
  for (i = 0; i < iters; ++i)
   for (j = i; j < iters; ++j) {
    t = (tcompleted[j] - tposted[i]) / (j - i + 1);
-   if (t < opt_delta) {
+   if (t < opt_delta && t > (tsize / MAX_AVAILABLE_BW)) {
+    // Avoid the sitatuation when opt_delta is infinitely close to 0
     opt_delta  = t;
     opt_posted = i;
     opt_completed = j;
@@ -482,8 +486,6 @@

  cycles_to_units = get_cpu_mhz() ;

- tsize = duplex ? 2 : 1;
- tsize = tsize * size;
  printf("%7d        %d            %7.2f               %7.2f\n",
         size,iters,tsize * cycles_to_units / opt_delta / 0x100000,
         (uint64_t)tsize * iters * cycles_to_units /(tcompleted[iters - 1] - tposted[0]) / 0x100000);
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20100523/5cc8ab8b/attachment.html>


More information about the ofw mailing list