Add math benchmark latency test

author Wilco Dijkstra <wdijkstr@arm.com>

Thu, 17 Aug 2017 15:27:20 +0000 (16:27 +0100)

committer Wilco Dijkstra <wdijkstr@arm.com>

Thu, 17 Aug 2017 15:27:20 +0000 (16:27 +0100)
author Wilco Dijkstra <wdijkstr@arm.com>
Thu, 17 Aug 2017 15:27:20 +0000 (16:27 +0100)
committer Wilco Dijkstra <wdijkstr@arm.com>
Thu, 17 Aug 2017 15:27:20 +0000 (16:27 +0100)
diff --git a/ChangeLog b/ChangeLog

index f11f6e11c7e15d6f0fa8173491cac5f4cd9cb729..08207b1c4e00b2b9a202a8f2692b2b90301c71c4 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2017-08-17  Wilco Dijkstra  <wdijkstr@arm.com>
+
+       * benchtests/bench-skeleton.c (main): Add support for
+       latency benchmarking.
+       * benchtests/scripts/bench.py: Add support for latency benchmarking.
+
  2017-08-17  H.J. Lu  <hongjiu.lu@intel.com>
  
         * Makeconfig (+link-pie-before-libc): Add CRT-* hook to override
diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c

index 3c6dad705594ac0a53edcb4e09686252c13127cf..955b2e1d2125775c6ffc10a004184a9564496e46 100644 (file)
--- a/benchtests/bench-skeleton.c
+++ b/benchtests/bench-skeleton.c
@@ -71,8 +71,10 @@ main (int argc, char **argv)
        bool is_bench = strncmp (VARIANT (v), "workload-", 9) == 0;
        double d_total_i = 0;
        timing_t total = 0, max = 0, min = 0x7fffffffffffffff;
+      timing_t throughput = 0, latency = 0;
        int64_t c = 0;
        uint64_t cur;
+      BENCH_VARS;
        while (1)
         {
           if (is_bench)
@@ -86,7 +88,16 @@ main (int argc, char **argv)
                   BENCH_FUNC (v, i);
               TIMING_NOW (end);
               TIMING_DIFF (cur, start, end);
-             TIMING_ACCUM (total, cur);
+             TIMING_ACCUM (throughput, cur);
+
+             TIMING_NOW (start);
+             for (k = 0; k < iters; k++)
+               for (i = 0; i < NUM_SAMPLES (v); i++)
+                 BENCH_FUNC_LAT (v, i);
+             TIMING_NOW (end);
+             TIMING_DIFF (cur, start, end);
+             TIMING_ACCUM (latency, cur);
+
               d_total_i += iters * NUM_SAMPLES (v);
             }
           else
@@ -131,12 +142,20 @@ main (int argc, char **argv)
        /* Begin variant.  */
        json_attr_object_begin (&json_ctx, VARIANT (v));
  
-      json_attr_double (&json_ctx, "duration", d_total_s);
-      json_attr_double (&json_ctx, "iterations", d_total_i);
        if (is_bench)
-       json_attr_double (&json_ctx, "throughput", d_total_s / d_total_i);
+       {
+         json_attr_double (&json_ctx, "reciprocal-throughput",
+                           throughput / d_total_i);
+         json_attr_double (&json_ctx, "latency", latency / d_total_i);
+         json_attr_double (&json_ctx, "max-throughput",
+                           d_total_i / throughput * 1000000000.0);
+         json_attr_double (&json_ctx, "min-throughput",
+                           d_total_i / latency * 1000000000.0);
+       }
        else
         {
+         json_attr_double (&json_ctx, "duration", d_total_s);
+         json_attr_double (&json_ctx, "iterations", d_total_i);
           json_attr_double (&json_ctx, "max", max / d_iters);
           json_attr_double (&json_ctx, "min", min / d_iters);
           json_attr_double (&json_ctx, "mean", d_total_s / d_total_i);
diff --git a/benchtests/scripts/bench.py b/benchtests/scripts/bench.py

index 8c1c9eeb2bc67a16cb8a8e010fd2b8a2ef8ab6df..33dc7b9e9f315e72a81982561ea39b9eb788a696 100755 (executable)
--- a/benchtests/scripts/bench.py
+++ b/benchtests/scripts/bench.py
@@ -45,7 +45,7 @@ DEFINES_TEMPLATE = '''
  # variant is represented by the _VARIANT structure.  The ARGS structure
  # represents a single set of arguments.
  STRUCT_TEMPLATE = '''
-#define CALL_BENCH_FUNC(v, i) %(func)s (%(func_args)s)
+#define CALL_BENCH_FUNC(v, i, x) %(func)s (x %(func_args)s)
  
  struct args
  {
@@ -84,7 +84,9 @@ EPILOGUE = '''
  #define RESULT(__v, __i) (variants[(__v)].in[(__i)].timing)
  #define RESULT_ACCUM(r, v, i, old, new) \\
          ((RESULT ((v), (i))) = (RESULT ((v), (i)) * (old) + (r)) / ((new) + 1))
-#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j);})
+#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j, );})
+#define BENCH_FUNC_LAT(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j, %(latarg)s);})
+#define BENCH_VARS %(defvar)s
  #define FUNCNAME "%(func)s"
  #include "bench-skeleton.c"'''
  
@@ -122,17 +124,23 @@ def gen_source(func, directives, all_vals):
      # If we have a return value from the function, make sure it is
      # assigned to prevent the compiler from optimizing out the
      # call.
+    getret = ''
+    latarg = ''
+    defvar = ''
+
      if directives['ret']:
          print('static %s volatile ret;' % directives['ret'])
-        getret = 'ret = '
-    else:
-        getret = ''
+        print('static %s zero __attribute__((used)) = 0;' % directives['ret'])
+        getret = 'ret = func_res = '
+        # Note this may not work if argument and result type are incompatible.
+        latarg = 'func_res * zero +'
+        defvar = '%s func_res = 0;' % directives['ret']
  
      # Test initialization.
      if directives['init']:
          print('#define BENCH_INIT %s' % directives['init'])
  
-    print(EPILOGUE % {'getret': getret, 'func': func})
+    print(EPILOGUE % {'getret': getret, 'func': func, 'latarg': latarg, 'defvar': defvar })
  
  
  def _print_arg_data(func, directives, all_vals):
author	Wilco Dijkstra <wdijkstr@arm.com>
	Thu, 17 Aug 2017 15:27:20 +0000 (16:27 +0100)
committer	Wilco Dijkstra <wdijkstr@arm.com>
	Thu, 17 Aug 2017 15:27:20 +0000 (16:27 +0100)
ChangeLog		patch \| blob \| blame \| history
benchtests/bench-skeleton.c		patch \| blob \| blame \| history
benchtests/scripts/bench.py		patch \| blob \| blame \| history