Improve math benchmark infrastructure

author Wilco Dijkstra <wdijkstr@arm.com>

Tue, 20 Jun 2017 15:26:26 +0000 (16:26 +0100)

committer Wilco Dijkstra <wdijkstr@arm.com>

Tue, 20 Jun 2017 15:26:26 +0000 (16:26 +0100)
author Wilco Dijkstra <wdijkstr@arm.com>
Tue, 20 Jun 2017 15:26:26 +0000 (16:26 +0100)
committer Wilco Dijkstra <wdijkstr@arm.com>
Tue, 20 Jun 2017 15:26:26 +0000 (16:26 +0100)
diff --git a/ChangeLog b/ChangeLog

index 2d81375287f4731e5b5ab1f1f47f37007be61ac3..80bb1f847e704dd20fe921b88cc4b3fc30a53a23 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2017-06-20  Wilco Dijkstra  <wdijkstr@arm.com>
+
+       * benchtests/README: Describe workload feature.
+       * benchtests/bench-skeleton.c (main): Add support for
+       benchmarking traces from workloads.
+
  2017-06-20  Zack Weinberg  <zackw@panix.com>
  
         * string/string.h (__mempcpy_inline): Delete.
diff --git a/benchtests/README b/benchtests/README

index 2c5f38113593ea7da90895266c8fd523fa21c5a1..b015acfd5385d616a4d3992ec0518f5c50da653e 100644 (file)
--- a/benchtests/README
+++ b/benchtests/README
@@ -102,6 +102,12 @@ the same file by using the `name' directive that looks something like this:
  See the pow-inputs file for an example of what such a partitioned input file
  would look like.
  
+It is also possible to measure throughput of a (partial) trace extracted from
+a real workload.  In this case the whole trace is iterated over multiple times
+rather than repeating every input multiple times.  This can be done via:
+
+  ##name: workload-<name>
+
  Benchmark Sets:
  ==============
  
diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c

index 09eb78df1bce2d9f5e410e3e82821eb9b271e70d..3c6dad705594ac0a53edcb4e09686252c13127cf 100644 (file)
--- a/benchtests/bench-skeleton.c
+++ b/benchtests/bench-skeleton.c
@@ -68,34 +68,50 @@ main (int argc, char **argv)
        clock_gettime (CLOCK_MONOTONIC_RAW, &runtime);
        runtime.tv_sec += DURATION;
  
+      bool is_bench = strncmp (VARIANT (v), "workload-", 9) == 0;
        double d_total_i = 0;
        timing_t total = 0, max = 0, min = 0x7fffffffffffffff;
        int64_t c = 0;
+      uint64_t cur;
        while (1)
         {
-         for (i = 0; i < NUM_SAMPLES (v); i++)
+         if (is_bench)
             {
-             uint64_t cur;
+             /* Benchmark a real trace of calls - all samples are iterated
+                over once before repeating.  This models actual use more
+                accurately than repeating the same sample many times.  */
               TIMING_NOW (start);
               for (k = 0; k < iters; k++)
-               BENCH_FUNC (v, i);
+               for (i = 0; i < NUM_SAMPLES (v); i++)
+                 BENCH_FUNC (v, i);
               TIMING_NOW (end);
-
               TIMING_DIFF (cur, start, end);
+             TIMING_ACCUM (total, cur);
+             d_total_i += iters * NUM_SAMPLES (v);
+           }
+         else
+           for (i = 0; i < NUM_SAMPLES (v); i++)
+             {
+               TIMING_NOW (start);
+               for (k = 0; k < iters; k++)
+                 BENCH_FUNC (v, i);
+               TIMING_NOW (end);
  
-             if (cur > max)
-               max = cur;
+               TIMING_DIFF (cur, start, end);
  
-             if (cur < min)
-               min = cur;
+               if (cur > max)
+                 max = cur;
  
-             TIMING_ACCUM (total, cur);
-             /* Accumulate timings for the value.  In the end we will divide
-                by the total iterations.  */
-             RESULT_ACCUM (cur, v, i, c * iters, (c + 1) * iters);
+               if (cur < min)
+                 min = cur;
  
-             d_total_i += iters;
-           }
+               TIMING_ACCUM (total, cur);
+               /* Accumulate timings for the value.  In the end we will divide
+                  by the total iterations.  */
+               RESULT_ACCUM (cur, v, i, c * iters, (c + 1) * iters);
+
+               d_total_i += iters;
+             }
           c++;
           struct timespec curtime;
  
@@ -117,11 +133,16 @@ main (int argc, char **argv)
  
        json_attr_double (&json_ctx, "duration", d_total_s);
        json_attr_double (&json_ctx, "iterations", d_total_i);
-      json_attr_double (&json_ctx, "max", max / d_iters);
-      json_attr_double (&json_ctx, "min", min / d_iters);
-      json_attr_double (&json_ctx, "mean", d_total_s / d_total_i);
+      if (is_bench)
+       json_attr_double (&json_ctx, "throughput", d_total_s / d_total_i);
+      else
+       {
+         json_attr_double (&json_ctx, "max", max / d_iters);
+         json_attr_double (&json_ctx, "min", min / d_iters);
+         json_attr_double (&json_ctx, "mean", d_total_s / d_total_i);
+       }
  
-      if (detailed)
+      if (detailed && !is_bench)
         {
           json_array_begin (&json_ctx, "timings");
author	Wilco Dijkstra <wdijkstr@arm.com>
	Tue, 20 Jun 2017 15:26:26 +0000 (16:26 +0100)
committer	Wilco Dijkstra <wdijkstr@arm.com>
	Tue, 20 Jun 2017 15:26:26 +0000 (16:26 +0100)
ChangeLog		patch \| blob \| blame \| history
benchtests/README		patch \| blob \| blame \| history
benchtests/bench-skeleton.c		patch \| blob \| blame \| history