benchtests/scripts/plot_strings.py

   1 #!/usr/bin/python3
   2 # Plot GNU C Library string microbenchmark output.
   3 # Copyright (C) 2019-2020 Free Software Foundation, Inc.
   4 # This file is part of the GNU C Library.
   5 #
   6 # The GNU C Library is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU Lesser General Public
   8 # License as published by the Free Software Foundation; either
   9 # version 2.1 of the License, or (at your option) any later version.
  10 #
  11 # The GNU C Library is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # Lesser General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU Lesser General Public
  17 # License along with the GNU C Library; if not, see
  18 # <https://www.gnu.org/licenses/>.
  19 """Plot string microbenchmark results.
  20
  21 Given a benchmark results file in JSON format and a benchmark schema file,
  22 plot the benchmark timings in one of the available representations.
  23
  24 Separate figure is generated and saved to a file for each 'results' array
  25 found in the benchmark results file. Output filenames and plot titles
  26 are derived from the metadata found in the benchmark results file.
  27 """
  28 import argparse
  29 from collections import defaultdict
  30 import json
  31 import matplotlib as mpl
  32 import numpy as np
  33 import os
  34
  35 try:
  36     import jsonschema as validator
  37 except ImportError:
  38     print("Could not find jsonschema module.")
  39     raise
  40
  41 # Use pre-selected markers for plotting lines to improve readability
  42 markers = [".", "x", "^", "+", "*", "v", "1", ">", "s"]
  43
  44 # Benchmark variants for which the x-axis scale should be logarithmic
  45 log_variants = {"powers of 2"}
  46
  47
  48 def gmean(numbers):
  49     """Compute geometric mean.
  50
  51     Args:
  52         numbers: 2-D list of numbers
  53     Return:
  54         numpy array with geometric means of numbers along each column
  55     """
  56     a = np.array(numbers, dtype=np.complex)
  57     means = a.prod(0) ** (1.0 / len(a))
  58     return np.real(means)
  59
  60
  61 def relativeDifference(x, x_reference):
  62     """Compute per-element relative difference between each row of
  63        a matrix and an array of reference values.
  64
  65     Args:
  66         x: numpy matrix of shape (n, m)
  67         x_reference: numpy array of size m
  68     Return:
  69         relative difference between rows of x and x_reference (in %)
  70     """
  71     abs_diff = np.subtract(x, x_reference)
  72     return np.divide(np.multiply(abs_diff, 100.0), x_reference)
  73
  74
  75 def plotTime(timings, routine, bench_variant, title, outpath):
  76     """Plot absolute timing values.
  77
  78     Args:
  79         timings: timings to plot
  80         routine: benchmarked string routine name
  81         bench_variant: top-level benchmark variant name
  82         title: figure title (generated so far)
  83         outpath: output file path (generated so far)
  84     Return:
  85         y: y-axis values to plot
  86         title_final: final figure title
  87         outpath_final: file output file path
  88     """
  89     y = timings
  90     plt.figure()
  91
  92     if not args.values:
  93         plt.axes().yaxis.set_major_formatter(plt.NullFormatter())
  94
  95     plt.ylabel("timing")
  96     title_final = "%s %s benchmark timings\n%s" % \
  97                   (routine, bench_variant, title)
  98     outpath_final = os.path.join(args.outdir, "%s_%s_%s%s" % \
  99                     (routine, args.plot, bench_variant, outpath))
 100
 101     return y, title_final, outpath_final
 102
 103
 104 def plotRelative(timings, all_timings, routine, ifuncs, bench_variant,
 105                  title, outpath):
 106     """Plot timing values relative to a chosen ifunc
 107
 108     Args:
 109         timings: timings to plot
 110         all_timings: all collected timings
 111         routine: benchmarked string routine name
 112         ifuncs: names of ifuncs tested
 113         bench_variant: top-level benchmark variant name
 114         title: figure title (generated so far)
 115         outpath: output file path (generated so far)
 116     Return:
 117         y: y-axis values to plot
 118         title_final: final figure title
 119         outpath_final: file output file path
 120     """
 121     # Choose the baseline ifunc
 122     if args.baseline:
 123         baseline = args.baseline.replace("__", "")
 124     else:
 125         baseline = ifuncs[0]
 126
 127     baseline_index = ifuncs.index(baseline)
 128
 129     # Compare timings against the baseline
 130     y = relativeDifference(timings, all_timings[baseline_index])
 131
 132     plt.figure()
 133     plt.axhspan(-args.threshold, args.threshold, color="lightgray", alpha=0.3)
 134     plt.axhline(0, color="k", linestyle="--", linewidth=0.4)
 135     plt.ylabel("relative timing (in %)")
 136     title_final = "Timing comparison against %s\nfor %s benchmark, %s" % \
 137                   (baseline, bench_variant, title)
 138     outpath_final = os.path.join(args.outdir, "%s_%s_%s%s" % \
 139                     (baseline, args.plot, bench_variant, outpath))
 140
 141     return y, title_final, outpath_final
 142
 143
 144 def plotMax(timings, routine, bench_variant, title, outpath):
 145     """Plot results as percentage of the maximum ifunc performance.
 146
 147     The optimal ifunc is computed on a per-parameter-value basis.
 148     Performance is computed as 1/timing.
 149
 150     Args:
 151         timings: timings to plot
 152         routine: benchmarked string routine name
 153         bench_variant: top-level benchmark variant name
 154         title: figure title (generated so far)
 155         outpath: output file path (generated so far)
 156     Return:
 157         y: y-axis values to plot
 158         title_final: final figure title
 159         outpath_final: file output file path
 160     """
 161     perf = np.reciprocal(timings)
 162     max_perf = np.max(perf, axis=0)
 163     y = np.add(100.0, relativeDifference(perf, max_perf))
 164
 165     plt.figure()
 166     plt.axhline(100.0, color="k", linestyle="--", linewidth=0.4)
 167     plt.ylabel("1/timing relative to max (in %)")
 168     title_final = "Performance comparison against max for %s\n%s " \
 169                   "benchmark, %s" % (routine, bench_variant, title)
 170     outpath_final = os.path.join(args.outdir, "%s_%s_%s%s" % \
 171                     (routine, args.plot, bench_variant, outpath))
 172
 173     return y, title_final, outpath_final
 174
 175
 176 def plotThroughput(timings, params, routine, bench_variant, title, outpath):
 177     """Plot throughput.
 178
 179     Throughput is computed as the varied parameter value over timing.
 180
 181     Args:
 182         timings: timings to plot
 183         params: varied parameter values
 184         routine: benchmarked string routine name
 185         bench_variant: top-level benchmark variant name
 186         title: figure title (generated so far)
 187         outpath: output file path (generated so far)
 188     Return:
 189         y: y-axis values to plot
 190         title_final: final figure title
 191         outpath_final: file output file path
 192     """
 193     y = np.divide(params, timings)
 194     plt.figure()
 195
 196     if not args.values:
 197         plt.axes().yaxis.set_major_formatter(plt.NullFormatter())
 198
 199     plt.ylabel("%s / timing" % args.key)
 200     title_final = "%s %s benchmark throughput results\n%s" % \
 201                   (routine, bench_variant, title)
 202     outpath_final = os.path.join(args.outdir, "%s_%s_%s%s" % \
 203                     (routine, args.plot, bench_variant, outpath))
 204     return y, title_final, outpath_final
 205
 206
 207 def finishPlot(x, y, title, outpath, x_scale, plotted_ifuncs):
 208     """Finish generating current Figure.
 209
 210     Args:
 211         x: x-axis values
 212         y: y-axis values
 213         title: figure title
 214         outpath: output file path
 215         x_scale: x-axis scale
 216         plotted_ifuncs: names of ifuncs to plot
 217     """
 218     plt.xlabel(args.key)
 219     plt.xscale(x_scale)
 220     plt.title(title)
 221
 222     plt.grid(color="k", linestyle=args.grid, linewidth=0.5, alpha=0.5)
 223
 224     for i in range(len(plotted_ifuncs)):
 225         plt.plot(x, y[i], marker=markers[i % len(markers)],
 226                  label=plotted_ifuncs[i])
 227
 228     plt.legend(loc="best", fontsize="small")
 229     plt.savefig("%s_%s.%s" % (outpath, x_scale, args.extension),
 230                 format=args.extension, dpi=args.resolution)
 231
 232     if args.display:
 233         plt.show()
 234
 235     plt.close()
 236
 237
 238 def plotRecursive(json_iter, routine, ifuncs, bench_variant, title, outpath,
 239                   x_scale):
 240     """Plot benchmark timings.
 241
 242     Args:
 243         json_iter: reference to json object
 244         routine: benchmarked string routine name
 245         ifuncs: names of ifuncs tested
 246         bench_variant: top-level benchmark variant name
 247         title: figure's title (generated so far)
 248         outpath: output file path (generated so far)
 249         x_scale: x-axis scale
 250     """
 251
 252     # RECURSIVE CASE: 'variants' array found
 253     if "variants" in json_iter:
 254         # Continue recursive search for 'results' array. Record the
 255         # benchmark variant (configuration) in order to customize
 256         # the title, filename and X-axis scale for the generated figure.
 257         for variant in json_iter["variants"]:
 258             new_title = "%s%s, " % (title, variant["name"])
 259             new_outpath = "%s_%s" % (outpath, variant["name"].replace(" ", "_"))
 260             new_x_scale = "log" if variant["name"] in log_variants else x_scale
 261
 262             plotRecursive(variant, routine, ifuncs, bench_variant, new_title,
 263                           new_outpath, new_x_scale)
 264         return
 265
 266     # BASE CASE: 'results' array found
 267     domain = []
 268     timings = defaultdict(list)
 269
 270     # Collect timings
 271     for result in json_iter["results"]:
 272         domain.append(result[args.key])
 273         timings[result[args.key]].append(result["timings"])
 274
 275     domain = np.unique(np.array(domain))
 276     averages = []
 277
 278     # Compute geometric mean if there are multple timings for each
 279     # parameter value.
 280     for parameter in domain:
 281         averages.append(gmean(timings[parameter]))
 282
 283     averages = np.array(averages).transpose()
 284
 285     # Choose ifuncs to plot
 286     if isinstance(args.ifuncs, str):
 287         plotted_ifuncs = ifuncs
 288     else:
 289         plotted_ifuncs = [x.replace("__", "") for x in args.ifuncs]
 290
 291     plotted_indices = [ifuncs.index(x) for x in plotted_ifuncs]
 292     plotted_vals = averages[plotted_indices,:]
 293
 294     # Plotting logic specific to each plot type
 295     if args.plot == "time":
 296         codomain, title, outpath = plotTime(plotted_vals, routine,
 297                                    bench_variant, title, outpath)
 298     elif args.plot == "rel":
 299         codomain, title, outpath = plotRelative(plotted_vals, averages, routine,
 300                                    ifuncs, bench_variant, title, outpath)
 301     elif args.plot == "max":
 302         codomain, title, outpath = plotMax(plotted_vals, routine,
 303                                    bench_variant, title, outpath)
 304     elif args.plot == "thru":
 305         codomain, title, outpath = plotThroughput(plotted_vals, domain, routine,
 306                                    bench_variant, title, outpath)
 307
 308     # Plotting logic shared between plot types
 309     finishPlot(domain, codomain, title, outpath, x_scale, plotted_ifuncs)
 310
 311
 312 def main(args):
 313     """Program Entry Point.
 314
 315     Args:
 316       args: command line arguments (excluding program name)
 317     """
 318
 319     # Select non-GUI matplotlib backend if interactive display is disabled
 320     if not args.display:
 321         mpl.use("Agg")
 322
 323     global plt
 324     import matplotlib.pyplot as plt
 325
 326     schema = None
 327
 328     with open(args.schema, "r") as f:
 329         schema = json.load(f)
 330
 331     for filename in args.bench:
 332         bench = None
 333
 334         with open(filename, "r") as f:
 335             bench = json.load(f)
 336
 337         validator.validate(bench, schema)
 338
 339         for function in bench["functions"]:
 340             bench_variant = bench["functions"][function]["bench-variant"]
 341             ifuncs = bench["functions"][function]["ifuncs"]
 342             ifuncs = [x.replace("__", "") for x in ifuncs]
 343
 344             plotRecursive(bench["functions"][function], function, ifuncs,
 345                           bench_variant, "", "", args.logarithmic)
 346
 347
 348 """ main() """
 349 if __name__ == "__main__":
 350
 351     parser = argparse.ArgumentParser(description=
 352             "Plot string microbenchmark results",
 353             formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 354
 355     # Required parameter
 356     parser.add_argument("bench", nargs="+",
 357                         help="benchmark results file(s) in json format")
 358
 359     # Optional parameters
 360     parser.add_argument("-b", "--baseline", type=str,
 361                         help="baseline ifunc for 'rel' plot")
 362     parser.add_argument("-d", "--display", action="store_true",
 363                         help="display figures")
 364     parser.add_argument("-e", "--extension", type=str, default="png",
 365                         choices=["png", "pdf", "svg"],
 366                         help="output file(s) extension")
 367     parser.add_argument("-g", "--grid", action="store_const", default="",
 368                         const="-", help="show grid lines")
 369     parser.add_argument("-i", "--ifuncs", nargs="+", default="all",
 370                         help="ifuncs to plot")
 371     parser.add_argument("-k", "--key", type=str, default="length",
 372                         help="key to access the varied parameter")
 373     parser.add_argument("-l", "--logarithmic", action="store_const",
 374                         default="linear", const="log",
 375                         help="use logarithmic x-axis scale")
 376     parser.add_argument("-o", "--outdir", type=str, default=os.getcwd(),
 377                         help="output directory")
 378     parser.add_argument("-p", "--plot", type=str, default="time",
 379                         choices=["time", "rel", "max", "thru"],
 380                         help="plot absolute timings, relative timings, " \
 381                         "performance relative to max, or throughput")
 382     parser.add_argument("-r", "--resolution", type=int, default=100,
 383                         help="dpi resolution for the generated figures")
 384     parser.add_argument("-s", "--schema", type=str,
 385                         default=os.path.join(os.path.dirname(
 386                         os.path.realpath(__file__)),
 387                         "benchout_strings.schema.json"),
 388                         help="schema file to validate the results file.")
 389     parser.add_argument("-t", "--threshold", type=int, default=5,
 390                         help="threshold to mark in 'rel' graph (in %%)")
 391     parser.add_argument("-v", "--values", action="store_true",
 392                         help="show actual values")
 393
 394     args = parser.parse_args()
 395     main(args)