]> git.ipfire.org Git - thirdparty/glibc.git/blame - benchtests/scripts/compare_bench.py
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / benchtests / scripts / compare_bench.py
CommitLineData
0cd28286 1#!/usr/bin/python
04277e02 2# Copyright (C) 2015-2019 Free Software Foundation, Inc.
0cd28286
SP
3# This file is part of the GNU C Library.
4#
5# The GNU C Library is free software; you can redistribute it and/or
6# modify it under the terms of the GNU Lesser General Public
7# License as published by the Free Software Foundation; either
8# version 2.1 of the License, or (at your option) any later version.
9#
10# The GNU C Library is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13# Lesser General Public License for more details.
14#
15# You should have received a copy of the GNU Lesser General Public
16# License along with the GNU C Library; if not, see
17# <http://www.gnu.org/licenses/>.
18"""Compare two benchmark results
19
20Given two benchmark result files and a threshold, this script compares the
21benchmark results and flags differences in performance beyond a given
22threshold.
23"""
24import sys
25import os
26import pylab
27import import_bench as bench
1cf4ae7f 28import argparse
0cd28286
SP
29
30def do_compare(func, var, tl1, tl2, par, threshold):
31 """Compare one of the aggregate measurements
32
33 Helper function to compare one of the aggregate measurements of a function
34 variant.
35
36 Args:
37 func: Function name
38 var: Function variant name
39 tl1: The first timings list
40 tl2: The second timings list
41 par: The aggregate to measure
42 threshold: The threshold for differences, beyond which the script should
43 print a warning.
44 """
1990185f
LS
45 try:
46 v1 = tl1[str(par)]
47 v2 = tl2[str(par)]
48 d = abs(v2 - v1) * 100 / v1
49 except KeyError:
de099757 50 sys.stderr.write('%s(%s)[%s]: stat does not exist\n' % (func, var, par))
1990185f
LS
51 return
52 except ZeroDivisionError:
53 return
54
0cd28286 55 if d > threshold:
1990185f 56 if v1 > v2:
0cd28286
SP
57 ind = '+++'
58 else:
59 ind = '---'
60 print('%s %s(%s)[%s]: (%.2lf%%) from %g to %g' %
1990185f 61 (ind, func, var, par, d, v1, v2))
0cd28286
SP
62
63
1990185f 64def compare_runs(pts1, pts2, threshold, stats):
0cd28286
SP
65 """Compare two benchmark runs
66
67 Args:
68 pts1: Timing data from first machine
69 pts2: Timing data from second machine
70 """
71
72 # XXX We assume that the two benchmarks have identical functions and
73 # variants. We cannot compare two benchmarks that may have different
74 # functions or variants. Maybe that is something for the future.
75 for func in pts1['functions'].keys():
76 for var in pts1['functions'][func].keys():
77 tl1 = pts1['functions'][func][var]
78 tl2 = pts2['functions'][func][var]
79
80 # Compare the consolidated numbers
81 # do_compare(func, var, tl1, tl2, 'max', threshold)
1990185f
LS
82 for stat in stats.split():
83 do_compare(func, var, tl1, tl2, stat, threshold)
0cd28286
SP
84
85 # Skip over to the next variant or function if there is no detailed
86 # timing info for the function variant.
87 if 'timings' not in pts1['functions'][func][var].keys() or \
88 'timings' not in pts2['functions'][func][var].keys():
de099757 89 continue
0cd28286
SP
90
91 # If two lists do not have the same length then it is likely that
92 # the performance characteristics of the function have changed.
93 # XXX: It is also likely that there was some measurement that
94 # strayed outside the usual range. Such ouiers should not
95 # happen on an idle machine with identical hardware and
96 # configuration, but ideal environments are hard to come by.
97 if len(tl1['timings']) != len(tl2['timings']):
98 print('* %s(%s): Timing characteristics changed' %
99 (func, var))
100 print('\tBefore: [%s]' %
101 ', '.join([str(x) for x in tl1['timings']]))
102 print('\tAfter: [%s]' %
103 ', '.join([str(x) for x in tl2['timings']]))
104 continue
105
106 # Collect numbers whose differences cross the threshold we have
107 # set.
108 issues = [(x, y) for x, y in zip(tl1['timings'], tl2['timings']) \
109 if abs(y - x) * 100 / x > threshold]
110
111 # Now print them.
112 for t1, t2 in issues:
113 d = abs(t2 - t1) * 100 / t1
114 if t2 > t1:
115 ind = '-'
116 else:
117 ind = '+'
118
119 print("%s %s(%s): (%.2lf%%) from %g to %g" %
120 (ind, func, var, d, t1, t2))
121
122
123def plot_graphs(bench1, bench2):
124 """Plot graphs for functions
125
126 Make scatter plots for the functions and their variants.
127
128 Args:
129 bench1: Set of points from the first machine
130 bench2: Set of points from the second machine.
131 """
132 for func in bench1['functions'].keys():
133 for var in bench1['functions'][func].keys():
134 # No point trying to print a graph if there are no detailed
135 # timings.
136 if u'timings' not in bench1['functions'][func][var].keys():
de099757 137 sys.stderr.write('Skipping graph for %s(%s)\n' % (func, var))
0cd28286
SP
138 continue
139
140 pylab.clf()
141 pylab.ylabel('Time (cycles)')
142
143 # First set of points
144 length = len(bench1['functions'][func][var]['timings'])
145 X = [float(x) for x in range(length)]
146 lines = pylab.scatter(X, bench1['functions'][func][var]['timings'],
147 1.5 + 100 / length)
148 pylab.setp(lines, 'color', 'r')
149
150 # Second set of points
151 length = len(bench2['functions'][func][var]['timings'])
152 X = [float(x) for x in range(length)]
153 lines = pylab.scatter(X, bench2['functions'][func][var]['timings'],
154 1.5 + 100 / length)
155 pylab.setp(lines, 'color', 'g')
156
157 if var:
158 filename = "%s-%s.png" % (func, var)
159 else:
160 filename = "%s.png" % func
de099757 161 sys.stderr.write('Writing out %s' % filename)
0cd28286
SP
162 pylab.savefig(filename)
163
1990185f 164def main(bench1, bench2, schema, threshold, stats):
1cf4ae7f
LS
165 bench1 = bench.parse_bench(bench1, schema)
166 bench2 = bench.parse_bench(bench2, schema)
0cd28286
SP
167
168 plot_graphs(bench1, bench2)
169
170 bench.compress_timings(bench1)
171 bench.compress_timings(bench2)
172
1990185f 173 compare_runs(bench1, bench2, threshold, stats)
0cd28286
SP
174
175
176if __name__ == '__main__':
1cf4ae7f
LS
177 parser = argparse.ArgumentParser(description='Take two benchmark and compare their timings.')
178
179 # Required parameters
180 parser.add_argument('bench1', help='First bench to compare')
181 parser.add_argument('bench2', help='Second bench to compare')
182
183 # Optional parameters
184 parser.add_argument('--schema',
185 default=os.path.join(os.path.dirname(os.path.realpath(__file__)),'benchout.schema.json'),
186 help='JSON file to validate source/dest files (default: %(default)s)')
c892ae04 187 parser.add_argument('--threshold', default=10.0, type=float, help='Only print those with equal or higher threshold (default: %(default)s)')
1990185f 188 parser.add_argument('--stats', default='min mean', type=str, help='Only consider values from the statistics specified as a space separated list (default: %(default)s)')
1cf4ae7f
LS
189
190 args = parser.parse_args()
191
1990185f 192 main(args.bench1, args.bench2, args.schema, args.threshold, args.stats)