]>
Commit | Line | Data |
---|---|---|
0cd28286 | 1 | #!/usr/bin/python |
04277e02 | 2 | # Copyright (C) 2015-2019 Free Software Foundation, Inc. |
0cd28286 SP |
3 | # This file is part of the GNU C Library. |
4 | # | |
5 | # The GNU C Library is free software; you can redistribute it and/or | |
6 | # modify it under the terms of the GNU Lesser General Public | |
7 | # License as published by the Free Software Foundation; either | |
8 | # version 2.1 of the License, or (at your option) any later version. | |
9 | # | |
10 | # The GNU C Library is distributed in the hope that it will be useful, | |
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | # Lesser General Public License for more details. | |
14 | # | |
15 | # You should have received a copy of the GNU Lesser General Public | |
16 | # License along with the GNU C Library; if not, see | |
17 | # <http://www.gnu.org/licenses/>. | |
18 | """Compare two benchmark results | |
19 | ||
20 | Given two benchmark result files and a threshold, this script compares the | |
21 | benchmark results and flags differences in performance beyond a given | |
22 | threshold. | |
23 | """ | |
24 | import sys | |
25 | import os | |
26 | import pylab | |
27 | import import_bench as bench | |
1cf4ae7f | 28 | import argparse |
0cd28286 SP |
29 | |
30 | def do_compare(func, var, tl1, tl2, par, threshold): | |
31 | """Compare one of the aggregate measurements | |
32 | ||
33 | Helper function to compare one of the aggregate measurements of a function | |
34 | variant. | |
35 | ||
36 | Args: | |
37 | func: Function name | |
38 | var: Function variant name | |
39 | tl1: The first timings list | |
40 | tl2: The second timings list | |
41 | par: The aggregate to measure | |
42 | threshold: The threshold for differences, beyond which the script should | |
43 | print a warning. | |
44 | """ | |
1990185f LS |
45 | try: |
46 | v1 = tl1[str(par)] | |
47 | v2 = tl2[str(par)] | |
48 | d = abs(v2 - v1) * 100 / v1 | |
49 | except KeyError: | |
de099757 | 50 | sys.stderr.write('%s(%s)[%s]: stat does not exist\n' % (func, var, par)) |
1990185f LS |
51 | return |
52 | except ZeroDivisionError: | |
53 | return | |
54 | ||
0cd28286 | 55 | if d > threshold: |
1990185f | 56 | if v1 > v2: |
0cd28286 SP |
57 | ind = '+++' |
58 | else: | |
59 | ind = '---' | |
60 | print('%s %s(%s)[%s]: (%.2lf%%) from %g to %g' % | |
1990185f | 61 | (ind, func, var, par, d, v1, v2)) |
0cd28286 SP |
62 | |
63 | ||
1990185f | 64 | def compare_runs(pts1, pts2, threshold, stats): |
0cd28286 SP |
65 | """Compare two benchmark runs |
66 | ||
67 | Args: | |
68 | pts1: Timing data from first machine | |
69 | pts2: Timing data from second machine | |
70 | """ | |
71 | ||
72 | # XXX We assume that the two benchmarks have identical functions and | |
73 | # variants. We cannot compare two benchmarks that may have different | |
74 | # functions or variants. Maybe that is something for the future. | |
75 | for func in pts1['functions'].keys(): | |
76 | for var in pts1['functions'][func].keys(): | |
77 | tl1 = pts1['functions'][func][var] | |
78 | tl2 = pts2['functions'][func][var] | |
79 | ||
80 | # Compare the consolidated numbers | |
81 | # do_compare(func, var, tl1, tl2, 'max', threshold) | |
1990185f LS |
82 | for stat in stats.split(): |
83 | do_compare(func, var, tl1, tl2, stat, threshold) | |
0cd28286 SP |
84 | |
85 | # Skip over to the next variant or function if there is no detailed | |
86 | # timing info for the function variant. | |
87 | if 'timings' not in pts1['functions'][func][var].keys() or \ | |
88 | 'timings' not in pts2['functions'][func][var].keys(): | |
de099757 | 89 | continue |
0cd28286 SP |
90 | |
91 | # If two lists do not have the same length then it is likely that | |
92 | # the performance characteristics of the function have changed. | |
93 | # XXX: It is also likely that there was some measurement that | |
94 | # strayed outside the usual range. Such ouiers should not | |
95 | # happen on an idle machine with identical hardware and | |
96 | # configuration, but ideal environments are hard to come by. | |
97 | if len(tl1['timings']) != len(tl2['timings']): | |
98 | print('* %s(%s): Timing characteristics changed' % | |
99 | (func, var)) | |
100 | print('\tBefore: [%s]' % | |
101 | ', '.join([str(x) for x in tl1['timings']])) | |
102 | print('\tAfter: [%s]' % | |
103 | ', '.join([str(x) for x in tl2['timings']])) | |
104 | continue | |
105 | ||
106 | # Collect numbers whose differences cross the threshold we have | |
107 | # set. | |
108 | issues = [(x, y) for x, y in zip(tl1['timings'], tl2['timings']) \ | |
109 | if abs(y - x) * 100 / x > threshold] | |
110 | ||
111 | # Now print them. | |
112 | for t1, t2 in issues: | |
113 | d = abs(t2 - t1) * 100 / t1 | |
114 | if t2 > t1: | |
115 | ind = '-' | |
116 | else: | |
117 | ind = '+' | |
118 | ||
119 | print("%s %s(%s): (%.2lf%%) from %g to %g" % | |
120 | (ind, func, var, d, t1, t2)) | |
121 | ||
122 | ||
123 | def plot_graphs(bench1, bench2): | |
124 | """Plot graphs for functions | |
125 | ||
126 | Make scatter plots for the functions and their variants. | |
127 | ||
128 | Args: | |
129 | bench1: Set of points from the first machine | |
130 | bench2: Set of points from the second machine. | |
131 | """ | |
132 | for func in bench1['functions'].keys(): | |
133 | for var in bench1['functions'][func].keys(): | |
134 | # No point trying to print a graph if there are no detailed | |
135 | # timings. | |
136 | if u'timings' not in bench1['functions'][func][var].keys(): | |
de099757 | 137 | sys.stderr.write('Skipping graph for %s(%s)\n' % (func, var)) |
0cd28286 SP |
138 | continue |
139 | ||
140 | pylab.clf() | |
141 | pylab.ylabel('Time (cycles)') | |
142 | ||
143 | # First set of points | |
144 | length = len(bench1['functions'][func][var]['timings']) | |
145 | X = [float(x) for x in range(length)] | |
146 | lines = pylab.scatter(X, bench1['functions'][func][var]['timings'], | |
147 | 1.5 + 100 / length) | |
148 | pylab.setp(lines, 'color', 'r') | |
149 | ||
150 | # Second set of points | |
151 | length = len(bench2['functions'][func][var]['timings']) | |
152 | X = [float(x) for x in range(length)] | |
153 | lines = pylab.scatter(X, bench2['functions'][func][var]['timings'], | |
154 | 1.5 + 100 / length) | |
155 | pylab.setp(lines, 'color', 'g') | |
156 | ||
157 | if var: | |
158 | filename = "%s-%s.png" % (func, var) | |
159 | else: | |
160 | filename = "%s.png" % func | |
de099757 | 161 | sys.stderr.write('Writing out %s' % filename) |
0cd28286 SP |
162 | pylab.savefig(filename) |
163 | ||
1990185f | 164 | def main(bench1, bench2, schema, threshold, stats): |
1cf4ae7f LS |
165 | bench1 = bench.parse_bench(bench1, schema) |
166 | bench2 = bench.parse_bench(bench2, schema) | |
0cd28286 SP |
167 | |
168 | plot_graphs(bench1, bench2) | |
169 | ||
170 | bench.compress_timings(bench1) | |
171 | bench.compress_timings(bench2) | |
172 | ||
1990185f | 173 | compare_runs(bench1, bench2, threshold, stats) |
0cd28286 SP |
174 | |
175 | ||
176 | if __name__ == '__main__': | |
1cf4ae7f LS |
177 | parser = argparse.ArgumentParser(description='Take two benchmark and compare their timings.') |
178 | ||
179 | # Required parameters | |
180 | parser.add_argument('bench1', help='First bench to compare') | |
181 | parser.add_argument('bench2', help='Second bench to compare') | |
182 | ||
183 | # Optional parameters | |
184 | parser.add_argument('--schema', | |
185 | default=os.path.join(os.path.dirname(os.path.realpath(__file__)),'benchout.schema.json'), | |
186 | help='JSON file to validate source/dest files (default: %(default)s)') | |
c892ae04 | 187 | parser.add_argument('--threshold', default=10.0, type=float, help='Only print those with equal or higher threshold (default: %(default)s)') |
1990185f | 188 | parser.add_argument('--stats', default='min mean', type=str, help='Only consider values from the statistics specified as a space separated list (default: %(default)s)') |
1cf4ae7f LS |
189 | |
190 | args = parser.parse_args() | |
191 | ||
1990185f | 192 | main(args.bench1, args.bench2, args.schema, args.threshold, args.stats) |