]>
Commit | Line | Data |
---|---|---|
0994b9b6 | 1 | #!/usr/bin/python |
2b778ceb | 2 | # Copyright (C) 2015-2021 Free Software Foundation, Inc. |
0994b9b6 SP |
3 | # This file is part of the GNU C Library. |
4 | # | |
5 | # The GNU C Library is free software; you can redistribute it and/or | |
6 | # modify it under the terms of the GNU Lesser General Public | |
7 | # License as published by the Free Software Foundation; either | |
8 | # version 2.1 of the License, or (at your option) any later version. | |
9 | # | |
10 | # The GNU C Library is distributed in the hope that it will be useful, | |
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | # Lesser General Public License for more details. | |
14 | # | |
15 | # You should have received a copy of the GNU Lesser General Public | |
16 | # License along with the GNU C Library; if not, see | |
5a82c748 | 17 | # <https://www.gnu.org/licenses/>. |
0994b9b6 SP |
18 | """Functions to import benchmark data and process it""" |
19 | ||
20 | import json | |
21 | try: | |
22 | import jsonschema as validator | |
23 | except ImportError: | |
24 | print('Could not find jsonschema module.') | |
25 | raise | |
26 | ||
27 | ||
0cd28286 SP |
28 | def mean(lst): |
29 | """Compute and return mean of numbers in a list | |
30 | ||
31 | The numpy average function has horrible performance, so implement our | |
32 | own mean function. | |
33 | ||
34 | Args: | |
35 | lst: The list of numbers to average. | |
36 | Return: | |
37 | The mean of members in the list. | |
38 | """ | |
39 | return sum(lst) / len(lst) | |
40 | ||
41 | ||
42 | def split_list(bench, func, var): | |
43 | """ Split the list into a smaller set of more distinct points | |
44 | ||
45 | Group together points such that the difference between the smallest | |
46 | point and the mean is less than 1/3rd of the mean. This means that | |
47 | the mean is at most 1.5x the smallest member of that group. | |
48 | ||
49 | mean - xmin < mean / 3 | |
50 | i.e. 2 * mean / 3 < xmin | |
51 | i.e. mean < 3 * xmin / 2 | |
52 | ||
53 | For an evenly distributed group, the largest member will be less than | |
54 | twice the smallest member of the group. | |
55 | Derivation: | |
56 | ||
57 | An evenly distributed series would be xmin, xmin + d, xmin + 2d... | |
58 | ||
59 | mean = (2 * n * xmin + n * (n - 1) * d) / 2 * n | |
60 | and max element is xmin + (n - 1) * d | |
61 | ||
62 | Now, mean < 3 * xmin / 2 | |
63 | ||
64 | 3 * xmin > 2 * mean | |
65 | 3 * xmin > (2 * n * xmin + n * (n - 1) * d) / n | |
66 | 3 * n * xmin > 2 * n * xmin + n * (n - 1) * d | |
67 | n * xmin > n * (n - 1) * d | |
68 | xmin > (n - 1) * d | |
69 | 2 * xmin > xmin + (n-1) * d | |
70 | 2 * xmin > xmax | |
71 | ||
72 | Hence, proved. | |
73 | ||
74 | Similarly, it is trivial to prove that for a similar aggregation by using | |
75 | the maximum element, the maximum element in the group must be at most 4/3 | |
76 | times the mean. | |
77 | ||
78 | Args: | |
79 | bench: The benchmark object | |
80 | func: The function name | |
81 | var: The function variant name | |
82 | """ | |
83 | means = [] | |
84 | lst = bench['functions'][func][var]['timings'] | |
85 | last = len(lst) - 1 | |
86 | while lst: | |
87 | for i in range(last + 1): | |
88 | avg = mean(lst[i:]) | |
89 | if avg > 0.75 * lst[last]: | |
90 | means.insert(0, avg) | |
91 | lst = lst[:i] | |
92 | last = i - 1 | |
93 | break | |
94 | bench['functions'][func][var]['timings'] = means | |
95 | ||
96 | ||
97 | def do_for_all_timings(bench, callback): | |
98 | """Call a function for all timing objects for each function and its | |
99 | variants. | |
100 | ||
101 | Args: | |
102 | bench: The benchmark object | |
103 | callback: The callback function | |
104 | """ | |
105 | for func in bench['functions'].keys(): | |
106 | for k in bench['functions'][func].keys(): | |
107 | if 'timings' not in bench['functions'][func][k].keys(): | |
108 | continue | |
109 | ||
110 | callback(bench, func, k) | |
111 | ||
112 | ||
113 | def compress_timings(points): | |
114 | """Club points with close enough values into a single mean value | |
115 | ||
116 | See split_list for details on how the clubbing is done. | |
117 | ||
118 | Args: | |
119 | points: The set of points. | |
120 | """ | |
121 | do_for_all_timings(points, split_list) | |
122 | ||
123 | ||
0994b9b6 SP |
124 | def parse_bench(filename, schema_filename): |
125 | """Parse the input file | |
126 | ||
127 | Parse and validate the json file containing the benchmark outputs. Return | |
128 | the resulting object. | |
129 | Args: | |
130 | filename: Name of the benchmark output file. | |
131 | Return: | |
132 | The bench dictionary. | |
133 | """ | |
134 | with open(schema_filename, 'r') as schemafile: | |
135 | schema = json.load(schemafile) | |
136 | with open(filename, 'r') as benchfile: | |
137 | bench = json.load(benchfile) | |
138 | validator.validate(bench, schema) | |
139 | do_for_all_timings(bench, lambda b, f, v: | |
140 | b['functions'][f][v]['timings'].sort()) | |
141 | return bench |