]>
Commit | Line | Data |
---|---|---|
bff0e529 LC |
1 | #!/usr/bin/python |
2 | ||
3 | # Script to statistically compare two sets of log files with -ftime-report | |
4 | # output embedded within them. | |
5 | ||
6 | # Contributed by Lawrence Crowl <crowl@google.com> | |
7 | # | |
8 | # Copyright (C) 2012 Free Software Foundation, Inc. | |
9 | # | |
10 | # This file is part of GCC. | |
11 | # | |
12 | # GCC is free software; you can redistribute it and/or modify | |
13 | # it under the terms of the GNU General Public License as published by | |
14 | # the Free Software Foundation; either version 3, or (at your option) | |
15 | # any later version. | |
16 | # | |
17 | # GCC is distributed in the hope that it will be useful, | |
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | # GNU General Public License for more details. | |
21 | # | |
22 | # You should have received a copy of the GNU General Public License | |
23 | # along with GCC; see the file COPYING. If not, write to | |
24 | # the Free Software Foundation, 51 Franklin Street, Fifth Floor, | |
25 | # Boston, MA 02110-1301, USA. | |
26 | ||
27 | ||
28 | """ Compare two sets of compile-time performance numbers. | |
29 | ||
30 | The intent of this script is to compare compile-time performance of two | |
31 | different versions of the compiler. Each version of the compiler must be | |
32 | run at least three times with the -ftime-report option. Each log file | |
33 | represents a data point, or trial. The set of trials for each compiler | |
34 | version constitutes a sample. The ouput of the script is a description | |
35 | of the statistically significant difference between the two version of | |
36 | the compiler. | |
37 | ||
38 | The parameters to the script are: | |
39 | ||
40 | Two file patterns that each match a set of log files. You will probably | |
41 | need to quote the patterns before passing them to the script. | |
42 | ||
43 | Each pattern corresponds to a version of the compiler. | |
44 | ||
45 | A regular expression that finds interesting lines in the log files. | |
46 | If you want to match the beginning of the line, you will need to add | |
47 | the ^ operator. The filtering uses Python regular expression syntax. | |
48 | ||
49 | The default is "TOTAL". | |
50 | ||
51 | All of the interesting lines in a single log file are summed to produce | |
52 | a single trial (data point). | |
53 | ||
54 | A desired statistical confidence within the range 60% to 99.9%. Due to | |
55 | the implementation, this confidence will be rounded down to one of 60%, | |
56 | 70%, 80%, 90%, 95%, 98%, 99%, 99.5%, 99.8%, and 99.9%. | |
57 | ||
58 | The default is 95. | |
59 | ||
60 | If the computed confidence is lower than desired, the script will | |
61 | estimate the number of trials needed to meet the desired confidence. | |
62 | This estimate is not very good, as the variance tends to change as | |
63 | you increase the number of trials. | |
64 | ||
65 | The most common use of the script is total compile-time comparison between | |
66 | logfiles stored in different directories. | |
67 | ||
68 | compare_two_ftime_report_sets "Log1/*perf" "Log2/*perf" | |
69 | ||
70 | One can also look at parsing time, but expecting a lower confidence. | |
71 | ||
72 | compare_two_ftime_report_sets "Log1/*perf" "Log2/*perf" "^phase parsing" 75 | |
73 | ||
74 | """ | |
75 | ||
76 | ||
77 | import os | |
78 | import sys | |
79 | import fnmatch | |
80 | import glob | |
81 | import re | |
82 | import math | |
83 | ||
84 | ||
85 | ####################################################################### Utility | |
86 | ||
87 | ||
88 | def divide(dividend, divisor): | |
89 | """ Return the quotient, avoiding division by zero. | |
90 | """ | |
91 | if divisor == 0: | |
92 | return sys.float_info.max | |
93 | else: | |
94 | return dividend / divisor | |
95 | ||
96 | ||
97 | ################################################################# File and Line | |
98 | ||
99 | ||
100 | # Should you repurpose this script, this code might help. | |
101 | # | |
102 | #def find_files(topdir, filepat): | |
103 | # """ Find a set of file names, under a given directory, | |
104 | # matching a Unix shell file pattern. | |
105 | # Returns an iterator over the file names. | |
106 | # """ | |
107 | # for path, dirlist, filelist in os.walk(topdir): | |
108 | # for name in fnmatch.filter(filelist, filepat): | |
109 | # yield os.path.join(path, name) | |
110 | ||
111 | ||
112 | def match_files(fileglob): | |
113 | """ Find a set of file names matching a Unix shell glob pattern. | |
114 | Returns an iterator over the file names. | |
115 | """ | |
116 | return glob.iglob(os.path.expanduser(fileglob)) | |
117 | ||
118 | ||
119 | def lines_in_file(filename): | |
120 | """ Return an iterator over lines in the named file. """ | |
121 | filedesc = open(filename, "r") | |
122 | for line in filedesc: | |
123 | yield line | |
124 | filedesc.close() | |
125 | ||
126 | ||
127 | def lines_containing_pattern(pattern, lines): | |
128 | """ Find lines by a Python regular-expression. | |
129 | Returns an iterator over lines containing the expression. | |
130 | """ | |
131 | parser = re.compile(pattern) | |
132 | for line in lines: | |
133 | if parser.search(line): | |
134 | yield line | |
135 | ||
136 | ||
137 | ############################################################# Number Formatting | |
138 | ||
139 | ||
140 | def strip_redundant_digits(numrep): | |
141 | if numrep.find(".") == -1: | |
142 | return numrep | |
143 | return numrep.rstrip("0").rstrip(".") | |
144 | ||
145 | ||
146 | def text_number(number): | |
147 | return strip_redundant_digits("%g" % number) | |
148 | ||
149 | ||
150 | def round_significant(digits, number): | |
151 | if number == 0: | |
152 | return 0 | |
153 | magnitude = abs(number) | |
154 | significance = math.floor(math.log10(magnitude)) | |
155 | least_position = int(significance - digits + 1) | |
156 | return round(number, -least_position) | |
157 | ||
158 | ||
159 | def text_significant(digits, number): | |
160 | return text_number(round_significant(digits, number)) | |
161 | ||
162 | ||
163 | def text_percent(number): | |
164 | return text_significant(3, number*100) + "%" | |
165 | ||
166 | ||
167 | ################################################################ T-Distribution | |
168 | ||
169 | ||
170 | # This section of code provides functions for using Student's t-distribution. | |
171 | ||
172 | ||
173 | # The functions are implemented using table lookup | |
174 | # to facilitate implementation of inverse functions. | |
175 | ||
176 | ||
177 | # The table is comprised of row 0 listing the alpha values, | |
178 | # column 0 listing the degree-of-freedom values, | |
179 | # and the other entries listing the corresponding t-distribution values. | |
180 | ||
181 | t_dist_table = [ | |
182 | [ 0, 0.200, 0.150, 0.100, 0.050, 0.025, 0.010, 0.005, .0025, 0.001, .0005], | |
183 | [ 1, 1.376, 1.963, 3.078, 6.314, 12.71, 31.82, 63.66, 127.3, 318.3, 636.6], | |
184 | [ 2, 1.061, 1.386, 1.886, 2.920, 4.303, 6.965, 9.925, 14.09, 22.33, 31.60], | |
185 | [ 3, 0.978, 1.250, 1.638, 2.353, 3.182, 4.541, 5.841, 7.453, 10.21, 12.92], | |
186 | [ 4, 0.941, 1.190, 1.533, 2.132, 2.776, 3.747, 4.604, 5.598, 7.173, 8.610], | |
187 | [ 5, 0.920, 1.156, 1.476, 2.015, 2.571, 3.365, 4.032, 4.773, 5.894, 6.869], | |
188 | [ 6, 0.906, 1.134, 1.440, 1.943, 2.447, 3.143, 3.707, 4.317, 5.208, 5.959], | |
189 | [ 7, 0.896, 1.119, 1.415, 1.895, 2.365, 2.998, 3.499, 4.029, 4.785, 5.408], | |
190 | [ 8, 0.889, 1.108, 1.397, 1.860, 2.306, 2.896, 3.355, 3.833, 4.501, 5.041], | |
191 | [ 9, 0.883, 1.100, 1.383, 1.833, 2.262, 2.821, 3.250, 3.690, 4.297, 4.781], | |
192 | [ 10, 0.879, 1.093, 1.372, 1.812, 2.228, 2.764, 3.169, 3.581, 4.144, 4.587], | |
193 | [ 11, 0.876, 1.088, 1.363, 1.796, 2.201, 2.718, 3.106, 3.497, 4.025, 4.437], | |
194 | [ 12, 0.873, 1.083, 1.356, 1.782, 2.179, 2.681, 3.055, 3.428, 3.930, 4.318], | |
195 | [ 13, 0.870, 1.079, 1.350, 1.771, 2.160, 2.650, 3.012, 3.372, 3.852, 4.221], | |
196 | [ 14, 0.868, 1.076, 1.345, 1.761, 2.145, 2.624, 2.977, 3.326, 3.787, 4.140], | |
197 | [ 15, 0.866, 1.074, 1.341, 1.753, 2.131, 2.602, 2.947, 3.286, 3.733, 4.073], | |
198 | [ 16, 0.865, 1.071, 1.337, 1.746, 2.120, 2.583, 2.921, 3.252, 3.686, 4.015], | |
199 | [ 17, 0.863, 1.069, 1.333, 1.740, 2.110, 2.567, 2.898, 3.222, 3.646, 3.965], | |
200 | [ 18, 0.862, 1.067, 1.330, 1.734, 2.101, 2.552, 2.878, 3.197, 3.610, 3.922], | |
201 | [ 19, 0.861, 1.066, 1.328, 1.729, 2.093, 2.539, 2.861, 3.174, 3.579, 3.883], | |
202 | [ 20, 0.860, 1.064, 1.325, 1.725, 2.086, 2.528, 2.845, 3.153, 3.552, 3.850], | |
203 | [ 21, 0.859, 1.063, 1.323, 1.721, 2.080, 2.518, 2.831, 3.135, 3.527, 3.819], | |
204 | [ 22, 0.858, 1.061, 1.321, 1.717, 2.074, 2.508, 2.819, 3.119, 3.505, 3.792], | |
205 | [ 23, 0.858, 1.060, 1.319, 1.714, 2.069, 2.500, 2.807, 3.104, 3.485, 3.768], | |
206 | [ 24, 0.857, 1.059, 1.318, 1.711, 2.064, 2.492, 2.797, 3.091, 3.467, 3.745], | |
207 | [ 25, 0.856, 1.058, 1.316, 1.708, 2.060, 2.485, 2.787, 3.078, 3.450, 3.725], | |
208 | [ 26, 0.856, 1.058, 1.315, 1.706, 2.056, 2.479, 2.779, 3.067, 3.435, 3.707], | |
209 | [ 27, 0.855, 1.057, 1.314, 1.703, 2.052, 2.473, 2.771, 3.057, 3.421, 3.689], | |
210 | [ 28, 0.855, 1.056, 1.313, 1.701, 2.048, 2.467, 2.763, 3.047, 3.408, 3.674], | |
211 | [ 29, 0.854, 1.055, 1.311, 1.699, 2.045, 2.462, 2.756, 3.038, 3.396, 3.660], | |
212 | [ 30, 0.854, 1.055, 1.310, 1.697, 2.042, 2.457, 2.750, 3.030, 3.385, 3.646], | |
213 | [ 31, 0.853, 1.054, 1.309, 1.696, 2.040, 2.453, 2.744, 3.022, 3.375, 3.633], | |
214 | [ 32, 0.853, 1.054, 1.309, 1.694, 2.037, 2.449, 2.738, 3.015, 3.365, 3.622], | |
215 | [ 33, 0.853, 1.053, 1.308, 1.692, 2.035, 2.445, 2.733, 3.008, 3.356, 3.611], | |
216 | [ 34, 0.852, 1.052, 1.307, 1.691, 2.032, 2.441, 2.728, 3.002, 3.348, 3.601], | |
217 | [ 35, 0.852, 1.052, 1.306, 1.690, 2.030, 2.438, 2.724, 2.996, 3.340, 3.591], | |
218 | [ 36, 0.852, 1.052, 1.306, 1.688, 2.028, 2.434, 2.719, 2.990, 3.333, 3.582], | |
219 | [ 37, 0.851, 1.051, 1.305, 1.687, 2.026, 2.431, 2.715, 2.985, 3.326, 3.574], | |
220 | [ 38, 0.851, 1.051, 1.304, 1.686, 2.024, 2.429, 2.712, 2.980, 3.319, 3.566], | |
221 | [ 39, 0.851, 1.050, 1.304, 1.685, 2.023, 2.426, 2.708, 2.976, 3.313, 3.558], | |
222 | [ 40, 0.851, 1.050, 1.303, 1.684, 2.021, 2.423, 2.704, 2.971, 3.307, 3.551], | |
223 | [ 50, 0.849, 1.047, 1.299, 1.676, 2.009, 2.403, 2.678, 2.937, 3.261, 3.496], | |
224 | [ 60, 0.848, 1.045, 1.296, 1.671, 2.000, 2.390, 2.660, 2.915, 3.232, 3.460], | |
225 | [ 80, 0.846, 1.043, 1.292, 1.664, 1.990, 2.374, 2.639, 2.887, 3.195, 3.416], | |
226 | [100, 0.845, 1.042, 1.290, 1.660, 1.984, 2.364, 2.626, 2.871, 3.174, 3.390], | |
227 | [150, 0.844, 1.040, 1.287, 1.655, 1.976, 2.351, 2.609, 2.849, 3.145, 3.357] ] | |
228 | ||
229 | ||
230 | # The functions use the following parameter name conventions: | |
231 | # alpha - the alpha parameter | |
232 | # degree - the degree-of-freedom parameter | |
233 | # value - the t-distribution value for some alpha and degree | |
234 | # deviations - a confidence interval radius, | |
235 | # expressed as a multiple of the standard deviation of the sample | |
236 | # ax - the alpha parameter index | |
237 | # dx - the degree-of-freedom parameter index | |
238 | ||
239 | # The interface to this section of code is the last three functions, | |
240 | # find_t_dist_value, find_t_dist_alpha, and find_t_dist_degree. | |
241 | ||
242 | ||
243 | def t_dist_alpha_at_index(ax): | |
244 | if ax == 0: | |
245 | return .25 # effectively no confidence | |
246 | else: | |
247 | return t_dist_table[0][ax] | |
248 | ||
249 | ||
250 | def t_dist_degree_at_index(dx): | |
251 | return t_dist_table[dx][0] | |
252 | ||
253 | ||
254 | def t_dist_value_at_index(ax, dx): | |
255 | return t_dist_table[dx][ax] | |
256 | ||
257 | ||
258 | def t_dist_index_of_degree(degree): | |
259 | limit = len(t_dist_table) - 1 | |
260 | dx = 0 | |
261 | while dx < limit and t_dist_degree_at_index(dx+1) <= degree: | |
262 | dx += 1 | |
263 | return dx | |
264 | ||
265 | ||
266 | def t_dist_index_of_alpha(alpha): | |
267 | limit = len(t_dist_table[0]) - 1 | |
268 | ax = 0 | |
269 | while ax < limit and t_dist_alpha_at_index(ax+1) >= alpha: | |
270 | ax += 1 | |
271 | return ax | |
272 | ||
273 | ||
274 | def t_dist_index_of_value(dx, value): | |
275 | limit = len(t_dist_table[dx]) - 1 | |
276 | ax = 0 | |
277 | while ax < limit and t_dist_value_at_index(ax+1, dx) < value: | |
278 | ax += 1 | |
279 | return ax | |
280 | ||
281 | ||
282 | def t_dist_value_within_deviations(dx, ax, deviations): | |
283 | degree = t_dist_degree_at_index(dx) | |
284 | count = degree + 1 | |
285 | root = math.sqrt(count) | |
286 | value = t_dist_value_at_index(ax, dx) | |
287 | nominal = value / root | |
288 | comparison = nominal <= deviations | |
289 | return comparison | |
290 | ||
291 | ||
292 | def t_dist_index_of_degree_for_deviations(ax, deviations): | |
293 | limit = len(t_dist_table) - 1 | |
294 | dx = 1 | |
295 | while dx < limit and not t_dist_value_within_deviations(dx, ax, deviations): | |
296 | dx += 1 | |
297 | return dx | |
298 | ||
299 | ||
300 | def find_t_dist_value(alpha, degree): | |
301 | """ Return the t-distribution value. | |
302 | The parameters are alpha and degree of freedom. | |
303 | """ | |
304 | dx = t_dist_index_of_degree(degree) | |
305 | ax = t_dist_index_of_alpha(alpha) | |
306 | return t_dist_value_at_index(ax, dx) | |
307 | ||
308 | ||
309 | def find_t_dist_alpha(value, degree): | |
310 | """ Return the alpha. | |
311 | The parameters are the t-distribution value for a given degree of freedom. | |
312 | """ | |
313 | dx = t_dist_index_of_degree(degree) | |
314 | ax = t_dist_index_of_value(dx, value) | |
315 | return t_dist_alpha_at_index(ax) | |
316 | ||
317 | ||
318 | def find_t_dist_degree(alpha, deviations): | |
319 | """ Return the degree-of-freedom. | |
320 | The parameters are the desired alpha and the number of standard deviations | |
321 | away from the mean that the degree should handle. | |
322 | """ | |
323 | ax = t_dist_index_of_alpha(alpha) | |
324 | dx = t_dist_index_of_degree_for_deviations(ax, deviations) | |
325 | return t_dist_degree_at_index(dx) | |
326 | ||
327 | ||
328 | ############################################################## Core Statistical | |
329 | ||
330 | ||
331 | # This section provides the core statistical classes and functions. | |
332 | ||
333 | ||
334 | class Accumulator: | |
335 | ||
336 | """ An accumulator for statistical information using arithmetic mean. """ | |
337 | ||
338 | def __init__(self): | |
339 | self.count = 0 | |
340 | self.mean = 0 | |
341 | self.sumsqdiff = 0 | |
342 | ||
343 | def insert(self, value): | |
344 | self.count += 1 | |
345 | diff = value - self.mean | |
346 | self.mean += diff / self.count | |
347 | self.sumsqdiff += (self.count - 1) * diff * diff / self.count | |
348 | ||
349 | ||
350 | def fill_accumulator_from_values(values): | |
351 | accumulator = Accumulator() | |
352 | for value in values: | |
353 | accumulator.insert(value) | |
354 | return accumulator | |
355 | ||
356 | ||
357 | def alpha_from_confidence(confidence): | |
358 | scrubbed = min(99.99, max(confidence, 60)) | |
359 | return (100.0 - scrubbed) / 200.0 | |
360 | ||
361 | ||
362 | def confidence_from_alpha(alpha): | |
363 | return 100 - 200 * alpha | |
364 | ||
365 | ||
366 | class Sample: | |
367 | ||
368 | """ A description of a sample using an arithmetic mean. """ | |
369 | ||
370 | def __init__(self, accumulator, alpha): | |
371 | if accumulator.count < 3: | |
372 | sys.exit("Samples must contain three trials.") | |
373 | self.count = accumulator.count | |
374 | self.mean = accumulator.mean | |
375 | variance = accumulator.sumsqdiff / (self.count - 1) | |
376 | self.deviation = math.sqrt(variance) | |
377 | self.error = self.deviation / math.sqrt(self.count) | |
378 | self.alpha = alpha | |
379 | self.radius = find_t_dist_value(alpha, self.count - 1) * self.error | |
380 | ||
381 | def alpha_for_radius(self, radius): | |
382 | return find_t_dist_alpha(divide(radius, self.error), self.count) | |
383 | ||
384 | def degree_for_radius(self, radius): | |
385 | return find_t_dist_degree(self.alpha, divide(radius, self.deviation)) | |
386 | ||
387 | def __str__(self): | |
388 | text = "trial count is " + text_number(self.count) | |
389 | text += ", mean is " + text_number(self.mean) | |
390 | text += " (" + text_number(confidence_from_alpha(self.alpha)) +"%" | |
391 | text += " confidence in " + text_number(self.mean - self.radius) | |
392 | text += " to " + text_number(self.mean + self.radius) + ")" | |
393 | text += ",\nstd.deviation is " + text_number(self.deviation) | |
394 | text += ", std.error is " + text_number(self.error) | |
395 | return text | |
396 | ||
397 | ||
398 | def sample_from_values(values, alpha): | |
399 | accumulator = fill_accumulator_from_values(values) | |
400 | return Sample(accumulator, alpha) | |
401 | ||
402 | ||
403 | class Comparison: | |
404 | ||
405 | """ A comparison of two samples using arithmetic means. """ | |
406 | ||
407 | def __init__(self, first, second, alpha): | |
408 | if first.mean > second.mean: | |
409 | self.upper = first | |
410 | self.lower = second | |
411 | self.larger = "first" | |
412 | else: | |
413 | self.upper = second | |
414 | self.lower = first | |
415 | self.larger = "second" | |
416 | self.a_wanted = alpha | |
417 | radius = self.upper.mean - self.lower.mean | |
418 | rising = self.lower.alpha_for_radius(radius) | |
419 | falling = self.upper.alpha_for_radius(radius) | |
420 | self.a_actual = max(rising, falling) | |
421 | rising = self.lower.degree_for_radius(radius) | |
422 | falling = self.upper.degree_for_radius(radius) | |
423 | self.count = max(rising, falling) + 1 | |
424 | ||
425 | def __str__(self): | |
426 | message = "The " + self.larger + " sample appears to be " | |
427 | change = divide(self.upper.mean, self.lower.mean) - 1 | |
428 | message += text_percent(change) + " larger,\n" | |
429 | confidence = confidence_from_alpha(self.a_actual) | |
430 | if confidence >= 60: | |
431 | message += "with " + text_number(confidence) + "% confidence" | |
432 | message += " of being larger." | |
433 | else: | |
434 | message += "but with no confidence of actually being larger." | |
435 | if self.a_actual > self.a_wanted: | |
436 | confidence = confidence_from_alpha(self.a_wanted) | |
437 | message += "\nTo reach " + text_number(confidence) + "% confidence," | |
438 | if self.count < 100: | |
439 | message += " you need roughly " + text_number(self.count) + " trials,\n" | |
440 | message += "assuming the standard deviation is stable, which is iffy." | |
441 | else: | |
442 | message += "\nyou need to reduce the larger deviation" | |
443 | message += " or increase the number of trials." | |
444 | return message | |
445 | ||
446 | ||
447 | ############################################################ Single Value Files | |
448 | ||
449 | ||
450 | # This section provides functions to compare two raw data files, | |
451 | # each containing a whole sample consisting of single number per line. | |
452 | ||
453 | ||
454 | # Should you repurpose this script, this code might help. | |
455 | # | |
456 | #def values_from_data_file(filename): | |
457 | # for line in lines_in_file(filename): | |
458 | # yield float(line) | |
459 | ||
460 | ||
461 | # Should you repurpose this script, this code might help. | |
462 | # | |
463 | #def sample_from_data_file(filename, alpha): | |
464 | # confidence = confidence_from_alpha(alpha) | |
465 | # text = "\nArithmetic sample for data file\n\"" + filename + "\"" | |
466 | # text += " with desired confidence " + text_number(confidence) + " is " | |
467 | # print text | |
468 | # values = values_from_data_file(filename) | |
469 | # sample = sample_from_values(values, alpha) | |
470 | # print sample | |
471 | # return sample | |
472 | ||
473 | ||
474 | # Should you repurpose this script, this code might help. | |
475 | # | |
476 | #def compare_two_data_files(filename1, filename2, confidence): | |
477 | # alpha = alpha_from_confidence(confidence) | |
478 | # sample1 = sample_from_data_file(filename1, alpha) | |
479 | # sample2 = sample_from_data_file(filename2, alpha) | |
480 | ||
481 | # print Comparison(sample1, sample2, alpha) | |
482 | ||
483 | ||
484 | # Should you repurpose this script, this code might help. | |
485 | # | |
486 | #def command_two_data_files(): | |
487 | # argc = len(sys.argv) | |
488 | # if argc < 2 or 4 < argc: | |
489 | # message = "usage: " + sys.argv[0] | |
490 | # message += " file-name file-name [confidence]" | |
491 | # print message | |
492 | # else: | |
493 | # filename1 = sys.argv[1] | |
494 | # filename2 = sys.argv[2] | |
495 | # if len(sys.argv) >= 4: | |
496 | # confidence = int(sys.argv[3]) | |
497 | # else: | |
498 | # confidence = 95 | |
499 | # compare_two_data_files(filename1, filename2, confidence) | |
500 | ||
501 | ||
502 | ############################################### -ftime-report TimeVar Log Files | |
503 | ||
504 | ||
505 | # This section provides functions to compare two sets of -ftime-report log | |
506 | # files. Each set is a sample, where each data point is derived from the | |
507 | # sum of values in a single log file. | |
508 | ||
509 | ||
510 | label = r"^ *([^:]*[^: ]) *:" | |
511 | number = r" *([0-9.]*) *" | |
512 | percent = r"\( *[0-9]*\%\)" | |
513 | numpct = number + percent | |
514 | total_format = label + number + number + number + number + " kB\n" | |
515 | total_parser = re.compile(total_format) | |
516 | tmvar_format = label + numpct + " usr" + numpct + " sys" | |
517 | tmvar_format += numpct + " wall" + number + " kB " + percent + " ggc\n" | |
518 | tmvar_parser = re.compile(tmvar_format) | |
519 | replace = r"\2\t\3\t\4\t\5\t\1" | |
520 | ||
521 | ||
522 | def split_time_report(lines, pattern): | |
523 | if pattern == "TOTAL": | |
524 | parser = total_parser | |
525 | else: | |
526 | parser = tmvar_parser | |
527 | for line in lines: | |
528 | modified = parser.sub(replace, line) | |
529 | if modified != line: | |
530 | yield re.split("\t", modified) | |
531 | ||
532 | ||
533 | def extract_cpu_time(tvtuples): | |
534 | for tuple in tvtuples: | |
535 | yield float(tuple[0]) + float(tuple[1]) | |
536 | ||
537 | ||
538 | def sum_values(values): | |
539 | sum = 0 | |
540 | for value in values: | |
541 | sum += value | |
542 | return sum | |
543 | ||
544 | ||
545 | def extract_time_for_timevar_log(filename, pattern): | |
546 | lines = lines_in_file(filename) | |
547 | tmvars = lines_containing_pattern(pattern, lines) | |
548 | tuples = split_time_report(tmvars, pattern) | |
549 | times = extract_cpu_time(tuples) | |
550 | return sum_values(times) | |
551 | ||
552 | ||
553 | def extract_times_for_timevar_logs(filelist, pattern): | |
554 | for filename in filelist: | |
555 | yield extract_time_for_timevar_log(filename, pattern) | |
556 | ||
557 | ||
558 | def sample_from_timevar_logs(fileglob, pattern, alpha): | |
559 | confidence = confidence_from_alpha(alpha) | |
560 | text = "\nArithmetic sample for timevar log files\n\"" + fileglob + "\"" | |
561 | text += "\nand selecting lines containing \"" + pattern + "\"" | |
562 | text += " with desired confidence " + text_number(confidence) + " is " | |
563 | print text | |
564 | filelist = match_files(fileglob) | |
565 | values = extract_times_for_timevar_logs(filelist, pattern) | |
566 | sample = sample_from_values(values, alpha) | |
567 | print sample | |
568 | return sample | |
569 | ||
570 | ||
571 | def compare_two_timevar_logs(fileglob1, fileglob2, pattern, confidence): | |
572 | alpha = alpha_from_confidence(confidence) | |
573 | sample1 = sample_from_timevar_logs(fileglob1, pattern, alpha) | |
574 | sample2 = sample_from_timevar_logs(fileglob2, pattern, alpha) | |
575 | ||
576 | print Comparison(sample1, sample2, alpha) | |
577 | ||
578 | ||
579 | def command_two_timevar_logs(): | |
580 | argc = len(sys.argv) | |
581 | if argc < 3 or 5 < argc: | |
582 | message = "usage: " + sys.argv[0] | |
583 | message += " file-pattern file-pattern [line-pattern [confidence]]" | |
584 | print message | |
585 | else: | |
586 | filepat1 = sys.argv[1] | |
587 | filepat2 = sys.argv[2] | |
588 | if len(sys.argv) >= 5: | |
589 | confidence = int(sys.argv[4]) | |
590 | else: | |
591 | confidence = 95 | |
592 | if len(sys.argv) >= 4: | |
593 | linepat = sys.argv[3] | |
594 | else: | |
595 | linepat = "TOTAL" | |
596 | compare_two_timevar_logs(filepat1, filepat2, linepat, confidence) | |
597 | ||
598 | ||
599 | ########################################################################## Main | |
600 | ||
601 | ||
602 | # This section is the main code, implementing the command. | |
603 | ||
604 | ||
605 | command_two_timevar_logs() |