]> git.ipfire.org Git - thirdparty/openembedded/openembedded-core.git/blob - scripts/oe-build-perf-report
conf/licenses: Add FreeType SPDX mapping
[thirdparty/openembedded/openembedded-core.git] / scripts / oe-build-perf-report
1 #!/usr/bin/env python3
2 #
3 # Examine build performance test results
4 #
5 # Copyright (c) 2017, Intel Corporation.
6 #
7 # SPDX-License-Identifier: GPL-2.0-only
8 #
9
10 import argparse
11 import json
12 import logging
13 import os
14 import re
15 import sys
16 from collections import namedtuple, OrderedDict
17 from operator import attrgetter
18 from xml.etree import ElementTree as ET
19
20 # Import oe libs
21 scripts_path = os.path.dirname(os.path.realpath(__file__))
22 sys.path.append(os.path.join(scripts_path, 'lib'))
23 import scriptpath
24 from build_perf import print_table
25 from build_perf.report import (metadata_xml_to_json, results_xml_to_json,
26 aggregate_data, aggregate_metadata, measurement_stats,
27 AggregateTestData)
28 from build_perf import html
29 from buildstats import BuildStats, diff_buildstats, BSVerDiff
30
31 scriptpath.add_oe_lib_path()
32
33 from oeqa.utils.git import GitRepo, GitError
34 import oeqa.utils.gitarchive as gitarchive
35
36
37 # Setup logging
38 logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
39 log = logging.getLogger('oe-build-perf-report')
40
41 def list_test_revs(repo, tag_name, verbosity, **kwargs):
42 """Get list of all tested revisions"""
43 valid_kwargs = dict([(k, v) for k, v in kwargs.items() if v is not None])
44
45 fields, revs = gitarchive.get_test_runs(log, repo, tag_name, **valid_kwargs)
46 ignore_fields = ['tag_number']
47 if verbosity < 2:
48 extra_fields = ['COMMITS', 'TEST RUNS']
49 ignore_fields.extend(['commit_number', 'commit'])
50 else:
51 extra_fields = ['TEST RUNS']
52
53 print_fields = [i for i, f in enumerate(fields) if f not in ignore_fields]
54
55 # Sort revs
56 rows = [[fields[i].upper() for i in print_fields] + extra_fields]
57
58 prev = [''] * len(print_fields)
59 prev_commit = None
60 commit_cnt = 0
61 commit_field = fields.index('commit')
62 for rev in revs:
63 # Only use fields that we want to print
64 cols = [rev[i] for i in print_fields]
65
66
67 if cols != prev:
68 commit_cnt = 1
69 test_run_cnt = 1
70 new_row = [''] * (len(print_fields) + len(extra_fields))
71
72 for i in print_fields:
73 if cols[i] != prev[i]:
74 break
75 new_row[i:-len(extra_fields)] = cols[i:]
76 rows.append(new_row)
77 else:
78 if rev[commit_field] != prev_commit:
79 commit_cnt += 1
80 test_run_cnt += 1
81
82 if verbosity < 2:
83 new_row[-2] = commit_cnt
84 new_row[-1] = test_run_cnt
85 prev = cols
86 prev_commit = rev[commit_field]
87
88 print_table(rows)
89
90 def is_xml_format(repo, commit):
91 """Check if the commit contains xml (or json) data"""
92 if repo.rev_parse(commit + ':results.xml'):
93 log.debug("Detected report in xml format in %s", commit)
94 return True
95 else:
96 log.debug("No xml report in %s, assuming json formatted results", commit)
97 return False
98
99 def read_results(repo, tags, xml=True):
100 """Read result files from repo"""
101
102 def parse_xml_stream(data):
103 """Parse multiple concatenated XML objects"""
104 objs = []
105 xml_d = ""
106 for line in data.splitlines():
107 if xml_d and line.startswith('<?xml version='):
108 objs.append(ET.fromstring(xml_d))
109 xml_d = line
110 else:
111 xml_d += line
112 objs.append(ET.fromstring(xml_d))
113 return objs
114
115 def parse_json_stream(data):
116 """Parse multiple concatenated JSON objects"""
117 objs = []
118 json_d = ""
119 for line in data.splitlines():
120 if line == '}{':
121 json_d += '}'
122 objs.append(json.loads(json_d, object_pairs_hook=OrderedDict))
123 json_d = '{'
124 else:
125 json_d += line
126 objs.append(json.loads(json_d, object_pairs_hook=OrderedDict))
127 return objs
128
129 num_revs = len(tags)
130
131 # Optimize by reading all data with one git command
132 log.debug("Loading raw result data from %d tags, %s...", num_revs, tags[0])
133 if xml:
134 git_objs = [tag + ':metadata.xml' for tag in tags] + [tag + ':results.xml' for tag in tags]
135 data = parse_xml_stream(repo.run_cmd(['show'] + git_objs + ['--']))
136 return ([metadata_xml_to_json(e) for e in data[0:num_revs]],
137 [results_xml_to_json(e) for e in data[num_revs:]])
138 else:
139 git_objs = [tag + ':metadata.json' for tag in tags] + [tag + ':results.json' for tag in tags]
140 data = parse_json_stream(repo.run_cmd(['show'] + git_objs + ['--']))
141 return data[0:num_revs], data[num_revs:]
142
143
144 def get_data_item(data, key):
145 """Nested getitem lookup"""
146 for k in key.split('.'):
147 data = data[k]
148 return data
149
150
151 def metadata_diff(metadata_l, metadata_r):
152 """Prepare a metadata diff for printing"""
153 keys = [('Hostname', 'hostname', 'hostname'),
154 ('Branch', 'branch', 'layers.meta.branch'),
155 ('Commit number', 'commit_num', 'layers.meta.commit_count'),
156 ('Commit', 'commit', 'layers.meta.commit'),
157 ('Number of test runs', 'testrun_count', 'testrun_count')
158 ]
159
160 def _metadata_diff(key):
161 """Diff metadata from two test reports"""
162 try:
163 val1 = get_data_item(metadata_l, key)
164 except KeyError:
165 val1 = '(N/A)'
166 try:
167 val2 = get_data_item(metadata_r, key)
168 except KeyError:
169 val2 = '(N/A)'
170 return val1, val2
171
172 metadata = OrderedDict()
173 for title, key, key_json in keys:
174 value_l, value_r = _metadata_diff(key_json)
175 metadata[key] = {'title': title,
176 'value_old': value_l,
177 'value': value_r}
178 return metadata
179
180
181 def print_diff_report(metadata_l, data_l, metadata_r, data_r):
182 """Print differences between two data sets"""
183
184 # First, print general metadata
185 print("\nTEST METADATA:\n==============")
186 meta_diff = metadata_diff(metadata_l, metadata_r)
187 rows = []
188 row_fmt = ['{:{wid}} ', '{:<{wid}} ', '{:<{wid}}']
189 rows = [['', 'CURRENT COMMIT', 'COMPARING WITH']]
190 for key, val in meta_diff.items():
191 # Shorten commit hashes
192 if key == 'commit':
193 rows.append([val['title'] + ':', val['value'][:20], val['value_old'][:20]])
194 else:
195 rows.append([val['title'] + ':', val['value'], val['value_old']])
196 print_table(rows, row_fmt)
197
198
199 # Print test results
200 print("\nTEST RESULTS:\n=============")
201
202 tests = list(data_l['tests'].keys())
203 # Append tests that are only present in 'right' set
204 tests += [t for t in list(data_r['tests'].keys()) if t not in tests]
205
206 # Prepare data to be printed
207 rows = []
208 row_fmt = ['{:8}', '{:{wid}}', '{:{wid}}', ' {:>{wid}}', ' {:{wid}} ', '{:{wid}}',
209 ' {:>{wid}}', ' {:>{wid}}']
210 num_cols = len(row_fmt)
211 for test in tests:
212 test_l = data_l['tests'][test] if test in data_l['tests'] else None
213 test_r = data_r['tests'][test] if test in data_r['tests'] else None
214 pref = ' '
215 if test_l is None:
216 pref = '+'
217 elif test_r is None:
218 pref = '-'
219 descr = test_l['description'] if test_l else test_r['description']
220 heading = "{} {}: {}".format(pref, test, descr)
221
222 rows.append([heading])
223
224 # Generate the list of measurements
225 meas_l = test_l['measurements'] if test_l else {}
226 meas_r = test_r['measurements'] if test_r else {}
227 measurements = list(meas_l.keys())
228 measurements += [m for m in list(meas_r.keys()) if m not in measurements]
229
230 for meas in measurements:
231 m_pref = ' '
232 if meas in meas_l:
233 stats_l = measurement_stats(meas_l[meas], 'l.')
234 else:
235 stats_l = measurement_stats(None, 'l.')
236 m_pref = '+'
237 if meas in meas_r:
238 stats_r = measurement_stats(meas_r[meas], 'r.')
239 else:
240 stats_r = measurement_stats(None, 'r.')
241 m_pref = '-'
242 stats = stats_l.copy()
243 stats.update(stats_r)
244
245 absdiff = stats['val_cls'](stats['r.mean'] - stats['l.mean'])
246 reldiff = "{:+.1f} %".format(absdiff * 100 / stats['l.mean'])
247 if stats['r.mean'] > stats['l.mean']:
248 absdiff = '+' + str(absdiff)
249 else:
250 absdiff = str(absdiff)
251 rows.append(['', m_pref, stats['name'] + ' ' + stats['quantity'],
252 str(stats['l.mean']), '->', str(stats['r.mean']),
253 absdiff, reldiff])
254 rows.append([''] * num_cols)
255
256 print_table(rows, row_fmt)
257
258 print()
259
260
261 class BSSummary(object):
262 def __init__(self, bs1, bs2):
263 self.tasks = {'count': bs2.num_tasks,
264 'change': '{:+d}'.format(bs2.num_tasks - bs1.num_tasks)}
265 self.top_consumer = None
266 self.top_decrease = None
267 self.top_increase = None
268 self.ver_diff = OrderedDict()
269
270 tasks_diff = diff_buildstats(bs1, bs2, 'cputime')
271
272 # Get top consumers of resources
273 tasks_diff = sorted(tasks_diff, key=attrgetter('value2'))
274 self.top_consumer = tasks_diff[-5:]
275
276 # Get biggest increase and decrease in resource usage
277 tasks_diff = sorted(tasks_diff, key=attrgetter('absdiff'))
278 self.top_decrease = tasks_diff[0:5]
279 self.top_increase = tasks_diff[-5:]
280
281 # Compare recipe versions and prepare data for display
282 ver_diff = BSVerDiff(bs1, bs2)
283 if ver_diff:
284 if ver_diff.new:
285 self.ver_diff['New recipes'] = [(n, r.evr) for n, r in ver_diff.new.items()]
286 if ver_diff.dropped:
287 self.ver_diff['Dropped recipes'] = [(n, r.evr) for n, r in ver_diff.dropped.items()]
288 if ver_diff.echanged:
289 self.ver_diff['Epoch changed'] = [(n, "{} &rarr; {}".format(r.left.evr, r.right.evr)) for n, r in ver_diff.echanged.items()]
290 if ver_diff.vchanged:
291 self.ver_diff['Version changed'] = [(n, "{} &rarr; {}".format(r.left.version, r.right.version)) for n, r in ver_diff.vchanged.items()]
292 if ver_diff.rchanged:
293 self.ver_diff['Revision changed'] = [(n, "{} &rarr; {}".format(r.left.evr, r.right.evr)) for n, r in ver_diff.rchanged.items()]
294
295
296 def print_html_report(data, id_comp, buildstats):
297 """Print report in html format"""
298 # Handle metadata
299 metadata = metadata_diff(data[id_comp].metadata, data[-1].metadata)
300
301 # Generate list of tests
302 tests = []
303 for test in data[-1].results['tests'].keys():
304 test_r = data[-1].results['tests'][test]
305 new_test = {'name': test_r['name'],
306 'description': test_r['description'],
307 'status': test_r['status'],
308 'measurements': [],
309 'err_type': test_r.get('err_type'),
310 }
311 # Limit length of err output shown
312 if 'message' in test_r:
313 lines = test_r['message'].splitlines()
314 if len(lines) > 20:
315 new_test['message'] = '...\n' + '\n'.join(lines[-20:])
316 else:
317 new_test['message'] = test_r['message']
318
319
320 # Generate the list of measurements
321 for meas in test_r['measurements'].keys():
322 meas_r = test_r['measurements'][meas]
323 meas_type = 'time' if meas_r['type'] == 'sysres' else 'size'
324 new_meas = {'name': meas_r['name'],
325 'legend': meas_r['legend'],
326 'description': meas_r['name'] + ' ' + meas_type,
327 }
328 samples = []
329
330 # Run through all revisions in our data
331 for meta, test_data in data:
332 if (not test in test_data['tests'] or
333 not meas in test_data['tests'][test]['measurements']):
334 samples.append(measurement_stats(None))
335 continue
336 test_i = test_data['tests'][test]
337 meas_i = test_i['measurements'][meas]
338 commit_num = get_data_item(meta, 'layers.meta.commit_count')
339 samples.append(measurement_stats(meas_i))
340 samples[-1]['commit_num'] = commit_num
341
342 absdiff = samples[-1]['val_cls'](samples[-1]['mean'] - samples[id_comp]['mean'])
343 reldiff = absdiff * 100 / samples[id_comp]['mean']
344 new_meas['absdiff'] = absdiff
345 new_meas['absdiff_str'] = str(absdiff) if absdiff < 0 else '+' + str(absdiff)
346 new_meas['reldiff'] = reldiff
347 new_meas['reldiff_str'] = "{:+.1f} %".format(reldiff)
348 new_meas['samples'] = samples
349 new_meas['value'] = samples[-1]
350 new_meas['value_type'] = samples[-1]['val_cls']
351
352 # Compare buildstats
353 bs_key = test + '.' + meas
354 rev = str(metadata['commit_num']['value'])
355 comp_rev = str(metadata['commit_num']['value_old'])
356 if (buildstats and rev in buildstats and bs_key in buildstats[rev] and
357 comp_rev in buildstats and bs_key in buildstats[comp_rev]):
358 new_meas['buildstats'] = BSSummary(buildstats[comp_rev][bs_key],
359 buildstats[rev][bs_key])
360
361
362 new_test['measurements'].append(new_meas)
363 tests.append(new_test)
364
365 # Chart options
366 chart_opts = {'haxis': {'min': get_data_item(data[0][0], 'layers.meta.commit_count'),
367 'max': get_data_item(data[-1][0], 'layers.meta.commit_count')}
368 }
369
370 print(html.template.render(title="Build Perf Test Report",
371 metadata=metadata, test_data=tests,
372 chart_opts=chart_opts))
373
374
375 def get_buildstats(repo, notes_ref, notes_ref2, revs, outdir=None):
376 """Get the buildstats from git notes"""
377 full_ref = 'refs/notes/' + notes_ref
378 if not repo.rev_parse(full_ref):
379 log.error("No buildstats found, please try running "
380 "'git fetch origin %s:%s' to fetch them from the remote",
381 full_ref, full_ref)
382 return
383
384 missing = False
385 buildstats = {}
386 log.info("Parsing buildstats from 'refs/notes/%s'", notes_ref)
387 for rev in revs:
388 buildstats[rev.commit_number] = {}
389 log.debug('Dumping buildstats for %s (%s)', rev.commit_number,
390 rev.commit)
391 for tag in rev.tags:
392 log.debug(' %s', tag)
393 try:
394 try:
395 bs_all = json.loads(repo.run_cmd(['notes', '--ref', notes_ref, 'show', tag + '^0']))
396 except GitError:
397 if notes_ref2:
398 bs_all = json.loads(repo.run_cmd(['notes', '--ref', notes_ref2, 'show', tag + '^0']))
399 else:
400 raise
401 except GitError:
402 log.warning("Buildstats not found for %s", tag)
403 bs_all = {}
404 missing = True
405
406 for measurement, bs in bs_all.items():
407 # Write out onto disk
408 if outdir:
409 tag_base, run_id = tag.rsplit('/', 1)
410 tag_base = tag_base.replace('/', '_')
411 bs_dir = os.path.join(outdir, measurement, tag_base)
412 if not os.path.exists(bs_dir):
413 os.makedirs(bs_dir)
414 with open(os.path.join(bs_dir, run_id + '.json'), 'w') as f:
415 json.dump(bs, f, indent=2)
416
417 # Read buildstats into a dict
418 _bs = BuildStats.from_json(bs)
419 if measurement not in buildstats[rev.commit_number]:
420 buildstats[rev.commit_number][measurement] = _bs
421 else:
422 buildstats[rev.commit_number][measurement].aggregate(_bs)
423
424 if missing:
425 log.info("Buildstats were missing for some test runs, please "
426 "run 'git fetch origin %s:%s' and try again",
427 full_ref, full_ref)
428
429 return buildstats
430
431
432 def auto_args(repo, args):
433 """Guess arguments, if not defined by the user"""
434 # Get the latest commit in the repo
435 log.debug("Guessing arguments from the latest commit")
436 msg = repo.run_cmd(['log', '-1', '--branches', '--remotes', '--format=%b'])
437 for line in msg.splitlines():
438 split = line.split(':', 1)
439 if len(split) != 2:
440 continue
441
442 key = split[0]
443 val = split[1].strip()
444 if key == 'hostname' and not args.hostname:
445 log.debug("Using hostname %s", val)
446 args.hostname = val
447 elif key == 'branch' and not args.branch:
448 log.debug("Using branch %s", val)
449 args.branch = val
450
451
452 def parse_args(argv):
453 """Parse command line arguments"""
454 description = """
455 Examine build performance test results from a Git repository"""
456 parser = argparse.ArgumentParser(
457 formatter_class=argparse.ArgumentDefaultsHelpFormatter,
458 description=description)
459
460 parser.add_argument('--debug', '-d', action='store_true',
461 help="Verbose logging")
462 parser.add_argument('--repo', '-r', required=True,
463 help="Results repository (local git clone)")
464 parser.add_argument('--list', '-l', action='count',
465 help="List available test runs")
466 parser.add_argument('--html', action='store_true',
467 help="Generate report in html format")
468 group = parser.add_argument_group('Tag and revision')
469 group.add_argument('--tag-name', '-t',
470 default='{hostname}/{branch}/{machine}/{commit_number}-g{commit}/{tag_number}',
471 help="Tag name (pattern) for finding results")
472 group.add_argument('--hostname', '-H')
473 group.add_argument('--branch', '-B', default='master', help="Branch to find commit in")
474 group.add_argument('--branch2', help="Branch to find comparision revisions in")
475 group.add_argument('--machine', default='qemux86')
476 group.add_argument('--history-length', default=25, type=int,
477 help="Number of tested revisions to plot in html report")
478 group.add_argument('--commit',
479 help="Revision to search for")
480 group.add_argument('--commit-number',
481 help="Revision number to search for, redundant if "
482 "--commit is specified")
483 group.add_argument('--commit2',
484 help="Revision to compare with")
485 group.add_argument('--commit-number2',
486 help="Revision number to compare with, redundant if "
487 "--commit2 is specified")
488 parser.add_argument('--dump-buildstats', nargs='?', const='.',
489 help="Dump buildstats of the tests")
490
491 return parser.parse_args(argv)
492
493
494 def main(argv=None):
495 """Script entry point"""
496 args = parse_args(argv)
497 if args.debug:
498 log.setLevel(logging.DEBUG)
499
500 repo = GitRepo(args.repo)
501
502 if args.list:
503 list_test_revs(repo, args.tag_name, args.list, hostname=args.hostname)
504 return 0
505
506 # Determine hostname which to use
507 if not args.hostname:
508 auto_args(repo, args)
509
510 revs = gitarchive.get_test_revs(log, repo, args.tag_name, hostname=args.hostname,
511 branch=args.branch, machine=args.machine)
512 if args.branch2 and args.branch2 != args.branch:
513 revs2 = gitarchive.get_test_revs(log, repo, args.tag_name, hostname=args.hostname,
514 branch=args.branch2, machine=args.machine)
515 if not len(revs2):
516 log.error("No revisions found to compare against")
517 return 1
518 if not len(revs):
519 log.error("No revision to report on found")
520 return 1
521 else:
522 if len(revs) < 2:
523 log.error("Only %d tester revisions found, unable to generate report" % len(revs))
524 return 1
525
526 # Pick revisions
527 if args.commit:
528 if args.commit_number:
529 log.warning("Ignoring --commit-number as --commit was specified")
530 index1 = gitarchive.rev_find(revs, 'commit', args.commit)
531 elif args.commit_number:
532 index1 = gitarchive.rev_find(revs, 'commit_number', args.commit_number)
533 else:
534 index1 = len(revs) - 1
535
536 if args.branch2 and args.branch2 != args.branch:
537 revs2.append(revs[index1])
538 index1 = len(revs2) - 1
539 revs = revs2
540
541 if args.commit2:
542 if args.commit_number2:
543 log.warning("Ignoring --commit-number2 as --commit2 was specified")
544 index2 = gitarchive.rev_find(revs, 'commit', args.commit2)
545 elif args.commit_number2:
546 index2 = gitarchive.rev_find(revs, 'commit_number', args.commit_number2)
547 else:
548 if index1 > 0:
549 index2 = index1 - 1
550 # Find the closest matching commit number for comparision
551 # In future we could check the commit is a common ancestor and
552 # continue back if not but this good enough for now
553 while index2 > 0 and revs[index2].commit_number > revs[index1].commit_number:
554 index2 = index2 - 1
555 else:
556 log.error("Unable to determine the other commit, use "
557 "--commit2 or --commit-number2 to specify it")
558 return 1
559
560 index_l = min(index1, index2)
561 index_r = max(index1, index2)
562
563 rev_l = revs[index_l]
564 rev_r = revs[index_r]
565 log.debug("Using 'left' revision %s (%s), %s test runs:\n %s",
566 rev_l.commit_number, rev_l.commit, len(rev_l.tags),
567 '\n '.join(rev_l.tags))
568 log.debug("Using 'right' revision %s (%s), %s test runs:\n %s",
569 rev_r.commit_number, rev_r.commit, len(rev_r.tags),
570 '\n '.join(rev_r.tags))
571
572 # Check report format used in the repo (assume all reports in the same fmt)
573 xml = is_xml_format(repo, revs[index_r].tags[-1])
574
575 if args.html:
576 index_0 = max(0, min(index_l, index_r - args.history_length))
577 rev_range = range(index_0, index_r + 1)
578 else:
579 # We do not need range of commits for text report (no graphs)
580 index_0 = index_l
581 rev_range = (index_l, index_r)
582
583 # Read raw data
584 log.debug("Reading %d revisions, starting from %s (%s)",
585 len(rev_range), revs[index_0].commit_number, revs[index_0].commit)
586 raw_data = [read_results(repo, revs[i].tags, xml) for i in rev_range]
587
588 data = []
589 for raw_m, raw_d in raw_data:
590 data.append(AggregateTestData(aggregate_metadata(raw_m),
591 aggregate_data(raw_d)))
592
593 # Read buildstats only when needed
594 buildstats = None
595 if args.dump_buildstats or args.html:
596 outdir = 'oe-build-perf-buildstats' if args.dump_buildstats else None
597 notes_ref = 'buildstats/{}/{}/{}'.format(args.hostname, args.branch, args.machine)
598 notes_ref2 = None
599 if args.branch2:
600 notes_ref = 'buildstats/{}/{}/{}'.format(args.hostname, args.branch2, args.machine)
601 notes_ref2 = 'buildstats/{}/{}/{}'.format(args.hostname, args.branch, args.machine)
602 buildstats = get_buildstats(repo, notes_ref, notes_ref2, [rev_l, rev_r], outdir)
603
604 # Print report
605 if not args.html:
606 print_diff_report(data[0].metadata, data[0].results,
607 data[1].metadata, data[1].results)
608 else:
609 # Re-map 'left' list index to the data table where index_0 maps to 0
610 print_html_report(data, index_l - index_0, buildstats)
611
612 return 0
613
614 if __name__ == "__main__":
615 sys.exit(main())