3 # Examine build performance test results
5 # Copyright (c) 2017, Intel Corporation.
7 # SPDX-License-Identifier: GPL-2.0-only
16 from collections
import namedtuple
, OrderedDict
17 from operator
import attrgetter
18 from xml
.etree
import ElementTree
as ET
21 scripts_path
= os
.path
.dirname(os
.path
.realpath(__file__
))
22 sys
.path
.append(os
.path
.join(scripts_path
, 'lib'))
24 from build_perf
import print_table
25 from build_perf
.report
import (metadata_xml_to_json
, results_xml_to_json
,
26 aggregate_data
, aggregate_metadata
, measurement_stats
,
28 from build_perf
import html
29 from buildstats
import BuildStats
, diff_buildstats
, BSVerDiff
31 scriptpath
.add_oe_lib_path()
33 from oeqa
.utils
.git
import GitRepo
, GitError
34 import oeqa
.utils
.gitarchive
as gitarchive
38 logging
.basicConfig(level
=logging
.INFO
, format
="%(levelname)s: %(message)s")
39 log
= logging
.getLogger('oe-build-perf-report')
41 def list_test_revs(repo
, tag_name
, verbosity
, **kwargs
):
42 """Get list of all tested revisions"""
43 valid_kwargs
= dict([(k
, v
) for k
, v
in kwargs
.items() if v
is not None])
45 fields
, revs
= gitarchive
.get_test_runs(log
, repo
, tag_name
, **valid_kwargs
)
46 ignore_fields
= ['tag_number']
48 extra_fields
= ['COMMITS', 'TEST RUNS']
49 ignore_fields
.extend(['commit_number', 'commit'])
51 extra_fields
= ['TEST RUNS']
53 print_fields
= [i
for i
, f
in enumerate(fields
) if f
not in ignore_fields
]
56 rows
= [[fields
[i
].upper() for i
in print_fields
] + extra_fields
]
58 prev
= [''] * len(print_fields
)
61 commit_field
= fields
.index('commit')
63 # Only use fields that we want to print
64 cols
= [rev
[i
] for i
in print_fields
]
70 new_row
= [''] * (len(print_fields
) + len(extra_fields
))
72 for i
in print_fields
:
73 if cols
[i
] != prev
[i
]:
75 new_row
[i
:-len(extra_fields
)] = cols
[i
:]
78 if rev
[commit_field
] != prev_commit
:
83 new_row
[-2] = commit_cnt
84 new_row
[-1] = test_run_cnt
86 prev_commit
= rev
[commit_field
]
90 def is_xml_format(repo
, commit
):
91 """Check if the commit contains xml (or json) data"""
92 if repo
.rev_parse(commit
+ ':results.xml'):
93 log
.debug("Detected report in xml format in %s", commit
)
96 log
.debug("No xml report in %s, assuming json formatted results", commit
)
99 def read_results(repo
, tags
, xml
=True):
100 """Read result files from repo"""
102 def parse_xml_stream(data
):
103 """Parse multiple concatenated XML objects"""
106 for line
in data
.splitlines():
107 if xml_d
and line
.startswith('<?xml version='):
108 objs
.append(ET
.fromstring(xml_d
))
112 objs
.append(ET
.fromstring(xml_d
))
115 def parse_json_stream(data
):
116 """Parse multiple concatenated JSON objects"""
119 for line
in data
.splitlines():
122 objs
.append(json
.loads(json_d
, object_pairs_hook
=OrderedDict
))
126 objs
.append(json
.loads(json_d
, object_pairs_hook
=OrderedDict
))
131 # Optimize by reading all data with one git command
132 log
.debug("Loading raw result data from %d tags, %s...", num_revs
, tags
[0])
134 git_objs
= [tag
+ ':metadata.xml' for tag
in tags
] + [tag
+ ':results.xml' for tag
in tags
]
135 data
= parse_xml_stream(repo
.run_cmd(['show'] + git_objs
+ ['--']))
136 return ([metadata_xml_to_json(e
) for e
in data
[0:num_revs
]],
137 [results_xml_to_json(e
) for e
in data
[num_revs
:]])
139 git_objs
= [tag
+ ':metadata.json' for tag
in tags
] + [tag
+ ':results.json' for tag
in tags
]
140 data
= parse_json_stream(repo
.run_cmd(['show'] + git_objs
+ ['--']))
141 return data
[0:num_revs
], data
[num_revs
:]
144 def get_data_item(data
, key
):
145 """Nested getitem lookup"""
146 for k
in key
.split('.'):
151 def metadata_diff(metadata_l
, metadata_r
):
152 """Prepare a metadata diff for printing"""
153 keys
= [('Hostname', 'hostname', 'hostname'),
154 ('Branch', 'branch', 'layers.meta.branch'),
155 ('Commit number', 'commit_num', 'layers.meta.commit_count'),
156 ('Commit', 'commit', 'layers.meta.commit'),
157 ('Number of test runs', 'testrun_count', 'testrun_count')
160 def _metadata_diff(key
):
161 """Diff metadata from two test reports"""
163 val1
= get_data_item(metadata_l
, key
)
167 val2
= get_data_item(metadata_r
, key
)
172 metadata
= OrderedDict()
173 for title
, key
, key_json
in keys
:
174 value_l
, value_r
= _metadata_diff(key_json
)
175 metadata
[key
] = {'title': title
,
176 'value_old': value_l
,
181 def print_diff_report(metadata_l
, data_l
, metadata_r
, data_r
):
182 """Print differences between two data sets"""
184 # First, print general metadata
185 print("\nTEST METADATA:\n==============")
186 meta_diff
= metadata_diff(metadata_l
, metadata_r
)
188 row_fmt
= ['{:{wid}} ', '{:<{wid}} ', '{:<{wid}}']
189 rows
= [['', 'CURRENT COMMIT', 'COMPARING WITH']]
190 for key
, val
in meta_diff
.items():
191 # Shorten commit hashes
193 rows
.append([val
['title'] + ':', val
['value'][:20], val
['value_old'][:20]])
195 rows
.append([val
['title'] + ':', val
['value'], val
['value_old']])
196 print_table(rows
, row_fmt
)
200 print("\nTEST RESULTS:\n=============")
202 tests
= list(data_l
['tests'].keys())
203 # Append tests that are only present in 'right' set
204 tests
+= [t
for t
in list(data_r
['tests'].keys()) if t
not in tests
]
206 # Prepare data to be printed
208 row_fmt
= ['{:8}', '{:{wid}}', '{:{wid}}', ' {:>{wid}}', ' {:{wid}} ', '{:{wid}}',
209 ' {:>{wid}}', ' {:>{wid}}']
210 num_cols
= len(row_fmt
)
212 test_l
= data_l
['tests'][test
] if test
in data_l
['tests'] else None
213 test_r
= data_r
['tests'][test
] if test
in data_r
['tests'] else None
219 descr
= test_l
['description'] if test_l
else test_r
['description']
220 heading
= "{} {}: {}".format(pref
, test
, descr
)
222 rows
.append([heading
])
224 # Generate the list of measurements
225 meas_l
= test_l
['measurements'] if test_l
else {}
226 meas_r
= test_r
['measurements'] if test_r
else {}
227 measurements
= list(meas_l
.keys())
228 measurements
+= [m
for m
in list(meas_r
.keys()) if m
not in measurements
]
230 for meas
in measurements
:
233 stats_l
= measurement_stats(meas_l
[meas
], 'l.')
235 stats_l
= measurement_stats(None, 'l.')
238 stats_r
= measurement_stats(meas_r
[meas
], 'r.')
240 stats_r
= measurement_stats(None, 'r.')
242 stats
= stats_l
.copy()
243 stats
.update(stats_r
)
245 absdiff
= stats
['val_cls'](stats
['r.mean'] - stats
['l.mean'])
246 reldiff
= "{:+.1f} %".format(absdiff
* 100 / stats
['l.mean'])
247 if stats
['r.mean'] > stats
['l.mean']:
248 absdiff
= '+' + str(absdiff
)
250 absdiff
= str(absdiff
)
251 rows
.append(['', m_pref
, stats
['name'] + ' ' + stats
['quantity'],
252 str(stats
['l.mean']), '->', str(stats
['r.mean']),
254 rows
.append([''] * num_cols
)
256 print_table(rows
, row_fmt
)
261 class BSSummary(object):
262 def __init__(self
, bs1
, bs2
):
263 self
.tasks
= {'count': bs2
.num_tasks
,
264 'change': '{:+d}'.format(bs2
.num_tasks
- bs1
.num_tasks
)}
265 self
.top_consumer
= None
266 self
.top_decrease
= None
267 self
.top_increase
= None
268 self
.ver_diff
= OrderedDict()
270 tasks_diff
= diff_buildstats(bs1
, bs2
, 'cputime')
272 # Get top consumers of resources
273 tasks_diff
= sorted(tasks_diff
, key
=attrgetter('value2'))
274 self
.top_consumer
= tasks_diff
[-5:]
276 # Get biggest increase and decrease in resource usage
277 tasks_diff
= sorted(tasks_diff
, key
=attrgetter('absdiff'))
278 self
.top_decrease
= tasks_diff
[0:5]
279 self
.top_increase
= tasks_diff
[-5:]
281 # Compare recipe versions and prepare data for display
282 ver_diff
= BSVerDiff(bs1
, bs2
)
285 self
.ver_diff
['New recipes'] = [(n
, r
.evr
) for n
, r
in ver_diff
.new
.items()]
287 self
.ver_diff
['Dropped recipes'] = [(n
, r
.evr
) for n
, r
in ver_diff
.dropped
.items()]
288 if ver_diff
.echanged
:
289 self
.ver_diff
['Epoch changed'] = [(n
, "{} → {}".format(r
.left
.evr
, r
.right
.evr
)) for n
, r
in ver_diff
.echanged
.items()]
290 if ver_diff
.vchanged
:
291 self
.ver_diff
['Version changed'] = [(n
, "{} → {}".format(r
.left
.version
, r
.right
.version
)) for n
, r
in ver_diff
.vchanged
.items()]
292 if ver_diff
.rchanged
:
293 self
.ver_diff
['Revision changed'] = [(n
, "{} → {}".format(r
.left
.evr
, r
.right
.evr
)) for n
, r
in ver_diff
.rchanged
.items()]
296 def print_html_report(data
, id_comp
, buildstats
):
297 """Print report in html format"""
299 metadata
= metadata_diff(data
[id_comp
].metadata
, data
[-1].metadata
)
301 # Generate list of tests
303 for test
in data
[-1].results
['tests'].keys():
304 test_r
= data
[-1].results
['tests'][test
]
305 new_test
= {'name': test_r
['name'],
306 'description': test_r
['description'],
307 'status': test_r
['status'],
309 'err_type': test_r
.get('err_type'),
311 # Limit length of err output shown
312 if 'message' in test_r
:
313 lines
= test_r
['message'].splitlines()
315 new_test
['message'] = '...\n' + '\n'.join(lines
[-20:])
317 new_test
['message'] = test_r
['message']
320 # Generate the list of measurements
321 for meas
in test_r
['measurements'].keys():
322 meas_r
= test_r
['measurements'][meas
]
323 meas_type
= 'time' if meas_r
['type'] == 'sysres' else 'size'
324 new_meas
= {'name': meas_r
['name'],
325 'legend': meas_r
['legend'],
326 'description': meas_r
['name'] + ' ' + meas_type
,
330 # Run through all revisions in our data
331 for meta
, test_data
in data
:
332 if (not test
in test_data
['tests'] or
333 not meas
in test_data
['tests'][test
]['measurements']):
334 samples
.append(measurement_stats(None))
336 test_i
= test_data
['tests'][test
]
337 meas_i
= test_i
['measurements'][meas
]
338 commit_num
= get_data_item(meta
, 'layers.meta.commit_count')
339 samples
.append(measurement_stats(meas_i
))
340 samples
[-1]['commit_num'] = commit_num
342 absdiff
= samples
[-1]['val_cls'](samples
[-1]['mean'] - samples
[id_comp
]['mean'])
343 reldiff
= absdiff
* 100 / samples
[id_comp
]['mean']
344 new_meas
['absdiff'] = absdiff
345 new_meas
['absdiff_str'] = str(absdiff
) if absdiff
< 0 else '+' + str(absdiff
)
346 new_meas
['reldiff'] = reldiff
347 new_meas
['reldiff_str'] = "{:+.1f} %".format(reldiff
)
348 new_meas
['samples'] = samples
349 new_meas
['value'] = samples
[-1]
350 new_meas
['value_type'] = samples
[-1]['val_cls']
353 bs_key
= test
+ '.' + meas
354 rev
= str(metadata
['commit_num']['value'])
355 comp_rev
= str(metadata
['commit_num']['value_old'])
356 if (buildstats
and rev
in buildstats
and bs_key
in buildstats
[rev
] and
357 comp_rev
in buildstats
and bs_key
in buildstats
[comp_rev
]):
358 new_meas
['buildstats'] = BSSummary(buildstats
[comp_rev
][bs_key
],
359 buildstats
[rev
][bs_key
])
362 new_test
['measurements'].append(new_meas
)
363 tests
.append(new_test
)
366 chart_opts
= {'haxis': {'min': get_data_item(data
[0][0], 'layers.meta.commit_count'),
367 'max': get_data_item(data
[-1][0], 'layers.meta.commit_count')}
370 print(html
.template
.render(title
="Build Perf Test Report",
371 metadata
=metadata
, test_data
=tests
,
372 chart_opts
=chart_opts
))
375 def get_buildstats(repo
, notes_ref
, notes_ref2
, revs
, outdir
=None):
376 """Get the buildstats from git notes"""
377 full_ref
= 'refs/notes/' + notes_ref
378 if not repo
.rev_parse(full_ref
):
379 log
.error("No buildstats found, please try running "
380 "'git fetch origin %s:%s' to fetch them from the remote",
386 log
.info("Parsing buildstats from 'refs/notes/%s'", notes_ref
)
388 buildstats
[rev
.commit_number
] = {}
389 log
.debug('Dumping buildstats for %s (%s)', rev
.commit_number
,
392 log
.debug(' %s', tag
)
395 bs_all
= json
.loads(repo
.run_cmd(['notes', '--ref', notes_ref
, 'show', tag
+ '^0']))
398 bs_all
= json
.loads(repo
.run_cmd(['notes', '--ref', notes_ref2
, 'show', tag
+ '^0']))
402 log
.warning("Buildstats not found for %s", tag
)
406 for measurement
, bs
in bs_all
.items():
407 # Write out onto disk
409 tag_base
, run_id
= tag
.rsplit('/', 1)
410 tag_base
= tag_base
.replace('/', '_')
411 bs_dir
= os
.path
.join(outdir
, measurement
, tag_base
)
412 if not os
.path
.exists(bs_dir
):
414 with
open(os
.path
.join(bs_dir
, run_id
+ '.json'), 'w') as f
:
415 json
.dump(bs
, f
, indent
=2)
417 # Read buildstats into a dict
418 _bs
= BuildStats
.from_json(bs
)
419 if measurement
not in buildstats
[rev
.commit_number
]:
420 buildstats
[rev
.commit_number
][measurement
] = _bs
422 buildstats
[rev
.commit_number
][measurement
].aggregate(_bs
)
425 log
.info("Buildstats were missing for some test runs, please "
426 "run 'git fetch origin %s:%s' and try again",
432 def auto_args(repo
, args
):
433 """Guess arguments, if not defined by the user"""
434 # Get the latest commit in the repo
435 log
.debug("Guessing arguments from the latest commit")
436 msg
= repo
.run_cmd(['log', '-1', '--branches', '--remotes', '--format=%b'])
437 for line
in msg
.splitlines():
438 split
= line
.split(':', 1)
443 val
= split
[1].strip()
444 if key
== 'hostname' and not args
.hostname
:
445 log
.debug("Using hostname %s", val
)
447 elif key
== 'branch' and not args
.branch
:
448 log
.debug("Using branch %s", val
)
452 def parse_args(argv
):
453 """Parse command line arguments"""
455 Examine build performance test results from a Git repository"""
456 parser
= argparse
.ArgumentParser(
457 formatter_class
=argparse
.ArgumentDefaultsHelpFormatter
,
458 description
=description
)
460 parser
.add_argument('--debug', '-d', action
='store_true',
461 help="Verbose logging")
462 parser
.add_argument('--repo', '-r', required
=True,
463 help="Results repository (local git clone)")
464 parser
.add_argument('--list', '-l', action
='count',
465 help="List available test runs")
466 parser
.add_argument('--html', action
='store_true',
467 help="Generate report in html format")
468 group
= parser
.add_argument_group('Tag and revision')
469 group
.add_argument('--tag-name', '-t',
470 default
='{hostname}/{branch}/{machine}/{commit_number}-g{commit}/{tag_number}',
471 help="Tag name (pattern) for finding results")
472 group
.add_argument('--hostname', '-H')
473 group
.add_argument('--branch', '-B', default
='master', help="Branch to find commit in")
474 group
.add_argument('--branch2', help="Branch to find comparision revisions in")
475 group
.add_argument('--machine', default
='qemux86')
476 group
.add_argument('--history-length', default
=25, type=int,
477 help="Number of tested revisions to plot in html report")
478 group
.add_argument('--commit',
479 help="Revision to search for")
480 group
.add_argument('--commit-number',
481 help="Revision number to search for, redundant if "
482 "--commit is specified")
483 group
.add_argument('--commit2',
484 help="Revision to compare with")
485 group
.add_argument('--commit-number2',
486 help="Revision number to compare with, redundant if "
487 "--commit2 is specified")
488 parser
.add_argument('--dump-buildstats', nargs
='?', const
='.',
489 help="Dump buildstats of the tests")
491 return parser
.parse_args(argv
)
495 """Script entry point"""
496 args
= parse_args(argv
)
498 log
.setLevel(logging
.DEBUG
)
500 repo
= GitRepo(args
.repo
)
503 list_test_revs(repo
, args
.tag_name
, args
.list, hostname
=args
.hostname
)
506 # Determine hostname which to use
507 if not args
.hostname
:
508 auto_args(repo
, args
)
510 revs
= gitarchive
.get_test_revs(log
, repo
, args
.tag_name
, hostname
=args
.hostname
,
511 branch
=args
.branch
, machine
=args
.machine
)
512 if args
.branch2
and args
.branch2
!= args
.branch
:
513 revs2
= gitarchive
.get_test_revs(log
, repo
, args
.tag_name
, hostname
=args
.hostname
,
514 branch
=args
.branch2
, machine
=args
.machine
)
516 log
.error("No revisions found to compare against")
519 log
.error("No revision to report on found")
523 log
.error("Only %d tester revisions found, unable to generate report" % len(revs
))
528 if args
.commit_number
:
529 log
.warning("Ignoring --commit-number as --commit was specified")
530 index1
= gitarchive
.rev_find(revs
, 'commit', args
.commit
)
531 elif args
.commit_number
:
532 index1
= gitarchive
.rev_find(revs
, 'commit_number', args
.commit_number
)
534 index1
= len(revs
) - 1
536 if args
.branch2
and args
.branch2
!= args
.branch
:
537 revs2
.append(revs
[index1
])
538 index1
= len(revs2
) - 1
542 if args
.commit_number2
:
543 log
.warning("Ignoring --commit-number2 as --commit2 was specified")
544 index2
= gitarchive
.rev_find(revs
, 'commit', args
.commit2
)
545 elif args
.commit_number2
:
546 index2
= gitarchive
.rev_find(revs
, 'commit_number', args
.commit_number2
)
550 # Find the closest matching commit number for comparision
551 # In future we could check the commit is a common ancestor and
552 # continue back if not but this good enough for now
553 while index2
> 0 and revs
[index2
].commit_number
> revs
[index1
].commit_number
:
556 log
.error("Unable to determine the other commit, use "
557 "--commit2 or --commit-number2 to specify it")
560 index_l
= min(index1
, index2
)
561 index_r
= max(index1
, index2
)
563 rev_l
= revs
[index_l
]
564 rev_r
= revs
[index_r
]
565 log
.debug("Using 'left' revision %s (%s), %s test runs:\n %s",
566 rev_l
.commit_number
, rev_l
.commit
, len(rev_l
.tags
),
567 '\n '.join(rev_l
.tags
))
568 log
.debug("Using 'right' revision %s (%s), %s test runs:\n %s",
569 rev_r
.commit_number
, rev_r
.commit
, len(rev_r
.tags
),
570 '\n '.join(rev_r
.tags
))
572 # Check report format used in the repo (assume all reports in the same fmt)
573 xml
= is_xml_format(repo
, revs
[index_r
].tags
[-1])
576 index_0
= max(0, min(index_l
, index_r
- args
.history_length
))
577 rev_range
= range(index_0
, index_r
+ 1)
579 # We do not need range of commits for text report (no graphs)
581 rev_range
= (index_l
, index_r
)
584 log
.debug("Reading %d revisions, starting from %s (%s)",
585 len(rev_range
), revs
[index_0
].commit_number
, revs
[index_0
].commit
)
586 raw_data
= [read_results(repo
, revs
[i
].tags
, xml
) for i
in rev_range
]
589 for raw_m
, raw_d
in raw_data
:
590 data
.append(AggregateTestData(aggregate_metadata(raw_m
),
591 aggregate_data(raw_d
)))
593 # Read buildstats only when needed
595 if args
.dump_buildstats
or args
.html
:
596 outdir
= 'oe-build-perf-buildstats' if args
.dump_buildstats
else None
597 notes_ref
= 'buildstats/{}/{}/{}'.format(args
.hostname
, args
.branch
, args
.machine
)
600 notes_ref
= 'buildstats/{}/{}/{}'.format(args
.hostname
, args
.branch2
, args
.machine
)
601 notes_ref2
= 'buildstats/{}/{}/{}'.format(args
.hostname
, args
.branch
, args
.machine
)
602 buildstats
= get_buildstats(repo
, notes_ref
, notes_ref2
, [rev_l
, rev_r
], outdir
)
606 print_diff_report(data
[0].metadata
, data
[0].results
,
607 data
[1].metadata
, data
[1].results
)
609 # Re-map 'left' list index to the data table where index_0 maps to 0
610 print_html_report(data
, index_l
- index_0
, buildstats
)
614 if __name__
== "__main__":