[thirdparty/glibc.git] / benchtests / scripts / compare_bench.py

#!/usr/bin/python
# Copyright (C) 2015-2019 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
#
# The GNU C Library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# The GNU C Library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with the GNU C Library; if not, see
# <http://www.gnu.org/licenses/>.
"""Compare two benchmark results

Given two benchmark result files and a threshold, this script compares the
benchmark results and flags differences in performance beyond a given
threshold.
"""
import sys
import os
import pylab
import import_bench as bench
import argparse

def do_compare(func, var, tl1, tl2, par, threshold):
    """Compare one of the aggregate measurements

    Helper function to compare one of the aggregate measurements of a function
    variant.

    Args:
        func: Function name
        var: Function variant name
        tl1: The first timings list
        tl2: The second timings list
        par: The aggregate to measure
        threshold: The threshold for differences, beyond which the script should
        print a warning.
    """
    try:
        v1 = tl1[str(par)]
        v2 = tl2[str(par)]
        d = abs(v2 - v1) * 100 / v1
    except KeyError:
        sys.stderr.write('%s(%s)[%s]: stat does not exist\n' % (func, var, par))
        return
    except ZeroDivisionError:
        return

    if d > threshold:
        if v1 > v2:
            ind = '+++'
        else:
            ind = '---'
        print('%s %s(%s)[%s]: (%.2lf%%) from %g to %g' %
                (ind, func, var, par, d, v1, v2))


def compare_runs(pts1, pts2, threshold, stats):
    """Compare two benchmark runs

    Args:
        pts1: Timing data from first machine
        pts2: Timing data from second machine
    """

    # XXX We assume that the two benchmarks have identical functions and
    # variants.  We cannot compare two benchmarks that may have different
    # functions or variants.  Maybe that is something for the future.
    for func in pts1['functions'].keys():
        for var in pts1['functions'][func].keys():
            tl1 = pts1['functions'][func][var]
            tl2 = pts2['functions'][func][var]

            # Compare the consolidated numbers
            # do_compare(func, var, tl1, tl2, 'max', threshold)
            for stat in stats.split():
                do_compare(func, var, tl1, tl2, stat, threshold)

            # Skip over to the next variant or function if there is no detailed
            # timing info for the function variant.
            if 'timings' not in pts1['functions'][func][var].keys() or \
                'timings' not in pts2['functions'][func][var].keys():
                continue

            # If two lists do not have the same length then it is likely that
            # the performance characteristics of the function have changed.
            # XXX: It is also likely that there was some measurement that
            # strayed outside the usual range.  Such ouiers should not
            # happen on an idle machine with identical hardware and
            # configuration, but ideal environments are hard to come by.
            if len(tl1['timings']) != len(tl2['timings']):
                print('* %s(%s): Timing characteristics changed' %
                        (func, var))
                print('\tBefore: [%s]' %
                        ', '.join([str(x) for x in tl1['timings']]))
                print('\tAfter: [%s]' %
                        ', '.join([str(x) for x in tl2['timings']]))
                continue

            # Collect numbers whose differences cross the threshold we have
            # set.
            issues = [(x, y) for x, y in zip(tl1['timings'], tl2['timings']) \
                        if abs(y - x) * 100 / x > threshold]

            # Now print them.
            for t1, t2 in issues:
                d = abs(t2 - t1) * 100 / t1
                if t2 > t1:
                    ind = '-'
                else:
                    ind = '+'

                print("%s %s(%s): (%.2lf%%) from %g to %g" %
                        (ind, func, var, d, t1, t2))


def plot_graphs(bench1, bench2):
    """Plot graphs for functions

    Make scatter plots for the functions and their variants.

    Args:
        bench1: Set of points from the first machine
        bench2: Set of points from the second machine.
    """
    for func in bench1['functions'].keys():
        for var in bench1['functions'][func].keys():
            # No point trying to print a graph if there are no detailed
            # timings.
            if u'timings' not in bench1['functions'][func][var].keys():
                sys.stderr.write('Skipping graph for %s(%s)\n' % (func, var))
                continue

            pylab.clf()
            pylab.ylabel('Time (cycles)')

            # First set of points
            length = len(bench1['functions'][func][var]['timings'])
            X = [float(x) for x in range(length)]
            lines = pylab.scatter(X, bench1['functions'][func][var]['timings'],
                    1.5 + 100 / length)
            pylab.setp(lines, 'color', 'r')

            # Second set of points
            length = len(bench2['functions'][func][var]['timings'])
            X = [float(x) for x in range(length)]
            lines = pylab.scatter(X, bench2['functions'][func][var]['timings'],
                    1.5 + 100 / length)
            pylab.setp(lines, 'color', 'g')

            if var:
                filename = "%s-%s.png" % (func, var)
            else:
                filename = "%s.png" % func
            sys.stderr.write('Writing out %s' % filename)
            pylab.savefig(filename)

def main(bench1, bench2, schema, threshold, stats):
    bench1 = bench.parse_bench(bench1, schema)
    bench2 = bench.parse_bench(bench2, schema)

    plot_graphs(bench1, bench2)

    bench.compress_timings(bench1)
    bench.compress_timings(bench2)

    compare_runs(bench1, bench2, threshold, stats)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Take two benchmark and compare their timings.')

    # Required parameters
    parser.add_argument('bench1', help='First bench to compare')
    parser.add_argument('bench2', help='Second bench to compare')

    # Optional parameters
    parser.add_argument('--schema',
                        default=os.path.join(os.path.dirname(os.path.realpath(__file__)),'benchout.schema.json'),
                        help='JSON file to validate source/dest files (default: %(default)s)')
    parser.add_argument('--threshold', default=10.0, type=float, help='Only print those with equal or higher threshold (default: %(default)s)')
    parser.add_argument('--stats', default='min mean', type=str, help='Only consider values from the statistics specified as a space separated list (default: %(default)s)')

    args = parser.parse_args()

    main(args.bench1, args.bench2, args.schema, args.threshold, args.stats)
Commit	Line	Data
0cd28286	1	#!/usr/bin/python
04277e02	2	# Copyright (C) 2015-2019 Free Software Foundation, Inc.
0cd28286 SP	3	# This file is part of the GNU C Library.
	4	#
	5	# The GNU C Library is free software; you can redistribute it and/or
	6	# modify it under the terms of the GNU Lesser General Public
	7	# License as published by the Free Software Foundation; either
	8	# version 2.1 of the License, or (at your option) any later version.
	9	#
	10	# The GNU C Library is distributed in the hope that it will be useful,
	11	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	# Lesser General Public License for more details.
	14	#
	15	# You should have received a copy of the GNU Lesser General Public
	16	# License along with the GNU C Library; if not, see
	17	# <http://www.gnu.org/licenses/>.
	18	"""Compare two benchmark results
	19
	20	Given two benchmark result files and a threshold, this script compares the
	21	benchmark results and flags differences in performance beyond a given
	22	threshold.
	23	"""
	24	import sys
	25	import os
	26	import pylab
	27	import import_bench as bench
1cf4ae7f	28	import argparse
0cd28286 SP	29
	30	def do_compare(func, var, tl1, tl2, par, threshold):
	31	"""Compare one of the aggregate measurements
	32
	33	Helper function to compare one of the aggregate measurements of a function
	34	variant.
	35
	36	Args:
	37	func: Function name
	38	var: Function variant name
	39	tl1: The first timings list
	40	tl2: The second timings list
	41	par: The aggregate to measure
	42	threshold: The threshold for differences, beyond which the script should
	43	print a warning.
	44	"""
1990185f LS	45	try:
	46	v1 = tl1[str(par)]
	47	v2 = tl2[str(par)]
	48	d = abs(v2 - v1) * 100 / v1
	49	except KeyError:
de099757	50	sys.stderr.write('%s(%s)[%s]: stat does not exist\n' % (func, var, par))
1990185f LS	51	return
	52	except ZeroDivisionError:
	53	return
	54
0cd28286	55	if d > threshold:
1990185f	56	if v1 > v2:
0cd28286 SP	57	ind = '+++'
	58	else:
	59	ind = '---'
	60	print('%s %s(%s)[%s]: (%.2lf%%) from %g to %g' %
1990185f	61	(ind, func, var, par, d, v1, v2))
0cd28286 SP	62
0cd28286 SP	63
1990185f	64	def compare_runs(pts1, pts2, threshold, stats):
0cd28286 SP	65	"""Compare two benchmark runs
	66
	67	Args:
	68	pts1: Timing data from first machine
	69	pts2: Timing data from second machine
	70	"""
	71
	72	# XXX We assume that the two benchmarks have identical functions and
	73	# variants. We cannot compare two benchmarks that may have different
	74	# functions or variants. Maybe that is something for the future.
	75	for func in pts1['functions'].keys():
	76	for var in pts1['functions'][func].keys():
	77	tl1 = pts1['functions'][func][var]
	78	tl2 = pts2['functions'][func][var]
	79
	80	# Compare the consolidated numbers
	81	# do_compare(func, var, tl1, tl2, 'max', threshold)
1990185f LS	82	for stat in stats.split():
1990185f LS	83	do_compare(func, var, tl1, tl2, stat, threshold)
0cd28286 SP	84
	85	# Skip over to the next variant or function if there is no detailed
	86	# timing info for the function variant.
	87	if 'timings' not in pts1['functions'][func][var].keys() or \
	88	'timings' not in pts2['functions'][func][var].keys():
de099757	89	continue
0cd28286 SP	90
	91	# If two lists do not have the same length then it is likely that
	92	# the performance characteristics of the function have changed.
	93	# XXX: It is also likely that there was some measurement that
	94	# strayed outside the usual range. Such ouiers should not
	95	# happen on an idle machine with identical hardware and
	96	# configuration, but ideal environments are hard to come by.
	97	if len(tl1['timings']) != len(tl2['timings']):
	98	print('* %s(%s): Timing characteristics changed' %
	99	(func, var))
	100	print('\tBefore: [%s]' %
	101	', '.join([str(x) for x in tl1['timings']]))
	102	print('\tAfter: [%s]' %
	103	', '.join([str(x) for x in tl2['timings']]))
	104	continue
	105
	106	# Collect numbers whose differences cross the threshold we have
	107	# set.
	108	issues = [(x, y) for x, y in zip(tl1['timings'], tl2['timings']) \
	109	if abs(y - x) * 100 / x > threshold]
	110
	111	# Now print them.
	112	for t1, t2 in issues:
	113	d = abs(t2 - t1) * 100 / t1
	114	if t2 > t1:
	115	ind = '-'
	116	else:
	117	ind = '+'
	118
	119	print("%s %s(%s): (%.2lf%%) from %g to %g" %
	120	(ind, func, var, d, t1, t2))
	121
	122
	123	def plot_graphs(bench1, bench2):
	124	"""Plot graphs for functions
	125
	126	Make scatter plots for the functions and their variants.
	127
	128	Args:
	129	bench1: Set of points from the first machine
	130	bench2: Set of points from the second machine.
	131	"""
	132	for func in bench1['functions'].keys():
	133	for var in bench1['functions'][func].keys():
	134	# No point trying to print a graph if there are no detailed
	135	# timings.
	136	if u'timings' not in bench1['functions'][func][var].keys():
de099757	137	sys.stderr.write('Skipping graph for %s(%s)\n' % (func, var))
0cd28286 SP	138	continue
	139
	140	pylab.clf()
	141	pylab.ylabel('Time (cycles)')
	142
	143	# First set of points
	144	length = len(bench1['functions'][func][var]['timings'])
	145	X = [float(x) for x in range(length)]
	146	lines = pylab.scatter(X, bench1['functions'][func][var]['timings'],
	147	1.5 + 100 / length)
	148	pylab.setp(lines, 'color', 'r')
	149
	150	# Second set of points
	151	length = len(bench2['functions'][func][var]['timings'])
	152	X = [float(x) for x in range(length)]
	153	lines = pylab.scatter(X, bench2['functions'][func][var]['timings'],
	154	1.5 + 100 / length)
	155	pylab.setp(lines, 'color', 'g')
	156
	157	if var:
	158	filename = "%s-%s.png" % (func, var)
	159	else:
	160	filename = "%s.png" % func
de099757	161	sys.stderr.write('Writing out %s' % filename)
0cd28286 SP	162	pylab.savefig(filename)
0cd28286 SP	163
1990185f	164	def main(bench1, bench2, schema, threshold, stats):
1cf4ae7f LS	165	bench1 = bench.parse_bench(bench1, schema)
1cf4ae7f LS	166	bench2 = bench.parse_bench(bench2, schema)
0cd28286 SP	167
	168	plot_graphs(bench1, bench2)
	169
	170	bench.compress_timings(bench1)
	171	bench.compress_timings(bench2)
	172
1990185f	173	compare_runs(bench1, bench2, threshold, stats)
0cd28286 SP	174
	175
	176	if __name__ == '__main__':
1cf4ae7f LS	177	parser = argparse.ArgumentParser(description='Take two benchmark and compare their timings.')
	178
	179	# Required parameters
	180	parser.add_argument('bench1', help='First bench to compare')
	181	parser.add_argument('bench2', help='Second bench to compare')
	182
	183	# Optional parameters
	184	parser.add_argument('--schema',
	185	default=os.path.join(os.path.dirname(os.path.realpath(__file__)),'benchout.schema.json'),
	186	help='JSON file to validate source/dest files (default: %(default)s)')
c892ae04	187	parser.add_argument('--threshold', default=10.0, type=float, help='Only print those with equal or higher threshold (default: %(default)s)')
1990185f	188	parser.add_argument('--stats', default='min mean', type=str, help='Only consider values from the statistics specified as a space separated list (default: %(default)s)')
1cf4ae7f LS	189
	190	args = parser.parse_args()
	191
1990185f	192	main(args.bench1, args.bench2, args.schema, args.threshold, args.stats)