[thirdparty/glibc.git] / benchtests / scripts / import_bench.py

#!/usr/bin/python
# Copyright (C) 2015-2021 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
#
# The GNU C Library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# The GNU C Library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with the GNU C Library; if not, see
# <https://www.gnu.org/licenses/>.
"""Functions to import benchmark data and process it"""

import json
try:
    import jsonschema as validator
except ImportError:
    print('Could not find jsonschema module.')
    raise


def mean(lst):
    """Compute and return mean of numbers in a list

    The numpy average function has horrible performance, so implement our
    own mean function.

    Args:
        lst: The list of numbers to average.
    Return:
        The mean of members in the list.
    """
    return sum(lst) / len(lst)


def split_list(bench, func, var):
    """ Split the list into a smaller set of more distinct points

    Group together points such that the difference between the smallest
    point and the mean is less than 1/3rd of the mean.  This means that
    the mean is at most 1.5x the smallest member of that group.

    mean - xmin < mean / 3
    i.e. 2 * mean / 3 < xmin
    i.e. mean < 3 * xmin / 2

    For an evenly distributed group, the largest member will be less than
    twice the smallest member of the group.
    Derivation:

    An evenly distributed series would be xmin, xmin + d, xmin + 2d...

    mean = (2 * n * xmin + n * (n - 1) * d) / 2 * n
    and max element is xmin + (n - 1) * d

    Now, mean < 3 * xmin / 2

    3 * xmin > 2 * mean
    3 * xmin > (2 * n * xmin + n * (n - 1) * d) / n
    3 * n * xmin > 2 * n * xmin + n * (n - 1) * d
    n * xmin > n * (n - 1) * d
    xmin > (n - 1) * d
    2 * xmin > xmin + (n-1) * d
    2 * xmin > xmax

    Hence, proved.

    Similarly, it is trivial to prove that for a similar aggregation by using
    the maximum element, the maximum element in the group must be at most 4/3
    times the mean.

    Args:
        bench: The benchmark object
        func: The function name
        var: The function variant name
    """
    means = []
    lst = bench['functions'][func][var]['timings']
    last = len(lst) - 1
    while lst:
        for i in range(last + 1):
            avg = mean(lst[i:])
            if avg > 0.75 * lst[last]:
                means.insert(0, avg)
                lst = lst[:i]
                last = i - 1
                break
    bench['functions'][func][var]['timings'] = means


def do_for_all_timings(bench, callback):
    """Call a function for all timing objects for each function and its
    variants.

    Args:
        bench: The benchmark object
        callback: The callback function
    """
    for func in bench['functions'].keys():
        for k in bench['functions'][func].keys():
            if 'timings' not in bench['functions'][func][k].keys():
                continue

            callback(bench, func, k)


def compress_timings(points):
    """Club points with close enough values into a single mean value

    See split_list for details on how the clubbing is done.

    Args:
        points: The set of points.
    """
    do_for_all_timings(points, split_list)


def parse_bench(filename, schema_filename):
    """Parse the input file

    Parse and validate the json file containing the benchmark outputs.  Return
    the resulting object.
    Args:
        filename: Name of the benchmark output file.
    Return:
        The bench dictionary.
    """
    with open(schema_filename, 'r') as schemafile:
        schema = json.load(schemafile)
        with open(filename, 'r') as benchfile:
            bench = json.load(benchfile)
            validator.validate(bench, schema)
            do_for_all_timings(bench, lambda b, f, v:
                    b['functions'][f][v]['timings'].sort())
            return bench
Commit	Line	Data
0994b9b6	1	#!/usr/bin/python
2b778ceb	2	# Copyright (C) 2015-2021 Free Software Foundation, Inc.
0994b9b6 SP	3	# This file is part of the GNU C Library.
	4	#
	5	# The GNU C Library is free software; you can redistribute it and/or
	6	# modify it under the terms of the GNU Lesser General Public
	7	# License as published by the Free Software Foundation; either
	8	# version 2.1 of the License, or (at your option) any later version.
	9	#
	10	# The GNU C Library is distributed in the hope that it will be useful,
	11	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	# Lesser General Public License for more details.
	14	#
	15	# You should have received a copy of the GNU Lesser General Public
	16	# License along with the GNU C Library; if not, see
5a82c748	17	# <https://www.gnu.org/licenses/>.
0994b9b6 SP	18	"""Functions to import benchmark data and process it"""
	19
	20	import json
	21	try:
	22	import jsonschema as validator
	23	except ImportError:
	24	print('Could not find jsonschema module.')
	25	raise
	26
	27
0cd28286 SP	28	def mean(lst):
	29	"""Compute and return mean of numbers in a list
	30
	31	The numpy average function has horrible performance, so implement our
	32	own mean function.
	33
	34	Args:
	35	lst: The list of numbers to average.
	36	Return:
	37	The mean of members in the list.
	38	"""
	39	return sum(lst) / len(lst)
	40
	41
	42	def split_list(bench, func, var):
	43	""" Split the list into a smaller set of more distinct points
	44
	45	Group together points such that the difference between the smallest
	46	point and the mean is less than 1/3rd of the mean. This means that
	47	the mean is at most 1.5x the smallest member of that group.
	48
	49	mean - xmin < mean / 3
	50	i.e. 2 * mean / 3 < xmin
	51	i.e. mean < 3 * xmin / 2
	52
	53	For an evenly distributed group, the largest member will be less than
	54	twice the smallest member of the group.
	55	Derivation:
	56
	57	An evenly distributed series would be xmin, xmin + d, xmin + 2d...
	58
	59	mean = (2 * n * xmin + n * (n - 1) * d) / 2 * n
	60	and max element is xmin + (n - 1) * d
	61
	62	Now, mean < 3 * xmin / 2
	63
	64	3 * xmin > 2 * mean
	65	3 * xmin > (2 * n * xmin + n * (n - 1) * d) / n
	66	3 * n * xmin > 2 * n * xmin + n * (n - 1) * d
	67	n * xmin > n * (n - 1) * d
	68	xmin > (n - 1) * d
	69	2 * xmin > xmin + (n-1) * d
	70	2 * xmin > xmax
	71
	72	Hence, proved.
	73
	74	Similarly, it is trivial to prove that for a similar aggregation by using
	75	the maximum element, the maximum element in the group must be at most 4/3
	76	times the mean.
	77
	78	Args:
	79	bench: The benchmark object
	80	func: The function name
	81	var: The function variant name
	82	"""
	83	means = []
	84	lst = bench['functions'][func][var]['timings']
	85	last = len(lst) - 1
	86	while lst:
	87	for i in range(last + 1):
	88	avg = mean(lst[i:])
	89	if avg > 0.75 * lst[last]:
	90	means.insert(0, avg)
	91	lst = lst[:i]
92	last = i - 1
93	break
94	bench['functions'][func][var]['timings'] = means
95
96
97	def do_for_all_timings(bench, callback):
98	"""Call a function for all timing objects for each function and its
99	variants.
100
101	Args:
102	bench: The benchmark object
103	callback: The callback function
104	"""
105	for func in bench['functions'].keys():
106	for k in bench['functions'][func].keys():
107	if 'timings' not in bench['functions'][func][k].keys():
108	continue
109
110	callback(bench, func, k)
111
112
113	def compress_timings(points):
114	"""Club points with close enough values into a single mean value
115
116	See split_list for details on how the clubbing is done.
117
118	Args:
119	points: The set of points.
120	"""
121	do_for_all_timings(points, split_list)
122
123
0994b9b6 SP	124	def parse_bench(filename, schema_filename):
	125	"""Parse the input file
	126
	127	Parse and validate the json file containing the benchmark outputs. Return
	128	the resulting object.
	129	Args:
	130	filename: Name of the benchmark output file.
	131	Return:
	132	The bench dictionary.
	133	"""
	134	with open(schema_filename, 'r') as schemafile:
	135	schema = json.load(schemafile)
	136	with open(filename, 'r') as benchfile:
	137	bench = json.load(benchfile)
	138	validator.validate(bench, schema)
	139	do_for_all_timings(bench, lambda b, f, v:
	140	b['functions'][f][v]['timings'].sort())
	141	return bench