From 8a75eecbad1da39fc29fa112bae4d9d547404656 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 24 Mar 2023 23:50:49 +1100 Subject: [PATCH] Rewrite `cg_diff` in Python. For all the same reasons I rewrote `cg_annotate` in Python. The commit also moves the Python "build" steps into `auxprogs/pybuild.sh`, for easy sharing. Finally, it very slightly tweaks the whitespace in the output of `cg_annotate`. --- auxprogs/pybuild.sh | 88 +++ cachegrind/Makefile.am | 34 +- cachegrind/cg_annotate.in | 67 +- cachegrind/cg_diff.in | 657 +++++++++--------- cachegrind/tests/Makefile.am | 1 + cachegrind/tests/ann-diff1.post.exp | 3 +- cachegrind/tests/ann-diff1.vgtest | 2 +- .../tests/ann-diff2-aux/ann-diff2-basic.rs | 10 + cachegrind/tests/ann-diff2.post.exp | 46 ++ cachegrind/tests/ann-diff2.stderr.exp | 17 + cachegrind/tests/ann-diff2.vgtest | 6 + cachegrind/tests/ann-diff2a.cgout | 9 + cachegrind/tests/ann-diff2b.cgout | 15 + 13 files changed, 561 insertions(+), 394 deletions(-) create mode 100755 auxprogs/pybuild.sh create mode 100644 cachegrind/tests/ann-diff2-aux/ann-diff2-basic.rs create mode 100644 cachegrind/tests/ann-diff2.post.exp create mode 100644 cachegrind/tests/ann-diff2.stderr.exp create mode 100644 cachegrind/tests/ann-diff2.vgtest create mode 100644 cachegrind/tests/ann-diff2a.cgout create mode 100644 cachegrind/tests/ann-diff2b.cgout diff --git a/auxprogs/pybuild.sh b/auxprogs/pybuild.sh new file mode 100755 index 0000000000..432a768c51 --- /dev/null +++ b/auxprogs/pybuild.sh @@ -0,0 +1,88 @@ +#! /bin/sh + +# "Build" a given Python file `foo`: format it, type-check it, lint it, and +# generate the final file from the `foo.in` file. +# +# The following Python tools are used by this script. +# +# - Formatters: +# - `black`, for general formatting. This avoids the need for style checkers +# like `flake8`. Note that `black` allows a max line length of 88, which is +# a mild but common PEP-8 violation. +# - `isort`, for import sorting. +# +# - Type-checkers: +# - `mypy`. This is the most commonly used Python type checker. +# - `pyright`. This is another good type checker. +# - Sometimes they give different result. Both should be kept happy. +# +# - Linters: +# - `ruff`. Sometimes useful, and very fast to run. +# - `pylint`. Sometimes annoying, sometimes useful. The `pylintrc` +# modifies/disables the more annoying lints. +# - Sometimes they give different result. Both should be kept happy. +# +# The following tools are relevant, but not run by this script. +# +# - Profilers: +# - `cProfile` + `snakeviz`: Typically run with +# `python3 -m cProfile -o cg.prof cg_annotate $INPUT && snakeviz cg.prof`. +# - `scalene`. Typically run with `scalene ./cg_annotate $INPUT`. +# +# - Packager: +# - `cp` is used for distribution. This is possible because this program is a +# single file and only uses the Python Standard Library. This avoids the +# needs for any of the million different Python package management tools. +# +# All of the above tools can be installed with `pip3 install $NAME`, except +# `cProfile` which is built into Python. + +set -e + +# Currently targetting Python 3.9 (released in October 2020) and up. The tools +# use two different syntaxes for specifying the version number. +ver=3.9 +pyver=py39 + +infile=$1 +outfile=$2 +if [ -z "$outfile" ] ; then + exit 1 +fi + +echo "== black ==" +black $infile +echo + +echo "== isort ==" +isort $infile +echo + +echo "== mypy ==" +mypy --strict $infile --python-version $ver +echo + +# Strict mode for pyright is enabled by a `pyright: strict` comment inside each +# Python file. +# +# Note: `pyright` refuses to check any file without a `.py` extension, hence +# the copying to a temp file with a `.py` extension. +echo "== pyright ==" +tmpfile=`mktemp --tmpdir $infile.XXX.py` +echo "program output" >> $tmpfile +cp $infile $tmpfile +pyright --pythonversion $ver $tmpfile +rm $tmpfile +echo + +echo "== ruff ==" +ruff check --target-version $pyver $infile +echo + +echo "== pylint ==" +pylint --py-version $ver $infile + +echo "== config.status ==" +make $outfile +echo + diff --git a/cachegrind/Makefile.am b/cachegrind/Makefile.am index 8ea99ca529..34717ae541 100644 --- a/cachegrind/Makefile.am +++ b/cachegrind/Makefile.am @@ -93,28 +93,12 @@ endif # Miscellaneous #---------------------------------------------------------------------------- -# Run the formatters, type checkers, and linters on `cg_annotate.in`, then -# generate `cg_annotate`. -# -# Note: `pyright` refuses to check any file without a `.py` extension, hence -# the copying to `/tmp/tmp.py`. -ann: - @echo "== black ==" - @black cg_annotate.in - @echo - @echo "== isort ==" - @isort cg_annotate.in - @echo - @echo "== mypy ==" - @mypy --strict cg_annotate.in - @echo - @echo "== pyright ==" - @cp cg_annotate.in /tmp/tmp.py && pyright /tmp/tmp.py && rm /tmp/tmp.py - @echo - @echo "== ruff ==" - @ruff cg_annotate.in - @echo - @echo "== pylint ==" - @pylint cg_annotate.in - @echo "== config.status ==" - $(MAKE) cg_annotate +# "Build" `cg_annotate`. The `+` avoids warnings about the jobserver. +pyann: + +../auxprogs/pybuild.sh cg_annotate.in cg_annotate + +# "Build" `cg_diff`. The `+` avoids warnings about the jobserver. +pydiff: + +../auxprogs/pybuild.sh cg_diff.in cg_diff + +.PHONY: pyann pydiff diff --git a/cachegrind/cg_annotate.in b/cachegrind/cg_annotate.in index 43dce8f10e..240e069ccb 100755 --- a/cachegrind/cg_annotate.in +++ b/cachegrind/cg_annotate.in @@ -30,44 +30,9 @@ This script reads Cachegrind output files and produces human-readable reports. """ -# Use `make ann` to "build" this script every time it is changed. This runs the -# formatters, type-checkers, and linters on `cg_annotate.in` and then generates -# `cg_annotate`. -# -# Python versions: Currently this script targets Python 3.9 and later versions. -# Consequences of this: -# - No use of `TypeAlias` for explicit type aliases, which requires 3.10. -# -# The following Python tools are used. All can be installed with `pip3 install -# $NAME`, except `cProfile` which is built into Python. -# -# - Formatters: -# - `black`, for general formatting. This avoids the need for style checkers -# like `flake8`. Note that `black` allows a max line length of 88, which is -# a mild but common PEP-8 violation. -# - `isort`, for import sorting. -# -# - Type-checkers: -# - `mypy --strict`. This is the most commonly used Python type checker. -# - `pyright`. This is another good type checker. The `pyright: strict` -# comment above forces strict checking. -# - Sometimes one type-checker will complain about something the other does -# not. The goal is to keep both type checkers happy. -# -# - Linters: -# - `ruff`. Sometimes useful, and very fast to run. -# - `pylint`. Sometimes annoying, sometimes useful. The `pylintrc` -# modifies/disables the more annoying lints. -# -# - Profilers: -# - `cProfile` + `snakeviz`: Typically run with -# `python3 -m cProfile -o cg.prof cg_annotate $INPUT && snakeviz cg.prof`. -# - `scalene`. Typically run with `scalene ./cg_annotate $INPUT`. -# -# - Packager: -# - `cp` is used for distribution. This is possible because this program is a -# single file and only uses the Python Standard Library. This avoids the -# needs for any of the million different Python package management tools. +# Use `make pyann` to "build" this script with `auxprogs/pybuild.rs` every time +# it is changed. This runs the formatters, type-checkers, and linters on +# `cg_annotate.in` and then generates `cg_annotate`. from __future__ import annotations @@ -140,7 +105,7 @@ class Args(Namespace): help=f"(deprecated) same as --no-{name}", ) - p = ArgumentParser(description="Process Cachegrind output files.") + p = ArgumentParser(description="Process a Cachegrind output file.") p.add_argument("--version", action="version", version="%(prog)s-@VERSION@") @@ -317,11 +282,9 @@ class Cc: Flfn = NewType("Flfn", tuple[str, str]) # Per-function CCs. -# Note: not using `TypeAlias`. See "Python versions" comment above. DictFlfnCc = DefaultDict[Flfn, Cc] # Per-line CCs, organised by filename and line number. -# Note: not using `TypeAlias`. See "Python versions" comment above. DictLineCc = DefaultDict[int, Cc] DictFlDictLineCc = DefaultDict[str, DictLineCc] @@ -376,7 +339,7 @@ def read_cgout_file() -> tuple[str, str, Events, DictFlfnCc, DictFlDictLineCc, C curr_fl = "" curr_flfn = Flfn(("", "")) - # Three different places where we accumulate CC data. + # Different places where we accumulate CC data. dict_flfn_cc: DictFlfnCc = defaultdict(events.mk_empty_cc) dict_fl_dict_line_cc: DictFlDictLineCc = defaultdict(mk_empty_dict_line_cc) summary_cc = None @@ -479,8 +442,8 @@ class CcPrinter: min_cc.counts[i] = count # Find maximum width for each column. - self.count_widths = [0] * len(events.events) - self.perc_widths = [0] * len(events.events) + self.count_widths = [0] * events.num_events + self.perc_widths = [0] * events.num_events for i, event in enumerate(events.events): # Get count and perc widths of the min and max CCs. (min_count, min_perc) = self.count_and_perc(min_cc, i) @@ -757,16 +720,16 @@ def print_annotated_src_file( else: break - # If there was info on lines past the end of the file, warn. - if line_nums: - for line_num in line_nums: - printer.print_cc(dict_line_cc[line_num], f"") - annotated_ccs.line_nums_known_cc += dict_line_cc[line_num] + # If there was info on lines past the end of the file, warn. + if line_nums: + for line_num in line_nums: + printer.print_cc(dict_line_cc[line_num], f"") + annotated_ccs.line_nums_known_cc += dict_line_cc[line_num] - print() - warn_bogus_lines(src_file.name) + print() + warn_bogus_lines(src_file.name) - print() + print() # This (partially) consumes `dict_fl_dict_line_cc`. diff --git a/cachegrind/cg_diff.in b/cachegrind/cg_diff.in index 462308b49e..bae0c7abe4 100755 --- a/cachegrind/cg_diff.in +++ b/cachegrind/cg_diff.in @@ -1,13 +1,14 @@ -#! @PERL@ +#! /usr/bin/env python3 +# pyright: strict -##--------------------------------------------------------------------## -##--- Cachegrind's differencer. cg_diff.in ---## -##--------------------------------------------------------------------## +# -------------------------------------------------------------------- +# --- Cachegrind's differencer. cg_diff.in --- +# -------------------------------------------------------------------- # This file is part of Cachegrind, a Valgrind tool for cache # profiling programs. # -# Copyright (C) 2002-2017 Nicholas Nethercote +# Copyright (C) 2002-2023 Nicholas Nethercote # njn@valgrind.org # # This program is free software; you can redistribute it and/or @@ -25,312 +26,340 @@ # # The GNU General Public License is contained in the file COPYING. -#---------------------------------------------------------------------------- -# This is a very cut-down and modified version of cg_annotate. -#---------------------------------------------------------------------------- - -use warnings; -use strict; - -#---------------------------------------------------------------------------- -# Global variables -#---------------------------------------------------------------------------- - -# Version number -my $version = "@VERSION@"; - -# Usage message. -my $usage = < - - options for the user, with defaults in [ ], are: - -h --help show this message - -v --version show version - --mod-filename= a Perl search-and-replace expression that is applied - to filenames, eg. --mod-filename='s/prog[0-9]/projN/' - --mod-funcname= like --mod-filename, but applied to function names - - cg_diff is Copyright (C) 2002-2017 Nicholas Nethercote. - and licensed under the GNU General Public License, version 2. - Bug reports, feedback, admiration, abuse, etc, to: njn\@valgrind.org. - -END -; - -# --mod-filename expression -my $mod_filename = undef; - -# --mod-funcname expression -my $mod_funcname = undef; - -#----------------------------------------------------------------------------- -# Argument and option handling -#----------------------------------------------------------------------------- -sub process_cmd_line() -{ - my ($file1, $file2) = (undef, undef); - - for my $arg (@ARGV) { - - if ($arg =~ /^-/) { - # --version - if ($arg =~ /^-v$|^--version$/) { - die("cg_diff-$version\n"); - - } elsif ($arg =~ /^--mod-filename=(.*)/) { - $mod_filename = $1; - - } elsif ($arg =~ /^--mod-funcname=(.*)/) { - $mod_funcname = $1; - - } else { # -h and --help fall under this case - die($usage); - } - - } elsif (not defined($file1)) { - $file1 = $arg; - - } elsif (not defined($file2)) { - $file2 = $arg; - - } else { - die($usage); - } - } - - # Must have specified two input files. - if (not defined $file1 or not defined $file2) { - die($usage); - } - - return ($file1, $file2); -} - -#----------------------------------------------------------------------------- -# Reading of input file -#----------------------------------------------------------------------------- -sub max ($$) -{ - my ($x, $y) = @_; - return ($x > $y ? $x : $y); -} - -# Add the two arrays; any '.' entries are ignored. Two tricky things: -# 1. If $a2->[$i] is undefined, it defaults to 0 which is what we want; we turn -# off warnings to allow this. This makes things about 10% faster than -# checking for definedness ourselves. -# 2. We don't add an undefined count or a ".", even though it's value is 0, -# because we don't want to make an $a2->[$i] that is undef become 0 -# unnecessarily. -sub add_array_a_to_b ($$) -{ - my ($a, $b) = @_; - - my $n = max(scalar @$a, scalar @$b); - $^W = 0; - foreach my $i (0 .. $n-1) { - $b->[$i] += $a->[$i] if (defined $a->[$i] && "." ne $a->[$i]); - } - $^W = 1; -} - -sub sub_array_b_from_a ($$) -{ - my ($a, $b) = @_; - - my $n = max(scalar @$a, scalar @$b); - $^W = 0; - foreach my $i (0 .. $n-1) { - $a->[$i] -= $b->[$i]; # XXX: doesn't handle '.' entries - } - $^W = 1; -} - -# Add each event count to the CC array. '.' counts become undef, as do -# missing entries (implicitly). -sub line_to_CC ($$) -{ - my ($line, $numEvents) = @_; - - my @CC = (split /\s+/, $line); - (@CC <= $numEvents) or die("Line $.: too many event counts\n"); - return \@CC; -} - -sub read_input_file($) -{ - my ($input_file) = @_; - - open(INPUTFILE, "< $input_file") - || die "Cannot open $input_file for reading\n"; - - # Read "desc:" lines. - my $desc; - my $line; - while ($line = ) { - if ($line =~ s/desc:\s+//) { - $desc .= $line; - } else { - last; - } - } - - # Read "cmd:" line (Nb: will already be in $line from "desc:" loop above). - ($line =~ s/^cmd:\s+//) or die("Line $.: missing command line\n"); - my $cmd = $line; - chomp($cmd); # Remove newline - - # Read "events:" line. We make a temporary hash in which the Nth event's - # value is N, which is useful for handling --show/--sort options below. - $line = ; - (defined $line && $line =~ s/^events:\s+//) - or die("Line $.: missing events line\n"); - my @events = split(/\s+/, $line); - my $numEvents = scalar @events; - - my $currFileName; - my $currFileFuncName; - - my %CCs; # hash("$filename###$funcname" => CC array) - my $currCC = undef; # CC array - - my $summaryCC; - - # Read body of input file. - while () { - # Skip comments and empty lines. - next if /^\s*$/ || /^\#/; - - if (s/^(-?\d+)\s+//) { - my $CC = line_to_CC($_, $numEvents); - defined($currCC) || die; - add_array_a_to_b($CC, $currCC); - - } elsif (s/^fn=(.*)$//) { - defined($currFileName) || die; - my $tmpFuncName = $1; - if (defined $mod_funcname) { - eval "\$tmpFuncName =~ $mod_funcname"; - } - $currFileFuncName = "$currFileName###$tmpFuncName"; - $currCC = $CCs{$currFileFuncName}; - if (not defined $currCC) { - $currCC = []; - $CCs{$currFileFuncName} = $currCC; - } - - } elsif (s/^fl=(.*)$//) { - $currFileName = $1; - if (defined $mod_filename) { - eval "\$currFileName =~ $mod_filename"; - } - # Assume that a "fn=" line is followed by a "fl=" line. - $currFileFuncName = undef; - - } elsif (s/^summary:\s+//) { - $summaryCC = line_to_CC($_, $numEvents); - (scalar(@$summaryCC) == @events) - or die("Line $.: summary event and total event mismatch\n"); - - } else { - warn("WARNING: line $. malformed, ignoring\n"); - } - } - - # Check if summary line was present - if (not defined $summaryCC) { - die("missing final summary line, aborting\n"); - } - - close(INPUTFILE); - - return ($cmd, \@events, \%CCs, $summaryCC); -} - -#---------------------------------------------------------------------------- -# "main()" -#---------------------------------------------------------------------------- -# Commands seen in the files. Need not match. -my $cmd1; -my $cmd2; - -# Events seen in the files. They must match. -my $events1; -my $events2; - -# Individual CCs, organised by filename/funcname/line_num. -# hashref("$filename###$funcname", CC array) -my $CCs1; -my $CCs2; - -# Total counts for summary (an arrayref). -my $summaryCC1; -my $summaryCC2; - -#---------------------------------------------------------------------------- -# Read the input files -#---------------------------------------------------------------------------- -my ($file1, $file2) = process_cmd_line(); -($cmd1, $events1, $CCs1, $summaryCC1) = read_input_file($file1); -($cmd2, $events2, $CCs2, $summaryCC2) = read_input_file($file2); - -#---------------------------------------------------------------------------- -# Check the events match -#---------------------------------------------------------------------------- -my $n = max(scalar @$events1, scalar @$events2); -$^W = 0; # turn off warnings, because we might hit undefs -foreach my $i (0 .. $n-1) { - ($events1->[$i] eq $events2->[$i]) || die "events don't match, aborting\n"; -} -$^W = 1; - -#---------------------------------------------------------------------------- -# Do the subtraction: CCs2 -= CCs1 -#---------------------------------------------------------------------------- -while (my ($filefuncname, $CC1) = each(%$CCs1)) { - my $CC2 = $CCs2->{$filefuncname}; - if (not defined $CC2) { - $CC2 = []; - sub_array_b_from_a($CC2, $CC1); # CC2 -= CC1 - $CCs2->{$filefuncname} = $CC2; - } else { - sub_array_b_from_a($CC2, $CC1); # CC2 -= CC1 - } -} -sub_array_b_from_a($summaryCC2, $summaryCC1); - -#---------------------------------------------------------------------------- -# Print the result, in CCs2 -#---------------------------------------------------------------------------- -print("desc: Files compared: $file1; $file2\n"); -print("cmd: $cmd1; $cmd2\n"); -print("events: "); -for my $e (@$events1) { - print(" $e"); -} -print("\n"); - -while (my ($filefuncname, $CC) = each(%$CCs2)) { - - my @x = split(/###/, $filefuncname); - (scalar @x == 2) || die; - - print("fl=$x[0]\n"); - print("fn=$x[1]\n"); - - print("0"); - foreach my $n (@$CC) { - print(" $n"); - } - print("\n"); -} - -print("summary:"); -foreach my $n (@$summaryCC2) { - print(" $n"); -} -print("\n"); - -##--------------------------------------------------------------------## -##--- end ---## -##--------------------------------------------------------------------## +""" +This script diffs Cachegrind output files. +""" + +# Use `make pydiff` to "build" this script every time it is changed. This runs +# the formatters, type-checkers, and linters on `cg_diff.in` and then generates +# `cg_diff`. +# +# This is a cut-down version of `cg_annotate.in`. + +from __future__ import annotations + +import re +import sys +from argparse import ArgumentParser, Namespace +from collections import defaultdict +from typing import Callable, DefaultDict, NewType, NoReturn + +SearchAndReplace = Callable[[str], str] + + +class Args(Namespace): + """ + A typed wrapper for parsed args. + + None of these fields are modified after arg parsing finishes. + """ + + mod_filename: SearchAndReplace + mod_funcname: SearchAndReplace + cgout_filename1: str + cgout_filename2: str + + @staticmethod + def parse() -> Args: + # We support Perl-style `s/old/new/flags` search-and-replace + # expressions, because that's how this option was implemented in the + # old Perl version of `cg_diff`. This requires conversion from + # `s/old/new/` style to `re.sub`. The conversion isn't a perfect + # emulation of Perl regexps (e.g. Python uses `\1` rather than `$1` for + # using captures in the `new` part), but it should be close enough. The + # only supported flags are `g` (global) and `i` (ignore case). + def search_and_replace(regex: str | None) -> SearchAndReplace: + if regex is None: + return lambda s: s + + # Extract the parts of a `s/old/new/tail` regex. `(? None: + self.events = text.split() + self.num_events = len(self.events) + + def mk_cc(self, text: str) -> Cc: + """Raises a `ValueError` exception on syntax error.""" + # This is slightly faster than a list comprehension. + counts = list(map(int, text.split())) + + if len(counts) == self.num_events: + pass + elif len(counts) < self.num_events: + # Add zeroes at the end for any missing numbers. + counts.extend([0] * (self.num_events - len(counts))) + else: + raise ValueError + + return Cc(counts) + + def mk_empty_cc(self) -> Cc: + # This is much faster than a list comprehension. + return Cc([0] * self.num_events) + + +class Cc: + """ + This is a dumb container for counts. + + It doesn't know anything about events, i.e. what each count means. It can + do basic operations like `__iadd__` and `__eq__`, and anything more must be + done elsewhere. `Events.mk_cc` and `Events.mk_empty_cc` are used for + construction. + """ + + # Always the same length as `Events.events`. + counts: list[int] + + def __init__(self, counts: list[int]) -> None: + self.counts = counts + + def __repr__(self) -> str: + return str(self.counts) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, Cc): + return NotImplemented + return self.counts == other.counts + + def __iadd__(self, other: Cc) -> Cc: + for i, other_count in enumerate(other.counts): + self.counts[i] += other_count + return self + + def __isub__(self, other: Cc) -> Cc: + for i, other_count in enumerate(other.counts): + self.counts[i] -= other_count + return self + + +# A paired filename and function name. +Flfn = NewType("Flfn", tuple[str, str]) + +# Per-function CCs. +DictFlfnCc = DefaultDict[Flfn, Cc] + + +def die(msg: str) -> NoReturn: + print("cg_diff: error:", msg, file=sys.stderr) + sys.exit(1) + + +def read_cgout_file(cgout_filename: str) -> tuple[str, Events, DictFlfnCc, Cc]: + # The file format is described in Cachegrind's manual. + try: + cgout_file = open(cgout_filename, "r", encoding="utf-8") + except OSError as err: + die(f"{err}") + + with cgout_file: + cgout_line_num = 0 + + def parse_die(msg: str) -> NoReturn: + die(f"{cgout_file.name}:{cgout_line_num}: {msg}") + + def readline() -> str: + nonlocal cgout_line_num + cgout_line_num += 1 + return cgout_file.readline() + + # Read "desc:" lines. + while line := readline(): + if m := re.match(r"desc:\s+(.*)", line): + # The "desc:" lines are unused. + pass + else: + break + + # Read "cmd:" line. (`line` is already set from the "desc:" loop.) + if m := re.match(r"cmd:\s+(.*)", line): + cmd = m.group(1) + else: + parse_die("missing a `command:` line") + + # Read "events:" line. + line = readline() + if m := re.match(r"events:\s+(.*)", line): + events = Events(m.group(1)) + else: + parse_die("missing an `events:` line") + + curr_fl = "" + curr_flfn = Flfn(("", "")) + + # Different places where we accumulate CC data. + dict_flfn_cc: DictFlfnCc = defaultdict(events.mk_empty_cc) + summary_cc = None + + # Compile the one hot regex. + count_pat = re.compile(r"(\d+)\s+(.*)") + + # Line matching is done in order of pattern frequency, for speed. + while True: + line = readline() + + if m := count_pat.match(line): + # The line_num isn't used. + try: + cc = events.mk_cc(m.group(2)) + except ValueError: + parse_die("malformed or too many event counts") + + # Record this CC at the function level. + flfn_cc = dict_flfn_cc[curr_flfn] + flfn_cc += cc + + elif line.startswith("fn="): + curr_flfn = Flfn((curr_fl, args.mod_funcname(line[3:-1]))) + + elif line.startswith("fl="): + # A longstanding bug: the use of `--mod-filename` makes it + # likely that some files won't be found when annotating. This + # doesn't matter much, because we use line number 0 for all + # diffs anyway. It just means we get "This file was unreadable" + # for modified filenames rather than a single "" CC. + curr_fl = args.mod_filename(line[3:-1]) + # A `fn=` line should follow, overwriting the "???". + curr_flfn = Flfn((curr_fl, "???")) + + elif m := re.match(r"summary:\s+(.*)", line): + try: + summary_cc = events.mk_cc(m.group(1)) + except ValueError: + parse_die("too many event counts") + + elif line == "": + break # EOF + + elif line == "\n" or line.startswith("#"): + # Skip empty lines and comment lines. + pass + + else: + parse_die(f"malformed line: {line[:-1]}") + + # Check if summary line was present. + if not summary_cc: + parse_die("missing `summary:` line, aborting") + + # Check summary is correct. + total_cc = events.mk_empty_cc() + for flfn_cc in dict_flfn_cc.values(): + total_cc += flfn_cc + if summary_cc != total_cc: + msg = ( + "`summary:` line doesn't match computed total\n" + f"- summary: {summary_cc}\n" + f"- total: {total_cc}" + ) + parse_die(msg) + + return (cmd, events, dict_flfn_cc, summary_cc) + + +def main() -> None: + filename1 = args.cgout_filename1[0] + filename2 = args.cgout_filename2[0] + + (cmd1, events1, dict_flfn_cc1, summary_cc1) = read_cgout_file(filename1) + (cmd2, events2, dict_flfn_cc2, summary_cc2) = read_cgout_file(filename2) + + if events1.num_events != events2.num_events: + die("events don't match") + + # Subtract file 1's CCs from file 2's CCs, at the Flfn level. + for flfn, flfn_cc1 in dict_flfn_cc1.items(): + flfn_cc2 = dict_flfn_cc2[flfn] + flfn_cc2 -= flfn_cc1 + summary_cc2 -= summary_cc1 + + print(f"desc: Files compared: {filename1}; {filename2}") + print(f"cmd: {cmd1}; {cmd2}") + print("events:", *events1.events, sep=" ") + + # Sort so the output is deterministic. + def key(flfn_and_cc: tuple[Flfn, Cc]) -> Flfn: + return flfn_and_cc[0] + + for flfn, flfn_cc2 in sorted(dict_flfn_cc2.items(), key=key): + # Use `0` for the line number because we don't try to give line-level + # CCs, due to the possibility of code changes causing line numbers to + # move around. + print(f"fl={flfn[0]}") + print(f"fn={flfn[1]}") + print("0", *flfn_cc2.counts, sep=" ") + + print("summary:", *summary_cc2.counts, sep=" ") + + +if __name__ == "__main__": + main() diff --git a/cachegrind/tests/Makefile.am b/cachegrind/tests/Makefile.am index 33baeeea69..16ac524b35 100644 --- a/cachegrind/tests/Makefile.am +++ b/cachegrind/tests/Makefile.am @@ -14,6 +14,7 @@ dist_noinst_SCRIPTS = filter_stderr filter_cachesim_discards # They just serve as input for cg_annotate in `ann1a` and `ann1b`. EXTRA_DIST = \ ann-diff1.post.exp ann-diff1.stderr.exp ann-diff1.vgtest \ + ann-diff2a.cgout ann-diff2b.cgout \ ann1a.post.exp ann1a.stderr.exp ann1a.vgtest ann1.cgout \ ann1b.post.exp ann1b.stderr.exp ann1b.vgtest ann1b.cgout \ ann2.post.exp ann2.stderr.exp ann2.vgtest ann2.cgout \ diff --git a/cachegrind/tests/ann-diff1.post.exp b/cachegrind/tests/ann-diff1.post.exp index 2d7d61ac70..f8d901b0a8 100644 --- a/cachegrind/tests/ann-diff1.post.exp +++ b/cachegrind/tests/ann-diff1.post.exp @@ -24,7 +24,7 @@ Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw -------------------------------------------------------------------------------- Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw file:function -5,000,000 (100.0%) 0 0 -2,000,000 (100.0%) 0 0 0 0 0 a.c:main +5,000,000 (100.0%) 0 0 -2,000,000 (100.0%) 0 0 0 0 0 a.c:MAIN -------------------------------------------------------------------------------- -- Auto-annotated source file: a.c @@ -33,7 +33,6 @@ Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw 5,000,000 (100.0%) 0 0 -2,000,000 (100.0%) 0 0 0 0 0 - -------------------------------------------------------------------------------- -- Annotation summary -------------------------------------------------------------------------------- diff --git a/cachegrind/tests/ann-diff1.vgtest b/cachegrind/tests/ann-diff1.vgtest index ce3e216c66..b737b713e3 100644 --- a/cachegrind/tests/ann-diff1.vgtest +++ b/cachegrind/tests/ann-diff1.vgtest @@ -2,5 +2,5 @@ # the post-processing of the `ann{1,1b}.cgout` test files. prog: ../../tests/true vgopts: --cachegrind-out-file=cachegrind.out -post: perl ../../cachegrind/cg_diff ann1.cgout ann1b.cgout > ann-diff1.cgout && perl ../../cachegrind/cg_annotate ann-diff1.cgout +post: python ../../cachegrind/cg_diff --mod-funcname="s/main/MAIN/" ann1.cgout ann1b.cgout > ann-diff1.cgout && python ../../cachegrind/cg_annotate ann-diff1.cgout cleanup: rm ann-diff1.cgout diff --git a/cachegrind/tests/ann-diff2-aux/ann-diff2-basic.rs b/cachegrind/tests/ann-diff2-aux/ann-diff2-basic.rs new file mode 100644 index 0000000000..c9e9e05f44 --- /dev/null +++ b/cachegrind/tests/ann-diff2-aux/ann-diff2-basic.rs @@ -0,0 +1,10 @@ +one +two +three +four +five +six +seven +eight +nine +ten diff --git a/cachegrind/tests/ann-diff2.post.exp b/cachegrind/tests/ann-diff2.post.exp new file mode 100644 index 0000000000..742ff3841c --- /dev/null +++ b/cachegrind/tests/ann-diff2.post.exp @@ -0,0 +1,46 @@ +-------------------------------------------------------------------------------- +-- Cachegrind profile +-------------------------------------------------------------------------------- +Files compared: ann-diff2a.cgout; ann-diff2b.cgout +Command: cmd1; cmd2 +Data file: ann-diff2c.cgout +Events recorded: One Two +Events shown: One Two +Event sort order: One Two +Threshold: 0.1 +Include dirs: +User annotated: +Auto-annotation: on + +-------------------------------------------------------------------------------- +-- Summary +-------------------------------------------------------------------------------- +One Two + +2,100 (100.0%) 1,900 (100.0%) PROGRAM TOTALS + +-------------------------------------------------------------------------------- +-- Function summary +-------------------------------------------------------------------------------- +One Two file:function + +1,000 (47.6%) 1,000 (52.6%) aux/ann-diff2-basic.rs:groffN +1,000 (47.6%) 1,000 (52.6%) aux/ann-diff2-basic.rs:fN_ffN_fooN_F4_g5 + 100 (4.8%) -100 (-5.3%) aux/ann-diff2-basic.rs:basic1 + +-------------------------------------------------------------------------------- +-- Auto-annotated source file: aux/ann-diff2-basic.rs +-------------------------------------------------------------------------------- +This file was unreadable + +-------------------------------------------------------------------------------- +-- Annotation summary +-------------------------------------------------------------------------------- +One Two + + 0 0 annotated: files known & above threshold & readable, line numbers known + 0 0 annotated: files known & above threshold & readable, line numbers unknown +2,100 (100.0%) 1,900 (100.0%) unannotated: files known & above threshold & unreadable + 0 0 unannotated: files known & below threshold + 0 0 unannotated: files unknown + diff --git a/cachegrind/tests/ann-diff2.stderr.exp b/cachegrind/tests/ann-diff2.stderr.exp new file mode 100644 index 0000000000..e8084c12c3 --- /dev/null +++ b/cachegrind/tests/ann-diff2.stderr.exp @@ -0,0 +1,17 @@ + + +I refs: +I1 misses: +LLi misses: +I1 miss rate: +LLi miss rate: + +D refs: +D1 misses: +LLd misses: +D1 miss rate: +LLd miss rate: + +LL refs: +LL misses: +LL miss rate: diff --git a/cachegrind/tests/ann-diff2.vgtest b/cachegrind/tests/ann-diff2.vgtest new file mode 100644 index 0000000000..101cac07d1 --- /dev/null +++ b/cachegrind/tests/ann-diff2.vgtest @@ -0,0 +1,6 @@ +# The `prog` doesn't matter because we don't use its output. Instead we test +# the post-processing of the `ann-diff2{a,b}.cgout` test files. +prog: ../../tests/true +vgopts: --cachegrind-out-file=cachegrind.out +post: python ../../cachegrind/cg_diff --mod-filename="s/.*aux\//aux\//i" --mod-funcname="s/(f[a-z]*)[0-9]/\1N/g" ann-diff2a.cgout ann-diff2b.cgout > ann-diff2c.cgout && python ../../cachegrind/cg_annotate ann-diff2c.cgout +cleanup: rm ann-diff2c.cgout diff --git a/cachegrind/tests/ann-diff2a.cgout b/cachegrind/tests/ann-diff2a.cgout new file mode 100644 index 0000000000..bb82b75f6c --- /dev/null +++ b/cachegrind/tests/ann-diff2a.cgout @@ -0,0 +1,9 @@ +desc: Description for ann-diff2a.cgout +cmd: cmd1 +events: One Two + +fl=ann2-diff-AUX/ann-diff2-basic.rs +fn=basic1 +1 1000 1000 + +summary: 1000 1000 diff --git a/cachegrind/tests/ann-diff2b.cgout b/cachegrind/tests/ann-diff2b.cgout new file mode 100644 index 0000000000..9fb733e708 --- /dev/null +++ b/cachegrind/tests/ann-diff2b.cgout @@ -0,0 +1,15 @@ +desc: Description for ann-diff2a.cgout +cmd: cmd2 +events: One Two + +fl=ann2-diff-Aux/ann-diff2-basic.rs +fn=basic1 +1 1100 900 + +fn=f1_ff2_foo3_F4_g5 +3 1000 1000 + +fn=groff5 +5 1000 1000 + +summary: 3100 2900 -- 2.47.2