From 8a75eecbad1da39fc29fa112bae4d9d547404656 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Fri, 24 Mar 2023 23:50:49 +1100
Subject: [PATCH] Rewrite `cg_diff` in Python.

For all the same reasons I rewrote `cg_annotate` in Python.

The commit also moves the Python "build" steps into
`auxprogs/pybuild.sh`, for easy sharing.

Finally, it very slightly tweaks the whitespace in the output of
`cg_annotate`.
---
 auxprogs/pybuild.sh                           |  88 +++
 cachegrind/Makefile.am                        |  34 +-
 cachegrind/cg_annotate.in                     |  67 +-
 cachegrind/cg_diff.in                         | 657 +++++++++---------
 cachegrind/tests/Makefile.am                  |   1 +
 cachegrind/tests/ann-diff1.post.exp           |   3 +-
 cachegrind/tests/ann-diff1.vgtest             |   2 +-
 .../tests/ann-diff2-aux/ann-diff2-basic.rs    |  10 +
 cachegrind/tests/ann-diff2.post.exp           |  46 ++
 cachegrind/tests/ann-diff2.stderr.exp         |  17 +
 cachegrind/tests/ann-diff2.vgtest             |   6 +
 cachegrind/tests/ann-diff2a.cgout             |   9 +
 cachegrind/tests/ann-diff2b.cgout             |  15 +
 13 files changed, 561 insertions(+), 394 deletions(-)
 create mode 100755 auxprogs/pybuild.sh
 create mode 100644 cachegrind/tests/ann-diff2-aux/ann-diff2-basic.rs
 create mode 100644 cachegrind/tests/ann-diff2.post.exp
 create mode 100644 cachegrind/tests/ann-diff2.stderr.exp
 create mode 100644 cachegrind/tests/ann-diff2.vgtest
 create mode 100644 cachegrind/tests/ann-diff2a.cgout
 create mode 100644 cachegrind/tests/ann-diff2b.cgout

diff --git a/auxprogs/pybuild.sh b/auxprogs/pybuild.sh
new file mode 100755
index 0000000000..432a768c51
--- /dev/null
+++ b/auxprogs/pybuild.sh
@@ -0,0 +1,88 @@
+#! /bin/sh
+
+# "Build" a given Python file `foo`: format it, type-check it, lint it, and
+# generate the final file from the `foo.in` file.
+#
+# The following Python tools are used by this script.
+#
+# - Formatters:
+#   - `black`, for general formatting. This avoids the need for style checkers
+#     like `flake8`. Note that `black` allows a max line length of 88, which is
+#     a mild but common PEP-8 violation.
+#   - `isort`, for import sorting.
+#
+# - Type-checkers:
+#   - `mypy`. This is the most commonly used Python type checker.
+#   - `pyright`. This is another good type checker.
+#   - Sometimes they give different result. Both should be kept happy.
+#
+# - Linters:
+#   - `ruff`. Sometimes useful, and very fast to run.
+#   - `pylint`. Sometimes annoying, sometimes useful. The `pylintrc`
+#     modifies/disables the more annoying lints.
+#   - Sometimes they give different result. Both should be kept happy.
+#
+# The following tools are relevant, but not run by this script.
+#
+# - Profilers:
+#   - `cProfile` + `snakeviz`: Typically run with
+#     `python3 -m cProfile -o cg.prof cg_annotate $INPUT && snakeviz cg.prof`.
+#   - `scalene`. Typically run with `scalene ./cg_annotate $INPUT`.
+#
+# - Packager:
+#   - `cp` is used for distribution. This is possible because this program is a
+#     single file and only uses the Python Standard Library. This avoids the
+#     needs for any of the million different Python package management tools.
+#
+# All of the above tools can be installed with `pip3 install $NAME`, except
+# `cProfile` which is built into Python.
+
+set -e
+
+# Currently targetting Python 3.9 (released in October 2020) and up. The tools
+# use two different syntaxes for specifying the version number.
+ver=3.9
+pyver=py39
+
+infile=$1
+outfile=$2
+if [ -z "$outfile" ] ; then
+    exit 1
+fi
+
+echo "== black =="
+black $infile
+echo
+
+echo "== isort =="
+isort $infile
+echo
+
+echo "== mypy =="
+mypy --strict $infile --python-version $ver
+echo
+
+# Strict mode for pyright is enabled by a `pyright: strict` comment inside each
+# Python file.
+#
+# Note: `pyright` refuses to check any file without a `.py` extension, hence
+# the copying to a temp file with a `.py` extension.
+echo "== pyright =="
+tmpfile=`mktemp --tmpdir $infile.XXX.py`
+echo "program output" >> $tmpfile
+cp $infile $tmpfile
+pyright --pythonversion $ver $tmpfile
+rm $tmpfile
+echo
+
+echo "== ruff =="
+ruff check --target-version $pyver $infile
+echo
+
+echo "== pylint =="
+pylint --py-version $ver $infile
+
+echo "== config.status =="
+make $outfile
+echo
+
diff --git a/cachegrind/Makefile.am b/cachegrind/Makefile.am
index 8ea99ca529..34717ae541 100644
--- a/cachegrind/Makefile.am
+++ b/cachegrind/Makefile.am
@@ -93,28 +93,12 @@ endif
 # Miscellaneous
 #----------------------------------------------------------------------------
 
-# Run the formatters, type checkers, and linters on `cg_annotate.in`, then
-# generate `cg_annotate`.
-#
-# Note: `pyright` refuses to check any file without a `.py` extension, hence
-# the copying to `/tmp/tmp.py`.
-ann:
-	@echo "== black =="
-	@black cg_annotate.in
-	@echo
-	@echo "== isort =="
-	@isort cg_annotate.in
-	@echo
-	@echo "== mypy =="
-	@mypy --strict cg_annotate.in
-	@echo
-	@echo "== pyright =="
-	@cp cg_annotate.in /tmp/tmp.py && pyright /tmp/tmp.py && rm /tmp/tmp.py
-	@echo
-	@echo "== ruff =="
-	@ruff cg_annotate.in
-	@echo
-	@echo "== pylint =="
-	@pylint cg_annotate.in
-	@echo "== config.status =="
-	$(MAKE) cg_annotate
+# "Build" `cg_annotate`. The `+` avoids warnings about the jobserver.
+pyann:
+	+../auxprogs/pybuild.sh cg_annotate.in cg_annotate
+
+# "Build" `cg_diff`. The `+` avoids warnings about the jobserver.
+pydiff:
+	+../auxprogs/pybuild.sh cg_diff.in cg_diff
+
+.PHONY: pyann pydiff
diff --git a/cachegrind/cg_annotate.in b/cachegrind/cg_annotate.in
index 43dce8f10e..240e069ccb 100755
--- a/cachegrind/cg_annotate.in
+++ b/cachegrind/cg_annotate.in
@@ -30,44 +30,9 @@
 This script reads Cachegrind output files and produces human-readable reports.
 """
 
-# Use `make ann` to "build" this script every time it is changed. This runs the
-# formatters, type-checkers, and linters on `cg_annotate.in` and then generates
-# `cg_annotate`.
-#
-# Python versions: Currently this script targets Python 3.9 and later versions.
-# Consequences of this:
-# - No use of `TypeAlias` for explicit type aliases, which requires 3.10.
-#
-# The following Python tools are used. All can be installed with `pip3 install
-# $NAME`, except `cProfile` which is built into Python.
-#
-# - Formatters:
-#   - `black`, for general formatting. This avoids the need for style checkers
-#     like `flake8`. Note that `black` allows a max line length of 88, which is
-#     a mild but common PEP-8 violation.
-#   - `isort`, for import sorting.
-#
-# - Type-checkers:
-#   - `mypy --strict`. This is the most commonly used Python type checker.
-#   - `pyright`. This is another good type checker. The `pyright: strict`
-#     comment above forces strict checking.
-#   - Sometimes one type-checker will complain about something the other does
-#     not. The goal is to keep both type checkers happy.
-#
-# - Linters:
-#   - `ruff`. Sometimes useful, and very fast to run.
-#   - `pylint`. Sometimes annoying, sometimes useful. The `pylintrc`
-#     modifies/disables the more annoying lints.
-#
-# - Profilers:
-#   - `cProfile` + `snakeviz`: Typically run with
-#     `python3 -m cProfile -o cg.prof cg_annotate $INPUT && snakeviz cg.prof`.
-#   - `scalene`. Typically run with `scalene ./cg_annotate $INPUT`.
-#
-# - Packager:
-#   - `cp` is used for distribution. This is possible because this program is a
-#     single file and only uses the Python Standard Library. This avoids the
-#     needs for any of the million different Python package management tools.
+# Use `make pyann` to "build" this script with `auxprogs/pybuild.rs` every time
+# it is changed. This runs the formatters, type-checkers, and linters on
+# `cg_annotate.in` and then generates `cg_annotate`.
 
 
 from __future__ import annotations
@@ -140,7 +105,7 @@ class Args(Namespace):
                 help=f"(deprecated) same as --no-{name}",
             )
 
-        p = ArgumentParser(description="Process Cachegrind output files.")
+        p = ArgumentParser(description="Process a Cachegrind output file.")
 
         p.add_argument("--version", action="version", version="%(prog)s-@VERSION@")
 
@@ -317,11 +282,9 @@ class Cc:
 Flfn = NewType("Flfn", tuple[str, str])
 
 # Per-function CCs.
-# Note: not using `TypeAlias`. See "Python versions" comment above.
 DictFlfnCc = DefaultDict[Flfn, Cc]
 
 # Per-line CCs, organised by filename and line number.
-# Note: not using `TypeAlias`. See "Python versions" comment above.
 DictLineCc = DefaultDict[int, Cc]
 DictFlDictLineCc = DefaultDict[str, DictLineCc]
 
@@ -376,7 +339,7 @@ def read_cgout_file() -> tuple[str, str, Events, DictFlfnCc, DictFlDictLineCc, C
         curr_fl = ""
         curr_flfn = Flfn(("", ""))
 
-        # Three different places where we accumulate CC data.
+        # Different places where we accumulate CC data.
         dict_flfn_cc: DictFlfnCc = defaultdict(events.mk_empty_cc)
         dict_fl_dict_line_cc: DictFlDictLineCc = defaultdict(mk_empty_dict_line_cc)
         summary_cc = None
@@ -479,8 +442,8 @@ class CcPrinter:
                     min_cc.counts[i] = count
 
         # Find maximum width for each column.
-        self.count_widths = [0] * len(events.events)
-        self.perc_widths = [0] * len(events.events)
+        self.count_widths = [0] * events.num_events
+        self.perc_widths = [0] * events.num_events
         for i, event in enumerate(events.events):
             # Get count and perc widths of the min and max CCs.
             (min_count, min_perc) = self.count_and_perc(min_cc, i)
@@ -757,16 +720,16 @@ def print_annotated_src_file(
             else:
                 break
 
-    # If there was info on lines past the end of the file, warn.
-    if line_nums:
-        for line_num in line_nums:
-            printer.print_cc(dict_line_cc[line_num], f"<bogus line {line_num}>")
-            annotated_ccs.line_nums_known_cc += dict_line_cc[line_num]
+        # If there was info on lines past the end of the file, warn.
+        if line_nums:
+            for line_num in line_nums:
+                printer.print_cc(dict_line_cc[line_num], f"<bogus line {line_num}>")
+                annotated_ccs.line_nums_known_cc += dict_line_cc[line_num]
 
-        print()
-        warn_bogus_lines(src_file.name)
+            print()
+            warn_bogus_lines(src_file.name)
 
-    print()
+        print()
 
 
 # This (partially) consumes `dict_fl_dict_line_cc`.
diff --git a/cachegrind/cg_diff.in b/cachegrind/cg_diff.in
index 462308b49e..bae0c7abe4 100755
--- a/cachegrind/cg_diff.in
+++ b/cachegrind/cg_diff.in
@@ -1,13 +1,14 @@
-#! @PERL@
+#! /usr/bin/env python3
+# pyright: strict
 
-##--------------------------------------------------------------------##
-##--- Cachegrind's differencer.                         cg_diff.in ---##
-##--------------------------------------------------------------------##
+# --------------------------------------------------------------------
+# --- Cachegrind's differencer.                         cg_diff.in ---
+# --------------------------------------------------------------------
 
 #  This file is part of Cachegrind, a Valgrind tool for cache
 #  profiling programs.
 #
-#  Copyright (C) 2002-2017 Nicholas Nethercote
+#  Copyright (C) 2002-2023 Nicholas Nethercote
 #     njn@valgrind.org
 #
 #  This program is free software; you can redistribute it and/or
@@ -25,312 +26,340 @@
 #
 #  The GNU General Public License is contained in the file COPYING.
 
-#----------------------------------------------------------------------------
-# This is a very cut-down and modified version of cg_annotate.
-#----------------------------------------------------------------------------
-
-use warnings;
-use strict;
-
-#----------------------------------------------------------------------------
-# Global variables
-#----------------------------------------------------------------------------
-
-# Version number
-my $version = "@VERSION@";
-
-# Usage message.
-my $usage = <<END
-usage: cg_diff [options] <cachegrind-out-file1> <cachegrind-out-file2>
-
-  options for the user, with defaults in [ ], are:
-    -h --help             show this message
-    -v --version          show version
-    --mod-filename=<expr> a Perl search-and-replace expression that is applied
-                          to filenames, eg. --mod-filename='s/prog[0-9]/projN/'
-    --mod-funcname=<expr> like --mod-filename, but applied to function names
-
-  cg_diff is Copyright (C) 2002-2017 Nicholas Nethercote.
-  and licensed under the GNU General Public License, version 2.
-  Bug reports, feedback, admiration, abuse, etc, to: njn\@valgrind.org.
-                                                
-END
-;
-
-# --mod-filename expression
-my $mod_filename = undef;
-
-# --mod-funcname expression
-my $mod_funcname = undef;
-
-#-----------------------------------------------------------------------------
-# Argument and option handling
-#-----------------------------------------------------------------------------
-sub process_cmd_line() 
-{
-    my ($file1, $file2) = (undef, undef);
-
-    for my $arg (@ARGV) { 
-
-        if ($arg =~ /^-/) {
-            # --version
-            if ($arg =~ /^-v$|^--version$/) {
-                die("cg_diff-$version\n");
-
-            } elsif ($arg =~ /^--mod-filename=(.*)/) {
-                $mod_filename = $1;
-
-            } elsif ($arg =~ /^--mod-funcname=(.*)/) {
-                $mod_funcname = $1;
-
-            } else {            # -h and --help fall under this case
-                die($usage);
-            }
-
-        } elsif (not defined($file1)) {
-            $file1 = $arg;
-
-        } elsif (not defined($file2)) {
-            $file2 = $arg;
-
-        } else {
-            die($usage);
-        }
-    }
-
-    # Must have specified two input files.
-    if (not defined $file1 or not defined $file2) {
-        die($usage);
-    }
-
-    return ($file1, $file2);
-}
-
-#-----------------------------------------------------------------------------
-# Reading of input file
-#-----------------------------------------------------------------------------
-sub max ($$) 
-{
-    my ($x, $y) = @_;
-    return ($x > $y ? $x : $y);
-}
-
-# Add the two arrays;  any '.' entries are ignored.  Two tricky things:
-# 1. If $a2->[$i] is undefined, it defaults to 0 which is what we want; we turn
-#    off warnings to allow this.  This makes things about 10% faster than
-#    checking for definedness ourselves.
-# 2. We don't add an undefined count or a ".", even though it's value is 0,
-#    because we don't want to make an $a2->[$i] that is undef become 0
-#    unnecessarily.
-sub add_array_a_to_b ($$) 
-{
-    my ($a, $b) = @_;
-
-    my $n = max(scalar @$a, scalar @$b);
-    $^W = 0;
-    foreach my $i (0 .. $n-1) {
-        $b->[$i] += $a->[$i] if (defined $a->[$i] && "." ne $a->[$i]);
-    }
-    $^W = 1;
-}
-
-sub sub_array_b_from_a ($$) 
-{
-    my ($a, $b) = @_;
-
-    my $n = max(scalar @$a, scalar @$b);
-    $^W = 0;
-    foreach my $i (0 .. $n-1) {
-        $a->[$i] -= $b->[$i];       # XXX: doesn't handle '.' entries
-    }
-    $^W = 1;
-}
-
-# Add each event count to the CC array.  '.' counts become undef, as do
-# missing entries (implicitly).
-sub line_to_CC ($$)
-{
-    my ($line, $numEvents) = @_;
-
-    my @CC = (split /\s+/, $line);
-    (@CC <= $numEvents) or die("Line $.: too many event counts\n");
-    return \@CC;
-}
-
-sub read_input_file($) 
-{
-    my ($input_file) = @_;
-
-    open(INPUTFILE, "< $input_file") 
-         || die "Cannot open $input_file for reading\n";
-
-    # Read "desc:" lines.
-    my $desc;
-    my $line;
-    while ($line = <INPUTFILE>) {
-        if ($line =~ s/desc:\s+//) {
-            $desc .= $line;
-        } else {
-            last;
-        }
-    }
-
-    # Read "cmd:" line (Nb: will already be in $line from "desc:" loop above).
-    ($line =~ s/^cmd:\s+//) or die("Line $.: missing command line\n");
-    my $cmd = $line;
-    chomp($cmd);    # Remove newline
-
-    # Read "events:" line.  We make a temporary hash in which the Nth event's
-    # value is N, which is useful for handling --show/--sort options below.
-    $line = <INPUTFILE>;
-    (defined $line && $line =~ s/^events:\s+//) 
-        or die("Line $.: missing events line\n");
-    my @events = split(/\s+/, $line);
-    my $numEvents = scalar @events;
-
-    my $currFileName;
-    my $currFileFuncName;
-
-    my %CCs;                    # hash("$filename###$funcname" => CC array)
-    my $currCC = undef;         # CC array
-
-    my $summaryCC;
-
-    # Read body of input file.
-    while (<INPUTFILE>) {
-        # Skip comments and empty lines.
-        next if /^\s*$/ || /^\#/;
-
-        if (s/^(-?\d+)\s+//) {
-            my $CC = line_to_CC($_, $numEvents);
-            defined($currCC) || die;
-            add_array_a_to_b($CC, $currCC);
-
-        } elsif (s/^fn=(.*)$//) {
-            defined($currFileName) || die;
-            my $tmpFuncName = $1;
-            if (defined $mod_funcname) {
-                eval "\$tmpFuncName =~ $mod_funcname";
-            }
-            $currFileFuncName = "$currFileName###$tmpFuncName";
-            $currCC = $CCs{$currFileFuncName};
-            if (not defined $currCC) {
-                $currCC = [];
-                $CCs{$currFileFuncName} = $currCC;
-            }
-
-        } elsif (s/^fl=(.*)$//) {
-            $currFileName = $1;
-            if (defined $mod_filename) {
-                eval "\$currFileName =~ $mod_filename";
-            }
-            # Assume that a "fn=" line is followed by a "fl=" line.
-            $currFileFuncName = undef;  
-
-        } elsif (s/^summary:\s+//) {
-            $summaryCC = line_to_CC($_, $numEvents);
-            (scalar(@$summaryCC) == @events) 
-                or die("Line $.: summary event and total event mismatch\n");
-
-        } else {
-            warn("WARNING: line $. malformed, ignoring\n");
-        }
-    }
-
-    # Check if summary line was present
-    if (not defined $summaryCC) {
-        die("missing final summary line, aborting\n");
-    }
-
-    close(INPUTFILE);
-
-    return ($cmd, \@events, \%CCs, $summaryCC);
-}
-
-#----------------------------------------------------------------------------
-# "main()"
-#----------------------------------------------------------------------------
-# Commands seen in the files.  Need not match.
-my $cmd1;
-my $cmd2;
-
-# Events seen in the files.  They must match.
-my $events1;
-my $events2;
-
-# Individual CCs, organised by filename/funcname/line_num.
-# hashref("$filename###$funcname", CC array)
-my $CCs1;
-my $CCs2;
-
-# Total counts for summary (an arrayref).
-my $summaryCC1;
-my $summaryCC2;
-
-#----------------------------------------------------------------------------
-# Read the input files
-#----------------------------------------------------------------------------
-my ($file1, $file2) = process_cmd_line();
-($cmd1, $events1, $CCs1, $summaryCC1) = read_input_file($file1);
-($cmd2, $events2, $CCs2, $summaryCC2) = read_input_file($file2);
-
-#----------------------------------------------------------------------------
-# Check the events match
-#----------------------------------------------------------------------------
-my $n = max(scalar @$events1, scalar @$events2);
-$^W = 0;    # turn off warnings, because we might hit undefs
-foreach my $i (0 .. $n-1) {
-    ($events1->[$i] eq $events2->[$i]) || die "events don't match, aborting\n";
-}
-$^W = 1;
-
-#----------------------------------------------------------------------------
-# Do the subtraction: CCs2 -= CCs1
-#----------------------------------------------------------------------------
-while (my ($filefuncname, $CC1) = each(%$CCs1)) {
-    my $CC2 = $CCs2->{$filefuncname};
-    if (not defined $CC2) {
-        $CC2 = [];
-        sub_array_b_from_a($CC2, $CC1);     # CC2 -= CC1
-        $CCs2->{$filefuncname} = $CC2;
-    } else {
-        sub_array_b_from_a($CC2, $CC1);     # CC2 -= CC1
-    }
-}
-sub_array_b_from_a($summaryCC2, $summaryCC1);
-
-#----------------------------------------------------------------------------
-# Print the result, in CCs2
-#----------------------------------------------------------------------------
-print("desc: Files compared:   $file1; $file2\n");
-print("cmd:  $cmd1; $cmd2\n");
-print("events: ");
-for my $e (@$events1) {
-    print(" $e");
-}
-print("\n");
-
-while (my ($filefuncname, $CC) = each(%$CCs2)) {
-
-    my @x = split(/###/, $filefuncname);
-    (scalar @x == 2) || die;
-
-    print("fl=$x[0]\n");
-    print("fn=$x[1]\n");
-
-    print("0");
-    foreach my $n (@$CC) {
-        print(" $n");
-    }
-    print("\n");
-}
-
-print("summary:");
-foreach my $n (@$summaryCC2) {
-    print(" $n");
-}
-print("\n");
-
-##--------------------------------------------------------------------##
-##--- end                                                          ---##
-##--------------------------------------------------------------------##
+"""
+This script diffs Cachegrind output files.
+"""
+
+# Use `make pydiff` to "build" this script every time it is changed. This runs
+# the formatters, type-checkers, and linters on `cg_diff.in` and then generates
+# `cg_diff`.
+#
+# This is a cut-down version of `cg_annotate.in`.
+
+from __future__ import annotations
+
+import re
+import sys
+from argparse import ArgumentParser, Namespace
+from collections import defaultdict
+from typing import Callable, DefaultDict, NewType, NoReturn
+
+SearchAndReplace = Callable[[str], str]
+
+
+class Args(Namespace):
+    """
+    A typed wrapper for parsed args.
+
+    None of these fields are modified after arg parsing finishes.
+    """
+
+    mod_filename: SearchAndReplace
+    mod_funcname: SearchAndReplace
+    cgout_filename1: str
+    cgout_filename2: str
+
+    @staticmethod
+    def parse() -> Args:
+        # We support Perl-style `s/old/new/flags` search-and-replace
+        # expressions, because that's how this option was implemented in the
+        # old Perl version of `cg_diff`. This requires conversion from
+        # `s/old/new/` style to `re.sub`. The conversion isn't a perfect
+        # emulation of Perl regexps (e.g. Python uses `\1` rather than `$1` for
+        # using captures in the `new` part), but it should be close enough. The
+        # only supported flags are `g` (global) and `i` (ignore case).
+        def search_and_replace(regex: str | None) -> SearchAndReplace:
+            if regex is None:
+                return lambda s: s
+
+            # Extract the parts of a `s/old/new/tail` regex. `(?<!\\)/` is an
+            # example of negative lookbehind. It means "match a forward slash
+            # unless preceded by a backslash".
+            m = re.match(r"s/(.*)(?<!\\)/(.*)(?<!\\)/(g|i|gi|ig|)$", regex)
+            if m is None:
+                raise ValueError
+
+            # Forward slashes must be escaped in an `s/old/new/` expression,
+            # but we then must unescape them before using them with `re.sub`
+            pat = m.group(1).replace(r"\/", r"/")
+            repl = m.group(2).replace(r"\/", r"/")
+            tail = m.group(3)
+
+            if "g" in tail:
+                count = 0  # unlimited
+            else:
+                count = 1
+
+            if "i" in tail:
+                flags = re.IGNORECASE
+            else:
+                flags = re.RegexFlag(0)
+
+            return lambda s: re.sub(re.compile(pat, flags=flags), repl, s, count=count)
+
+        p = ArgumentParser(description="Diff two Cachegrind output files.")
+
+        p.add_argument("--version", action="version", version="%(prog)s-@VERSION@")
+
+        p.add_argument(
+            "--mod-filename",
+            type=search_and_replace,
+            metavar="REGEX",
+            default=search_and_replace(None),
+            help="a search-and-replace regex applied to filenames, e.g. "
+            "`s/prog[0-9]/progN/`",
+        )
+        p.add_argument(
+            "--mod-funcname",
+            type=search_and_replace,
+            metavar="REGEX",
+            default=search_and_replace(None),
+            help="like --mod-filename, but for function names",
+        )
+
+        p.add_argument(
+            "cgout_filename1",
+            nargs=1,
+            metavar="cachegrind-out-file1",
+            help="file produced by Cachegrind",
+        )
+        p.add_argument(
+            "cgout_filename2",
+            nargs=1,
+            metavar="cachegrind-out-file2",
+            help="file produced by Cachegrind",
+        )
+
+        return p.parse_args(namespace=Args())
+
+
+# Args are stored in a global for easy access.
+args = Args.parse()
+
+# A single instance of this class is constructed, from `args` and the `events:`
+# line in the cgout file.
+class Events:
+    # The event names.
+    events: list[str]
+
+    def __init__(self, text: str) -> None:
+        self.events = text.split()
+        self.num_events = len(self.events)
+
+    def mk_cc(self, text: str) -> Cc:
+        """Raises a `ValueError` exception on syntax error."""
+        # This is slightly faster than a list comprehension.
+        counts = list(map(int, text.split()))
+
+        if len(counts) == self.num_events:
+            pass
+        elif len(counts) < self.num_events:
+            # Add zeroes at the end for any missing numbers.
+            counts.extend([0] * (self.num_events - len(counts)))
+        else:
+            raise ValueError
+
+        return Cc(counts)
+
+    def mk_empty_cc(self) -> Cc:
+        # This is much faster than a list comprehension.
+        return Cc([0] * self.num_events)
+
+
+class Cc:
+    """
+    This is a dumb container for counts.
+
+    It doesn't know anything about events, i.e. what each count means. It can
+    do basic operations like `__iadd__` and `__eq__`, and anything more must be
+    done elsewhere. `Events.mk_cc` and `Events.mk_empty_cc` are used for
+    construction.
+    """
+
+    # Always the same length as `Events.events`.
+    counts: list[int]
+
+    def __init__(self, counts: list[int]) -> None:
+        self.counts = counts
+
+    def __repr__(self) -> str:
+        return str(self.counts)
+
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, Cc):
+            return NotImplemented
+        return self.counts == other.counts
+
+    def __iadd__(self, other: Cc) -> Cc:
+        for i, other_count in enumerate(other.counts):
+            self.counts[i] += other_count
+        return self
+
+    def __isub__(self, other: Cc) -> Cc:
+        for i, other_count in enumerate(other.counts):
+            self.counts[i] -= other_count
+        return self
+
+
+# A paired filename and function name.
+Flfn = NewType("Flfn", tuple[str, str])
+
+# Per-function CCs.
+DictFlfnCc = DefaultDict[Flfn, Cc]
+
+
+def die(msg: str) -> NoReturn:
+    print("cg_diff: error:", msg, file=sys.stderr)
+    sys.exit(1)
+
+
+def read_cgout_file(cgout_filename: str) -> tuple[str, Events, DictFlfnCc, Cc]:
+    # The file format is described in Cachegrind's manual.
+    try:
+        cgout_file = open(cgout_filename, "r", encoding="utf-8")
+    except OSError as err:
+        die(f"{err}")
+
+    with cgout_file:
+        cgout_line_num = 0
+
+        def parse_die(msg: str) -> NoReturn:
+            die(f"{cgout_file.name}:{cgout_line_num}: {msg}")
+
+        def readline() -> str:
+            nonlocal cgout_line_num
+            cgout_line_num += 1
+            return cgout_file.readline()
+
+        # Read "desc:" lines.
+        while line := readline():
+            if m := re.match(r"desc:\s+(.*)", line):
+                # The "desc:" lines are unused.
+                pass
+            else:
+                break
+
+        # Read "cmd:" line. (`line` is already set from the "desc:" loop.)
+        if m := re.match(r"cmd:\s+(.*)", line):
+            cmd = m.group(1)
+        else:
+            parse_die("missing a `command:` line")
+
+        # Read "events:" line.
+        line = readline()
+        if m := re.match(r"events:\s+(.*)", line):
+            events = Events(m.group(1))
+        else:
+            parse_die("missing an `events:` line")
+
+        curr_fl = ""
+        curr_flfn = Flfn(("", ""))
+
+        # Different places where we accumulate CC data.
+        dict_flfn_cc: DictFlfnCc = defaultdict(events.mk_empty_cc)
+        summary_cc = None
+
+        # Compile the one hot regex.
+        count_pat = re.compile(r"(\d+)\s+(.*)")
+
+        # Line matching is done in order of pattern frequency, for speed.
+        while True:
+            line = readline()
+
+            if m := count_pat.match(line):
+                # The line_num isn't used.
+                try:
+                    cc = events.mk_cc(m.group(2))
+                except ValueError:
+                    parse_die("malformed or too many event counts")
+
+                # Record this CC at the function level.
+                flfn_cc = dict_flfn_cc[curr_flfn]
+                flfn_cc += cc
+
+            elif line.startswith("fn="):
+                curr_flfn = Flfn((curr_fl, args.mod_funcname(line[3:-1])))
+
+            elif line.startswith("fl="):
+                # A longstanding bug: the use of `--mod-filename` makes it
+                # likely that some files won't be found when annotating. This
+                # doesn't matter much, because we use line number 0 for all
+                # diffs anyway. It just means we get "This file was unreadable"
+                # for modified filenames rather than a single "<unknown (line
+                # 0)>" CC.
+                curr_fl = args.mod_filename(line[3:-1])
+                # A `fn=` line should follow, overwriting the "???".
+                curr_flfn = Flfn((curr_fl, "???"))
+
+            elif m := re.match(r"summary:\s+(.*)", line):
+                try:
+                    summary_cc = events.mk_cc(m.group(1))
+                except ValueError:
+                    parse_die("too many event counts")
+
+            elif line == "":
+                break  # EOF
+
+            elif line == "\n" or line.startswith("#"):
+                # Skip empty lines and comment lines.
+                pass
+
+            else:
+                parse_die(f"malformed line: {line[:-1]}")
+
+    # Check if summary line was present.
+    if not summary_cc:
+        parse_die("missing `summary:` line, aborting")
+
+    # Check summary is correct.
+    total_cc = events.mk_empty_cc()
+    for flfn_cc in dict_flfn_cc.values():
+        total_cc += flfn_cc
+    if summary_cc != total_cc:
+        msg = (
+            "`summary:` line doesn't match computed total\n"
+            f"- summary: {summary_cc}\n"
+            f"- total:   {total_cc}"
+        )
+        parse_die(msg)
+
+    return (cmd, events, dict_flfn_cc, summary_cc)
+
+
+def main() -> None:
+    filename1 = args.cgout_filename1[0]
+    filename2 = args.cgout_filename2[0]
+
+    (cmd1, events1, dict_flfn_cc1, summary_cc1) = read_cgout_file(filename1)
+    (cmd2, events2, dict_flfn_cc2, summary_cc2) = read_cgout_file(filename2)
+
+    if events1.num_events != events2.num_events:
+        die("events don't match")
+
+    # Subtract file 1's CCs from file 2's CCs, at the Flfn level.
+    for flfn, flfn_cc1 in dict_flfn_cc1.items():
+        flfn_cc2 = dict_flfn_cc2[flfn]
+        flfn_cc2 -= flfn_cc1
+    summary_cc2 -= summary_cc1
+
+    print(f"desc: Files compared:   {filename1}; {filename2}")
+    print(f"cmd: {cmd1}; {cmd2}")
+    print("events:", *events1.events, sep=" ")
+
+    # Sort so the output is deterministic.
+    def key(flfn_and_cc: tuple[Flfn, Cc]) -> Flfn:
+        return flfn_and_cc[0]
+
+    for flfn, flfn_cc2 in sorted(dict_flfn_cc2.items(), key=key):
+        # Use `0` for the line number because we don't try to give line-level
+        # CCs, due to the possibility of code changes causing line numbers to
+        # move around.
+        print(f"fl={flfn[0]}")
+        print(f"fn={flfn[1]}")
+        print("0", *flfn_cc2.counts, sep=" ")
+
+    print("summary:", *summary_cc2.counts, sep=" ")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cachegrind/tests/Makefile.am b/cachegrind/tests/Makefile.am
index 33baeeea69..16ac524b35 100644
--- a/cachegrind/tests/Makefile.am
+++ b/cachegrind/tests/Makefile.am
@@ -14,6 +14,7 @@ dist_noinst_SCRIPTS = filter_stderr filter_cachesim_discards
 # They just serve as input for cg_annotate in `ann1a` and `ann1b`.
 EXTRA_DIST = \
 	ann-diff1.post.exp ann-diff1.stderr.exp ann-diff1.vgtest \
+		ann-diff2a.cgout ann-diff2b.cgout \
 	ann1a.post.exp ann1a.stderr.exp ann1a.vgtest ann1.cgout \
 	ann1b.post.exp ann1b.stderr.exp ann1b.vgtest ann1b.cgout \
 	ann2.post.exp ann2.stderr.exp ann2.vgtest ann2.cgout \
diff --git a/cachegrind/tests/ann-diff1.post.exp b/cachegrind/tests/ann-diff1.post.exp
index 2d7d61ac70..f8d901b0a8 100644
--- a/cachegrind/tests/ann-diff1.post.exp
+++ b/cachegrind/tests/ann-diff1.post.exp
@@ -24,7 +24,7 @@ Ir                 I1mr ILmr Dr                  D1mr DLmr Dw D1mw DLmw
 --------------------------------------------------------------------------------
 Ir                 I1mr ILmr Dr                  D1mr DLmr Dw D1mw DLmw  file:function
 
-5,000,000 (100.0%)    0    0 -2,000,000 (100.0%)    0    0  0    0    0  a.c:main
+5,000,000 (100.0%)    0    0 -2,000,000 (100.0%)    0    0  0    0    0  a.c:MAIN
 
 --------------------------------------------------------------------------------
 -- Auto-annotated source file: a.c
@@ -33,7 +33,6 @@ Ir                 I1mr ILmr Dr                  D1mr DLmr Dw D1mw DLmw
 
 5,000,000 (100.0%)    0    0 -2,000,000 (100.0%)    0    0  0    0    0  <unknown (line 0)>
 
-
 --------------------------------------------------------------------------------
 -- Annotation summary
 --------------------------------------------------------------------------------
diff --git a/cachegrind/tests/ann-diff1.vgtest b/cachegrind/tests/ann-diff1.vgtest
index ce3e216c66..b737b713e3 100644
--- a/cachegrind/tests/ann-diff1.vgtest
+++ b/cachegrind/tests/ann-diff1.vgtest
@@ -2,5 +2,5 @@
 # the post-processing of the `ann{1,1b}.cgout` test files.
 prog: ../../tests/true
 vgopts: --cachegrind-out-file=cachegrind.out
-post: perl ../../cachegrind/cg_diff ann1.cgout ann1b.cgout > ann-diff1.cgout && perl ../../cachegrind/cg_annotate ann-diff1.cgout
+post: python ../../cachegrind/cg_diff --mod-funcname="s/main/MAIN/" ann1.cgout ann1b.cgout > ann-diff1.cgout && python ../../cachegrind/cg_annotate ann-diff1.cgout
 cleanup: rm ann-diff1.cgout
diff --git a/cachegrind/tests/ann-diff2-aux/ann-diff2-basic.rs b/cachegrind/tests/ann-diff2-aux/ann-diff2-basic.rs
new file mode 100644
index 0000000000..c9e9e05f44
--- /dev/null
+++ b/cachegrind/tests/ann-diff2-aux/ann-diff2-basic.rs
@@ -0,0 +1,10 @@
+one
+two
+three
+four
+five
+six
+seven
+eight
+nine
+ten
diff --git a/cachegrind/tests/ann-diff2.post.exp b/cachegrind/tests/ann-diff2.post.exp
new file mode 100644
index 0000000000..742ff3841c
--- /dev/null
+++ b/cachegrind/tests/ann-diff2.post.exp
@@ -0,0 +1,46 @@
+--------------------------------------------------------------------------------
+-- Cachegrind profile
+--------------------------------------------------------------------------------
+Files compared:   ann-diff2a.cgout; ann-diff2b.cgout
+Command:          cmd1; cmd2
+Data file:        ann-diff2c.cgout
+Events recorded:  One Two
+Events shown:     One Two
+Event sort order: One Two
+Threshold:        0.1
+Include dirs:     
+User annotated:   
+Auto-annotation:  on
+
+--------------------------------------------------------------------------------
+-- Summary
+--------------------------------------------------------------------------------
+One            Two            
+
+2,100 (100.0%) 1,900 (100.0%)  PROGRAM TOTALS
+
+--------------------------------------------------------------------------------
+-- Function summary
+--------------------------------------------------------------------------------
+One           Two            file:function
+
+1,000 (47.6%) 1,000 (52.6%)  aux/ann-diff2-basic.rs:groffN
+1,000 (47.6%) 1,000 (52.6%)  aux/ann-diff2-basic.rs:fN_ffN_fooN_F4_g5
+  100  (4.8%)  -100 (-5.3%)  aux/ann-diff2-basic.rs:basic1
+
+--------------------------------------------------------------------------------
+-- Auto-annotated source file: aux/ann-diff2-basic.rs
+--------------------------------------------------------------------------------
+This file was unreadable
+
+--------------------------------------------------------------------------------
+-- Annotation summary
+--------------------------------------------------------------------------------
+One            Two            
+
+    0              0             annotated: files known & above threshold & readable, line numbers known
+    0              0             annotated: files known & above threshold & readable, line numbers unknown
+2,100 (100.0%) 1,900 (100.0%)  unannotated: files known & above threshold & unreadable 
+    0              0           unannotated: files known & below threshold
+    0              0           unannotated: files unknown
+
diff --git a/cachegrind/tests/ann-diff2.stderr.exp b/cachegrind/tests/ann-diff2.stderr.exp
new file mode 100644
index 0000000000..e8084c12c3
--- /dev/null
+++ b/cachegrind/tests/ann-diff2.stderr.exp
@@ -0,0 +1,17 @@
+
+
+I   refs:
+I1  misses:
+LLi misses:
+I1  miss rate:
+LLi miss rate:
+
+D   refs:
+D1  misses:
+LLd misses:
+D1  miss rate:
+LLd miss rate:
+
+LL refs:
+LL misses:
+LL miss rate:
diff --git a/cachegrind/tests/ann-diff2.vgtest b/cachegrind/tests/ann-diff2.vgtest
new file mode 100644
index 0000000000..101cac07d1
--- /dev/null
+++ b/cachegrind/tests/ann-diff2.vgtest
@@ -0,0 +1,6 @@
+# The `prog` doesn't matter because we don't use its output. Instead we test
+# the post-processing of the `ann-diff2{a,b}.cgout` test files.
+prog: ../../tests/true
+vgopts: --cachegrind-out-file=cachegrind.out
+post: python ../../cachegrind/cg_diff --mod-filename="s/.*aux\//aux\//i" --mod-funcname="s/(f[a-z]*)[0-9]/\1N/g" ann-diff2a.cgout ann-diff2b.cgout > ann-diff2c.cgout && python ../../cachegrind/cg_annotate ann-diff2c.cgout
+cleanup: rm ann-diff2c.cgout
diff --git a/cachegrind/tests/ann-diff2a.cgout b/cachegrind/tests/ann-diff2a.cgout
new file mode 100644
index 0000000000..bb82b75f6c
--- /dev/null
+++ b/cachegrind/tests/ann-diff2a.cgout
@@ -0,0 +1,9 @@
+desc: Description for ann-diff2a.cgout
+cmd: cmd1
+events: One Two
+
+fl=ann2-diff-AUX/ann-diff2-basic.rs
+fn=basic1
+1 1000 1000
+
+summary: 1000 1000
diff --git a/cachegrind/tests/ann-diff2b.cgout b/cachegrind/tests/ann-diff2b.cgout
new file mode 100644
index 0000000000..9fb733e708
--- /dev/null
+++ b/cachegrind/tests/ann-diff2b.cgout
@@ -0,0 +1,15 @@
+desc: Description for ann-diff2a.cgout
+cmd: cmd2
+events: One Two
+
+fl=ann2-diff-Aux/ann-diff2-basic.rs
+fn=basic1
+1 1100 900
+
+fn=f1_ff2_foo3_F4_g5
+3 1000 1000
+
+fn=groff5
+5 1000 1000
+
+summary: 3100 2900
-- 
2.47.2