-#! @PERL@
+#! /usr/bin/python3
+# pyright: strict
-##--------------------------------------------------------------------##
-##--- Cachegrind's annotator. cg_annotate.in ---##
-##--------------------------------------------------------------------##
+# --------------------------------------------------------------------
+# --- Cachegrind's annotator. cg_annotate.in ---
+# --------------------------------------------------------------------
-# This file is part of Cachegrind, a Valgrind tool for cache
-# profiling programs.
+# This file is part of Cachegrind, a Valgrind tool for cache
+# profiling programs.
#
-# Copyright (C) 2002-2017 Nicholas Nethercote
-# njn@valgrind.org
+# Copyright (C) 2002-2023 Nicholas Nethercote
+# njn@valgrind.org
#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License as
-# published by the Free Software Foundation; either version 2 of the
-# License, or (at your option) any later version.
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, see <http://www.gnu.org/licenses/>.
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
-# The GNU General Public License is contained in the file COPYING.
-
-#----------------------------------------------------------------------------
-# The file format is simple, basically printing the cost centre for every
-# source line, grouped by files and functions. The details are in
-# Cachegrind's manual.
-
-#----------------------------------------------------------------------------
-# Performance improvements record, using cachegrind.out for cacheprof, doing no
-# source annotation (irrelevant ones removed):
-# user time
-# 1. turned off warnings in add_hash_a_to_b() 3.81 --> 3.48s
-# [now add_array_a_to_b()]
-# 6. make line_to_CC() return a ref instead of a hash 3.01 --> 2.77s
+# The GNU General Public License is contained in the file COPYING.
+
+"""
+This script reads Cachegrind output files and produces human-readable reports.
+"""
+
+# Use `make ann` to "build" this script every time it is changed. This runs the
+# formatters, type-checkers, and linters on `cg_annotate.in` and then generates
+# `cg_annotate`.
#
-#10. changed file format to avoid file/fn name repetition 2.40s
-# (not sure why higher; maybe due to new '.' entries?)
-#11. changed file format to drop unnecessary end-line "."s 2.36s
-# (shrunk file by about 37%)
-#12. switched from hash CCs to array CCs 1.61s
-#13. only adding b[i] to a[i] if b[i] defined (was doing it if
-# either a[i] or b[i] was defined, but if b[i] was undefined
-# it just added 0) 1.48s
-#14. Stopped converting "." entries to undef and then back 1.16s
-#15. Using foreach $i (x..y) instead of for ($i = 0...) in
-# add_array_a_to_b() 1.11s
+# The following Python tools are used. All can be installed with `pip3 install
+# $NAME`, except `cProfile` which is built into Python.
#
-# Auto-annotating primes:
-#16. Finding count lengths by int((length-1)/3), not by
-# commifying (halves the number of commify calls) 1.68s --> 1.47s
-
-use warnings;
-use strict;
-
-#----------------------------------------------------------------------------
-# Overview: the running example in the comments is for:
-# - events = A,B,C,D
-# - --show=C,A,D
-# - --sort=D,C
-#----------------------------------------------------------------------------
-
-#----------------------------------------------------------------------------
-# Global variables, main data structures
-#----------------------------------------------------------------------------
-# CCs are arrays, the counts corresponding to @events, with 'undef'
-# representing '.'. This makes things fast (faster than using hashes for CCs)
-# but we have to use @sort_order and @show_order below to handle the --sort and
-# --show options, which is a bit tricky.
-#----------------------------------------------------------------------------
-
-# Total counts for summary (an array reference).
-my $summary_CC;
-
-# Totals for each function, for overall summary.
-# hash(filename:fn_name => CC array)
-my %fn_totals;
-
-# Individual CCs, organised by filename and line_num for easy annotation.
-# hash(filename => hash(line_num => CC array))
-my %allCCs;
-
-# Files chosen for annotation on the command line.
-# key = basename (trimmed of any directory), value = full filename
-my %user_ann_files;
-
-# Generic description string.
-my $desc = "";
-
-# Command line of profiled program.
-my $cmd;
-
-# Events in input file, eg. (A,B,C,D)
-my @events;
-
-# Events to show, from command line, eg. (C,A,D)
-my @show_events;
-
-# Map from @show_events indices to @events indices, eg. (2,0,3). Gives the
-# order in which we must traverse @events in order to show the @show_events,
-# eg. (@events[$show_order[1]], @events[$show_order[2]]...) = @show_events.
-# (Might help to think of it like a hash (0 => 2, 1 => 0, 2 => 3).)
-my @show_order;
-
-# Print out the function totals sorted by these events, eg. (D,C).
-my @sort_events;
-
-# Map from @sort_events indices to @events indices, eg. (3,2). Same idea as
-# for @show_order.
-my @sort_order;
-
-# Thresholds, one for each sort event (or default to 1 if no sort events
-# specified). We print out functions and do auto-annotations until we've
-# handled this proportion of all the events thresholded.
-my @thresholds;
-
-my $default_threshold = 0.1;
-
-my $single_threshold = $default_threshold;
-
-# If on, show a percentage for each non-zero count.
-my $show_percs = 1;
-
-# If on, automatically annotates all files that are involved in getting over
-# all the threshold counts.
-my $auto_annotate = 1;
-
-# Number of lines to show around each annotated line.
-my $context = 8;
-
-# Directories in which to look for annotation files.
-my @include_dirs = ("");
-
-# Input file name
-my $input_file = undef;
-
-# Version number
-my $version = "@VERSION@";
-
-# Usage message.
-my $usage = <<END
-usage: cg_annotate [options] cachegrind-out-file [source-files...]
-
- options for the user, with defaults in [ ], are:
- -h --help show this message
- --version show version
- --show=A,B,C only show figures for events A,B,C [all]
- --sort=A,B,C sort columns by events A,B,C [event column order]
- --threshold=<0--20> a function is shown if it accounts for more than x% of
- the counts of the primary sort event [$default_threshold]
- --show-percs=yes|no show a percentage for each non-zero count [yes]
- --auto=yes|no annotate all source files containing functions
- that helped reach the event count threshold [yes]
- --context=N print N lines of context before and after
- annotated lines [8]
- -I<d> --include=<d> add <d> to list of directories to search for
- source files
-
- cg_annotate is Copyright (C) 2002-2017 Nicholas Nethercote.
- and licensed under the GNU General Public License, version 2.
- Bug reports, feedback, admiration, abuse, etc, to: njn\@valgrind.org.
-
-END
-;
-
-# Used in various places of output.
-my $fancy = '-' x 80 . "\n";
-
-sub safe_div($$)
-{
- my ($x, $y) = @_;
- return ($y == 0 ? 0 : $x / $y);
-}
-
-#-----------------------------------------------------------------------------
-# Argument and option handling
-#-----------------------------------------------------------------------------
-sub process_cmd_line()
-{
- for my $arg (@ARGV) {
-
- # Option handling
- if ($arg =~ /^-/) {
-
- # --version
- if ($arg =~ /^--version$/) {
- die("cg_annotate-$version\n");
-
- # --show=A,B,C
- } elsif ($arg =~ /^--show=(.*)$/) {
- @show_events = split(/,/, $1);
-
- # --sort=A,B,C
- # Nb: You can specify thresholds individually, eg.
- # --sort=A:99,B:95,C:90. These will override any --threshold
- # argument.
- } elsif ($arg =~ /^--sort=(.*)$/) {
- @sort_events = split(/,/, $1);
- my $th_specified = 0;
- foreach my $i (0 .. scalar @sort_events - 1) {
- if ($sort_events[$i] =~ /.*:([\d\.]+)%?$/) {
- my $th = $1;
- ($th >= 0 && $th <= 100) or die($usage);
- $sort_events[$i] =~ s/:.*//;
- $thresholds[$i] = $th;
- $th_specified = 1;
- } else {
- $thresholds[$i] = 0;
- }
- }
- if (not $th_specified) {
- @thresholds = ();
- }
-
- # --threshold=X (tolerates a trailing '%')
- } elsif ($arg =~ /^--threshold=([\d\.]+)%?$/) {
- $single_threshold = $1;
- ($1 >= 0 && $1 <= 20) or die($usage);
-
- # --show-percs=yes|no
- } elsif ($arg =~ /^--show-percs=yes$/) {
- $show_percs = 1;
- } elsif ($arg =~ /^--show-percs=no$/) {
- $show_percs = 0;
-
- # --auto=yes|no
- } elsif ($arg =~ /^--auto=yes$/) {
- $auto_annotate = 1;
- } elsif ($arg =~ /^--auto=no$/) {
- $auto_annotate = 0;
-
- # --context=N
- } elsif ($arg =~ /^--context=([\d\.]+)$/) {
- $context = $1;
- if ($context < 0) {
- die($usage);
- }
-
- # We don't handle "-I name" -- there can be no space.
- } elsif ($arg =~ /^-I$/) {
- die("Sorry, no space is allowed after a -I flag\n");
-
- # --include=A,B,C. Allow -I=name for backwards compatibility.
- } elsif ($arg =~ /^(-I=|-I|--include=)(.*)$/) {
- my $inc = $2;
- $inc =~ s|/$||; # trim trailing '/'
- push(@include_dirs, "$inc/");
-
- } else { # -h and --help fall under this case
- die($usage);
- }
-
- # Argument handling -- annotation file checking and selection.
- # Stick filenames into a hash for quick 'n easy lookup throughout.
- } else {
- if (not defined $input_file) {
- # First non-option argument is the output file.
- $input_file = $arg;
- } else {
- # Subsequent non-option arguments are source files.
- my $readable = 0;
- foreach my $include_dir (@include_dirs) {
- if (-r $include_dir . $arg) {
- $readable = 1;
- }
- }
- $readable or die("File $arg not found in any of: @include_dirs\n");
- $user_ann_files{$arg} = 1;
- }
- }
- }
-
- # Must have chosen an input file
- if (not defined $input_file) {
- die($usage);
- }
-}
-
-#-----------------------------------------------------------------------------
-# Reading of input file
-#-----------------------------------------------------------------------------
-sub max ($$)
-{
- my ($x, $y) = @_;
- return ($x > $y ? $x : $y);
-}
-
-# Add the two arrays; any '.' entries are ignored. Two tricky things:
-# 1. If $a2->[$i] is undefined, it defaults to 0 which is what we want; we turn
-# off warnings to allow this. This makes things about 10% faster than
-# checking for definedness ourselves.
-# 2. We don't add an undefined count or a ".", even though it's value is 0,
-# because we don't want to make an $a2->[$i] that is undef become 0
-# unnecessarily.
-sub add_array_a_to_b ($$)
-{
- my ($a1, $a2) = @_;
-
- my $n = max(scalar @$a1, scalar @$a2);
- $^W = 0;
- foreach my $i (0 .. $n-1) {
- $a2->[$i] += $a1->[$i] if (defined $a1->[$i] && "." ne $a1->[$i]);
- }
- $^W = 1;
-}
-
-# Add each event count to the CC array. '.' counts become undef, as do
-# missing entries (implicitly).
-sub line_to_CC ($)
-{
- my @CC = (split /\s+/, $_[0]);
- (@CC <= @events) or die("Line $.: too many event counts\n");
- return \@CC;
-}
-
-sub read_input_file()
-{
- open(INPUTFILE, "< $input_file")
- || die "Cannot open $input_file for reading\n";
-
- # Read "desc:" lines.
- my $line;
- while ($line = <INPUTFILE>) {
- if ($line =~ s/desc:\s+//) {
- $desc .= $line;
- } else {
- last;
- }
- }
-
- # Read "cmd:" line (Nb: will already be in $line from "desc:" loop above).
- ($line =~ s/^cmd:\s+//) or die("Line $.: missing command line\n");
- $cmd = $line;
- chomp($cmd); # Remove newline
-
- # Read "events:" line. We make a temporary hash in which the Nth event's
- # value is N, which is useful for handling --show/--sort options below.
- $line = <INPUTFILE>;
- (defined $line && $line =~ s/^events:\s+//)
- or die("Line $.: missing events line\n");
- @events = split(/\s+/, $line);
- my %events;
- my $n = 0;
- foreach my $event (@events) {
- $events{$event} = $n;
- $n++
- }
-
- # If no --show arg give, default to showing all events in the file.
- # If --show option is used, check all specified events appeared in the
- # "events:" line. Then initialise @show_order.
- if (@show_events) {
- foreach my $show_event (@show_events) {
- (defined $events{$show_event}) or
- die("--show event `$show_event' did not appear in input\n");
- }
- } else {
- @show_events = @events;
- }
- foreach my $show_event (@show_events) {
- push(@show_order, $events{$show_event});
- }
-
- # Do as for --show, but if no --sort arg given, default to sorting by
- # column order (ie. first column event is primary sort key, 2nd column is
- # 2ndary key, etc).
- if (@sort_events) {
- foreach my $sort_event (@sort_events) {
- (defined $events{$sort_event}) or
- die("--sort event `$sort_event' did not appear in input\n");
- }
- } else {
- @sort_events = @events;
- }
- foreach my $sort_event (@sort_events) {
- push(@sort_order, $events{$sort_event});
- }
-
- # If multiple threshold args weren't given via --sort, stick in the single
- # threshold (either from --threshold if used, or the default otherwise) for
- # the primary sort event, and 0% for the rest.
- if (not @thresholds) {
- foreach my $e (@sort_order) {
- push(@thresholds, 100);
- }
- $thresholds[0] = $single_threshold;
- }
-
- my $currFileName;
- my $currFileFuncName;
-
- my $currFuncCC;
- my $currFileCCs = {}; # hash(line_num => CC)
-
- # Read body of input file.
- while (<INPUTFILE>) {
- # Skip comments and empty lines.
- next if /^\s*$/ || /^\#/;
-
- if (s/^(-?\d+)\s+//) {
- my $lineNum = $1;
- my $CC = line_to_CC($_);
- defined($currFuncCC) || die;
- add_array_a_to_b($CC, $currFuncCC);
-
- # If currFileName is selected, add CC to currFileName list. We look for
- # full filename matches; or, if auto-annotating, we have to
- # remember everything -- we won't know until the end what's needed.
- defined($currFileCCs) || die;
- if ($auto_annotate || defined $user_ann_files{$currFileName}) {
- my $currLineCC = $currFileCCs->{$lineNum};
- if (not defined $currLineCC) {
- $currLineCC = [];
- $currFileCCs->{$lineNum} = $currLineCC;
- }
- add_array_a_to_b($CC, $currLineCC);
- }
-
- } elsif (s/^fn=(.*)$//) {
- $currFileFuncName = "$currFileName:$1";
- $currFuncCC = $fn_totals{$currFileFuncName};
- if (not defined $currFuncCC) {
- $currFuncCC = [];
- $fn_totals{$currFileFuncName} = $currFuncCC;
- }
-
- } elsif (s/^fl=(.*)$//) {
- $currFileName = $1;
- $currFileCCs = $allCCs{$currFileName};
- if (not defined $currFileCCs) {
- $currFileCCs = {};
- $allCCs{$currFileName} = $currFileCCs;
- }
- # Assume that a "fn=" line is followed by a "fl=" line.
- $currFileFuncName = undef;
-
- } elsif (s/^summary:\s+//) {
- $summary_CC = line_to_CC($_);
- (scalar(@$summary_CC) == @events)
- or die("Line $.: summary event and total event mismatch\n");
-
- } else {
- warn("WARNING: line $. malformed, ignoring\n");
- }
- }
-
- # Check if summary line was present
- if (not defined $summary_CC) {
- die("missing final summary line, aborting\n");
- }
-
- close(INPUTFILE);
-}
-
-#-----------------------------------------------------------------------------
-# Print options used
-#-----------------------------------------------------------------------------
-sub print_options ()
-{
- print($fancy);
- print($desc);
- print("Command: $cmd\n");
- print("Data file: $input_file\n");
- print("Events recorded: @events\n");
- print("Events shown: @show_events\n");
- print("Event sort order: @sort_events\n");
- print("Thresholds: @thresholds\n");
-
- my @include_dirs2 = @include_dirs; # copy @include_dirs
- shift(@include_dirs2); # remove "" entry, which is always the first
- unshift(@include_dirs2, "") if (0 == @include_dirs2);
- my $include_dir = shift(@include_dirs2);
- print("Include dirs: $include_dir\n");
- foreach my $include_dir (@include_dirs2) {
- print(" $include_dir\n");
- }
-
- my @user_ann_files = keys %user_ann_files;
- unshift(@user_ann_files, "") if (0 == @user_ann_files);
- my $user_ann_file = shift(@user_ann_files);
- print("User annotated: $user_ann_file\n");
- foreach $user_ann_file (@user_ann_files) {
- print(" $user_ann_file\n");
- }
-
- my $is_on = ($auto_annotate ? "on" : "off");
- print("Auto-annotation: $is_on\n");
- print("\n");
-}
-
-#-----------------------------------------------------------------------------
-# Print summary and sorted function totals
-#-----------------------------------------------------------------------------
-sub mycmp ($$)
-{
- my ($c, $d) = @_;
-
- # Iterate through sort events (eg. 3,2); return result if two are different
- foreach my $i (@sort_order) {
- my ($x, $y);
- $x = $c->[$i];
- $y = $d->[$i];
- $x = -1 unless defined $x;
- $y = -1 unless defined $y;
-
- my $cmp = abs($y) <=> abs($x); # reverse sort of absolute size
- if (0 != $cmp) {
- return $cmp;
- }
- }
- # Exhausted events, equal
- return 0;
-}
-
-sub commify ($) {
- my ($val) = @_;
- 1 while ($val =~ s/^(-?\d+)(\d{3})/$1,$2/);
- return $val;
-}
-
-# Because the counts can get very big, and we don't want to waste screen space
-# and make lines too long, we compute exactly how wide each column needs to be
-# by finding the widest entry for each one.
-sub compute_CC_col_widths (@)
-{
- my @CCs = @_;
- my $CC_col_widths = [];
-
- # Initialise with minimum widths (from event names)
- foreach my $event (@events) {
- push(@$CC_col_widths, length($event));
- }
-
- # Find maximum width count for each column. @CC_col_width positions
- # correspond to @CC positions.
- foreach my $CC (@CCs) {
- foreach my $i (0 .. scalar(@$CC)-1) {
- if (defined $CC->[$i]) {
- # Find length, accounting for commas that will be added, and
- # possibly a percentage.
- my $length = length $CC->[$i];
- my $width = $length + int(($length - 1) / 3);
- if ($show_percs) {
- $width += 9; # e.g. " (12.34%)" is 9 chars
- }
- $CC_col_widths->[$i] = max($CC_col_widths->[$i], $width);
- }
- }
- }
- return $CC_col_widths;
-}
-
-# Print the CC with each column's size dictated by $CC_col_widths.
-sub print_CC ($$)
-{
- my ($CC, $CC_col_widths) = @_;
-
- foreach my $i (@show_order) {
- my $count = (defined $CC->[$i] ? commify($CC->[$i]) : ".");
-
- my $perc = "";
- if ($show_percs) {
- if (defined $CC->[$i] && $CC->[$i] != 0) {
- # Try our best to keep the number fitting into 5 chars. This
- # requires dropping a digit after the decimal place if it's
- # sufficiently negative (e.g. "-10.0") or positive (e.g.
- # "100.0"). Thanks to diffs it's possible to have even more
- # extreme values, like "-100.0" or "1000.0"; those rare case
- # will end up with slightly wrong indenting, oh well.
- $perc = safe_div($CC->[$i] * 100, $summary_CC->[$i]);
- $perc = (-9.995 < $perc && $perc < 99.995)
- ? sprintf(" (%5.2f%%)", $perc)
- : sprintf(" (%5.1f%%)", $perc);
- } else {
- # Don't show percentages for "." and "0" entries.
- $perc = " ";
- }
- }
-
- # $reps will be negative for the extreme values mentioned above. The
- # use of max() avoids a possible warning about a negative repeat count.
- my $text = $count . $perc;
- my $len = length($text);
- my $reps = $CC_col_widths->[$i] - length($text);
- my $space = ' ' x max($reps, 0);
- print("$space$text ");
- }
-}
-
-sub print_events ($)
-{
- my ($CC_col_widths) = @_;
-
- foreach my $i (@show_order) {
- my $event = $events[$i];
- my $event_width = length($event);
- my $col_width = $CC_col_widths->[$i];
- my $space = ' ' x ($col_width - $event_width);
- print("$event$space ");
- }
-}
-
-# Prints summary and function totals (with separate column widths, so that
-# function names aren't pushed over unnecessarily by huge summary figures).
-# Also returns a hash containing all the files that are involved in getting the
-# events count above the thresholds (ie. all the interesting ones).
-sub print_summary_and_fn_totals ()
-{
- my @fn_fullnames = keys %fn_totals;
-
- # Work out the size of each column for printing (summary and functions
- # separately).
- my $summary_CC_col_widths = compute_CC_col_widths($summary_CC);
- my $fn_CC_col_widths = compute_CC_col_widths(values %fn_totals);
-
- # Header and counts for summary
- print($fancy);
- print_events($summary_CC_col_widths);
- print("\n");
- print($fancy);
- print_CC($summary_CC, $summary_CC_col_widths);
- print(" PROGRAM TOTALS\n");
- print("\n");
-
- # Header for functions
- print($fancy);
- print_events($fn_CC_col_widths);
- print(" file:function\n");
- print($fancy);
-
- # Sort function names into order dictated by --sort option.
- @fn_fullnames = sort {
- mycmp($fn_totals{$a}, $fn_totals{$b})
- } @fn_fullnames;
-
-
- # Assertion
- (scalar @sort_order == scalar @thresholds) or
- die("sort_order length != thresholds length:\n",
- " @sort_order\n @thresholds\n");
-
- my $threshold_files = {};
- # @curr_totals has the same shape as @sort_order and @thresholds
- my @curr_totals = ();
- foreach my $e (@thresholds) {
- push(@curr_totals, 0);
- }
-
- # Print functions, stopping when the threshold has been reached.
- foreach my $fn_name (@fn_fullnames) {
-
- my $fn_CC = $fn_totals{$fn_name};
-
- # Stop when we've reached all the thresholds
- my $any_thresholds_exceeded = 0;
- foreach my $i (0 .. scalar @thresholds - 1) {
- my $prop = safe_div(abs($fn_CC->[$sort_order[$i]] * 100),
- abs($summary_CC->[$sort_order[$i]]));
- $any_thresholds_exceeded ||= ($prop >= $thresholds[$i]);
- }
- last if not $any_thresholds_exceeded;
-
- # Print function results
- print_CC($fn_CC, $fn_CC_col_widths);
- print(" $fn_name\n");
-
- # Update the threshold counts
- my $filename = $fn_name;
- $filename =~ s/:.+$//; # remove function name
- $threshold_files->{$filename} = 1;
- foreach my $i (0 .. scalar @sort_order - 1) {
- $curr_totals[$i] += $fn_CC->[$sort_order[$i]]
- if (defined $fn_CC->[$sort_order[$i]]);
- }
- }
- print("\n");
-
- return $threshold_files;
-}
-
-#-----------------------------------------------------------------------------
-# Annotate selected files
-#-----------------------------------------------------------------------------
-
-# Issue a warning that the source file is more recent than the input file.
-sub warning_on_src_more_recent_than_inputfile ($)
-{
- my $src_file = $_[0];
-
- my $warning = <<END
-@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-@@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@
-@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-@ Source file '$src_file' is more recent than input file '$input_file'.
-@ Annotations may not be correct.
-@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+# - Formatters:
+# - `black`, for general formatting. This avoids the need for style checkers
+# like `flake8`. Note that `black` allows a max line length of 88, which is
+# a mild but common PEP-8 violation.
+# - `isort`, for import sorting.
+#
+# - Type-checkers:
+# - `mypy --strict`. This is the most commonly used Python type checker.
+# - `pyright`. This is another good type checker. The `pyright: strict`
+# comment above forces strict checking.
+# - Sometimes one type-checker will complain about something the other does
+# not. The goal is to keep both type checkers happy.
+#
+# - Linters:
+# - `ruff`. Sometimes useful, and very fast to run.
+# - `pylint`. Sometimes annoying, sometimes useful. The `pylintrc`
+# modifies/disables the more annoying lints.
+#
+# - Profilers:
+# - `cProfile` + `snakeviz`: Typically run with
+# `python3 -m cProfile -o cg.prof cg_annotate $INPUT && snakeviz cg.prof`.
+# - `scalene`. Typically run with `scalene ./cg_annotate $INPUT`.
+#
+# - Packager:
+# - `cp` is used for distribution. This is possible because this program is a
+# single file and only uses the Python Standard Library. This avoids the
+# needs for any of the million different Python package management tools.
+
+
+from __future__ import annotations
+
+import os
+import re
+import sys
+from argparse import ArgumentParser, BooleanOptionalAction, Namespace
+from collections import defaultdict
+from typing import Callable, DefaultDict, NewType, NoReturn, TextIO, TypeAlias
+
+
+class Args(Namespace):
+ """
+ A typed wrapper for parsed args.
+
+ None of these fields are modified after arg parsing finishes.
+ """
+
+ show: list[str]
+ sort: list[str]
+ threshold: float # a percentage
+ show_percs: bool
+ auto: bool
+ context: int
+ include: list[str]
+ cgout_filename: list[str]
+ src_filenames: list[str]
+
+ @staticmethod
+ def parse() -> Args:
+ def comma_separated_list(values: str) -> list[str]:
+ return values.split(",")
+
+ def threshold(n: str) -> float:
+ f = float(n)
+ if 0 <= f <= 20:
+ return f
+ raise ValueError
+
+ def add_bool_argument(p: ArgumentParser, name: str, help: str) -> None:
+ """
+ Add a bool argument that defaults to true.
+
+ Supports these forms: `--foo`, `--no-foo`, `--foo=yes`, `--foo=no`.
+ The latter two were the forms supported by the old Perl version of
+ `cg_annotate`, and are now deprecated.
+ """
+ flag = "--" + name
+ dest = name.replace("-", "_")
+
+ # Note: the default value is always printed with `BooleanOptionalAction`,
+ # due to an argparse bug: https://github.com/python/cpython/issues/83137.
+ p.add_argument(
+ flag,
+ default=True,
+ action=BooleanOptionalAction,
+ help=help,
+ )
+ p.add_argument(
+ f"{flag}=yes",
+ dest=dest,
+ action="store_true",
+ help=f"(deprecated) same as --{name}",
+ )
+ p.add_argument(
+ f"{flag}=no",
+ dest=dest,
+ action="store_false",
+ help=f"(deprecated) same as --no-{name}",
+ )
+
+ p = ArgumentParser(description="Process Cachegrind output files.")
+
+ p.add_argument("--version", action="version", version="%(prog)s-@VERSION@")
+
+ p.add_argument(
+ "--show",
+ type=comma_separated_list,
+ metavar="A,B,C",
+ help="only show figures for events A,B,C (default: all events)",
+ )
+
+ p.add_argument(
+ "--sort",
+ type=comma_separated_list,
+ metavar="A,B,C",
+ help="sort functions by events A,B,C (default: event column order)",
+ )
+
+ p.add_argument(
+ "--threshold",
+ type=threshold,
+ default=0.1,
+ metavar="N:[0,20]",
+ help="only show functions with more than N%% of primary sort event "
+ "counts (default: %(default)s)",
+ )
+ add_bool_argument(
+ p,
+ "show-percs",
+ "show a percentage for each non-zero count",
+ )
+ add_bool_argument(
+ p,
+ "auto",
+ "annotate all source files containing functions that reached the "
+ "event count threshold",
+ )
+ p.add_argument(
+ "--context",
+ type=int,
+ default=8,
+ metavar="N",
+ help="print N lines of context before and after annotated lines "
+ "(default: %(default)s)",
+ )
+ p.add_argument(
+ "-I",
+ "--include",
+ action="append",
+ default=[],
+ metavar="D",
+ help="add D to the list of searched source file directories",
+ )
+ p.add_argument(
+ "cgout_filename",
+ nargs=1,
+ metavar="cachegrind-out-file",
+ help="file produced by Cachegrind",
+ )
+ p.add_argument(
+ "src_filenames",
+ nargs="*",
+ metavar="source-files",
+ help="source files to annotate (usually not needed due to --auto)",
+ )
+
+ return p.parse_args(namespace=Args())
+
+
+# Args are stored in a global for easy access.
+args = Args.parse()
+
+
+# A single instance of this class is constructed, from `args` and the `events:`
+# line in the cgout file.
+class Events:
+ # The event names.
+ events: list[str]
+
+ # The order in which we must traverse events for --show. Can be shorter
+ # than `events`.
+ show_events: list[str]
+
+ # Like `show_events`, but indices into `events`, rather than names.
+ show_indices: list[int]
+
+ # The order in which we must traverse events for --sort. Can be shorter
+ # than `events`.
+ sort_events: list[str]
+
+ # Like `sort_events`, but indices into `events`, rather than names.
+ sort_indices: list[int]
+
+ # Threshold percentages, one per sort event. Dictates when we stop printing
+ # functions. Positions correspond to positions in `sort_events`. Only
+ # `thresholds[0]` is actually used for thresholding, for historical
+ # reasons.
+ threshold_percs: list[float]
+
+ def __init__(self, text: str) -> None:
+ self.events = text.split()
+ self.num_events = len(self.events)
+
+ # A temporary dict mapping events to indices, [0, n-1].
+ event_indices = {event: n for n, event in enumerate(self.events)}
+
+ # If --show is given, check it is valid. If --show is not given,
+ # default to all events in the standard order.
+ if args.show:
+ for event in args.show:
+ if event not in event_indices:
+ die(f"--show event `{event}` did not appear in `events:` line")
+ self.show_events = args.show
+ else:
+ self.show_events = self.events
+
+ self.show_indices = [event_indices[event] for event in self.show_events]
+
+ # Likewise for --sort.
+ if args.sort:
+ for event in args.sort:
+ if event not in event_indices:
+ die(f"--sort event `{event}` did not appear in `events:` line")
+ self.sort_events = args.sort
+ else:
+ self.sort_events = self.events
+
+ self.sort_indices = [event_indices[event] for event in self.sort_events]
+
+ # The primary sort event gets the --threshold value, and all other sort
+ # events get 100% (i.e. ignored).
+ self.threshold_percs = [100] * len(self.sort_events)
+ self.threshold_percs[0] = args.threshold
+
+ def mk_cc(self, text: str) -> Cc:
+ # This is slightly faster than a list comprehension.
+ counts = list(map(int, text.split()))
+
+ if len(counts) == self.num_events:
+ pass
+ elif len(counts) < self.num_events:
+ # Add zeroes at the end for any missing numbers.
+ counts.extend([0] * (self.num_events - len(counts)))
+ else:
+ raise ValueError
+
+ return Cc(counts)
+
+ def mk_empty_cc(self) -> Cc:
+ # This is much faster than a list comprehension.
+ return Cc([0] * self.num_events)
+
+
+class Cc:
+ """
+ This is a dumb container for counts.
+
+ It doesn't know anything about events, i.e. what each count means. It can
+ do basic operations like `__iadd__` and `__eq__`, and anything more must be
+ done elsewhere. `Events.mk_cc` and `Events.mk_empty_cc` are used for
+ construction.
+ """
+
+ # Always the same length as `Events.events`.
+ counts: list[int]
+
+ def __init__(self, counts: list[int]) -> None:
+ self.counts = counts
+
+ def __repr__(self) -> str:
+ return str(self.counts)
+
+ def __eq__(self, other: object) -> bool:
+ if not isinstance(other, Cc):
+ return NotImplemented
+ return self.counts == other.counts
+
+ def __iadd__(self, other: Cc) -> Cc:
+ for i, other_count in enumerate(other.counts):
+ self.counts[i] += other_count
+ return self
+
+
+# A paired filename and function name.
+Flfn = NewType("Flfn", tuple[str, str])
+
+# Per-function CCs.
+DictFlfnCc: TypeAlias = DefaultDict[Flfn, Cc]
+
+# Per-line CCs, organised by filename and line number.
+DictLineCc: TypeAlias = DefaultDict[int, Cc]
+DictFlDictLineCc: TypeAlias = DefaultDict[str, DictLineCc]
+
+
+def die(msg: str) -> NoReturn:
+ print("cg_annotate: error:", msg, file=sys.stderr)
+ sys.exit(1)
+
+
+def read_cgout_file() -> tuple[str, str, Events, DictFlfnCc, DictFlDictLineCc, Cc]:
+ # The file format is described in Cachegrind's manual.
+ try:
+ cgout_file = open(args.cgout_filename[0], "r", encoding="utf-8")
+ except OSError as err:
+ die(f"{err}")
+
+ with cgout_file:
+ cgout_line_num = 0
+
+ def parse_die(msg: str) -> NoReturn:
+ die(f"{cgout_file.name}:{cgout_line_num}: {msg}")
+
+ def readline() -> str:
+ nonlocal cgout_line_num
+ cgout_line_num += 1
+ return cgout_file.readline()
+
+ # Read "desc:" lines.
+ desc = ""
+ while line := readline():
+ if m := re.match(r"desc:\s+(.*)", line):
+ desc += m.group(1) + "\n"
+ else:
+ break
+
+ # Read "cmd:" line. (`line` is already set from the "desc:" loop.)
+ if m := re.match(r"cmd:\s+(.*)", line):
+ cmd = m.group(1)
+ else:
+ parse_die("missing a `command:` line")
+
+ # Read "events:" line.
+ line = readline()
+ if m := re.match(r"events:\s+(.*)", line):
+ events = Events(m.group(1))
+ else:
+ parse_die("missing an `events:` line")
+
+ def mk_empty_dict_line_cc() -> DictLineCc:
+ return defaultdict(events.mk_empty_cc)
+
+ curr_fl = ""
+ curr_flfn = Flfn(("", ""))
+
+ # Three different places where we accumulate CC data.
+ dict_flfn_cc: DictFlfnCc = defaultdict(events.mk_empty_cc)
+ dict_fl_dict_line_cc: DictFlDictLineCc = defaultdict(mk_empty_dict_line_cc)
+ summary_cc = None
+
+ # Compile the one hot regex.
+ count_pat = re.compile(r"(\d+)\s+(.*)")
+
+ # Line matching is done in order of pattern frequency, for speed.
+ while True:
+ line = readline()
+
+ if m := count_pat.match(line):
+ line_num = int(m.group(1))
+ try:
+ cc = events.mk_cc(m.group(2))
+ except ValueError:
+ parse_die("malformed or too many event counts")
+
+ # Record this CC at the function level.
+ flfn_cc = dict_flfn_cc[curr_flfn]
+ flfn_cc += cc
+
+ # Record this CC at the file/line level.
+ line_cc = dict_fl_dict_line_cc[curr_fl][line_num]
+ line_cc += cc
+
+ elif line.startswith("fn="):
+ curr_flfn = Flfn((curr_fl, line[3:-1]))
+
+ elif line.startswith("fl="):
+ curr_fl = line[3:-1]
+ # A `fn=` line should follow, overwriting the "???".
+ curr_flfn = Flfn((curr_fl, "???"))
+
+ elif m := re.match(r"summary:\s+(.*)", line):
+ try:
+ summary_cc = events.mk_cc(m.group(1))
+ except ValueError:
+ parse_die("too many event counts")
+
+ elif line == "":
+ break # EOF
+
+ elif line == "\n" or line.startswith("#"):
+ # Skip empty lines and comment lines.
+ pass
+
+ else:
+ parse_die(f"malformed line: {line[:-1]}")
+
+ # Check if summary line was present.
+ if not summary_cc:
+ parse_die("missing `summary:` line, aborting")
+
+ # Check summary is correct.
+ total_cc = events.mk_empty_cc()
+ for flfn_cc in dict_flfn_cc.values():
+ total_cc += flfn_cc
+ if summary_cc != total_cc:
+ msg = (
+ "`summary:` line doesn't match compute total\n"
+ f"- summary: {summary_cc}\n"
+ f"- total: {total_cc}"
+ )
+ parse_die(msg)
+
+ return (desc, cmd, events, dict_flfn_cc, dict_fl_dict_line_cc, summary_cc)
+
+
+def safe_perc(m: int, n: int) -> float:
+ return 0 if n == 0 else m * 100 / n
+
+
+class CcPrinter:
+ # Note: every `CcPrinter` gets the same `Events` object.
+ events: Events
+
+ # Note: every `CcPrinter` gets the same summary CC.
+ summary_cc: Cc
+
+ # The width of each event column. For simplicity, its length matches
+ # `events.events`, even though not all events are necessarily shown.
+ widths: list[int]
+
+ def __init__(self, events: Events, ccs: list[Cc], summary_cc: Cc) -> None:
+ self.events = events
+ self.summary_cc = summary_cc
+
+ # Find min and max value for each event. One of them will be the
+ # widest value.
+ min_cc = events.mk_empty_cc()
+ max_cc = events.mk_empty_cc()
+ for cc in ccs:
+ for i, _ in enumerate(events.events):
+ count = cc.counts[i]
+ if count > max_cc.counts[i]:
+ max_cc.counts[i] = count
+ elif count < min_cc.counts[i]:
+ min_cc.counts[i] = count
+
+ # Find maximum width for each column.
+ self.widths = [0] * len(events.events)
+ for i, event in enumerate(events.events):
+ # Get widest of the min and max, accounting for commas that will be
+ # added, and a possible percentage.
+ width = max(len(str(min_cc.counts[i])), len(str(max_cc.counts[i])))
+ width += (width - 1) // 3
+ if args.show_percs:
+ width += 9 # e.g. " (12.34%)" is 9 chars.
+
+ # Account for the event name, too.
+ self.widths[i] = max(width, len(event))
+
+ def print_events(self, suffix: str) -> None:
+ for i in self.events.show_indices:
+ # +1 is for the single space between columns.
+ print(f"{self.events.events[i]:{self.widths[i] + 1}}", end="")
+
+ print(suffix)
+
+ def print_count(self, i: int, text: str) -> None:
+ print(f"{text:>{self.widths[i]}}", end=" ")
+
+ def print_cc(self, cc: Cc, suffix: str) -> None:
+ for i in self.events.show_indices:
+ nstr = f"{cc.counts[i]:,d}" # commify
+ if args.show_percs:
+ if cc.counts[i] != 0:
+ # Try our best to keep the number fitting into 5 chars. This
+ # requires dropping a digit after the decimal place if it's
+ # sufficiently negative (e.g. "-10.0") or positive (e.g.
+ # "100.0"). Thanks to diffs it's possible to have even more
+ # extreme values, like "-100.0" or "1000.0"; those rare case
+ # will end up with slightly wrong indenting, oh well.
+ p = safe_perc(cc.counts[i], self.summary_cc.counts[i])
+ normal = -9.995 < p < 99.995
+ perc = f" ({p:5.{2 if normal else 1}f}%)"
+ else:
+ # Don't show percentages for "0" entries, it's just clutter.
+ perc = " "
+ else:
+ perc = ""
+
+ self.print_count(i, nstr + perc)
+
+ print("", suffix)
+
+ def print_missing_cc(self, suffix: str) -> None:
+ if args.show_percs:
+ # Don't show percentages for "." entries, it's just clutter.
+ text = ". "
+ else:
+ text = "."
+
+ for i in self.events.show_indices:
+ self.print_count(i, text)
+
+ print("", suffix)
+
+
+# Used in various places in the output.
+FANCY: str = "-" * 80
+
+
+def print_header(desc: str, cmd: str, events: Events) -> None:
+ print(FANCY)
+ print(desc, end="")
+ print("Command: ", cmd)
+ print("Data file: ", args.cgout_filename[0])
+ print("Events recorded: ", *events.events)
+ print("Events shown: ", *events.show_events)
+ print("Event sort order:", *events.sort_events)
+ print("Thresholds: ", *events.threshold_percs)
+
+ if len(args.include) == 0:
+ print("Include dirs: ")
+ else:
+ print(f"Include dirs: {args.include[0]}")
+ for include_dirname in args.include[1:]:
+ print(f" {include_dirname}")
+
+ if len(args.src_filenames) == 0:
+ print("User annotated: ")
+ else:
+ print(f"User annotated: {args.src_filenames[0]}")
+ for src_filename in args.src_filenames[1:]:
+ print(f" {src_filename}")
+
+ print("Auto-annotation: ", "on" if args.auto else "off")
+ print()
+
+
+def print_summary_cc(events: Events, summary_cc: Cc) -> None:
+ printer = CcPrinter(events, [summary_cc], summary_cc)
+
+ print(FANCY)
+ printer.print_events("")
+ print(FANCY)
+ printer.print_cc(summary_cc, "PROGRAM TOTALS")
+ print()
+
+
+def print_flfn_ccs(
+ events: Events, dict_flfn_cc: DictFlfnCc, summary_cc: Cc
+) -> set[str]:
+ # Only the first threshold percentage is actually used.
+ threshold_index = events.sort_indices[0]
+
+ # Convert the threshold from a percentage to an event count.
+ threshold = (
+ events.threshold_percs[0] * abs(summary_cc.counts[threshold_index]) / 100
+ )
+
+ def meets_threshold(flfn_and_cc: tuple[Flfn, Cc]) -> bool:
+ cc = flfn_and_cc[1]
+ return abs(cc.counts[threshold_index]) >= threshold
+
+ # Create a list with the counts in sort order, so that left-to-right list
+ # comparison does the right thing. Plus the `Flfn` at the end for
+ # deterministic output when all the event counts are identical in two CCs.
+ def key(flfn_and_cc: tuple[Flfn, Cc]) -> tuple[list[int], Flfn]:
+ cc = flfn_and_cc[1]
+ return ([abs(cc.counts[i]) for i in events.sort_indices], flfn_and_cc[0])
+
+ # Filter out functions for which the primary sort event count is below the
+ # threshold, and sort the remainder.
+ filtered_flfns_and_ccs = filter(meets_threshold, dict_flfn_cc.items())
+ sorted_flfns_and_ccs = sorted(filtered_flfns_and_ccs, key=key, reverse=True)
+ sorted_ccs = list(map(lambda flfn_and_cc: flfn_and_cc[1], sorted_flfns_and_ccs))
+
+ printer = CcPrinter(events, sorted_ccs, summary_cc)
+
+ print(FANCY)
+ printer.print_events(" file:function")
+ print(FANCY)
-END
-;
- print($warning);
-}
-
-# If there is information about lines not in the file, issue a warning
-# explaining possible causes.
-sub warning_on_nonexistent_lines ($$$)
-{
- my ($src_more_recent_than_inputfile, $src_file, $excess_line_nums) = @_;
- my $cause_and_solution;
-
- if ($src_more_recent_than_inputfile) {
- $cause_and_solution = <<END
-@@ cause: '$src_file' has changed since information was gathered.
-@@ If so, a warning will have already been issued about this.
-@@ solution: Recompile program and rerun under "valgrind --cachesim=yes" to
-@@ gather new information.
-END
- # We suppress warnings about .h files
- } elsif ($src_file =~ /\.h$/) {
- $cause_and_solution = <<END
-@@ cause: bug in the Valgrind's debug info reader that screws up with .h
-@@ files sometimes
-@@ solution: none, sorry
-END
- } else {
- $cause_and_solution = <<END
-@@ cause: not sure, sorry
-END
- }
-
- my $warning = <<END
+ # Print per-function counts.
+ for flfn, flfn_cc in sorted_flfns_and_ccs:
+ printer.print_cc(flfn_cc, f"{flfn[0]}:{flfn[1]}")
+
+ print()
+
+ # Files containing a function that met the threshold.
+ return set(flfn_and_cc[0][0] for flfn_and_cc in sorted_flfns_and_ccs)
+
+
+def mk_warning(msg: str) -> str:
+ return f"""\
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-@@
-@@ Information recorded about lines past the end of '$src_file'.
-@@
-@@ Probable cause and solution:
-$cause_and_solution@@
+{msg}\
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-END
-;
- print($warning);
-}
-
-sub annotate_ann_files($)
-{
- my ($threshold_files) = @_;
-
- my %all_ann_files;
- my @unfound_auto_annotate_files;
- my $printed_totals_CC = [];
-
- # If auto-annotating, add interesting files (but not "???")
- if ($auto_annotate) {
- delete $threshold_files->{"???"};
- %all_ann_files = (%user_ann_files, %$threshold_files)
- } else {
- %all_ann_files = %user_ann_files;
- }
-
- # Track if we did any annotations.
- my $did_annotations = 0;
-
- LOOP:
- foreach my $src_file (keys %all_ann_files) {
-
- my $opened_file = "";
- my $full_file_name = "";
- # Nb: include_dirs already includes "", so it works in the case
- # where the filename has the full path.
- foreach my $include_dir (@include_dirs) {
- my $try_name = $include_dir . $src_file;
- if (open(INPUTFILE, "< $try_name")) {
- $opened_file = $try_name;
- $full_file_name = ($include_dir eq ""
- ? $src_file
- : "$include_dir + $src_file");
- last;
- }
- }
-
- if (not $opened_file) {
- # Failed to open the file. If chosen on the command line, die.
- # If arose from auto-annotation, print a little message.
- if (defined $user_ann_files{$src_file}) {
- die("File $src_file not opened in any of: @include_dirs\n");
-
- } else {
- push(@unfound_auto_annotate_files, $src_file);
- }
-
- } else {
- # File header (distinguish between user- and auto-selected files).
- print("$fancy");
- my $ann_type =
- (defined $user_ann_files{$src_file} ? "User" : "Auto");
- print("-- $ann_type-annotated source: $full_file_name\n");
- print("$fancy");
-
- # Get file's CCs
- my $src_file_CCs = $allCCs{$src_file};
- if (!defined $src_file_CCs) {
- print(" No information has been collected for $src_file\n\n");
- next LOOP;
- }
-
- $did_annotations = 1;
-
- # Numeric, not lexicographic sort!
- my @line_nums = sort {$a <=> $b} keys %$src_file_CCs;
-
- # If $src_file more recent than cachegrind.out, issue warning
- my $src_more_recent_than_inputfile = 0;
- if ((stat $opened_file)[9] > (stat $input_file)[9]) {
- $src_more_recent_than_inputfile = 1;
- warning_on_src_more_recent_than_inputfile($src_file);
- }
-
- # Work out the size of each column for printing
- my $CC_col_widths = compute_CC_col_widths(values %$src_file_CCs);
-
- # Events header
- print_events($CC_col_widths);
- print("\n\n");
-
- # Shift out 0 if it's in the line numbers (from unknown entries,
- # likely due to bugs in Valgrind's stabs debug info reader)
- shift(@line_nums) if (0 == $line_nums[0]);
-
- # Finds interesting line ranges -- all lines with a CC, and all
- # lines within $context lines of a line with a CC.
- my $n = @line_nums;
- my @pairs;
- for (my $i = 0; $i < $n; $i++) {
- push(@pairs, $line_nums[$i] - $context); # lower marker
- while ($i < $n-1 &&
- $line_nums[$i] + 2*$context >= $line_nums[$i+1]) {
- $i++;
- }
- push(@pairs, $line_nums[$i] + $context); # upper marker
- }
-
- # Annotate chosen lines, tracking total counts of lines printed
- if (@pairs) {
- $pairs[0] = 1 if ($pairs[0] < 1);
- while (@pairs) {
- my $low = shift @pairs;
- my $high = shift @pairs;
- while ($. < $low-1) {
- my $tmp = <INPUTFILE>;
- last unless (defined $tmp); # hack to detect EOF
- }
- my $src_line;
- # Print line number, unless start of file
- print("-- line $low " . '-' x 40 . "\n") if ($low != 1);
- while (($. < $high) && ($src_line = <INPUTFILE>)) {
- if (defined $line_nums[0] && $. == $line_nums[0]) {
- print_CC($src_file_CCs->{$.}, $CC_col_widths);
- add_array_a_to_b($src_file_CCs->{$.},
- $printed_totals_CC);
- shift(@line_nums);
-
- } else {
- print_CC([], $CC_col_widths);
- }
-
- print(" $src_line");
- }
- # Print line number, unless EOF
- if ($src_line) {
- print("-- line $high " . '-' x 40 . "\n");
- } else {
- last;
- }
- }
- }
-
- # If there was info on lines past the end of the file...
- if (@line_nums) {
- foreach my $line_num (@line_nums) {
- print_CC($src_file_CCs->{$line_num}, $CC_col_widths);
- print(" <bogus line $line_num>\n");
- }
- print("\n");
- warning_on_nonexistent_lines($src_more_recent_than_inputfile,
- $src_file, \@line_nums);
- }
- print("\n");
-
- # Print summary of counts attributed to file but not to any
- # particular line (due to incomplete debug info).
- if ($src_file_CCs->{0}) {
- print_CC($src_file_CCs->{0}, $CC_col_widths);
- print(" <counts for unidentified lines in $src_file>\n\n");
- }
-
- close(INPUTFILE);
- }
- }
-
- # Print list of unfound auto-annotate selected files.
- if (@unfound_auto_annotate_files) {
- print("$fancy");
- print("The following files chosen for auto-annotation could not be found:\n");
- print($fancy);
- foreach my $f (sort @unfound_auto_annotate_files) {
- print(" $f\n");
- }
- print("\n");
- }
+"""
+
+def warn_src_file_is_newer(src_filename: str, cgout_filename: str) -> None:
+ msg = f"""\
+@ Source file '{src_filename}' is more recent than input file '{cgout_filename}'.
+@ Annotations may not be correct.
+"""
+ print(mk_warning(msg))
+
+
+def warn_bogus_lines(src_filename: str) -> None:
+ msg = f"""\
+@@ Information recorded about lines past the end of '{src_filename}'.
+"""
+ print(mk_warning(msg), end="")
+
+
+def print_annotated_src_file(
+ events: Events,
+ dict_line_cc: DictLineCc,
+ ann_type: str,
+ src_file: TextIO,
+ annotated_cc: Cc,
+ summary_cc: Cc,
+) -> None:
+ print(FANCY)
+ print("-- ", ann_type, "-annotated source: ", src_file.name, sep="")
+ print(FANCY)
+
+ # Get file's CCs.
+ if not dict_line_cc:
+ print(f" No information has been collected for {src_file.name}")
+ print()
+ return
+
+ # If the source file is more recent than the cgout file, issue warning.
+ if os.stat(src_file.name).st_mtime_ns > os.stat(args.cgout_filename[0]).st_mtime_ns:
+ warn_src_file_is_newer(src_file.name, args.cgout_filename[0])
+
+ printer = CcPrinter(events, list(dict_line_cc.values()), summary_cc)
+
+ printer.print_events("")
+ print()
+
+ # Remove the CC for line 0 if it's present. It gets special treatment
+ # later.
+ line0_cc = dict_line_cc.pop(0, None)
+
+ # Find interesting line ranges: all lines with a CC, and all lines within
+ # `args.context` lines of a line with a CC.
+ line_nums = list(sorted(dict_line_cc.keys()))
+ pairs: list[tuple[int, int]] = []
+ n = len(line_nums)
+ i = 0
+ context = args.context
+ while i < n:
+ lo = max(line_nums[i] - context, 1) # `max` to prevent negatives
+ while i < n - 1 and line_nums[i] + 2 * context >= line_nums[i + 1]:
+ i += 1
+ hi = line_nums[i] + context
+ pairs.append((lo, hi))
+ i += 1
+
+ # Annotate chosen lines, tracking total annotated counts.
+ line_num = 0
+ if pairs:
+ while pairs:
+ (lo, hi) = pairs.pop(0)
+ while line_num < lo - 1:
+ tmp = src_file.readline()
+ line_num += 1
+ if not tmp:
+ break # EOF
+
+ src_line = ""
+ # Print line number, unless start of file.
+ if lo != 1:
+ print("-- line", lo, "-" * 40)
+
+ while line_num < hi:
+ src_line = src_file.readline()
+ line_num += 1
+ if not src_line:
+ break
+ if line_nums and line_num == line_nums[0]:
+ printer.print_cc(dict_line_cc[line_num], src_line[:-1])
+ annotated_cc += dict_line_cc[line_num]
+ del line_nums[0]
+ else:
+ printer.print_missing_cc(src_line[:-1])
+
+ # Print line number, unless EOF.
+ if src_line:
+ print("-- line", hi, "-" * 40)
+ else:
+ break
+
+ # If there was info on lines past the end of the file, warn.
+ if line_nums:
+ for line_num in line_nums:
+ printer.print_cc(dict_line_cc[line_num], f"<bogus line {line_num}>")
+
+ print()
+ warn_bogus_lines(src_file.name)
+
+ print()
+
+ # Print summary of counts attributed to the source file but not to any
+ # particular line (due to incomplete debug info).
+ if line0_cc:
+ suffix = f"<counts for unidentified lines in {src_file.name}>"
+ printer.print_cc(line0_cc, suffix)
+ print()
+
+
+def print_annotated_src_files(
+ events: Events,
+ threshold_src_filenames: set[str],
+ dict_fl_dict_line_cc: DictFlDictLineCc,
+ summary_cc: Cc,
+) -> tuple[list[str], Cc]:
+ unfound_auto_filenames: list[str] = []
+ annotated_cc = events.mk_empty_cc()
+
+ def pair_with(label: str) -> Callable[[str], tuple[str, str]]:
+ return lambda s: (s, label)
+
+ # If auto-annotating, add interesting files (excluding "???").
+ all_src_filenames = set(map(pair_with("User"), args.src_filenames))
+ if args.auto:
+ threshold_src_filenames.discard("???")
+ all_src_filenames.update(map(pair_with("Auto"), threshold_src_filenames))
+
+ # Prepend "" to the include dirnames so things work in the case where the
+ # filename has the full path.
+ include_dirnames = args.include.copy()
+ include_dirnames.insert(0, "")
+
+ for src_filename, ann_type in sorted(all_src_filenames):
+ annotated = False
+ for include_dirname in include_dirnames:
+ if include_dirname == "":
+ full_src_filename = src_filename
+ else:
+ full_src_filename = os.path.join(include_dirname, src_filename)
+
+ try:
+ with open(full_src_filename, "r", encoding="utf-8") as src_file:
+ print_annotated_src_file(
+ events,
+ dict_fl_dict_line_cc[src_filename],
+ ann_type,
+ src_file,
+ annotated_cc,
+ summary_cc,
+ )
+ annotated = True
+ break
+ except OSError:
+ pass
+
+ if not annotated:
+ unfound_auto_filenames.append(src_filename)
+
+ return (unfound_auto_filenames, annotated_cc)
+
+
+def print_unfound_auto_filenames(unfound_auto_filenames: list[str]) -> None:
+ if unfound_auto_filenames:
+ print(FANCY)
+ print("The following files chosen for auto-annotation could not be found:")
+ print(FANCY)
+ for filename in sorted(unfound_auto_filenames):
+ print(" ", filename)
+ print()
+
+
+def print_annotated_cc(events: Events, annotated_cc: Cc, summary_cc: Cc) -> None:
# If we did any annotating, show how many events were covered by annotated
# lines above.
- if ($did_annotations) {
- my $CC_col_widths = compute_CC_col_widths($printed_totals_CC);
- print($fancy);
- print_events($CC_col_widths);
- print("\n");
- print($fancy);
- print_CC($printed_totals_CC, $CC_col_widths);
- print(" events annotated\n\n");
- }
-}
-
-#----------------------------------------------------------------------------
-# "main()"
-#----------------------------------------------------------------------------
-process_cmd_line();
-read_input_file();
-print_options();
-my $threshold_files = print_summary_and_fn_totals();
-annotate_ann_files($threshold_files);
-
-##--------------------------------------------------------------------##
-##--- end cg_annotate.in ---##
-##--------------------------------------------------------------------##
+ if args.auto or args.src_filenames:
+ printer = CcPrinter(events, [annotated_cc], summary_cc)
+ print(FANCY)
+ printer.print_events("")
+ print(FANCY)
+ printer.print_cc(annotated_cc, "events annotated")
+ print()
+
+
+def main() -> None:
+ (
+ desc,
+ cmd,
+ events,
+ dict_flfn_cc,
+ dict_fl_dict_line_cc,
+ summary_cc,
+ ) = read_cgout_file()
+
+ # Each of these calls prints a section of the output.
+
+ print_header(desc, cmd, events)
+
+ print_summary_cc(events, summary_cc)
+
+ threshold_src_filenames = print_flfn_ccs(events, dict_flfn_cc, summary_cc)
+
+ (unfound_auto_filenames, annotated_cc) = print_annotated_src_files(
+ events, threshold_src_filenames, dict_fl_dict_line_cc, summary_cc
+ )
+
+ print_unfound_auto_filenames(unfound_auto_filenames)
+
+ print_annotated_cc(events, annotated_cc, summary_cc)
+if __name__ == "__main__":
+ main()