update to handle more than 3 layers, and correctly edit the files

author Alan T. DeKok <aland@freeradius.org>

Thu, 21 Oct 2021 00:42:20 +0000 (20:42 -0400)

committer Alan T. DeKok <aland@freeradius.org>

Thu, 21 Oct 2021 01:07:43 +0000 (21:07 -0400)
author Alan T. DeKok <aland@freeradius.org>
Thu, 21 Oct 2021 00:42:20 +0000 (20:42 -0400)
committer Alan T. DeKok <aland@freeradius.org>
Thu, 21 Oct 2021 01:07:43 +0000 (21:07 -0400)
diff --git a/scripts/build/min-includes.pl b/scripts/build/min-includes.pl

index 3eeaae8ed8e61a68e4dd70f44d45b1b922113284..83ba09c47cb6c0ec75e7532393837f1232defe7a 100755 (executable)
--- a/scripts/build/min-includes.pl
+++ b/scripts/build/min-includes.pl
@@ -12,7 +12,7 @@
  #  Run as: ./min-includes.pl $(find . -name "*.c" -print)
  #              prints out duplicate includes from files.
  #
-#         ./min-includes.pl +n $(find . -name "*.c" -print)
+#         ./min-includes.pl -e $(find . -name "*.c" -print)
  #              removes the duplicate includes from each file.
  #              Remember to check that it still builds!
  #
@@ -21,14 +21,11 @@
  #
  ######################################################################
  #
-#  FIXME: We don't handle include files taken from the current
-#  directory...
-#
  #  FIXME: we should take -I <path> from the command line.
  #
  ######################################################################
  #
-#  Copyright (C) 2006 Alan DeKok <aland@freeradius.org>
+#  Copyright (C) 2021 Alan DeKok <aland@freeradius.org>
  #
  #  $Id$
  #
@@ -38,20 +35,34 @@ use strict;
  use warnings;
  use Data::Dumper;
  
+#
+#  @todo - use Getopt::Long, and allow -I for includes,
+#  which lets us search include directories.
+#
+our ($opt_d, $opt_e, $opt_i, $opt_x);
+use Getopt::Std;
+getopts('deix');
+my $debug = $opt_x;
+my $edit = $opt_e;
+my $dry_run = $opt_d;
+my $edit_includes = $opt_i;
+
  my %processed;
  
  my $any_dups = 0;
-my $debug = 0;
  
-my %checked;
  my %refs;
+my %incs;
+my %depth;
+my %lines;
+my %names;
  my %requested;
-my %include;
-my %maps;
-my %forward;
  my %reverse;
-my %duplicate;
  my %delete_line;
+my %worked;
+my %transitive;
+
+my @work;
  
  #
  #  Find the #include's for one file.
@@ -64,15 +75,21 @@ my %delete_line;
  #
  sub process {
      my $file = shift;
+    my $dir = $file;
  
-    return if ($processed{$file});
+    $dir =~ s,/[^/]+$,/,;
  
-    $processed{$file}++;
+    $file =~ s,//,/,g;                 # canonicalize it
+
+    $depth{$file} = 1;
  
      open(my $FILE, "<", $file) or die "Failed to open $file: $!\n";
  
      my $line = 0;
      while (<$FILE>) {
+       my $inc;
+       my $ref;
+
          $line++;
  
         #
@@ -80,205 +97,216 @@ sub process {
         #
          next if (!/^\s*\#\s*include\s+/);
  
+       #
+       #  Include a header from this directory.
+       #
          if (/^\s*\#\s*include\s+"(.+?)"/) {
-            $refs{$file}{$1} = $line;
+           $inc = $1;
+           $ref = "$dir$1";
  
-            # FIXME: local header files?
-            # src/foo/bar.c: #include "foo.h"
-            #   src/foo/foo.h do stuff..
-
-            $include{$1}++;
          } elsif (/^\s*\#\s*include\s+<(.+?)>/) {
+           $inc = $1;
+           $ref = $1;
+
+           if ($ref =~ /freeradius/) {
+               $ref = "src/$ref";
+           }
+
+        } else {
+           die "Unhandled include at $file line $line\n";
+       }
+
+       $ref =~ s,//,/,g;                       # canonicalize it
+
+       next if defined $lines{$file}{$inc};    # ignore if we include the same file twice
+
+       $lines{$file}{$inc} = $line;            # FILE includes INC at line number
+       $names{$file}{$inc} = $ref;             # FILE includes REF which maps to INCLUDE
+       $refs{$file}{$ref} = $line;             # we don't muck with this one
+       $incs{$file}{$ref} = $inc;              # we don't muck with this one
+
+       $reverse{$ref}{$file} = $line;          # include REF is included by FILE at LINE
+       $transitive{$file}{$ref} = 1;           # FILE points to REF directly
+
+       $depth{$ref} = 1 if ! defined $depth{$ref};
  
-            $refs{$file}{$1} = $line;
-            $include{$1}++;
-        }
+       next if ($ref !~ /^src/);               # only process REF if it's in our source tree
+
+       next if defined $worked{$ref};          # we've already processed the include file REF
+
+       push @work, $ref;                       # we need to process this include file REF
+
+       $worked{$ref}++;
+
+#      print "$file includes $ref via $inc at $line\n";
      }
  
      close $FILE;
  }
  
  #
-#  Where include files are located.
-#
-#  FIXME:
-#
-my @directories = ("src/lib", "src");
-my $do_it = 0;
-
-#
-#  Horrid.
+#  Be utterly lazy about argument parsing.
  #
  if ($ARGV[0] eq "+n") {
      shift;
-    $do_it = 1;
+    $edit = 1;
  }
  
  #
-#  Bootstrap the input C files.
+#  Read and process the input C files.
  #
  foreach my $file (@ARGV) {
      $requested{$file}++;
      process($file);
  }
  
+#
+#  Processing the C files resulted in a set of include files to
+#  process.  We need to read those in turn, in order to create a full
+#  mapping of which file includes what.
+#
+foreach my $file (@work) {
+    next if ! -e $file;
+
+    process($file);
+}
  
  #
-#  Process the include files referenced from the C files, to find out
-#  what they include.
+#  Get the correct depth for each file.
  #
-#  Note that we create a temporary array, rather than walking over
-#  %include, because the process() function adds entries to the
-#  %include hash.
+foreach my $file (keys %transitive) {
+    my $mydepth = 1;
+
+    foreach my $ref (keys %{$transitive{$file}}) {
+       $mydepth = $depth{$ref} + 1 if ($depth{$ref} >= $mydepth);
+    }
+
+    $depth{$file} = $mydepth;
+}
+
  #
-my @work = sort keys %include;
-foreach my $inc (@work) {
+#  We now process transitive references.  i.e. file FOO includes BAR,
+#  but BAR also includes BAZ, BAD, etc.  We hoist all of that
+#  information.
+#
+#  Use the "depth" array, and start from 1 (file includes nothing
+#  else) to N (file is included -> included -> include N times.  This
+#  lets us hoist things gradually
+#
+#  This loop is sort of O(N^3), but it does a lot of trimming as we
+#  process the various files.
+#
+foreach my $file (sort {$depth{$a} <=> $depth{$b}} keys %depth) {
+    next if ($depth{$file} == 1);
  
-    $checked{$inc}++;
+    #
+    #  Loop over includes for $file.  If the include is not as deep as
+    #  we are, then it MUST already have been processed, so we skip it.
+    #
+    foreach my $inc (keys %{$transitive{$file}}) {
+       next if ($depth{$inc} < $depth{$file});
  
-    foreach my $dir (@directories) {
-        my $file = $dir . "/" . $inc;
+       #
+       #  $file includes $inc, so loop over the things which are
+       #  included by $inc.
+       #
+       #  If $file already references the second-include file, then
+       #  don't do anything else.
+       #
+       #  Otherwise mark up $file as including the second-include file.
+       #
+       foreach my $inc2 (keys %{$transitive{$inc}}) {
+           next if (defined $transitive{$file}{$inc2});
+
+           $transitive{$file}{$inc2} = $transitive{$inc}{$inc2} + 1;
+       }
+  }
+}
  
-        # normalize path
-        $file =~ s:/.*?/\.\.::;
-        $file =~ s:/.*?/\.\.::;
-       $file =~ s,//,/,g;
+# Loop over each file we're checking
+foreach my $file (sort keys %refs) {
+    next if ! defined $requested{$file} && ! $edit_includes;
  
-        next if (! -e $file);
+    print $file, "\n" if ! $edit;
  
-       #  Skip files we've already processed.
-       next if $reverse{$file};
+    #  walk of the list of include's in this file
+    foreach my $ref (sort {$refs{$file}{$a} <=> $refs{$file}{$b}} keys %{$refs{$file}}) {
+       # @todo - sort includes in order of increasing depth, so that
+       # we have a canonical order!
  
-        process($file);
+       #
+       #  If we're not editing files, print out what we're going to
+       #  do.
+       #
+       if (!$edit) {
+           if ($delete_line{$file}{$refs{$file}{$ref}}) {
+               print "\t[", $refs{$file}{$ref}, "]\t!$incs{$file}{$ref} (from line $refs{$file}{$delete_line{$file}{$refs{$file}{$ref}}}, $delete_line{$file}{$refs{$file}{$ref}})\n";
  
-        $forward{$inc} = $file;
-        $reverse{$file} = $inc;
+           } else {
+               print "\t[", $refs{$file}{$ref}, "]\t$incs{$file}{$ref}\n";
+           }
+       }
  
-        # ignore system include files
-        next if ((scalar keys %{$refs{$file}}) == 0);
+       #  Loop over the includes used by that file, seeing if they're included here.
+       foreach my $inc (keys %{$transitive{$ref}}) {
  
-        #  Remember that X includes Y, and push Y onto the list
-        #  of files to scan.
-        foreach my $inc2 (sort keys %{$refs{$file}}) {
-            $maps{$inc}{$inc2} = 0;
+           # This file doesn't manually include the given reference
+           next if ! defined $refs{$file}{$inc};
  
-           #
-           #  Don't push the same file multiple times.
-           #
-           next if $checked{$inc2};
-           $checked{$inc2}++;
+           # If the other include is earlier than this one, then
+           # it's already been handled.  So we ignore it.
+           next if ($refs{$file}{$inc} <= $refs{$file}{$ref});
  
-            push @work, $inc2;
-        }
-    }
-}
+           $any_dups++;
  
-#
-#  Process all of the forward refs, so that we have a complete
-#  list of who's referencing who.
-#
-#  This doesn't find the shortest path from A to B, but it does
-#  find one path.
-#
-foreach my $inc (sort keys %maps) {
-    foreach my $inc2 (sort keys %{$maps{$inc}}) {
-        foreach my $inc3 (sort keys %{$maps{$inc2}}) {
-            # map is already there...
-            next if (defined $maps{$inc}{$inc3});
+           $delete_line{$file}{$refs{$file}{$inc}} = $ref;
  
-            $maps{$inc}{$inc3} = $maps{$inc2}{$inc3} + 1;
-        }
+           print "\t\talready includes $inc, duplicate at line $refs{$file}{$inc}\n" if $debug;
+       }
      }
  }
  
  #
-#  Walk through the files again, looking for includes that are
-#  unnecessary.  Note that we process header files, too.
+#  if we're not editing the files, exit with success when there's no duplicates.
  #
-foreach my $file (sort keys %refs) {
-    # print out some debugging information.
-    if ($debug > 0) {
-        if (defined $reverse{$file}) {
-            print $file, "\t(", $reverse{$file}, ")\n";
-        } else {
-            print $file, "\n";
-        }
-    }
-
-    #  walk of the list of include's in this file
-    foreach my $ref (sort keys %{$refs{$file}}) {
-
-        #  walk over the include files we include, or included by
-        #  files that we include.
-        foreach my $inc2 (sort keys %{$maps{$ref}}) {
-            #
-            #  If we include X, and X includes Y, and we include
-            #  Y ourselves *after* X, it's a definite dupe.
-            #
-            #  Note that this is a *guaranteed* duplicate.
-            #
-            #  Sometimes order matters, so we can't always delete X if
-            #  we include Y after X, and Y includes X
-            #
-            if (defined $refs{$file}{$inc2} &&
-                ($refs{$file}{$inc2} > $refs{$file}{$ref})) {
-                $duplicate{$file}{$inc2} = $ref;
-
-                # mark the line to be deleted.
-                $delete_line{$file}{$refs{$file}{$inc2}}++;
-
-                $any_dups++;
-            }
-        }
-        print "\t", $ref, "\n" if ($debug > 0);
-    }
+if (!$edit) {
+    exit ($any_dups != 0);
  }
  
-if ($debug > 0) {
-    print "------------------------------------\n";
-}
+foreach my $file (keys %delete_line) {
+    my $OUTPUT;
  
-#
-#  Maybe just print out the dups so that a person can validate them.
-#
-if (!$do_it) {
-    foreach my $file (sort keys %duplicate) {
-       next if !$requested{$file};
+    print "$file\n" if $dry_run;
  
-        print $file, "\n";
+    open(my $FILE, "<", $file) or die "Failed to open $file: $!\n";
  
-        foreach my $inc (sort keys %{$duplicate{$file}}) {
-            print "\t[", $refs{$file}{$inc}, "] ", $inc, " (", $duplicate{$file}{$inc}, " at ", $refs{$file}{$duplicate{$file}{$inc}}, ")\n";
-        }
+    if (!$dry_run) {
+       open($OUTPUT, ">", "$file.tmp") or die "Failed to create $file.tmp: $!\n";
      }
-} else {
-    foreach my $file (sort keys %duplicate) {
-        open(my $FILE, "<", $file) or die "Failed to open $file: $!\n";
-        open(my $OUTPUT, ">", "$file.tmp") or die "Failed to create $file.tmp: $!\n";
-
-        my $line = 0;
-        while (<$FILE>) {
-            $line++;
  
-            # supposed to delete this line, don't print it to the output.
-            next if (defined $delete_line{$file}{$line});
+    my $line = 0;
+    while (<$FILE>) {
+       $line++;
  
-            print $OUTPUT $_;
-        }
+       if ($dry_run && defined $delete_line{$file}{$line}) {
+           print "\tdelete line $line already referenced in $delete_line{$file}{$line}\n";
+       }
  
-        close $OUTPUT;
-        close $FILE;
+       # supposed to delete this line, don't print it to the output.
+       next if (defined $delete_line{$file}{$line});
  
-        rename "$file.tmp", $file;
+       if (!$dry_run) {
+           print $OUTPUT $_;
+       }
      }
  
-}
+    if (!$dry_run) {
+       close $OUTPUT;
+    }
  
-#  If we succeeded in re-writing the files, it's OK.
-exit 0 if ($do_it);
+    close $FILE;
  
-#  If there are no duplicates, then we're OK.
-exit 0 if (!$any_dups);
+    rename "$file.tmp", $file;
+}
  
-#  Else there are duplicates, complain.
-exit 1
+exit 0;
author	Alan T. DeKok <aland@freeradius.org>
	Thu, 21 Oct 2021 00:42:20 +0000 (20:42 -0400)
committer	Alan T. DeKok <aland@freeradius.org>
	Thu, 21 Oct 2021 01:07:43 +0000 (21:07 -0400)