]> git.ipfire.org Git - thirdparty/curl.git/commitdiff
copyright.pl: parse and use .reuse/dep5 for skips
authorDaniel Stenberg <daniel@haxx.se>
Tue, 14 Jun 2022 08:22:57 +0000 (10:22 +0200)
committerDaniel Stenberg <daniel@haxx.se>
Tue, 14 Jun 2022 12:07:12 +0000 (14:07 +0200)
Also scan skipped files to be able to find superfluous ignores, shown with -v.

Closes #9006

scripts/copyright.pl

index f98558bc31887921014d882bf8cce77b8a850348..89f6b787495894b4fab464785e0633d98f15771a 100755 (executable)
 # Usage: copyright.pl [file]
 #
 
-# regexes of files to not scan
-my @skiplist=(
-    '^tests\/data\/test(\d+)$', # test case data
-
-    # all uppercase file name, possibly with dot and dash. But do not exclude
-    # the man pages:
-    '(\/|^)[A-Z0-9_.-]+[^31]$',
-    '(\/|^)[A-Z0-9_-]+\.md$', # all uppercase file name with .md extension
-    '^tests/certs/.*', # generated certs
-    '^tests/stunnel.pem', # generated cert
-    '^tests/valgrind.supp', # valgrind suppressions
-    '^projects/Windows/.*.dsw$', # generated MSVC file
-    '^projects/Windows/.*.sln$', # generated MSVC file
-    '^projects/Windows/.*.tmpl$', # generated MSVC file
-    '^projects/Windows/.*.vcxproj.filters$', # generated MSVC file
-    '^m4/ax_compile_check_sizeof.m4$', # imported, leave be
-    '^.mailmap', # git control file
-    '\/readme',
-    "buildconf", # its nothing to copyright
-
-    # docs/ files we're okay with without copyright
-    'INSTALL.cmake',
-    'TheArtOfHttpScripting',
-    'page-footer',
-    'curl_multi_socket_all.3',
-    'curl_strnequal.3',
-    'symbols-in-versions',
-    'options-in-versions',
-
-    # macos-framework files
-    '^lib\/libcurl.plist.in',
-    '^lib\/libcurl.vers.in',
-
-    # vms files
-    '^packages\/vms\/build_vms.com',
-    '^packages\/vms\/curl_release_note_start.txt',
-    '^packages\/vms\/curlmsg.sdl',
-    '^packages\/vms\/macro32_exactcase.patch',
-
-    # XML junk
-    '^projects\/wolfssl_override.props',
-
-    # checksrc control files
-    '\.checksrc$',
+my %skips;
 
-    # an empty control file
-    "^zuul.d/playbooks/.zuul.ignore",
-
-    # markdown linkchecker config
-    "mlc_config.json",
+# file names
+my %skiplist = (
+    # REUSE-specific file
+    ".reuse/dep5" => "<built-in>",
 
-    # github template file
-    "^.github/ISSUE_TEMPLATE/bug_report.md",
+    # License texts
+    "LICENSES/BSD-3-Clause.txt" => "<built-in>",
+    "LICENSES/BSD-4-Clause-UC.txt" => "<built-in>",
+    "LICENSES/GPL-3.0-or-later.txt" => "<built-in>",
+    "LICENSES/ISC.txt" => "<built-in>",
+    "LICENSES/LicenseRef-OpenEvidence.txt" => "<built-in>",
+    "LICENSES/curl.txt" => "<built-in>",
+    "COPYING" => "<built-in>",
 
-    # License texts and REUSE-specific files
-    ".reuse/dep5",
-    "LICENSES/.*"
+    # imported, leave be
+    'm4/ax_compile_check_sizeof.m4' => "<built-in>",
 
+    # an empty control file
+    "zuul.d/playbooks/.zuul.ignore" => "<built-in>",
     );
 
 sub scanfile {
@@ -122,13 +85,18 @@ sub scanfile {
 }
 
 sub checkfile {
-    my ($file) = @_;
+    my ($file, $skipped, $pattern) = @_;
     my $fine = 0;
     @copyright=();
     $spdx = 0;
     my $found = scanfile($file);
 
     if($found < 1) {
+        if($skipped) {
+            # just move on
+            $skips{$pattern}++;
+            return 0;
+        }
         if(!$found) {
             print "$file:1: missing copyright range\n";
             return 2;
@@ -138,6 +106,11 @@ sub checkfile {
         return 1;
     }
     if(!$spdx) {
+        if($skipped) {
+            # move on
+            $skips{$pattern}++;
+            return 0;
+        }
         print "$file:1: missing SPDX-License-Identifier\n";
         return 2;
     }
@@ -162,14 +135,56 @@ sub checkfile {
        $copyright[0]{year} != $commityear) {
         printf "$file:%d: copyright year out of date, should be $commityear, " .
             "is $copyright[0]{year}\n",
-            $copyright[0]{line};
+            $copyright[0]{line} if(!$skipped || $verbose);
+        $skips{$pattern}++ if($skipped);
     }
     else {
         $fine = 1;
     }
+    if($skipped && $fine) {
+        print "$file:1: ignored superfluously by $pattern\n" if($verbose);
+        $superf{$pattern}++;
+    }
+
     return $fine;
 }
 
+sub dep5 {
+    my ($file) = @_;
+    my @files;
+    my $copy;
+    open(F, "<$file") || die "can't open $file";
+    my $line = 0;
+    while(<F>) {
+        $line++;
+        if(/^Files: (.*)/i) {
+            push @files, `git ls-files $1`;
+        }
+        elsif(/^Copyright: (.*)/i) {
+            $copy = $1;
+        }
+        elsif(/^License: (.*)/i) {
+            my $license = $1;
+            for my $f (@files) {
+                chomp $f;
+                if($f =~ /\.gitignore\z/) {
+                    # ignore .gitignore
+                }
+                else {
+                    if($skiplist{$f}) {
+                        print STDERR "$f already skipped at $skiplist{$f}\n";
+                    }
+                    $skiplist{$f} = "dep5:$line";
+                }
+            }
+            undef @files;
+        }
+    }
+    close(F);
+}
+
+dep5(".reuse/dep5");
+
 my @all;
 my $verbose;
 if($ARGV[0] eq "-v") {
@@ -182,22 +197,26 @@ if($ARGV[0]) {
 else {
     @all = `git ls-files`;
 }
+
 for my $f (@all) {
     chomp $f;
     my $skipped = 0;
-    for my $skip (@skiplist) {
-        #print "$f matches $skip ?\n";
-        if($f =~ /$skip/) {
-            $skiplisted++;
-            $skipped = 1;
-            #print "$f: SKIPPED ($skip)\n";
-            last;
-        }
+    my $miss;
+    my $wro;
+    my $pattern;
+    if($skiplist{$f}) {
+        $pattern = $skip;
+        $skiplisted++;
+        $skipped = 1;
     }
+
+    my $r = checkfile($f, $skipped, $pattern);
+    $mis=1 if($r == 2);
+    $wro=1 if(!$r);
+
     if(!$skipped) {
-        my $r = checkfile($f);
-        $missing++ if($r == 2);
-        $wrong++ if(!$r);
+        $missing += $mis;
+        $wrong += $wro;
     }
 }
 
@@ -205,6 +224,16 @@ if($verbose) {
     print STDERR "$missing files have no copyright\n" if($missing);
     print STDERR "$wrong files have wrong copyright year\n" if ($wrong);
     print STDERR "$skiplisted files are skipped\n" if ($skiplisted);
+
+    for my $s (@skiplist) {
+        if(!$skips{$s}) {
+            printf ("Never skipped pattern: %s\n", $s);
+        }
+        if($superf{$s}) {
+            printf ("%s was skipped superfluously %u times and legitimately %u times\n",
+                    $s, $superf{$s}, $skips{$s});
+        }
+    }
 }
 
 exit 1 if($missing || $wrong);