]> git.ipfire.org Git - thirdparty/squid.git/blame - scripts/boiler-mgr.pl
Source Format Enforcement (#763)
[thirdparty/squid.git] / scripts / boiler-mgr.pl
CommitLineData
a53382b4 1#!/usr/bin/perl -w
a151895d 2#
f70aedc4 3## Copyright (C) 1996-2021 The Squid Software Foundation and contributors
a151895d
AJ
4##
5## Squid software is distributed under GPLv2+ license and includes
6## contributions from numerous individuals and organizations.
7## Please see the COPYING and CONTRIBUTORS files for details.
8##
a53382b4 9
0910221a
TSSF
10# Adds or adjusts the source file boilerplate, such as a Copyright statement.
11# The boilerplate is meant to remain constant from one source file to another.
a53382b4 12#
47f28373 13# The old boilerplate is assumed to be the first /* comment */ in a source
0910221a 14# file, before the first #include statement other than #include "squid.h".
51a9ab24
AR
15# Common old boilerplates are removed, with copyright-related claims contained
16# in them logged on stdout for recording in CONTRIBUTORS or some such.
17# Copyright and (C) (but not AUTHOR-like) lines are left in sources except
18# when we have a permission to move them to CONTRIBUTORS.
0910221a 19#
51a9ab24
AR
20# The new boilerplate comment is placed at the very beginning of the file,
21# followed by old copyright lines, "inspired by" lines, and DEBUG section
22# comments (if any were found in the old boilerplate).
0910221a
TSSF
23#
24# The script tries hard to detect files with unusual old boilerplates. When
25# detected, the script warns about the problem and leaves the file "as is".
a53382b4
TSSF
26
27use strict;
28use warnings;
29
30die("usage: $0 <boilerplate-file> <source-file> ...\n") unless @ARGV >= 2;
31my ($BoilerName, @FileNames) = @ARGV;
32
33my $CorrectBoiler = `cat $BoilerName` or
47f28373 34 die("cannot load boilerplate from $BoilerName: $!, stopped");
853bf523
TSSF
35$CorrectBoiler = &trimL(&trimR($CorrectBoiler)) . "\n\n";
36
37# the first /* comment */
38my $reComment = qr{
47f28373 39 /\*.*?\*/
853bf523
TSSF
40}xs;
41
6869c265 42# Debugging section inside a boilerplate comment.
853bf523 43my $reDebug = qr{
47f28373 44 ^[\s*]*(DEBUG:.*?)$
853bf523
TSSF
45}mx;
46
6869c265
TSSF
47# Same as $reDebug, but does not match empty DEBUG: statements.
48my $reDebugFull = qr{
47f28373 49 ^[\s*]*(DEBUG:[^\S\n]*\S.*?)\s*$
6869c265
TSSF
50}mx;
51
853bf523
TSSF
52# Copyright-related claims inside a boilerplate comment
53my $reClaims = qr{
47f28373
FC
54 (
55 (?:
56 AUTHOR\b(?:.|\n)*?\*[/\s]*$| # all authors until an "empty" line
57 ORIGINAL\s+AUTHOR\b| # or not the latest author
58 COPYRIGHT\b(?!\sfile)| # or copyright (except "COPYRIGHT file")
59 Portions\scopyright| # or partial copyright
60 (?<!Squid.is.Copyrighted.)\(C\)\s| # or (C) (except "Squid is ...")
61 Based.upon.original.+code.by\s*\n| # or this common pearl
62 Modified\sby\s| # or this
63 BASED\sON:\s # or this
64 )
65 .*? # and the claim content itself
66 )$
853bf523
TSSF
67}xmi;
68
14133da1
TSSF
69# removes common claim prefixes to minimize claim noise
70my $reClaimPrefix = qr{
47f28373
FC
71 (?:ORIGINAL\s)?AUTHOR:?|
72 based\son\s|
73 based\supon\s|
74 Portions\s
51a9ab24
AR
75}xi;
76
77# We have persmission to move these frequent claims to CONTRIBUTORS.
51a9ab24 78my $reClaimsOkToMove = qr{
47f28373 79 Robert.Collins|<robertc\@squid-cache.org>|<rbtcollins\@hotmail.com>|
58dce27e 80
47f28373 81 Duane.Wessels|
58dce27e 82
47f28373 83 Francesco.Chemolli|<kinkie\@squid-cache.org>|<kinkie\@kame.usr.dsi.unimi.it>|
51a9ab24 84
47f28373
FC
85 Amos.Jeffries|<amosjeffries\@squid-cache.org>|<squid3\@treenet.co.nz>|
86 Treehouse.Networks.Ltd.|
87 GPL.version.2,..C.2007-2013|
51a9ab24 88
47f28373
FC
89 Henrik.Nordstrom|<henrik\@henriknordstrom.net>|
90 MARA.Systems.AB|
af4d1ea4 91
47f28373 92 Guido.Serassio|<serassio\@squid-cache.org>|<guido.serassio\@acmeconsulting.it>|
14133da1
TSSF
93}xi;
94
95# inspirations are not copyright claims but should be preserved
96my $reInspiration = qr/^[\s*]*(inspired by previous work.*?)$/mi;
97
98# The most common GPL text, with some address variations.
47f28373
FC
99my $strGpl =
100 "This program is free software; you can redistribute it and/or modify".
101 "([^*]|[*][^/])+". # not a /* comment */ closure
102 "Foundation, Inc., [^\\n]+MA\\s+[-\\d]+, USA\\.";
853bf523
TSSF
103my $reGpl = qr{$strGpl}s;
104
14133da1
TSSF
105# Two most common Squid (C) statements.
106my $strSqCopyStart1 =
47f28373 107 "SQUID Web Proxy Cache\\s+http://www.squid-cache.org/";
14133da1 108my $strSqCopyStart2 =
47f28373 109 "SQUID Internet Object Cache\\s+http://squid.nlanr.net/Squid/";
14133da1 110my $strSqCopyEnd =
47f28373
FC
111 "([^*]|[*][^/])+".
112 "numerous individuals".
113 "([^*]|[*][^/])+".
114 "file for full details.";
14133da1
TSSF
115my $reSquidCopy = qr{($strSqCopyStart1|$strSqCopyStart2)$strSqCopyEnd}s;
116
a53382b4
TSSF
117
118my $FileName; # for Warn()ings
853bf523 119my %ReportedClaims; # to minimize noise in claims reporting
4a4459d1 120$| = 1; # report claims ASAP (but on STDOUT)
a53382b4
TSSF
121
122# process each file in-place; do not touch files on known failures
123foreach my $fname (@FileNames) {
124
47f28373
FC
125 $FileName = $fname;
126 my $code = &readFile($fname) or next;
127 my $virginCode = $code;
128
129 &WarnQuiet("Correct boilerplate already present, skipping:", $code), next if
130 $code =~ /\Q$CorrectBoiler\E/s;
131
132 my $boiler;
133
134 if ($code =~ m/$reComment/) {
135 my $beforeComment = $`;
136 my $comment = $&;
137
138 # Is the matched comment a boilerplate?
139 if ($comment !~ m/\n/) {
140 # A single line comment is not a boilerplate.
141 } elsif ($beforeComment =~ m/^\s*\#\s*include\s+(?!"squid.h")/m) {
142 # A comment after include is not a boilerplate,
143 # but we make an exception for #include "squid.h" common in lib/
144 } elsif ($comment =~ m@^/\*\*\s@){
145 # A Doxygen comment is not a boilerplate.
146 } elsif ($comment =~ m/internal declarations|stub file|unit test/i) {
147 # These relatively common comments are not boilerplates.
148 } elsif (&digestable($comment)) {
149 # Something we can safely replace.
150 $boiler = $comment;
151 } else {
152 &Warn("Unrecognized boilerplate, skipping:", $comment);
153 next;
154 }
155 }
156
157 my $extras = ''; # DEBUG section, inspired by ..., etc.
158
159 if (defined $boiler) {
160 my $copyClaims = ''; # formatted Copyright claims extracted from sources
161 my $preserveClaims = 0; # whether to preserve them or not
162
163 if (my @rawClaims = ($boiler =~ m/$reClaims/g)) {
164 my @claims = map { &claimList($_) } @rawClaims;
165 my $count = 0;
166 foreach my $claim (@claims) {
167 $claim =~ s/\n+/ /gs; # streamline multiline claims
168 $claim =~ s@\*/?@ @g; # clean comment leftovers
169 $claim =~ s/$reClaimPrefix/ /g; # remove common prefixes
170 # this one is sucked in from the old standard boilerplate
171 $claim =~ s/by the Regents of the University of//;
172 $claim =~ s/\s\s+/ /gs; # clean excessive whitespace
173 $claim =~ s/^\s+|\s+$//gs; # remove excessive whitespace
174 next unless length $claim;
175
176 # preserve Copyright claims
177 if ($claim =~ m/Copyright|\(c\)/i) {
178 $copyClaims .= sprintf(" * %s\n", $claim);
179
180 # Ignore certain claims, assuming we have their permission.
181 my $c = $claim;
182 $c =~ s/^\s*(Copyright)?[:\s]*([(c)]+)?\s*([0-9,-]+)?\s*(by)?\s*//i; # prefix
183 $c =~ s/$reClaimsOkToMove/ /g;
184 $c =~ s/[,]//g; # markup leftovers
185
186 # But if one claim is preserved, all must be preserved.
187 $preserveClaims = 1 if $c =~ /\S/;
188 warn($c) if $c =~ /\S/;
189 }
190
191 next if exists $ReportedClaims{$claim};
192 print("$fname: INFO: Found new claim(s):\n") unless $count++;
193 print("Claim: $claim\n");
194 $ReportedClaims{$claim} = $fname;
195 }
196 }
197
198 if ($preserveClaims) {
199 die("Internal error: $copyClaims") unless length($copyClaims);
200 my $prefix = " * Portions of this code are copyrighted and released under GPLv2+ by:";
201 my $suffix = " * Please add new claims to the CONTRIBUTORS file instead.";
202 $extras .= sprintf("/*\n%s\n%s%s\n */\n\n",
203 $prefix, $copyClaims, $suffix);
204 }
205
206 if ($boiler =~ m/$reInspiration/) {
207 $extras .= sprintf("/* %s */\n\n", ucfirst($1));
208 }
209
210 if ($boiler =~ m/$reDebugFull/) {
211 $extras .= "/* $1 */\n\n";
212 }
213
214 $code =~ s/\s*$reComment\s*/\n\n/ or
215 die("internal error: failed to remove expected comment, stopped");
216 &digestable($&) or
217 die("internal error: unsafe comment removal, stopped");
218
219 } else { # no boilerplate found
220 #&Warn("Cannot find old boilerplate, adding new boilerplate.", $code);
221 }
222
223 # Some files have license declarations way down in the code so we may not
224 # find a boilerplate at all or find an "empty" boilerplate preceding them.
225 my $license =
226 "Copyright|".
227 "This program is free software|".
228 "Permission to use|".
229 "Redistribution and use";
230 if ($code =~ m@/\*.*?($license).*?\*/@is) {
231 # If we replaced what we thought is an old boiler, do not use $` for
232 # context because it is based on modified $code and will often mislead.
233 my $context = defined $boiler ? $& : ($` . $&);
234 &Warn("Suspected boilerplate in an unusual location, skipping:",
235 $context);
236 next;
237 }
238
239 $code = $CorrectBoiler . $extras . &trimL($code);
240 &writeFile($fname, $code) unless $code eq $virginCode;
241 undef $FileName;
a53382b4
TSSF
242}
243
244exit(0);
245
246sub readFile() {
47f28373
FC
247 my ($fname) = @_;
248
249 if (!-f $fname) {
250 &Warn("Skipping directory or a special file.");
251 return undef();
252 }
253
254 my $code = '';
255 open(IF, "<$fname") or die("cannot open $fname: $!, stopped");
256 while (<IF>) {
257 $code .= $_;
258 }
259 close(IF);
260
261 &Warn("empty file") unless length $code;
262 return $code;
a53382b4
TSSF
263}
264
00c328dd 265sub writeFile() {
47f28373
FC
266 my ($fname, $code) = @_;
267 open(OF, ">$fname") or die("cannot open $fname for writing: $!, stopped");
a53382b4 268
47f28373 269 print(OF $code) or die("cannot write to $fname: $!, stopped");
a53382b4 270
47f28373 271 close(OF) or die("cannot finish updating $fname: $!, stopped");
a53382b4
TSSF
272}
273
14133da1
TSSF
274# split multiclaim claims into an array of single claims
275sub claimList() {
47f28373 276 my $multiClaim = shift;
14133da1 277
47f28373
FC
278 $multiClaim =~ s/$reDebug//g; # may pretend to continue AUTHORs list
279 $multiClaim =~ s/$reInspiration//g; # does not affect (C) claims
14133da1 280
47f28373
FC
281 # remove \n that is not used to separate two claims
282 $multiClaim =~ s/(Based.upon.original.+code.by\s*)\n/$1 /g;
14133da1 283
47f28373
FC
284 return split(/\n/, $multiClaim);
285 # return grep { /\S/ } split($reClaimSplitter, $multiClaim);
14133da1
TSSF
286}
287
288# checks whether a comment contains nothing but the stuff we can either
289# safely remove, replace, or move (e.g., DEBUG sections and copyright claims)
290sub digestable() {
47f28373
FC
291 my $comment = shift;
292
293 # Remove common text to detect an otherwise empty boilerplate.
294 $comment =~ s/$reDebug//;
295 $comment =~ s/$reClaims//g;
296 $comment =~ s/^[\s*]*(Created on.*?)$//mig;
297 $comment =~ s/^[\s*]*(Windows support\s*)$//mig;
298 $comment =~ s/^[\s*]*(History added by .*)$//mig;
299 $comment =~ s/$reGpl//;
300 $comment =~ s/$reSquidCopy//;
301 $comment =~ s/$reInspiration//g;
302 $comment =~ s/\* Stubs for.*?$//m; # e.g., Stubs for calls to stuff defined in...
303 $comment =~ s/\$Id(:.*)?\$//g; # CVS tags
304 $comment =~ s/-{60,}//g; # decorations such as -----------...---------
305 $comment =~ s/\b\w+\.(h|c|cc|cci)\b//; # Next to last step: a file name.
306 $comment =~ s@[\s*/]@@sg; # Last step: whitespace and comment characters.
307 return !length($comment);
14133da1
TSSF
308}
309
a53382b4 310# removes all opening whitespace
00c328dd 311sub trimL() {
47f28373
FC
312 my ($code) = @_;
313 $code =~ s/^\n[\n\s]*//s;
314 return $code;
a53382b4
TSSF
315}
316
317# removes all trailing whitespace
00c328dd 318sub trimR() {
47f28373
FC
319 my ($code) = @_;
320 $code =~ s/\n[\n\s]*$//s;
321 return $code;
a53382b4
TSSF
322}
323
00c328dd 324sub Warn() {
47f28373
FC
325 my ($msg, $context) = @_;
326
327 if (defined $context) {
328 my $MaxLen = 1000;
329 $context =~ s/$reGpl/... [GPL] .../;
330 $context =~ s/$reSquidCopy/... [Standard Squid "numerous individuals" text] .../;
331 $context = substr($context, 0, $MaxLen);
332 $context = &trimR($context);
333 $context .= "\n\n";
334 } else {
335 $context = '';
336 }
337 $msg = sprintf("%s: WARNING: %s\n%s", $FileName, $msg, $context) if defined $FileName;
338 warn($msg);
a53382b4 339}
bc2321d2
AJ
340
341sub WarnQuiet() {
47f28373 342 my ($msg, $context) = @_;
bc2321d2 343
47f28373
FC
344 $msg = sprintf("%s: WARNING: %s\n", $FileName, $msg) if defined $FileName;
345 warn($msg);
bc2321d2 346}