]> git.ipfire.org Git - thirdparty/squid.git/blob - scripts/boiler-mgr.pl
Docs: Copyright updates for 2018 (#114)
[thirdparty/squid.git] / scripts / boiler-mgr.pl
1 #!/usr/bin/perl -w
2 #
3 ## Copyright (C) 1996-2018 The Squid Software Foundation and contributors
4 ##
5 ## Squid software is distributed under GPLv2+ license and includes
6 ## contributions from numerous individuals and organizations.
7 ## Please see the COPYING and CONTRIBUTORS files for details.
8 ##
9
10 # Adds or adjusts the source file boilerplate, such as a Copyright statement.
11 # The boilerplate is meant to remain constant from one source file to another.
12 #
13 # The old boilerplate is assumed to be the first /* comment */ in a source
14 # file, before the first #include statement other than #include "squid.h".
15 # Common old boilerplates are removed, with copyright-related claims contained
16 # in them logged on stdout for recording in CONTRIBUTORS or some such.
17 # Copyright and (C) (but not AUTHOR-like) lines are left in sources except
18 # when we have a permission to move them to CONTRIBUTORS.
19 #
20 # The new boilerplate comment is placed at the very beginning of the file,
21 # followed by old copyright lines, "inspired by" lines, and DEBUG section
22 # comments (if any were found in the old boilerplate).
23 #
24 # The script tries hard to detect files with unusual old boilerplates. When
25 # detected, the script warns about the problem and leaves the file "as is".
26
27 use strict;
28 use warnings;
29
30 die("usage: $0 <boilerplate-file> <source-file> ...\n") unless @ARGV >= 2;
31 my ($BoilerName, @FileNames) = @ARGV;
32
33 my $CorrectBoiler = `cat $BoilerName` or
34 die("cannot load boilerplate from $BoilerName: $!, stopped");
35 $CorrectBoiler = &trimL(&trimR($CorrectBoiler)) . "\n\n";
36
37 # the first /* comment */
38 my $reComment = qr{
39 /\*.*?\*/
40 }xs;
41
42 # Debugging section inside a boilerplate comment.
43 my $reDebug = qr{
44 ^[\s*]*(DEBUG:.*?)$
45 }mx;
46
47 # Same as $reDebug, but does not match empty DEBUG: statements.
48 my $reDebugFull = qr{
49 ^[\s*]*(DEBUG:[^\S\n]*\S.*?)\s*$
50 }mx;
51
52 # Copyright-related claims inside a boilerplate comment
53 my $reClaims = qr{
54 (
55 (?:
56 AUTHOR\b(?:.|\n)*?\*[/\s]*$| # all authors until an "empty" line
57 ORIGINAL\s+AUTHOR\b| # or not the latest author
58 COPYRIGHT\b(?!\sfile)| # or copyright (except "COPYRIGHT file")
59 Portions\scopyright| # or partial copyright
60 (?<!Squid.is.Copyrighted.)\(C\)\s| # or (C) (except "Squid is ...")
61 Based.upon.original.+code.by\s*\n| # or this common pearl
62 Modified\sby\s| # or this
63 BASED\sON:\s # or this
64 )
65 .*? # and the claim content itself
66 )$
67 }xmi;
68
69 # removes common claim prefixes to minimize claim noise
70 my $reClaimPrefix = qr{
71 (?:ORIGINAL\s)?AUTHOR:?|
72 based\son\s|
73 based\supon\s|
74 Portions\s
75 }xi;
76
77 # We have persmission to move these frequent claims to CONTRIBUTORS.
78 my $reClaimsOkToMove = qr{
79 Robert.Collins|<robertc\@squid-cache.org>|<rbtcollins\@hotmail.com>|
80
81 Duane.Wessels|
82
83 Francesco.Chemolli|<kinkie\@squid-cache.org>|<kinkie\@kame.usr.dsi.unimi.it>|
84
85 Amos.Jeffries|<amosjeffries\@squid-cache.org>|<squid3\@treenet.co.nz>|
86 Treehouse.Networks.Ltd.|
87 GPL.version.2,..C.2007-2013|
88
89 Henrik.Nordstrom|<henrik\@henriknordstrom.net>|
90 MARA.Systems.AB|
91
92 Guido.Serassio|<serassio\@squid-cache.org>|<guido.serassio\@acmeconsulting.it>|
93 }xi;
94
95 # inspirations are not copyright claims but should be preserved
96 my $reInspiration = qr/^[\s*]*(inspired by previous work.*?)$/mi;
97
98 # The most common GPL text, with some address variations.
99 my $strGpl =
100 "This program is free software; you can redistribute it and/or modify".
101 "([^*]|[*][^/])+". # not a /* comment */ closure
102 "Foundation, Inc., [^\\n]+MA\\s+[-\\d]+, USA\\.";
103 my $reGpl = qr{$strGpl}s;
104
105 # Two most common Squid (C) statements.
106 my $strSqCopyStart1 =
107 "SQUID Web Proxy Cache\\s+http://www.squid-cache.org/";
108 my $strSqCopyStart2 =
109 "SQUID Internet Object Cache\\s+http://squid.nlanr.net/Squid/";
110 my $strSqCopyEnd =
111 "([^*]|[*][^/])+".
112 "numerous individuals".
113 "([^*]|[*][^/])+".
114 "file for full details.";
115 my $reSquidCopy = qr{($strSqCopyStart1|$strSqCopyStart2)$strSqCopyEnd}s;
116
117
118 my $FileName; # for Warn()ings
119 my %ReportedClaims; # to minimize noise in claims reporting
120 $| = 1; # report claims ASAP (but on STDOUT)
121
122 # process each file in-place; do not touch files on known failures
123 foreach my $fname (@FileNames) {
124
125 $FileName = $fname;
126 my $code = &readFile($fname) or next;
127 my $virginCode = $code;
128
129 &WarnQuiet("Correct boilerplate already present, skipping:", $code), next if
130 $code =~ /\Q$CorrectBoiler\E/s;
131
132 my $boiler;
133
134 if ($code =~ m/$reComment/) {
135 my $beforeComment = $`;
136 my $comment = $&;
137
138 # Is the matched comment a boilerplate?
139 if ($comment !~ m/\n/) {
140 # A single line comment is not a boilerplate.
141 } elsif ($beforeComment =~ m/^\s*\#\s*include\s+(?!"squid.h")/m) {
142 # A comment after include is not a boilerplate,
143 # but we make an exception for #include "squid.h" common in lib/
144 } elsif ($comment =~ m@^/\*\*\s@){
145 # A Doxygen comment is not a boilerplate.
146 } elsif ($comment =~ m/internal declarations|stub file|unit test/i) {
147 # These relatively common comments are not boilerplates.
148 } elsif (&digestable($comment)) {
149 # Something we can safely replace.
150 $boiler = $comment;
151 } else {
152 &Warn("Unrecognized boilerplate, skipping:", $comment);
153 next;
154 }
155 }
156
157 my $extras = ''; # DEBUG section, inspired by ..., etc.
158
159 if (defined $boiler) {
160 my $copyClaims = ''; # formatted Copyright claims extraced from sources
161 my $preserveClaims = 0; # whether to preserve them or not
162
163 if (my @rawClaims = ($boiler =~ m/$reClaims/g)) {
164 my @claims = map { &claimList($_) } @rawClaims;
165 my $count = 0;
166 foreach my $claim (@claims) {
167 $claim =~ s/\n+/ /gs; # streamline multiline claims
168 $claim =~ s@\*/?@ @g; # clean comment leftovers
169 $claim =~ s/$reClaimPrefix/ /g; # remove common prefixes
170 # this one is sucked in from the old standard boilerplate
171 $claim =~ s/by the Regents of the University of//;
172 $claim =~ s/\s\s+/ /gs; # clean excessive whitespace
173 $claim =~ s/^\s+|\s+$//gs; # remove excessive whitespace
174 next unless length $claim;
175
176 # preserve Copyright claims
177 if ($claim =~ m/Copyright|\(c\)/i) {
178 $copyClaims .= sprintf(" * %s\n", $claim);
179
180 # Ignore certain claims, assuming we have their permission.
181 my $c = $claim;
182 $c =~ s/^\s*(Copyright)?[:\s]*([(c)]+)?\s*([0-9,-]+)?\s*(by)?\s*//i; # prefix
183 $c =~ s/$reClaimsOkToMove/ /g;
184 $c =~ s/[,]//g; # markup leftovers
185
186 # But if one claim is preserved, all must be preserved.
187 $preserveClaims = 1 if $c =~ /\S/;
188 warn($c) if $c =~ /\S/;
189 }
190
191 next if exists $ReportedClaims{$claim};
192 print("$fname: INFO: Found new claim(s):\n") unless $count++;
193 print("Claim: $claim\n");
194 $ReportedClaims{$claim} = $fname;
195 }
196 }
197
198 if ($preserveClaims) {
199 die("Internal error: $copyClaims") unless length($copyClaims);
200 my $prefix = " * Portions of this code are copyrighted and released under GPLv2+ by:";
201 my $suffix = " * Please add new claims to the CONTRIBUTORS file instead.";
202 $extras .= sprintf("/*\n%s\n%s%s\n */\n\n",
203 $prefix, $copyClaims, $suffix);
204 }
205
206 if ($boiler =~ m/$reInspiration/) {
207 $extras .= sprintf("/* %s */\n\n", ucfirst($1));
208 }
209
210 if ($boiler =~ m/$reDebugFull/) {
211 $extras .= "/* $1 */\n\n";
212 }
213
214 $code =~ s/\s*$reComment\s*/\n\n/ or
215 die("internal error: failed to remove expected comment, stopped");
216 &digestable($&) or
217 die("internal error: unsafe comment removal, stopped");
218
219 } else { # no boilerplate found
220 #&Warn("Cannot find old boilerplate, adding new boilerplate.", $code);
221 }
222
223 # Some files have license declarations way down in the code so we may not
224 # find a boilerplate at all or find an "empty" boilerplate preceeding them.
225 my $license =
226 "Copyright|".
227 "This program is free software|".
228 "Permission to use|".
229 "Redistribution and use";
230 if ($code =~ m@/\*.*?($license).*?\*/@is) {
231 # If we replaced what we thought is an old boiler, do not use $` for
232 # context because it is based on modified $code and will often mislead.
233 my $context = defined $boiler ? $& : ($` . $&);
234 &Warn("Suspected boilerplate in an unusual location, skipping:",
235 $context);
236 next;
237 }
238
239 $code = $CorrectBoiler . $extras . &trimL($code);
240 &writeFile($fname, $code) unless $code eq $virginCode;
241 undef $FileName;
242 }
243
244 exit(0);
245
246 sub readFile() {
247 my ($fname) = @_;
248
249 if (!-f $fname) {
250 &Warn("Skipping directory or a special file.");
251 return undef();
252 }
253
254 my $code = '';
255 open(IF, "<$fname") or die("cannot open $fname: $!, stopped");
256 while (<IF>) {
257 $code .= $_;
258 }
259 close(IF);
260
261 &Warn("empty file") unless length $code;
262 return $code;
263 }
264
265 sub writeFile() {
266 my ($fname, $code) = @_;
267 open(OF, ">$fname") or die("cannot open $fname for writing: $!, stopped");
268
269 print(OF $code) or die("cannot write to $fname: $!, stopped");
270
271 close(OF) or die("cannot finish updating $fname: $!, stopped");
272 }
273
274 # split multiclaim claims into an array of single claims
275 sub claimList() {
276 my $multiClaim = shift;
277
278 $multiClaim =~ s/$reDebug//g; # may pretend to continue AUTHORs list
279 $multiClaim =~ s/$reInspiration//g; # does not affect (C) claims
280
281 # remove \n that is not used to separate two claims
282 $multiClaim =~ s/(Based.upon.original.+code.by\s*)\n/$1 /g;
283
284 return split(/\n/, $multiClaim);
285 # return grep { /\S/ } split($reClaimSplitter, $multiClaim);
286 }
287
288 # checks whether a comment contains nothing but the stuff we can either
289 # safely remove, replace, or move (e.g., DEBUG sections and copyright claims)
290 sub digestable() {
291 my $comment = shift;
292
293 # Remove common text to detect an otherwise empty boilerplate.
294 $comment =~ s/$reDebug//;
295 $comment =~ s/$reClaims//g;
296 $comment =~ s/^[\s*]*(Created on.*?)$//mig;
297 $comment =~ s/^[\s*]*(Windows support\s*)$//mig;
298 $comment =~ s/^[\s*]*(History added by .*)$//mig;
299 $comment =~ s/$reGpl//;
300 $comment =~ s/$reSquidCopy//;
301 $comment =~ s/$reInspiration//g;
302 $comment =~ s/\* Stubs for.*?$//m; # e.g., Stubs for calls to stuff defined in...
303 $comment =~ s/\$Id(:.*)?\$//g; # CVS tags
304 $comment =~ s/-{60,}//g; # decorations such as -----------...---------
305 $comment =~ s/\b\w+\.(h|c|cc|cci)\b//; # Next to last step: a file name.
306 $comment =~ s@[\s*/]@@sg; # Last step: whitespace and comment characters.
307 return !length($comment);
308 }
309
310 # removes all opening whitespace
311 sub trimL() {
312 my ($code) = @_;
313 $code =~ s/^\n[\n\s]*//s;
314 return $code;
315 }
316
317 # removes all trailing whitespace
318 sub trimR() {
319 my ($code) = @_;
320 $code =~ s/\n[\n\s]*$//s;
321 return $code;
322 }
323
324 sub Warn() {
325 my ($msg, $context) = @_;
326
327 if (defined $context) {
328 my $MaxLen = 1000;
329 $context =~ s/$reGpl/... [GPL] .../;
330 $context =~ s/$reSquidCopy/... [Standard Squid "numerous individuals" text] .../;
331 $context = substr($context, 0, $MaxLen);
332 $context = &trimR($context);
333 $context .= "\n\n";
334 } else {
335 $context = '';
336 }
337 $msg = sprintf("%s: WARNING: %s\n%s", $FileName, $msg, $context) if defined $FileName;
338 warn($msg);
339 }
340
341 sub WarnQuiet() {
342 my ($msg, $context) = @_;
343
344 $msg = sprintf("%s: WARNING: %s\n", $FileName, $msg) if defined $FileName;
345 warn($msg);
346 }