]>
Commit | Line | Data |
---|---|---|
a53382b4 | 1 | #!/usr/bin/perl -w |
a151895d | 2 | # |
f70aedc4 | 3 | ## Copyright (C) 1996-2021 The Squid Software Foundation and contributors |
a151895d AJ |
4 | ## |
5 | ## Squid software is distributed under GPLv2+ license and includes | |
6 | ## contributions from numerous individuals and organizations. | |
7 | ## Please see the COPYING and CONTRIBUTORS files for details. | |
8 | ## | |
a53382b4 | 9 | |
0910221a TSSF |
10 | # Adds or adjusts the source file boilerplate, such as a Copyright statement. |
11 | # The boilerplate is meant to remain constant from one source file to another. | |
a53382b4 | 12 | # |
47f28373 | 13 | # The old boilerplate is assumed to be the first /* comment */ in a source |
0910221a | 14 | # file, before the first #include statement other than #include "squid.h". |
51a9ab24 AR |
15 | # Common old boilerplates are removed, with copyright-related claims contained |
16 | # in them logged on stdout for recording in CONTRIBUTORS or some such. | |
17 | # Copyright and (C) (but not AUTHOR-like) lines are left in sources except | |
18 | # when we have a permission to move them to CONTRIBUTORS. | |
0910221a | 19 | # |
51a9ab24 AR |
20 | # The new boilerplate comment is placed at the very beginning of the file, |
21 | # followed by old copyright lines, "inspired by" lines, and DEBUG section | |
22 | # comments (if any were found in the old boilerplate). | |
0910221a TSSF |
23 | # |
24 | # The script tries hard to detect files with unusual old boilerplates. When | |
25 | # detected, the script warns about the problem and leaves the file "as is". | |
a53382b4 TSSF |
26 | |
27 | use strict; | |
28 | use warnings; | |
29 | ||
30 | die("usage: $0 <boilerplate-file> <source-file> ...\n") unless @ARGV >= 2; | |
31 | my ($BoilerName, @FileNames) = @ARGV; | |
32 | ||
33 | my $CorrectBoiler = `cat $BoilerName` or | |
47f28373 | 34 | die("cannot load boilerplate from $BoilerName: $!, stopped"); |
853bf523 TSSF |
35 | $CorrectBoiler = &trimL(&trimR($CorrectBoiler)) . "\n\n"; |
36 | ||
37 | # the first /* comment */ | |
38 | my $reComment = qr{ | |
47f28373 | 39 | /\*.*?\*/ |
853bf523 TSSF |
40 | }xs; |
41 | ||
6869c265 | 42 | # Debugging section inside a boilerplate comment. |
853bf523 | 43 | my $reDebug = qr{ |
47f28373 | 44 | ^[\s*]*(DEBUG:.*?)$ |
853bf523 TSSF |
45 | }mx; |
46 | ||
6869c265 TSSF |
47 | # Same as $reDebug, but does not match empty DEBUG: statements. |
48 | my $reDebugFull = qr{ | |
47f28373 | 49 | ^[\s*]*(DEBUG:[^\S\n]*\S.*?)\s*$ |
6869c265 TSSF |
50 | }mx; |
51 | ||
853bf523 TSSF |
52 | # Copyright-related claims inside a boilerplate comment |
53 | my $reClaims = qr{ | |
47f28373 FC |
54 | ( |
55 | (?: | |
56 | AUTHOR\b(?:.|\n)*?\*[/\s]*$| # all authors until an "empty" line | |
57 | ORIGINAL\s+AUTHOR\b| # or not the latest author | |
58 | COPYRIGHT\b(?!\sfile)| # or copyright (except "COPYRIGHT file") | |
59 | Portions\scopyright| # or partial copyright | |
60 | (?<!Squid.is.Copyrighted.)\(C\)\s| # or (C) (except "Squid is ...") | |
61 | Based.upon.original.+code.by\s*\n| # or this common pearl | |
62 | Modified\sby\s| # or this | |
63 | BASED\sON:\s # or this | |
64 | ) | |
65 | .*? # and the claim content itself | |
66 | )$ | |
853bf523 TSSF |
67 | }xmi; |
68 | ||
14133da1 TSSF |
69 | # removes common claim prefixes to minimize claim noise |
70 | my $reClaimPrefix = qr{ | |
47f28373 FC |
71 | (?:ORIGINAL\s)?AUTHOR:?| |
72 | based\son\s| | |
73 | based\supon\s| | |
74 | Portions\s | |
51a9ab24 AR |
75 | }xi; |
76 | ||
77 | # We have persmission to move these frequent claims to CONTRIBUTORS. | |
51a9ab24 | 78 | my $reClaimsOkToMove = qr{ |
47f28373 | 79 | Robert.Collins|<robertc\@squid-cache.org>|<rbtcollins\@hotmail.com>| |
58dce27e | 80 | |
47f28373 | 81 | Duane.Wessels| |
58dce27e | 82 | |
47f28373 | 83 | Francesco.Chemolli|<kinkie\@squid-cache.org>|<kinkie\@kame.usr.dsi.unimi.it>| |
51a9ab24 | 84 | |
47f28373 FC |
85 | Amos.Jeffries|<amosjeffries\@squid-cache.org>|<squid3\@treenet.co.nz>| |
86 | Treehouse.Networks.Ltd.| | |
87 | GPL.version.2,..C.2007-2013| | |
51a9ab24 | 88 | |
47f28373 FC |
89 | Henrik.Nordstrom|<henrik\@henriknordstrom.net>| |
90 | MARA.Systems.AB| | |
af4d1ea4 | 91 | |
47f28373 | 92 | Guido.Serassio|<serassio\@squid-cache.org>|<guido.serassio\@acmeconsulting.it>| |
14133da1 TSSF |
93 | }xi; |
94 | ||
95 | # inspirations are not copyright claims but should be preserved | |
96 | my $reInspiration = qr/^[\s*]*(inspired by previous work.*?)$/mi; | |
97 | ||
98 | # The most common GPL text, with some address variations. | |
47f28373 FC |
99 | my $strGpl = |
100 | "This program is free software; you can redistribute it and/or modify". | |
101 | "([^*]|[*][^/])+". # not a /* comment */ closure | |
102 | "Foundation, Inc., [^\\n]+MA\\s+[-\\d]+, USA\\."; | |
853bf523 TSSF |
103 | my $reGpl = qr{$strGpl}s; |
104 | ||
14133da1 TSSF |
105 | # Two most common Squid (C) statements. |
106 | my $strSqCopyStart1 = | |
47f28373 | 107 | "SQUID Web Proxy Cache\\s+http://www.squid-cache.org/"; |
14133da1 | 108 | my $strSqCopyStart2 = |
47f28373 | 109 | "SQUID Internet Object Cache\\s+http://squid.nlanr.net/Squid/"; |
14133da1 | 110 | my $strSqCopyEnd = |
47f28373 FC |
111 | "([^*]|[*][^/])+". |
112 | "numerous individuals". | |
113 | "([^*]|[*][^/])+". | |
114 | "file for full details."; | |
14133da1 TSSF |
115 | my $reSquidCopy = qr{($strSqCopyStart1|$strSqCopyStart2)$strSqCopyEnd}s; |
116 | ||
a53382b4 TSSF |
117 | |
118 | my $FileName; # for Warn()ings | |
853bf523 | 119 | my %ReportedClaims; # to minimize noise in claims reporting |
4a4459d1 | 120 | $| = 1; # report claims ASAP (but on STDOUT) |
a53382b4 TSSF |
121 | |
122 | # process each file in-place; do not touch files on known failures | |
123 | foreach my $fname (@FileNames) { | |
124 | ||
47f28373 FC |
125 | $FileName = $fname; |
126 | my $code = &readFile($fname) or next; | |
127 | my $virginCode = $code; | |
128 | ||
129 | &WarnQuiet("Correct boilerplate already present, skipping:", $code), next if | |
130 | $code =~ /\Q$CorrectBoiler\E/s; | |
131 | ||
132 | my $boiler; | |
133 | ||
134 | if ($code =~ m/$reComment/) { | |
135 | my $beforeComment = $`; | |
136 | my $comment = $&; | |
137 | ||
138 | # Is the matched comment a boilerplate? | |
139 | if ($comment !~ m/\n/) { | |
140 | # A single line comment is not a boilerplate. | |
141 | } elsif ($beforeComment =~ m/^\s*\#\s*include\s+(?!"squid.h")/m) { | |
142 | # A comment after include is not a boilerplate, | |
143 | # but we make an exception for #include "squid.h" common in lib/ | |
144 | } elsif ($comment =~ m@^/\*\*\s@){ | |
145 | # A Doxygen comment is not a boilerplate. | |
146 | } elsif ($comment =~ m/internal declarations|stub file|unit test/i) { | |
147 | # These relatively common comments are not boilerplates. | |
148 | } elsif (&digestable($comment)) { | |
149 | # Something we can safely replace. | |
150 | $boiler = $comment; | |
151 | } else { | |
152 | &Warn("Unrecognized boilerplate, skipping:", $comment); | |
153 | next; | |
154 | } | |
155 | } | |
156 | ||
157 | my $extras = ''; # DEBUG section, inspired by ..., etc. | |
158 | ||
159 | if (defined $boiler) { | |
160 | my $copyClaims = ''; # formatted Copyright claims extracted from sources | |
161 | my $preserveClaims = 0; # whether to preserve them or not | |
162 | ||
163 | if (my @rawClaims = ($boiler =~ m/$reClaims/g)) { | |
164 | my @claims = map { &claimList($_) } @rawClaims; | |
165 | my $count = 0; | |
166 | foreach my $claim (@claims) { | |
167 | $claim =~ s/\n+/ /gs; # streamline multiline claims | |
168 | $claim =~ s@\*/?@ @g; # clean comment leftovers | |
169 | $claim =~ s/$reClaimPrefix/ /g; # remove common prefixes | |
170 | # this one is sucked in from the old standard boilerplate | |
171 | $claim =~ s/by the Regents of the University of//; | |
172 | $claim =~ s/\s\s+/ /gs; # clean excessive whitespace | |
173 | $claim =~ s/^\s+|\s+$//gs; # remove excessive whitespace | |
174 | next unless length $claim; | |
175 | ||
176 | # preserve Copyright claims | |
177 | if ($claim =~ m/Copyright|\(c\)/i) { | |
178 | $copyClaims .= sprintf(" * %s\n", $claim); | |
179 | ||
180 | # Ignore certain claims, assuming we have their permission. | |
181 | my $c = $claim; | |
182 | $c =~ s/^\s*(Copyright)?[:\s]*([(c)]+)?\s*([0-9,-]+)?\s*(by)?\s*//i; # prefix | |
183 | $c =~ s/$reClaimsOkToMove/ /g; | |
184 | $c =~ s/[,]//g; # markup leftovers | |
185 | ||
186 | # But if one claim is preserved, all must be preserved. | |
187 | $preserveClaims = 1 if $c =~ /\S/; | |
188 | warn($c) if $c =~ /\S/; | |
189 | } | |
190 | ||
191 | next if exists $ReportedClaims{$claim}; | |
192 | print("$fname: INFO: Found new claim(s):\n") unless $count++; | |
193 | print("Claim: $claim\n"); | |
194 | $ReportedClaims{$claim} = $fname; | |
195 | } | |
196 | } | |
197 | ||
198 | if ($preserveClaims) { | |
199 | die("Internal error: $copyClaims") unless length($copyClaims); | |
200 | my $prefix = " * Portions of this code are copyrighted and released under GPLv2+ by:"; | |
201 | my $suffix = " * Please add new claims to the CONTRIBUTORS file instead."; | |
202 | $extras .= sprintf("/*\n%s\n%s%s\n */\n\n", | |
203 | $prefix, $copyClaims, $suffix); | |
204 | } | |
205 | ||
206 | if ($boiler =~ m/$reInspiration/) { | |
207 | $extras .= sprintf("/* %s */\n\n", ucfirst($1)); | |
208 | } | |
209 | ||
210 | if ($boiler =~ m/$reDebugFull/) { | |
211 | $extras .= "/* $1 */\n\n"; | |
212 | } | |
213 | ||
214 | $code =~ s/\s*$reComment\s*/\n\n/ or | |
215 | die("internal error: failed to remove expected comment, stopped"); | |
216 | &digestable($&) or | |
217 | die("internal error: unsafe comment removal, stopped"); | |
218 | ||
219 | } else { # no boilerplate found | |
220 | #&Warn("Cannot find old boilerplate, adding new boilerplate.", $code); | |
221 | } | |
222 | ||
223 | # Some files have license declarations way down in the code so we may not | |
224 | # find a boilerplate at all or find an "empty" boilerplate preceding them. | |
225 | my $license = | |
226 | "Copyright|". | |
227 | "This program is free software|". | |
228 | "Permission to use|". | |
229 | "Redistribution and use"; | |
230 | if ($code =~ m@/\*.*?($license).*?\*/@is) { | |
231 | # If we replaced what we thought is an old boiler, do not use $` for | |
232 | # context because it is based on modified $code and will often mislead. | |
233 | my $context = defined $boiler ? $& : ($` . $&); | |
234 | &Warn("Suspected boilerplate in an unusual location, skipping:", | |
235 | $context); | |
236 | next; | |
237 | } | |
238 | ||
239 | $code = $CorrectBoiler . $extras . &trimL($code); | |
240 | &writeFile($fname, $code) unless $code eq $virginCode; | |
241 | undef $FileName; | |
a53382b4 TSSF |
242 | } |
243 | ||
244 | exit(0); | |
245 | ||
246 | sub readFile() { | |
47f28373 FC |
247 | my ($fname) = @_; |
248 | ||
249 | if (!-f $fname) { | |
250 | &Warn("Skipping directory or a special file."); | |
251 | return undef(); | |
252 | } | |
253 | ||
254 | my $code = ''; | |
255 | open(IF, "<$fname") or die("cannot open $fname: $!, stopped"); | |
256 | while (<IF>) { | |
257 | $code .= $_; | |
258 | } | |
259 | close(IF); | |
260 | ||
261 | &Warn("empty file") unless length $code; | |
262 | return $code; | |
a53382b4 TSSF |
263 | } |
264 | ||
00c328dd | 265 | sub writeFile() { |
47f28373 FC |
266 | my ($fname, $code) = @_; |
267 | open(OF, ">$fname") or die("cannot open $fname for writing: $!, stopped"); | |
a53382b4 | 268 | |
47f28373 | 269 | print(OF $code) or die("cannot write to $fname: $!, stopped"); |
a53382b4 | 270 | |
47f28373 | 271 | close(OF) or die("cannot finish updating $fname: $!, stopped"); |
a53382b4 TSSF |
272 | } |
273 | ||
14133da1 TSSF |
274 | # split multiclaim claims into an array of single claims |
275 | sub claimList() { | |
47f28373 | 276 | my $multiClaim = shift; |
14133da1 | 277 | |
47f28373 FC |
278 | $multiClaim =~ s/$reDebug//g; # may pretend to continue AUTHORs list |
279 | $multiClaim =~ s/$reInspiration//g; # does not affect (C) claims | |
14133da1 | 280 | |
47f28373 FC |
281 | # remove \n that is not used to separate two claims |
282 | $multiClaim =~ s/(Based.upon.original.+code.by\s*)\n/$1 /g; | |
14133da1 | 283 | |
47f28373 FC |
284 | return split(/\n/, $multiClaim); |
285 | # return grep { /\S/ } split($reClaimSplitter, $multiClaim); | |
14133da1 TSSF |
286 | } |
287 | ||
288 | # checks whether a comment contains nothing but the stuff we can either | |
289 | # safely remove, replace, or move (e.g., DEBUG sections and copyright claims) | |
290 | sub digestable() { | |
47f28373 FC |
291 | my $comment = shift; |
292 | ||
293 | # Remove common text to detect an otherwise empty boilerplate. | |
294 | $comment =~ s/$reDebug//; | |
295 | $comment =~ s/$reClaims//g; | |
296 | $comment =~ s/^[\s*]*(Created on.*?)$//mig; | |
297 | $comment =~ s/^[\s*]*(Windows support\s*)$//mig; | |
298 | $comment =~ s/^[\s*]*(History added by .*)$//mig; | |
299 | $comment =~ s/$reGpl//; | |
300 | $comment =~ s/$reSquidCopy//; | |
301 | $comment =~ s/$reInspiration//g; | |
302 | $comment =~ s/\* Stubs for.*?$//m; # e.g., Stubs for calls to stuff defined in... | |
303 | $comment =~ s/\$Id(:.*)?\$//g; # CVS tags | |
304 | $comment =~ s/-{60,}//g; # decorations such as -----------...--------- | |
305 | $comment =~ s/\b\w+\.(h|c|cc|cci)\b//; # Next to last step: a file name. | |
306 | $comment =~ s@[\s*/]@@sg; # Last step: whitespace and comment characters. | |
307 | return !length($comment); | |
14133da1 TSSF |
308 | } |
309 | ||
a53382b4 | 310 | # removes all opening whitespace |
00c328dd | 311 | sub trimL() { |
47f28373 FC |
312 | my ($code) = @_; |
313 | $code =~ s/^\n[\n\s]*//s; | |
314 | return $code; | |
a53382b4 TSSF |
315 | } |
316 | ||
317 | # removes all trailing whitespace | |
00c328dd | 318 | sub trimR() { |
47f28373 FC |
319 | my ($code) = @_; |
320 | $code =~ s/\n[\n\s]*$//s; | |
321 | return $code; | |
a53382b4 TSSF |
322 | } |
323 | ||
00c328dd | 324 | sub Warn() { |
47f28373 FC |
325 | my ($msg, $context) = @_; |
326 | ||
327 | if (defined $context) { | |
328 | my $MaxLen = 1000; | |
329 | $context =~ s/$reGpl/... [GPL] .../; | |
330 | $context =~ s/$reSquidCopy/... [Standard Squid "numerous individuals" text] .../; | |
331 | $context = substr($context, 0, $MaxLen); | |
332 | $context = &trimR($context); | |
333 | $context .= "\n\n"; | |
334 | } else { | |
335 | $context = ''; | |
336 | } | |
337 | $msg = sprintf("%s: WARNING: %s\n%s", $FileName, $msg, $context) if defined $FileName; | |
338 | warn($msg); | |
a53382b4 | 339 | } |
bc2321d2 AJ |
340 | |
341 | sub WarnQuiet() { | |
47f28373 | 342 | my ($msg, $context) = @_; |
bc2321d2 | 343 | |
47f28373 FC |
344 | $msg = sprintf("%s: WARNING: %s\n", $FileName, $msg) if defined $FileName; |
345 | warn($msg); | |
bc2321d2 | 346 | } |