]>
Commit | Line | Data |
---|---|---|
b4f25b07 ES |
1 | #!/usr/bin/env perl |
2 | # | |
3 | # Copyright (c) 2021-2022 Eric Sunshine <sunshine@sunshineco.com> | |
4 | # | |
5 | # This tool scans shell scripts for test definitions and checks those tests for | |
6 | # problems, such as broken &&-chains, which might hide bugs in the tests | |
7 | # themselves or in behaviors being exercised by the tests. | |
8 | # | |
9 | # Input arguments are pathnames of shell scripts containing test definitions, | |
10 | # or globs referencing a collection of scripts. For each problem discovered, | |
11 | # the pathname of the script containing the test is printed along with the test | |
12 | # name and the test body with a `?!FOO?!` annotation at the location of each | |
13 | # detected problem, where "FOO" is a tag such as "AMP" which indicates a broken | |
14 | # &&-chain. Returns zero if no problems are discovered, otherwise non-zero. | |
15 | ||
16 | use warnings; | |
17 | use strict; | |
29fb2ec3 | 18 | use Config; |
b4f25b07 ES |
19 | use File::Glob; |
20 | use Getopt::Long; | |
21 | ||
29fb2ec3 | 22 | my $jobs = -1; |
b4f25b07 ES |
23 | my $show_stats; |
24 | my $emit_all; | |
25 | ||
7d480473 ES |
26 | # Lexer tokenizes POSIX shell scripts. It is roughly modeled after section 2.3 |
27 | # "Token Recognition" of POSIX chapter 2 "Shell Command Language". Although | |
28 | # similar to lexical analyzers for other languages, this one differs in a few | |
29 | # substantial ways due to quirks of the shell command language. | |
30 | # | |
31 | # For instance, in many languages, newline is just whitespace like space or | |
32 | # TAB, but in shell a newline is a command separator, thus a distinct lexical | |
33 | # token. A newline is significant and returned as a distinct token even at the | |
34 | # end of a shell comment. | |
35 | # | |
36 | # In other languages, `1+2` would typically be scanned as three tokens | |
37 | # (`1`, `+`, and `2`), but in shell it is a single token. However, the similar | |
38 | # `1 + 2`, which embeds whitepace, is scanned as three token in shell, as well. | |
39 | # In shell, several characters with special meaning lose that meaning when not | |
40 | # surrounded by whitespace. For instance, the negation operator `!` is special | |
41 | # when standing alone surrounded by whitespace; whereas in `foo!uucp` it is | |
42 | # just a plain character in the longer token "foo!uucp". In many other | |
43 | # languages, `"string"/foo:'string'` might be scanned as five tokens ("string", | |
44 | # `/`, `foo`, `:`, and 'string'), but in shell, it is just a single token. | |
45 | # | |
46 | # The lexical analyzer for the shell command language is also somewhat unusual | |
47 | # in that it recursively invokes the parser to handle the body of `$(...)` | |
48 | # expressions which can contain arbitrary shell code. Such expressions may be | |
49 | # encountered both inside and outside of double-quoted strings. | |
50 | # | |
51 | # The lexical analyzer is responsible for consuming shell here-doc bodies which | |
52 | # extend from the line following a `<<TAG` operator until a line consisting | |
53 | # solely of `TAG`. Here-doc consumption begins when a newline is encountered. | |
54 | # It is legal for multiple here-doc `<<TAG` operators to be present on a single | |
55 | # line, in which case their bodies must be present one following the next, and | |
56 | # are consumed in the (left-to-right) order the `<<TAG` operators appear on the | |
57 | # line. A special complication is that the bodies of all here-docs must be | |
58 | # consumed when the newline is encountered even if the parse context depth has | |
59 | # changed. For instance, in `cat <<A && x=$(cat <<B &&\n`, bodies of here-docs | |
60 | # "A" and "B" must be consumed even though "A" was introduced outside the | |
61 | # recursive parse context in which "B" was introduced and in which the newline | |
62 | # is encountered. | |
63 | package Lexer; | |
64 | ||
65 | sub new { | |
66 | my ($class, $parser, $s) = @_; | |
67 | bless { | |
68 | parser => $parser, | |
69 | buff => $s, | |
70 | heretags => [] | |
71 | } => $class; | |
72 | } | |
73 | ||
74 | sub scan_heredoc_tag { | |
75 | my $self = shift @_; | |
76 | ${$self->{buff}} =~ /\G(-?)/gc; | |
77 | my $indented = $1; | |
78 | my $tag = $self->scan_token(); | |
79 | $tag =~ s/['"\\]//g; | |
80 | push(@{$self->{heretags}}, $indented ? "\t$tag" : "$tag"); | |
81 | return "<<$indented$tag"; | |
82 | } | |
83 | ||
84 | sub scan_op { | |
85 | my ($self, $c) = @_; | |
86 | my $b = $self->{buff}; | |
87 | return $c unless $$b =~ /\G(.)/sgc; | |
88 | my $cc = $c . $1; | |
89 | return scan_heredoc_tag($self) if $cc eq '<<'; | |
90 | return $cc if $cc =~ /^(?:&&|\|\||>>|;;|<&|>&|<>|>\|)$/; | |
91 | pos($$b)--; | |
92 | return $c; | |
93 | } | |
94 | ||
95 | sub scan_sqstring { | |
96 | my $self = shift @_; | |
97 | ${$self->{buff}} =~ /\G([^']*'|.*\z)/sgc; | |
98 | return "'" . $1; | |
99 | } | |
100 | ||
101 | sub scan_dqstring { | |
102 | my $self = shift @_; | |
103 | my $b = $self->{buff}; | |
104 | my $s = '"'; | |
105 | while (1) { | |
106 | # slurp up non-special characters | |
107 | $s .= $1 if $$b =~ /\G([^"\$\\]+)/gc; | |
108 | # handle special characters | |
109 | last unless $$b =~ /\G(.)/sgc; | |
110 | my $c = $1; | |
111 | $s .= '"', last if $c eq '"'; | |
112 | $s .= '$' . $self->scan_dollar(), next if $c eq '$'; | |
113 | if ($c eq '\\') { | |
114 | $s .= '\\', last unless $$b =~ /\G(.)/sgc; | |
115 | $c = $1; | |
116 | next if $c eq "\n"; # line splice | |
117 | # backslash escapes only $, `, ", \ in dq-string | |
118 | $s .= '\\' unless $c =~ /^[\$`"\\]$/; | |
119 | $s .= $c; | |
120 | next; | |
121 | } | |
122 | die("internal error scanning dq-string '$c'\n"); | |
123 | } | |
124 | return $s; | |
125 | } | |
126 | ||
127 | sub scan_balanced { | |
128 | my ($self, $c1, $c2) = @_; | |
129 | my $b = $self->{buff}; | |
130 | my $depth = 1; | |
131 | my $s = $c1; | |
132 | while ($$b =~ /\G([^\Q$c1$c2\E]*(?:[\Q$c1$c2\E]|\z))/gc) { | |
133 | $s .= $1; | |
134 | $depth++, next if $s =~ /\Q$c1\E$/; | |
135 | $depth--; | |
136 | last if $depth == 0; | |
137 | } | |
138 | return $s; | |
139 | } | |
140 | ||
141 | sub scan_subst { | |
142 | my $self = shift @_; | |
143 | my @tokens = $self->{parser}->parse(qr/^\)$/); | |
144 | $self->{parser}->next_token(); # closing ")" | |
145 | return @tokens; | |
146 | } | |
147 | ||
148 | sub scan_dollar { | |
149 | my $self = shift @_; | |
150 | my $b = $self->{buff}; | |
151 | return $self->scan_balanced('(', ')') if $$b =~ /\G\((?=\()/gc; # $((...)) | |
152 | return '(' . join(' ', $self->scan_subst()) . ')' if $$b =~ /\G\(/gc; # $(...) | |
153 | return $self->scan_balanced('{', '}') if $$b =~ /\G\{/gc; # ${...} | |
154 | return $1 if $$b =~ /\G(\w+)/gc; # $var | |
155 | return $1 if $$b =~ /\G([@*#?$!0-9-])/gc; # $*, $1, $$, etc. | |
156 | return ''; | |
157 | } | |
158 | ||
159 | sub swallow_heredocs { | |
160 | my $self = shift @_; | |
161 | my $b = $self->{buff}; | |
162 | my $tags = $self->{heretags}; | |
163 | while (my $tag = shift @$tags) { | |
164 | my $indent = $tag =~ s/^\t// ? '\\s*' : ''; | |
165 | $$b =~ /(?:\G|\n)$indent\Q$tag\E(?:\n|\z)/gc; | |
166 | } | |
167 | } | |
168 | ||
169 | sub scan_token { | |
170 | my $self = shift @_; | |
171 | my $b = $self->{buff}; | |
172 | my $token = ''; | |
173 | RESTART: | |
174 | $$b =~ /\G[ \t]+/gc; # skip whitespace (but not newline) | |
175 | return "\n" if $$b =~ /\G#[^\n]*(?:\n|\z)/gc; # comment | |
176 | while (1) { | |
177 | # slurp up non-special characters | |
178 | $token .= $1 if $$b =~ /\G([^\\;&|<>(){}'"\$\s]+)/gc; | |
179 | # handle special characters | |
180 | last unless $$b =~ /\G(.)/sgc; | |
181 | my $c = $1; | |
182 | last if $c =~ /^[ \t]$/; # whitespace ends token | |
183 | pos($$b)--, last if length($token) && $c =~ /^[;&|<>(){}\n]$/; | |
184 | $token .= $self->scan_sqstring(), next if $c eq "'"; | |
185 | $token .= $self->scan_dqstring(), next if $c eq '"'; | |
186 | $token .= $c . $self->scan_dollar(), next if $c eq '$'; | |
187 | $self->swallow_heredocs(), $token = $c, last if $c eq "\n"; | |
188 | $token = $self->scan_op($c), last if $c =~ /^[;&|<>]$/; | |
189 | $token = $c, last if $c =~ /^[(){}]$/; | |
190 | if ($c eq '\\') { | |
191 | $token .= '\\', last unless $$b =~ /\G(.)/sgc; | |
192 | $c = $1; | |
193 | next if $c eq "\n" && length($token); # line splice | |
194 | goto RESTART if $c eq "\n"; # line splice | |
195 | $token .= '\\' . $c; | |
196 | next; | |
197 | } | |
198 | die("internal error scanning character '$c'\n"); | |
199 | } | |
200 | return length($token) ? $token : undef; | |
201 | } | |
202 | ||
65945541 ES |
203 | # ShellParser parses POSIX shell scripts (with minor extensions for Bash). It |
204 | # is a recursive descent parser very roughly modeled after section 2.10 "Shell | |
205 | # Grammar" of POSIX chapter 2 "Shell Command Language". | |
206 | package ShellParser; | |
207 | ||
208 | sub new { | |
209 | my ($class, $s) = @_; | |
210 | my $self = bless { | |
211 | buff => [], | |
212 | stop => [], | |
213 | output => [] | |
214 | } => $class; | |
215 | $self->{lexer} = Lexer->new($self, $s); | |
216 | return $self; | |
217 | } | |
218 | ||
219 | sub next_token { | |
220 | my $self = shift @_; | |
221 | return pop(@{$self->{buff}}) if @{$self->{buff}}; | |
222 | return $self->{lexer}->scan_token(); | |
223 | } | |
224 | ||
225 | sub untoken { | |
226 | my $self = shift @_; | |
227 | push(@{$self->{buff}}, @_); | |
228 | } | |
229 | ||
230 | sub peek { | |
231 | my $self = shift @_; | |
232 | my $token = $self->next_token(); | |
233 | return undef unless defined($token); | |
234 | $self->untoken($token); | |
235 | return $token; | |
236 | } | |
237 | ||
238 | sub stop_at { | |
239 | my ($self, $token) = @_; | |
240 | return 1 unless defined($token); | |
241 | my $stop = ${$self->{stop}}[-1] if @{$self->{stop}}; | |
242 | return defined($stop) && $token =~ $stop; | |
243 | } | |
244 | ||
245 | sub expect { | |
246 | my ($self, $expect) = @_; | |
247 | my $token = $self->next_token(); | |
248 | return $token if defined($token) && $token eq $expect; | |
249 | push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token : "<end-of-input>") . "'\n"); | |
250 | $self->untoken($token) if defined($token); | |
251 | return (); | |
252 | } | |
253 | ||
254 | sub optional_newlines { | |
255 | my $self = shift @_; | |
256 | my @tokens; | |
257 | while (my $token = $self->peek()) { | |
258 | last unless $token eq "\n"; | |
259 | push(@tokens, $self->next_token()); | |
260 | } | |
261 | return @tokens; | |
262 | } | |
263 | ||
264 | sub parse_group { | |
265 | my $self = shift @_; | |
266 | return ($self->parse(qr/^}$/), | |
267 | $self->expect('}')); | |
268 | } | |
269 | ||
270 | sub parse_subshell { | |
271 | my $self = shift @_; | |
272 | return ($self->parse(qr/^\)$/), | |
273 | $self->expect(')')); | |
274 | } | |
275 | ||
276 | sub parse_case_pattern { | |
277 | my $self = shift @_; | |
278 | my @tokens; | |
279 | while (defined(my $token = $self->next_token())) { | |
280 | push(@tokens, $token); | |
281 | last if $token eq ')'; | |
282 | } | |
283 | return @tokens; | |
284 | } | |
285 | ||
286 | sub parse_case { | |
287 | my $self = shift @_; | |
288 | my @tokens; | |
289 | push(@tokens, | |
290 | $self->next_token(), # subject | |
291 | $self->optional_newlines(), | |
292 | $self->expect('in'), | |
293 | $self->optional_newlines()); | |
294 | while (1) { | |
295 | my $token = $self->peek(); | |
296 | last unless defined($token) && $token ne 'esac'; | |
297 | push(@tokens, | |
298 | $self->parse_case_pattern(), | |
299 | $self->optional_newlines(), | |
300 | $self->parse(qr/^(?:;;|esac)$/)); # item body | |
301 | $token = $self->peek(); | |
302 | last unless defined($token) && $token ne 'esac'; | |
303 | push(@tokens, | |
304 | $self->expect(';;'), | |
305 | $self->optional_newlines()); | |
306 | } | |
307 | push(@tokens, $self->expect('esac')); | |
308 | return @tokens; | |
309 | } | |
310 | ||
311 | sub parse_for { | |
312 | my $self = shift @_; | |
313 | my @tokens; | |
314 | push(@tokens, | |
315 | $self->next_token(), # variable | |
316 | $self->optional_newlines()); | |
317 | my $token = $self->peek(); | |
318 | if (defined($token) && $token eq 'in') { | |
319 | push(@tokens, | |
320 | $self->expect('in'), | |
321 | $self->optional_newlines()); | |
322 | } | |
323 | push(@tokens, | |
324 | $self->parse(qr/^do$/), # items | |
325 | $self->expect('do'), | |
326 | $self->optional_newlines(), | |
327 | $self->parse_loop_body(), | |
328 | $self->expect('done')); | |
329 | return @tokens; | |
330 | } | |
331 | ||
332 | sub parse_if { | |
333 | my $self = shift @_; | |
334 | my @tokens; | |
335 | while (1) { | |
336 | push(@tokens, | |
337 | $self->parse(qr/^then$/), # if/elif condition | |
338 | $self->expect('then'), | |
339 | $self->optional_newlines(), | |
340 | $self->parse(qr/^(?:elif|else|fi)$/)); # if/elif body | |
341 | my $token = $self->peek(); | |
342 | last unless defined($token) && $token eq 'elif'; | |
343 | push(@tokens, $self->expect('elif')); | |
344 | } | |
345 | my $token = $self->peek(); | |
346 | if (defined($token) && $token eq 'else') { | |
347 | push(@tokens, | |
348 | $self->expect('else'), | |
349 | $self->optional_newlines(), | |
350 | $self->parse(qr/^fi$/)); # else body | |
351 | } | |
352 | push(@tokens, $self->expect('fi')); | |
353 | return @tokens; | |
354 | } | |
355 | ||
356 | sub parse_loop_body { | |
357 | my $self = shift @_; | |
358 | return $self->parse(qr/^done$/); | |
359 | } | |
360 | ||
361 | sub parse_loop { | |
362 | my $self = shift @_; | |
363 | return ($self->parse(qr/^do$/), # condition | |
364 | $self->expect('do'), | |
365 | $self->optional_newlines(), | |
366 | $self->parse_loop_body(), | |
367 | $self->expect('done')); | |
368 | } | |
369 | ||
370 | sub parse_func { | |
371 | my $self = shift @_; | |
372 | return ($self->expect('('), | |
373 | $self->expect(')'), | |
374 | $self->optional_newlines(), | |
375 | $self->parse_cmd()); # body | |
376 | } | |
377 | ||
378 | sub parse_bash_array_assignment { | |
379 | my $self = shift @_; | |
380 | my @tokens = $self->expect('('); | |
381 | while (defined(my $token = $self->next_token())) { | |
382 | push(@tokens, $token); | |
383 | last if $token eq ')'; | |
384 | } | |
385 | return @tokens; | |
386 | } | |
387 | ||
388 | my %compound = ( | |
389 | '{' => \&parse_group, | |
390 | '(' => \&parse_subshell, | |
391 | 'case' => \&parse_case, | |
392 | 'for' => \&parse_for, | |
393 | 'if' => \&parse_if, | |
394 | 'until' => \&parse_loop, | |
395 | 'while' => \&parse_loop); | |
396 | ||
397 | sub parse_cmd { | |
398 | my $self = shift @_; | |
399 | my $cmd = $self->next_token(); | |
400 | return () unless defined($cmd); | |
401 | return $cmd if $cmd eq "\n"; | |
402 | ||
403 | my $token; | |
404 | my @tokens = $cmd; | |
405 | if ($cmd eq '!') { | |
406 | push(@tokens, $self->parse_cmd()); | |
407 | return @tokens; | |
408 | } elsif (my $f = $compound{$cmd}) { | |
409 | push(@tokens, $self->$f()); | |
410 | } elsif (defined($token = $self->peek()) && $token eq '(') { | |
411 | if ($cmd !~ /\w=$/) { | |
412 | push(@tokens, $self->parse_func()); | |
413 | return @tokens; | |
414 | } | |
415 | $tokens[-1] .= join(' ', $self->parse_bash_array_assignment()); | |
416 | } | |
417 | ||
418 | while (defined(my $token = $self->next_token())) { | |
419 | $self->untoken($token), last if $self->stop_at($token); | |
420 | push(@tokens, $token); | |
421 | last if $token =~ /^(?:[;&\n|]|&&|\|\|)$/; | |
422 | } | |
423 | push(@tokens, $self->next_token()) if $tokens[-1] ne "\n" && defined($token = $self->peek()) && $token eq "\n"; | |
424 | return @tokens; | |
425 | } | |
426 | ||
427 | sub accumulate { | |
428 | my ($self, $tokens, $cmd) = @_; | |
429 | push(@$tokens, @$cmd); | |
430 | } | |
431 | ||
432 | sub parse { | |
433 | my ($self, $stop) = @_; | |
434 | push(@{$self->{stop}}, $stop); | |
435 | goto DONE if $self->stop_at($self->peek()); | |
436 | my @tokens; | |
437 | while (my @cmd = $self->parse_cmd()) { | |
438 | $self->accumulate(\@tokens, \@cmd); | |
439 | last if $self->stop_at($self->peek()); | |
440 | } | |
441 | DONE: | |
442 | pop(@{$self->{stop}}); | |
443 | return @tokens; | |
444 | } | |
445 | ||
6d932e92 ES |
446 | # TestParser is a subclass of ShellParser which, beyond parsing shell script |
447 | # code, is also imbued with semantic knowledge of test construction, and checks | |
448 | # tests for common problems (such as broken &&-chains) which might hide bugs in | |
449 | # the tests themselves or in behaviors being exercised by the tests. As such, | |
450 | # TestParser is only called upon to parse test bodies, not the top-level | |
451 | # scripts in which the tests are defined. | |
452 | package TestParser; | |
453 | ||
454 | use base 'ShellParser'; | |
455 | ||
456 | sub find_non_nl { | |
457 | my $tokens = shift @_; | |
458 | my $n = shift @_; | |
459 | $n = $#$tokens if !defined($n); | |
460 | $n-- while $n >= 0 && $$tokens[$n] eq "\n"; | |
461 | return $n; | |
462 | } | |
463 | ||
464 | sub ends_with { | |
465 | my ($tokens, $needles) = @_; | |
466 | my $n = find_non_nl($tokens); | |
467 | for my $needle (reverse(@$needles)) { | |
468 | return undef if $n < 0; | |
469 | $n = find_non_nl($tokens, $n), next if $needle eq "\n"; | |
470 | return undef if $$tokens[$n] !~ $needle; | |
471 | $n--; | |
472 | } | |
473 | return 1; | |
474 | } | |
475 | ||
476 | sub accumulate { | |
477 | my ($self, $tokens, $cmd) = @_; | |
478 | goto DONE unless @$tokens; | |
479 | goto DONE if @$cmd == 1 && $$cmd[0] eq "\n"; | |
480 | ||
481 | # did previous command end with "&&", "||", "|"? | |
482 | goto DONE if ends_with($tokens, [qr/^(?:&&|\|\||\|)$/]); | |
483 | ||
484 | # flag missing "&&" at end of previous command | |
485 | my $n = find_non_nl($tokens); | |
486 | splice(@$tokens, $n + 1, 0, '?!AMP?!') unless $n < 0; | |
487 | ||
488 | DONE: | |
489 | $self->SUPER::accumulate($tokens, $cmd); | |
490 | } | |
491 | ||
d99ebd6d ES |
492 | # ScriptParser is a subclass of ShellParser which identifies individual test |
493 | # definitions within test scripts, and passes each test body through TestParser | |
494 | # to identify possible problems. ShellParser detects test definitions not only | |
495 | # at the top-level of test scripts but also within compound commands such as | |
496 | # loops and function definitions. | |
b4f25b07 ES |
497 | package ScriptParser; |
498 | ||
d99ebd6d ES |
499 | use base 'ShellParser'; |
500 | ||
b4f25b07 ES |
501 | sub new { |
502 | my $class = shift @_; | |
d99ebd6d | 503 | my $self = $class->SUPER::new(@_); |
b4f25b07 ES |
504 | $self->{ntests} = 0; |
505 | return $self; | |
506 | } | |
507 | ||
d99ebd6d ES |
508 | # extract the raw content of a token, which may be a single string or a |
509 | # composition of multiple strings and non-string character runs; for instance, | |
510 | # `"test body"` unwraps to `test body`; `word"a b"42'c d'` to `worda b42c d` | |
511 | sub unwrap { | |
512 | my $token = @_ ? shift @_ : $_; | |
513 | # simple case: 'sqstring' or "dqstring" | |
514 | return $token if $token =~ s/^'([^']*)'$/$1/; | |
515 | return $token if $token =~ s/^"([^"]*)"$/$1/; | |
516 | ||
517 | # composite case | |
518 | my ($s, $q, $escaped); | |
519 | while (1) { | |
520 | # slurp up non-special characters | |
521 | $s .= $1 if $token =~ /\G([^\\'"]*)/gc; | |
522 | # handle special characters | |
523 | last unless $token =~ /\G(.)/sgc; | |
524 | my $c = $1; | |
525 | $q = undef, next if defined($q) && $c eq $q; | |
526 | $q = $c, next if !defined($q) && $c =~ /^['"]$/; | |
527 | if ($c eq '\\') { | |
528 | last unless $token =~ /\G(.)/sgc; | |
529 | $c = $1; | |
530 | $s .= '\\' if $c eq "\n"; # preserve line splice | |
531 | } | |
532 | $s .= $c; | |
533 | } | |
534 | return $s | |
535 | } | |
536 | ||
537 | sub check_test { | |
538 | my $self = shift @_; | |
539 | my ($title, $body) = map(unwrap, @_); | |
540 | $self->{ntests}++; | |
541 | my $parser = TestParser->new(\$body); | |
542 | my @tokens = $parser->parse(); | |
543 | return unless $emit_all || grep(/\?![^?]+\?!/, @tokens); | |
544 | my $checked = join(' ', @tokens); | |
545 | $checked =~ s/^\n//; | |
546 | $checked =~ s/^ //mg; | |
547 | $checked =~ s/ $//mg; | |
548 | $checked .= "\n" unless $checked =~ /\n$/; | |
549 | push(@{$self->{output}}, "# chainlint: $title\n$checked"); | |
550 | } | |
551 | ||
b4f25b07 | 552 | sub parse_cmd { |
d99ebd6d ES |
553 | my $self = shift @_; |
554 | my @tokens = $self->SUPER::parse_cmd(); | |
555 | return @tokens unless @tokens && $tokens[0] =~ /^test_expect_(?:success|failure)$/; | |
556 | my $n = $#tokens; | |
557 | $n-- while $n >= 0 && $tokens[$n] =~ /^(?:[;&\n|]|&&|\|\|)$/; | |
558 | $self->check_test($tokens[1], $tokens[2]) if $n == 2; # title body | |
559 | $self->check_test($tokens[2], $tokens[3]) if $n > 2; # prereq title body | |
560 | return @tokens; | |
b4f25b07 ES |
561 | } |
562 | ||
563 | # main contains high-level functionality for processing command-line switches, | |
564 | # feeding input test scripts to ScriptParser, and reporting results. | |
565 | package main; | |
566 | ||
567 | my $getnow = sub { return time(); }; | |
568 | my $interval = sub { return time() - shift; }; | |
569 | if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) { | |
570 | $getnow = sub { return [Time::HiRes::gettimeofday()]; }; | |
571 | $interval = sub { return Time::HiRes::tv_interval(shift); }; | |
572 | } | |
573 | ||
29fb2ec3 ES |
574 | sub ncores { |
575 | # Windows | |
576 | return $ENV{NUMBER_OF_PROCESSORS} if exists($ENV{NUMBER_OF_PROCESSORS}); | |
577 | # Linux / MSYS2 / Cygwin / WSL | |
578 | do { local @ARGV='/proc/cpuinfo'; return scalar(grep(/^processor\s*:/, <>)); } if -r '/proc/cpuinfo'; | |
579 | # macOS & BSD | |
580 | return qx/sysctl -n hw.ncpu/ if $^O =~ /(?:^darwin$|bsd)/; | |
581 | return 1; | |
582 | } | |
583 | ||
b4f25b07 ES |
584 | sub show_stats { |
585 | my ($start_time, $stats) = @_; | |
586 | my $walltime = $interval->($start_time); | |
587 | my ($usertime) = times(); | |
588 | my ($total_workers, $total_scripts, $total_tests, $total_errs) = (0, 0, 0, 0); | |
589 | for (@$stats) { | |
590 | my ($worker, $nscripts, $ntests, $nerrs) = @$_; | |
591 | print(STDERR "worker $worker: $nscripts scripts, $ntests tests, $nerrs errors\n"); | |
592 | $total_workers++; | |
593 | $total_scripts += $nscripts; | |
594 | $total_tests += $ntests; | |
595 | $total_errs += $nerrs; | |
596 | } | |
597 | printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime); | |
598 | } | |
599 | ||
600 | sub check_script { | |
601 | my ($id, $next_script, $emit) = @_; | |
602 | my ($nscripts, $ntests, $nerrs) = (0, 0, 0); | |
603 | while (my $path = $next_script->()) { | |
604 | $nscripts++; | |
605 | my $fh; | |
606 | unless (open($fh, "<", $path)) { | |
607 | $emit->("?!ERR?! $path: $!\n"); | |
608 | next; | |
609 | } | |
610 | my $s = do { local $/; <$fh> }; | |
611 | close($fh); | |
612 | my $parser = ScriptParser->new(\$s); | |
613 | 1 while $parser->parse_cmd(); | |
614 | if (@{$parser->{output}}) { | |
615 | my $s = join('', @{$parser->{output}}); | |
616 | $emit->("# chainlint: $path\n" . $s); | |
617 | $nerrs += () = $s =~ /\?![^?]+\?!/g; | |
618 | } | |
619 | $ntests += $parser->{ntests}; | |
620 | } | |
621 | return [$id, $nscripts, $ntests, $nerrs]; | |
622 | } | |
623 | ||
624 | sub exit_code { | |
625 | my $stats = shift @_; | |
626 | for (@$stats) { | |
627 | my ($worker, $nscripts, $ntests, $nerrs) = @$_; | |
628 | return 1 if $nerrs; | |
629 | } | |
630 | return 0; | |
631 | } | |
632 | ||
633 | Getopt::Long::Configure(qw{bundling}); | |
634 | GetOptions( | |
635 | "emit-all!" => \$emit_all, | |
29fb2ec3 | 636 | "jobs|j=i" => \$jobs, |
b4f25b07 | 637 | "stats|show-stats!" => \$show_stats) or die("option error\n"); |
29fb2ec3 | 638 | $jobs = ncores() if $jobs < 1; |
b4f25b07 ES |
639 | |
640 | my $start_time = $getnow->(); | |
641 | my @stats; | |
642 | ||
643 | my @scripts; | |
644 | push(@scripts, File::Glob::bsd_glob($_)) for (@ARGV); | |
645 | unless (@scripts) { | |
646 | show_stats($start_time, \@stats) if $show_stats; | |
647 | exit; | |
648 | } | |
649 | ||
29fb2ec3 ES |
650 | unless ($Config{useithreads} && eval { |
651 | require threads; threads->import(); | |
652 | require Thread::Queue; Thread::Queue->import(); | |
653 | 1; | |
654 | }) { | |
655 | push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); })); | |
656 | show_stats($start_time, \@stats) if $show_stats; | |
657 | exit(exit_code(\@stats)); | |
658 | } | |
659 | ||
660 | my $script_queue = Thread::Queue->new(); | |
661 | my $output_queue = Thread::Queue->new(); | |
662 | ||
663 | sub next_script { return $script_queue->dequeue(); } | |
664 | sub emit { $output_queue->enqueue(@_); } | |
665 | ||
666 | sub monitor { | |
667 | while (my $s = $output_queue->dequeue()) { | |
668 | print($s); | |
669 | } | |
670 | } | |
671 | ||
672 | my $mon = threads->create({'context' => 'void'}, \&monitor); | |
673 | threads->create({'context' => 'list'}, \&check_script, $_, \&next_script, \&emit) for 1..$jobs; | |
674 | ||
675 | $script_queue->enqueue(@scripts); | |
676 | $script_queue->end(); | |
677 | ||
678 | for (threads->list()) { | |
679 | push(@stats, $_->join()) unless $_ == $mon; | |
680 | } | |
681 | ||
682 | $output_queue->end(); | |
683 | $mon->join(); | |
684 | ||
b4f25b07 ES |
685 | show_stats($start_time, \@stats) if $show_stats; |
686 | exit(exit_code(\@stats)); |