]>
Commit | Line | Data |
---|---|---|
b4f25b07 ES |
1 | #!/usr/bin/env perl |
2 | # | |
3 | # Copyright (c) 2021-2022 Eric Sunshine <sunshine@sunshineco.com> | |
4 | # | |
5 | # This tool scans shell scripts for test definitions and checks those tests for | |
6 | # problems, such as broken &&-chains, which might hide bugs in the tests | |
7 | # themselves or in behaviors being exercised by the tests. | |
8 | # | |
9 | # Input arguments are pathnames of shell scripts containing test definitions, | |
10 | # or globs referencing a collection of scripts. For each problem discovered, | |
11 | # the pathname of the script containing the test is printed along with the test | |
12 | # name and the test body with a `?!FOO?!` annotation at the location of each | |
13 | # detected problem, where "FOO" is a tag such as "AMP" which indicates a broken | |
14 | # &&-chain. Returns zero if no problems are discovered, otherwise non-zero. | |
15 | ||
16 | use warnings; | |
17 | use strict; | |
29fb2ec3 | 18 | use Config; |
b4f25b07 ES |
19 | use File::Glob; |
20 | use Getopt::Long; | |
21 | ||
29fb2ec3 | 22 | my $jobs = -1; |
b4f25b07 ES |
23 | my $show_stats; |
24 | my $emit_all; | |
25 | ||
7d480473 ES |
26 | # Lexer tokenizes POSIX shell scripts. It is roughly modeled after section 2.3 |
27 | # "Token Recognition" of POSIX chapter 2 "Shell Command Language". Although | |
28 | # similar to lexical analyzers for other languages, this one differs in a few | |
29 | # substantial ways due to quirks of the shell command language. | |
30 | # | |
31 | # For instance, in many languages, newline is just whitespace like space or | |
32 | # TAB, but in shell a newline is a command separator, thus a distinct lexical | |
33 | # token. A newline is significant and returned as a distinct token even at the | |
34 | # end of a shell comment. | |
35 | # | |
36 | # In other languages, `1+2` would typically be scanned as three tokens | |
37 | # (`1`, `+`, and `2`), but in shell it is a single token. However, the similar | |
38 | # `1 + 2`, which embeds whitepace, is scanned as three token in shell, as well. | |
39 | # In shell, several characters with special meaning lose that meaning when not | |
40 | # surrounded by whitespace. For instance, the negation operator `!` is special | |
41 | # when standing alone surrounded by whitespace; whereas in `foo!uucp` it is | |
42 | # just a plain character in the longer token "foo!uucp". In many other | |
43 | # languages, `"string"/foo:'string'` might be scanned as five tokens ("string", | |
44 | # `/`, `foo`, `:`, and 'string'), but in shell, it is just a single token. | |
45 | # | |
46 | # The lexical analyzer for the shell command language is also somewhat unusual | |
47 | # in that it recursively invokes the parser to handle the body of `$(...)` | |
48 | # expressions which can contain arbitrary shell code. Such expressions may be | |
49 | # encountered both inside and outside of double-quoted strings. | |
50 | # | |
51 | # The lexical analyzer is responsible for consuming shell here-doc bodies which | |
52 | # extend from the line following a `<<TAG` operator until a line consisting | |
53 | # solely of `TAG`. Here-doc consumption begins when a newline is encountered. | |
54 | # It is legal for multiple here-doc `<<TAG` operators to be present on a single | |
55 | # line, in which case their bodies must be present one following the next, and | |
56 | # are consumed in the (left-to-right) order the `<<TAG` operators appear on the | |
57 | # line. A special complication is that the bodies of all here-docs must be | |
58 | # consumed when the newline is encountered even if the parse context depth has | |
59 | # changed. For instance, in `cat <<A && x=$(cat <<B &&\n`, bodies of here-docs | |
60 | # "A" and "B" must be consumed even though "A" was introduced outside the | |
61 | # recursive parse context in which "B" was introduced and in which the newline | |
62 | # is encountered. | |
63 | package Lexer; | |
64 | ||
65 | sub new { | |
66 | my ($class, $parser, $s) = @_; | |
67 | bless { | |
68 | parser => $parser, | |
69 | buff => $s, | |
70 | heretags => [] | |
71 | } => $class; | |
72 | } | |
73 | ||
74 | sub scan_heredoc_tag { | |
75 | my $self = shift @_; | |
76 | ${$self->{buff}} =~ /\G(-?)/gc; | |
77 | my $indented = $1; | |
78 | my $tag = $self->scan_token(); | |
79 | $tag =~ s/['"\\]//g; | |
80 | push(@{$self->{heretags}}, $indented ? "\t$tag" : "$tag"); | |
81 | return "<<$indented$tag"; | |
82 | } | |
83 | ||
84 | sub scan_op { | |
85 | my ($self, $c) = @_; | |
86 | my $b = $self->{buff}; | |
87 | return $c unless $$b =~ /\G(.)/sgc; | |
88 | my $cc = $c . $1; | |
89 | return scan_heredoc_tag($self) if $cc eq '<<'; | |
90 | return $cc if $cc =~ /^(?:&&|\|\||>>|;;|<&|>&|<>|>\|)$/; | |
91 | pos($$b)--; | |
92 | return $c; | |
93 | } | |
94 | ||
95 | sub scan_sqstring { | |
96 | my $self = shift @_; | |
97 | ${$self->{buff}} =~ /\G([^']*'|.*\z)/sgc; | |
98 | return "'" . $1; | |
99 | } | |
100 | ||
101 | sub scan_dqstring { | |
102 | my $self = shift @_; | |
103 | my $b = $self->{buff}; | |
104 | my $s = '"'; | |
105 | while (1) { | |
106 | # slurp up non-special characters | |
107 | $s .= $1 if $$b =~ /\G([^"\$\\]+)/gc; | |
108 | # handle special characters | |
109 | last unless $$b =~ /\G(.)/sgc; | |
110 | my $c = $1; | |
111 | $s .= '"', last if $c eq '"'; | |
112 | $s .= '$' . $self->scan_dollar(), next if $c eq '$'; | |
113 | if ($c eq '\\') { | |
114 | $s .= '\\', last unless $$b =~ /\G(.)/sgc; | |
115 | $c = $1; | |
116 | next if $c eq "\n"; # line splice | |
117 | # backslash escapes only $, `, ", \ in dq-string | |
118 | $s .= '\\' unless $c =~ /^[\$`"\\]$/; | |
119 | $s .= $c; | |
120 | next; | |
121 | } | |
122 | die("internal error scanning dq-string '$c'\n"); | |
123 | } | |
124 | return $s; | |
125 | } | |
126 | ||
127 | sub scan_balanced { | |
128 | my ($self, $c1, $c2) = @_; | |
129 | my $b = $self->{buff}; | |
130 | my $depth = 1; | |
131 | my $s = $c1; | |
132 | while ($$b =~ /\G([^\Q$c1$c2\E]*(?:[\Q$c1$c2\E]|\z))/gc) { | |
133 | $s .= $1; | |
134 | $depth++, next if $s =~ /\Q$c1\E$/; | |
135 | $depth--; | |
136 | last if $depth == 0; | |
137 | } | |
138 | return $s; | |
139 | } | |
140 | ||
141 | sub scan_subst { | |
142 | my $self = shift @_; | |
143 | my @tokens = $self->{parser}->parse(qr/^\)$/); | |
144 | $self->{parser}->next_token(); # closing ")" | |
145 | return @tokens; | |
146 | } | |
147 | ||
148 | sub scan_dollar { | |
149 | my $self = shift @_; | |
150 | my $b = $self->{buff}; | |
151 | return $self->scan_balanced('(', ')') if $$b =~ /\G\((?=\()/gc; # $((...)) | |
152 | return '(' . join(' ', $self->scan_subst()) . ')' if $$b =~ /\G\(/gc; # $(...) | |
153 | return $self->scan_balanced('{', '}') if $$b =~ /\G\{/gc; # ${...} | |
154 | return $1 if $$b =~ /\G(\w+)/gc; # $var | |
155 | return $1 if $$b =~ /\G([@*#?$!0-9-])/gc; # $*, $1, $$, etc. | |
156 | return ''; | |
157 | } | |
158 | ||
159 | sub swallow_heredocs { | |
160 | my $self = shift @_; | |
161 | my $b = $self->{buff}; | |
162 | my $tags = $self->{heretags}; | |
163 | while (my $tag = shift @$tags) { | |
164 | my $indent = $tag =~ s/^\t// ? '\\s*' : ''; | |
165 | $$b =~ /(?:\G|\n)$indent\Q$tag\E(?:\n|\z)/gc; | |
166 | } | |
167 | } | |
168 | ||
169 | sub scan_token { | |
170 | my $self = shift @_; | |
171 | my $b = $self->{buff}; | |
172 | my $token = ''; | |
173 | RESTART: | |
174 | $$b =~ /\G[ \t]+/gc; # skip whitespace (but not newline) | |
175 | return "\n" if $$b =~ /\G#[^\n]*(?:\n|\z)/gc; # comment | |
176 | while (1) { | |
177 | # slurp up non-special characters | |
178 | $token .= $1 if $$b =~ /\G([^\\;&|<>(){}'"\$\s]+)/gc; | |
179 | # handle special characters | |
180 | last unless $$b =~ /\G(.)/sgc; | |
181 | my $c = $1; | |
182 | last if $c =~ /^[ \t]$/; # whitespace ends token | |
183 | pos($$b)--, last if length($token) && $c =~ /^[;&|<>(){}\n]$/; | |
184 | $token .= $self->scan_sqstring(), next if $c eq "'"; | |
185 | $token .= $self->scan_dqstring(), next if $c eq '"'; | |
186 | $token .= $c . $self->scan_dollar(), next if $c eq '$'; | |
187 | $self->swallow_heredocs(), $token = $c, last if $c eq "\n"; | |
188 | $token = $self->scan_op($c), last if $c =~ /^[;&|<>]$/; | |
189 | $token = $c, last if $c =~ /^[(){}]$/; | |
190 | if ($c eq '\\') { | |
191 | $token .= '\\', last unless $$b =~ /\G(.)/sgc; | |
192 | $c = $1; | |
193 | next if $c eq "\n" && length($token); # line splice | |
194 | goto RESTART if $c eq "\n"; # line splice | |
195 | $token .= '\\' . $c; | |
196 | next; | |
197 | } | |
198 | die("internal error scanning character '$c'\n"); | |
199 | } | |
200 | return length($token) ? $token : undef; | |
201 | } | |
202 | ||
65945541 ES |
203 | # ShellParser parses POSIX shell scripts (with minor extensions for Bash). It |
204 | # is a recursive descent parser very roughly modeled after section 2.10 "Shell | |
205 | # Grammar" of POSIX chapter 2 "Shell Command Language". | |
206 | package ShellParser; | |
207 | ||
208 | sub new { | |
209 | my ($class, $s) = @_; | |
210 | my $self = bless { | |
211 | buff => [], | |
212 | stop => [], | |
213 | output => [] | |
214 | } => $class; | |
215 | $self->{lexer} = Lexer->new($self, $s); | |
216 | return $self; | |
217 | } | |
218 | ||
219 | sub next_token { | |
220 | my $self = shift @_; | |
221 | return pop(@{$self->{buff}}) if @{$self->{buff}}; | |
222 | return $self->{lexer}->scan_token(); | |
223 | } | |
224 | ||
225 | sub untoken { | |
226 | my $self = shift @_; | |
227 | push(@{$self->{buff}}, @_); | |
228 | } | |
229 | ||
230 | sub peek { | |
231 | my $self = shift @_; | |
232 | my $token = $self->next_token(); | |
233 | return undef unless defined($token); | |
234 | $self->untoken($token); | |
235 | return $token; | |
236 | } | |
237 | ||
238 | sub stop_at { | |
239 | my ($self, $token) = @_; | |
240 | return 1 unless defined($token); | |
241 | my $stop = ${$self->{stop}}[-1] if @{$self->{stop}}; | |
242 | return defined($stop) && $token =~ $stop; | |
243 | } | |
244 | ||
245 | sub expect { | |
246 | my ($self, $expect) = @_; | |
247 | my $token = $self->next_token(); | |
248 | return $token if defined($token) && $token eq $expect; | |
249 | push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token : "<end-of-input>") . "'\n"); | |
250 | $self->untoken($token) if defined($token); | |
251 | return (); | |
252 | } | |
253 | ||
254 | sub optional_newlines { | |
255 | my $self = shift @_; | |
256 | my @tokens; | |
257 | while (my $token = $self->peek()) { | |
258 | last unless $token eq "\n"; | |
259 | push(@tokens, $self->next_token()); | |
260 | } | |
261 | return @tokens; | |
262 | } | |
263 | ||
264 | sub parse_group { | |
265 | my $self = shift @_; | |
266 | return ($self->parse(qr/^}$/), | |
267 | $self->expect('}')); | |
268 | } | |
269 | ||
270 | sub parse_subshell { | |
271 | my $self = shift @_; | |
272 | return ($self->parse(qr/^\)$/), | |
273 | $self->expect(')')); | |
274 | } | |
275 | ||
276 | sub parse_case_pattern { | |
277 | my $self = shift @_; | |
278 | my @tokens; | |
279 | while (defined(my $token = $self->next_token())) { | |
280 | push(@tokens, $token); | |
281 | last if $token eq ')'; | |
282 | } | |
283 | return @tokens; | |
284 | } | |
285 | ||
286 | sub parse_case { | |
287 | my $self = shift @_; | |
288 | my @tokens; | |
289 | push(@tokens, | |
290 | $self->next_token(), # subject | |
291 | $self->optional_newlines(), | |
292 | $self->expect('in'), | |
293 | $self->optional_newlines()); | |
294 | while (1) { | |
295 | my $token = $self->peek(); | |
296 | last unless defined($token) && $token ne 'esac'; | |
297 | push(@tokens, | |
298 | $self->parse_case_pattern(), | |
299 | $self->optional_newlines(), | |
300 | $self->parse(qr/^(?:;;|esac)$/)); # item body | |
301 | $token = $self->peek(); | |
302 | last unless defined($token) && $token ne 'esac'; | |
303 | push(@tokens, | |
304 | $self->expect(';;'), | |
305 | $self->optional_newlines()); | |
306 | } | |
307 | push(@tokens, $self->expect('esac')); | |
308 | return @tokens; | |
309 | } | |
310 | ||
311 | sub parse_for { | |
312 | my $self = shift @_; | |
313 | my @tokens; | |
314 | push(@tokens, | |
315 | $self->next_token(), # variable | |
316 | $self->optional_newlines()); | |
317 | my $token = $self->peek(); | |
318 | if (defined($token) && $token eq 'in') { | |
319 | push(@tokens, | |
320 | $self->expect('in'), | |
321 | $self->optional_newlines()); | |
322 | } | |
323 | push(@tokens, | |
324 | $self->parse(qr/^do$/), # items | |
325 | $self->expect('do'), | |
326 | $self->optional_newlines(), | |
327 | $self->parse_loop_body(), | |
328 | $self->expect('done')); | |
329 | return @tokens; | |
330 | } | |
331 | ||
332 | sub parse_if { | |
333 | my $self = shift @_; | |
334 | my @tokens; | |
335 | while (1) { | |
336 | push(@tokens, | |
337 | $self->parse(qr/^then$/), # if/elif condition | |
338 | $self->expect('then'), | |
339 | $self->optional_newlines(), | |
340 | $self->parse(qr/^(?:elif|else|fi)$/)); # if/elif body | |
341 | my $token = $self->peek(); | |
342 | last unless defined($token) && $token eq 'elif'; | |
343 | push(@tokens, $self->expect('elif')); | |
344 | } | |
345 | my $token = $self->peek(); | |
346 | if (defined($token) && $token eq 'else') { | |
347 | push(@tokens, | |
348 | $self->expect('else'), | |
349 | $self->optional_newlines(), | |
350 | $self->parse(qr/^fi$/)); # else body | |
351 | } | |
352 | push(@tokens, $self->expect('fi')); | |
353 | return @tokens; | |
354 | } | |
355 | ||
356 | sub parse_loop_body { | |
357 | my $self = shift @_; | |
358 | return $self->parse(qr/^done$/); | |
359 | } | |
360 | ||
361 | sub parse_loop { | |
362 | my $self = shift @_; | |
363 | return ($self->parse(qr/^do$/), # condition | |
364 | $self->expect('do'), | |
365 | $self->optional_newlines(), | |
366 | $self->parse_loop_body(), | |
367 | $self->expect('done')); | |
368 | } | |
369 | ||
370 | sub parse_func { | |
371 | my $self = shift @_; | |
372 | return ($self->expect('('), | |
373 | $self->expect(')'), | |
374 | $self->optional_newlines(), | |
375 | $self->parse_cmd()); # body | |
376 | } | |
377 | ||
378 | sub parse_bash_array_assignment { | |
379 | my $self = shift @_; | |
380 | my @tokens = $self->expect('('); | |
381 | while (defined(my $token = $self->next_token())) { | |
382 | push(@tokens, $token); | |
383 | last if $token eq ')'; | |
384 | } | |
385 | return @tokens; | |
386 | } | |
387 | ||
388 | my %compound = ( | |
389 | '{' => \&parse_group, | |
390 | '(' => \&parse_subshell, | |
391 | 'case' => \&parse_case, | |
392 | 'for' => \&parse_for, | |
393 | 'if' => \&parse_if, | |
394 | 'until' => \&parse_loop, | |
395 | 'while' => \&parse_loop); | |
396 | ||
397 | sub parse_cmd { | |
398 | my $self = shift @_; | |
399 | my $cmd = $self->next_token(); | |
400 | return () unless defined($cmd); | |
401 | return $cmd if $cmd eq "\n"; | |
402 | ||
403 | my $token; | |
404 | my @tokens = $cmd; | |
405 | if ($cmd eq '!') { | |
406 | push(@tokens, $self->parse_cmd()); | |
407 | return @tokens; | |
408 | } elsif (my $f = $compound{$cmd}) { | |
409 | push(@tokens, $self->$f()); | |
410 | } elsif (defined($token = $self->peek()) && $token eq '(') { | |
411 | if ($cmd !~ /\w=$/) { | |
412 | push(@tokens, $self->parse_func()); | |
413 | return @tokens; | |
414 | } | |
415 | $tokens[-1] .= join(' ', $self->parse_bash_array_assignment()); | |
416 | } | |
417 | ||
418 | while (defined(my $token = $self->next_token())) { | |
419 | $self->untoken($token), last if $self->stop_at($token); | |
420 | push(@tokens, $token); | |
421 | last if $token =~ /^(?:[;&\n|]|&&|\|\|)$/; | |
422 | } | |
423 | push(@tokens, $self->next_token()) if $tokens[-1] ne "\n" && defined($token = $self->peek()) && $token eq "\n"; | |
424 | return @tokens; | |
425 | } | |
426 | ||
427 | sub accumulate { | |
428 | my ($self, $tokens, $cmd) = @_; | |
429 | push(@$tokens, @$cmd); | |
430 | } | |
431 | ||
432 | sub parse { | |
433 | my ($self, $stop) = @_; | |
434 | push(@{$self->{stop}}, $stop); | |
435 | goto DONE if $self->stop_at($self->peek()); | |
436 | my @tokens; | |
437 | while (my @cmd = $self->parse_cmd()) { | |
438 | $self->accumulate(\@tokens, \@cmd); | |
439 | last if $self->stop_at($self->peek()); | |
440 | } | |
441 | DONE: | |
442 | pop(@{$self->{stop}}); | |
443 | return @tokens; | |
444 | } | |
445 | ||
6d932e92 ES |
446 | # TestParser is a subclass of ShellParser which, beyond parsing shell script |
447 | # code, is also imbued with semantic knowledge of test construction, and checks | |
448 | # tests for common problems (such as broken &&-chains) which might hide bugs in | |
449 | # the tests themselves or in behaviors being exercised by the tests. As such, | |
450 | # TestParser is only called upon to parse test bodies, not the top-level | |
451 | # scripts in which the tests are defined. | |
452 | package TestParser; | |
453 | ||
454 | use base 'ShellParser'; | |
455 | ||
456 | sub find_non_nl { | |
457 | my $tokens = shift @_; | |
458 | my $n = shift @_; | |
459 | $n = $#$tokens if !defined($n); | |
460 | $n-- while $n >= 0 && $$tokens[$n] eq "\n"; | |
461 | return $n; | |
462 | } | |
463 | ||
464 | sub ends_with { | |
465 | my ($tokens, $needles) = @_; | |
466 | my $n = find_non_nl($tokens); | |
467 | for my $needle (reverse(@$needles)) { | |
468 | return undef if $n < 0; | |
469 | $n = find_non_nl($tokens, $n), next if $needle eq "\n"; | |
470 | return undef if $$tokens[$n] !~ $needle; | |
471 | $n--; | |
472 | } | |
473 | return 1; | |
474 | } | |
475 | ||
35ebb1e3 ES |
476 | sub match_ending { |
477 | my ($tokens, $endings) = @_; | |
478 | for my $needles (@$endings) { | |
479 | next if @$tokens < scalar(grep {$_ ne "\n"} @$needles); | |
480 | return 1 if ends_with($tokens, $needles); | |
481 | } | |
482 | return undef; | |
483 | } | |
484 | ||
485 | my @safe_endings = ( | |
486 | [qr/^(?:&&|\|\||\|)$/], | |
487 | [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/], | |
488 | [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/, qr/^;$/], | |
489 | [qr/^(?:exit|return|continue)$/], | |
490 | [qr/^(?:exit|return|continue)$/, qr/^;$/]); | |
491 | ||
6d932e92 ES |
492 | sub accumulate { |
493 | my ($self, $tokens, $cmd) = @_; | |
494 | goto DONE unless @$tokens; | |
495 | goto DONE if @$cmd == 1 && $$cmd[0] eq "\n"; | |
496 | ||
35ebb1e3 ES |
497 | # did previous command end with "&&", "|", "|| return" or similar? |
498 | goto DONE if match_ending($tokens, \@safe_endings); | |
6d932e92 ES |
499 | |
500 | # flag missing "&&" at end of previous command | |
501 | my $n = find_non_nl($tokens); | |
502 | splice(@$tokens, $n + 1, 0, '?!AMP?!') unless $n < 0; | |
503 | ||
504 | DONE: | |
505 | $self->SUPER::accumulate($tokens, $cmd); | |
506 | } | |
507 | ||
d99ebd6d ES |
508 | # ScriptParser is a subclass of ShellParser which identifies individual test |
509 | # definitions within test scripts, and passes each test body through TestParser | |
510 | # to identify possible problems. ShellParser detects test definitions not only | |
511 | # at the top-level of test scripts but also within compound commands such as | |
512 | # loops and function definitions. | |
b4f25b07 ES |
513 | package ScriptParser; |
514 | ||
d99ebd6d ES |
515 | use base 'ShellParser'; |
516 | ||
b4f25b07 ES |
517 | sub new { |
518 | my $class = shift @_; | |
d99ebd6d | 519 | my $self = $class->SUPER::new(@_); |
b4f25b07 ES |
520 | $self->{ntests} = 0; |
521 | return $self; | |
522 | } | |
523 | ||
d99ebd6d ES |
524 | # extract the raw content of a token, which may be a single string or a |
525 | # composition of multiple strings and non-string character runs; for instance, | |
526 | # `"test body"` unwraps to `test body`; `word"a b"42'c d'` to `worda b42c d` | |
527 | sub unwrap { | |
528 | my $token = @_ ? shift @_ : $_; | |
529 | # simple case: 'sqstring' or "dqstring" | |
530 | return $token if $token =~ s/^'([^']*)'$/$1/; | |
531 | return $token if $token =~ s/^"([^"]*)"$/$1/; | |
532 | ||
533 | # composite case | |
534 | my ($s, $q, $escaped); | |
535 | while (1) { | |
536 | # slurp up non-special characters | |
537 | $s .= $1 if $token =~ /\G([^\\'"]*)/gc; | |
538 | # handle special characters | |
539 | last unless $token =~ /\G(.)/sgc; | |
540 | my $c = $1; | |
541 | $q = undef, next if defined($q) && $c eq $q; | |
542 | $q = $c, next if !defined($q) && $c =~ /^['"]$/; | |
543 | if ($c eq '\\') { | |
544 | last unless $token =~ /\G(.)/sgc; | |
545 | $c = $1; | |
546 | $s .= '\\' if $c eq "\n"; # preserve line splice | |
547 | } | |
548 | $s .= $c; | |
549 | } | |
550 | return $s | |
551 | } | |
552 | ||
553 | sub check_test { | |
554 | my $self = shift @_; | |
555 | my ($title, $body) = map(unwrap, @_); | |
556 | $self->{ntests}++; | |
557 | my $parser = TestParser->new(\$body); | |
558 | my @tokens = $parser->parse(); | |
559 | return unless $emit_all || grep(/\?![^?]+\?!/, @tokens); | |
560 | my $checked = join(' ', @tokens); | |
561 | $checked =~ s/^\n//; | |
562 | $checked =~ s/^ //mg; | |
563 | $checked =~ s/ $//mg; | |
564 | $checked .= "\n" unless $checked =~ /\n$/; | |
565 | push(@{$self->{output}}, "# chainlint: $title\n$checked"); | |
566 | } | |
567 | ||
b4f25b07 | 568 | sub parse_cmd { |
d99ebd6d ES |
569 | my $self = shift @_; |
570 | my @tokens = $self->SUPER::parse_cmd(); | |
571 | return @tokens unless @tokens && $tokens[0] =~ /^test_expect_(?:success|failure)$/; | |
572 | my $n = $#tokens; | |
573 | $n-- while $n >= 0 && $tokens[$n] =~ /^(?:[;&\n|]|&&|\|\|)$/; | |
574 | $self->check_test($tokens[1], $tokens[2]) if $n == 2; # title body | |
575 | $self->check_test($tokens[2], $tokens[3]) if $n > 2; # prereq title body | |
576 | return @tokens; | |
b4f25b07 ES |
577 | } |
578 | ||
579 | # main contains high-level functionality for processing command-line switches, | |
580 | # feeding input test scripts to ScriptParser, and reporting results. | |
581 | package main; | |
582 | ||
583 | my $getnow = sub { return time(); }; | |
584 | my $interval = sub { return time() - shift; }; | |
585 | if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) { | |
586 | $getnow = sub { return [Time::HiRes::gettimeofday()]; }; | |
587 | $interval = sub { return Time::HiRes::tv_interval(shift); }; | |
588 | } | |
589 | ||
29fb2ec3 ES |
590 | sub ncores { |
591 | # Windows | |
592 | return $ENV{NUMBER_OF_PROCESSORS} if exists($ENV{NUMBER_OF_PROCESSORS}); | |
593 | # Linux / MSYS2 / Cygwin / WSL | |
594 | do { local @ARGV='/proc/cpuinfo'; return scalar(grep(/^processor\s*:/, <>)); } if -r '/proc/cpuinfo'; | |
595 | # macOS & BSD | |
596 | return qx/sysctl -n hw.ncpu/ if $^O =~ /(?:^darwin$|bsd)/; | |
597 | return 1; | |
598 | } | |
599 | ||
b4f25b07 ES |
600 | sub show_stats { |
601 | my ($start_time, $stats) = @_; | |
602 | my $walltime = $interval->($start_time); | |
603 | my ($usertime) = times(); | |
604 | my ($total_workers, $total_scripts, $total_tests, $total_errs) = (0, 0, 0, 0); | |
605 | for (@$stats) { | |
606 | my ($worker, $nscripts, $ntests, $nerrs) = @$_; | |
607 | print(STDERR "worker $worker: $nscripts scripts, $ntests tests, $nerrs errors\n"); | |
608 | $total_workers++; | |
609 | $total_scripts += $nscripts; | |
610 | $total_tests += $ntests; | |
611 | $total_errs += $nerrs; | |
612 | } | |
613 | printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime); | |
614 | } | |
615 | ||
616 | sub check_script { | |
617 | my ($id, $next_script, $emit) = @_; | |
618 | my ($nscripts, $ntests, $nerrs) = (0, 0, 0); | |
619 | while (my $path = $next_script->()) { | |
620 | $nscripts++; | |
621 | my $fh; | |
622 | unless (open($fh, "<", $path)) { | |
623 | $emit->("?!ERR?! $path: $!\n"); | |
624 | next; | |
625 | } | |
626 | my $s = do { local $/; <$fh> }; | |
627 | close($fh); | |
628 | my $parser = ScriptParser->new(\$s); | |
629 | 1 while $parser->parse_cmd(); | |
630 | if (@{$parser->{output}}) { | |
631 | my $s = join('', @{$parser->{output}}); | |
632 | $emit->("# chainlint: $path\n" . $s); | |
633 | $nerrs += () = $s =~ /\?![^?]+\?!/g; | |
634 | } | |
635 | $ntests += $parser->{ntests}; | |
636 | } | |
637 | return [$id, $nscripts, $ntests, $nerrs]; | |
638 | } | |
639 | ||
640 | sub exit_code { | |
641 | my $stats = shift @_; | |
642 | for (@$stats) { | |
643 | my ($worker, $nscripts, $ntests, $nerrs) = @$_; | |
644 | return 1 if $nerrs; | |
645 | } | |
646 | return 0; | |
647 | } | |
648 | ||
649 | Getopt::Long::Configure(qw{bundling}); | |
650 | GetOptions( | |
651 | "emit-all!" => \$emit_all, | |
29fb2ec3 | 652 | "jobs|j=i" => \$jobs, |
b4f25b07 | 653 | "stats|show-stats!" => \$show_stats) or die("option error\n"); |
29fb2ec3 | 654 | $jobs = ncores() if $jobs < 1; |
b4f25b07 ES |
655 | |
656 | my $start_time = $getnow->(); | |
657 | my @stats; | |
658 | ||
659 | my @scripts; | |
660 | push(@scripts, File::Glob::bsd_glob($_)) for (@ARGV); | |
661 | unless (@scripts) { | |
662 | show_stats($start_time, \@stats) if $show_stats; | |
663 | exit; | |
664 | } | |
665 | ||
29fb2ec3 ES |
666 | unless ($Config{useithreads} && eval { |
667 | require threads; threads->import(); | |
668 | require Thread::Queue; Thread::Queue->import(); | |
669 | 1; | |
670 | }) { | |
671 | push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); })); | |
672 | show_stats($start_time, \@stats) if $show_stats; | |
673 | exit(exit_code(\@stats)); | |
674 | } | |
675 | ||
676 | my $script_queue = Thread::Queue->new(); | |
677 | my $output_queue = Thread::Queue->new(); | |
678 | ||
679 | sub next_script { return $script_queue->dequeue(); } | |
680 | sub emit { $output_queue->enqueue(@_); } | |
681 | ||
682 | sub monitor { | |
683 | while (my $s = $output_queue->dequeue()) { | |
684 | print($s); | |
685 | } | |
686 | } | |
687 | ||
688 | my $mon = threads->create({'context' => 'void'}, \&monitor); | |
689 | threads->create({'context' => 'list'}, \&check_script, $_, \&next_script, \&emit) for 1..$jobs; | |
690 | ||
691 | $script_queue->enqueue(@scripts); | |
692 | $script_queue->end(); | |
693 | ||
694 | for (threads->list()) { | |
695 | push(@stats, $_->join()) unless $_ == $mon; | |
696 | } | |
697 | ||
698 | $output_queue->end(); | |
699 | $mon->join(); | |
700 | ||
b4f25b07 ES |
701 | show_stats($start_time, \@stats) if $show_stats; |
702 | exit(exit_code(\@stats)); |