]>
Commit | Line | Data |
---|---|---|
b4f25b07 ES |
1 | #!/usr/bin/env perl |
2 | # | |
3 | # Copyright (c) 2021-2022 Eric Sunshine <sunshine@sunshineco.com> | |
4 | # | |
5 | # This tool scans shell scripts for test definitions and checks those tests for | |
6 | # problems, such as broken &&-chains, which might hide bugs in the tests | |
7 | # themselves or in behaviors being exercised by the tests. | |
8 | # | |
9 | # Input arguments are pathnames of shell scripts containing test definitions, | |
10 | # or globs referencing a collection of scripts. For each problem discovered, | |
11 | # the pathname of the script containing the test is printed along with the test | |
12 | # name and the test body with a `?!FOO?!` annotation at the location of each | |
13 | # detected problem, where "FOO" is a tag such as "AMP" which indicates a broken | |
14 | # &&-chain. Returns zero if no problems are discovered, otherwise non-zero. | |
15 | ||
16 | use warnings; | |
17 | use strict; | |
29fb2ec3 | 18 | use Config; |
b4f25b07 ES |
19 | use File::Glob; |
20 | use Getopt::Long; | |
21 | ||
29fb2ec3 | 22 | my $jobs = -1; |
b4f25b07 ES |
23 | my $show_stats; |
24 | my $emit_all; | |
25 | ||
7d480473 ES |
26 | # Lexer tokenizes POSIX shell scripts. It is roughly modeled after section 2.3 |
27 | # "Token Recognition" of POSIX chapter 2 "Shell Command Language". Although | |
28 | # similar to lexical analyzers for other languages, this one differs in a few | |
29 | # substantial ways due to quirks of the shell command language. | |
30 | # | |
31 | # For instance, in many languages, newline is just whitespace like space or | |
32 | # TAB, but in shell a newline is a command separator, thus a distinct lexical | |
33 | # token. A newline is significant and returned as a distinct token even at the | |
34 | # end of a shell comment. | |
35 | # | |
36 | # In other languages, `1+2` would typically be scanned as three tokens | |
37 | # (`1`, `+`, and `2`), but in shell it is a single token. However, the similar | |
38 | # `1 + 2`, which embeds whitepace, is scanned as three token in shell, as well. | |
39 | # In shell, several characters with special meaning lose that meaning when not | |
40 | # surrounded by whitespace. For instance, the negation operator `!` is special | |
41 | # when standing alone surrounded by whitespace; whereas in `foo!uucp` it is | |
42 | # just a plain character in the longer token "foo!uucp". In many other | |
43 | # languages, `"string"/foo:'string'` might be scanned as five tokens ("string", | |
44 | # `/`, `foo`, `:`, and 'string'), but in shell, it is just a single token. | |
45 | # | |
46 | # The lexical analyzer for the shell command language is also somewhat unusual | |
47 | # in that it recursively invokes the parser to handle the body of `$(...)` | |
48 | # expressions which can contain arbitrary shell code. Such expressions may be | |
49 | # encountered both inside and outside of double-quoted strings. | |
50 | # | |
51 | # The lexical analyzer is responsible for consuming shell here-doc bodies which | |
52 | # extend from the line following a `<<TAG` operator until a line consisting | |
53 | # solely of `TAG`. Here-doc consumption begins when a newline is encountered. | |
54 | # It is legal for multiple here-doc `<<TAG` operators to be present on a single | |
55 | # line, in which case their bodies must be present one following the next, and | |
56 | # are consumed in the (left-to-right) order the `<<TAG` operators appear on the | |
57 | # line. A special complication is that the bodies of all here-docs must be | |
58 | # consumed when the newline is encountered even if the parse context depth has | |
59 | # changed. For instance, in `cat <<A && x=$(cat <<B &&\n`, bodies of here-docs | |
60 | # "A" and "B" must be consumed even though "A" was introduced outside the | |
61 | # recursive parse context in which "B" was introduced and in which the newline | |
62 | # is encountered. | |
63 | package Lexer; | |
64 | ||
65 | sub new { | |
66 | my ($class, $parser, $s) = @_; | |
67 | bless { | |
68 | parser => $parser, | |
69 | buff => $s, | |
bf42f0a0 | 70 | lineno => 1, |
7d480473 ES |
71 | heretags => [] |
72 | } => $class; | |
73 | } | |
74 | ||
75 | sub scan_heredoc_tag { | |
76 | my $self = shift @_; | |
77 | ${$self->{buff}} =~ /\G(-?)/gc; | |
78 | my $indented = $1; | |
5f0321a9 ES |
79 | my $token = $self->scan_token(); |
80 | return "<<$indented" unless $token; | |
81 | my $tag = $token->[0]; | |
7d480473 | 82 | $tag =~ s/['"\\]//g; |
2b61c8dc ES |
83 | $$token[0] = $indented ? "\t$tag" : "$tag"; |
84 | push(@{$self->{heretags}}, $token); | |
7d480473 ES |
85 | return "<<$indented$tag"; |
86 | } | |
87 | ||
88 | sub scan_op { | |
89 | my ($self, $c) = @_; | |
90 | my $b = $self->{buff}; | |
91 | return $c unless $$b =~ /\G(.)/sgc; | |
92 | my $cc = $c . $1; | |
93 | return scan_heredoc_tag($self) if $cc eq '<<'; | |
94 | return $cc if $cc =~ /^(?:&&|\|\||>>|;;|<&|>&|<>|>\|)$/; | |
95 | pos($$b)--; | |
96 | return $c; | |
97 | } | |
98 | ||
99 | sub scan_sqstring { | |
100 | my $self = shift @_; | |
101 | ${$self->{buff}} =~ /\G([^']*'|.*\z)/sgc; | |
bf42f0a0 ES |
102 | my $s = $1; |
103 | $self->{lineno} += () = $s =~ /\n/sg; | |
104 | return "'" . $s; | |
7d480473 ES |
105 | } |
106 | ||
107 | sub scan_dqstring { | |
108 | my $self = shift @_; | |
109 | my $b = $self->{buff}; | |
110 | my $s = '"'; | |
111 | while (1) { | |
112 | # slurp up non-special characters | |
113 | $s .= $1 if $$b =~ /\G([^"\$\\]+)/gc; | |
114 | # handle special characters | |
115 | last unless $$b =~ /\G(.)/sgc; | |
116 | my $c = $1; | |
117 | $s .= '"', last if $c eq '"'; | |
118 | $s .= '$' . $self->scan_dollar(), next if $c eq '$'; | |
119 | if ($c eq '\\') { | |
120 | $s .= '\\', last unless $$b =~ /\G(.)/sgc; | |
121 | $c = $1; | |
bf42f0a0 | 122 | $self->{lineno}++, next if $c eq "\n"; # line splice |
7d480473 ES |
123 | # backslash escapes only $, `, ", \ in dq-string |
124 | $s .= '\\' unless $c =~ /^[\$`"\\]$/; | |
125 | $s .= $c; | |
126 | next; | |
127 | } | |
128 | die("internal error scanning dq-string '$c'\n"); | |
129 | } | |
bf42f0a0 | 130 | $self->{lineno} += () = $s =~ /\n/sg; |
7d480473 ES |
131 | return $s; |
132 | } | |
133 | ||
134 | sub scan_balanced { | |
135 | my ($self, $c1, $c2) = @_; | |
136 | my $b = $self->{buff}; | |
137 | my $depth = 1; | |
138 | my $s = $c1; | |
139 | while ($$b =~ /\G([^\Q$c1$c2\E]*(?:[\Q$c1$c2\E]|\z))/gc) { | |
140 | $s .= $1; | |
141 | $depth++, next if $s =~ /\Q$c1\E$/; | |
142 | $depth--; | |
143 | last if $depth == 0; | |
144 | } | |
bf42f0a0 | 145 | $self->{lineno} += () = $s =~ /\n/sg; |
7d480473 ES |
146 | return $s; |
147 | } | |
148 | ||
149 | sub scan_subst { | |
150 | my $self = shift @_; | |
151 | my @tokens = $self->{parser}->parse(qr/^\)$/); | |
152 | $self->{parser}->next_token(); # closing ")" | |
153 | return @tokens; | |
154 | } | |
155 | ||
156 | sub scan_dollar { | |
157 | my $self = shift @_; | |
158 | my $b = $self->{buff}; | |
159 | return $self->scan_balanced('(', ')') if $$b =~ /\G\((?=\()/gc; # $((...)) | |
5f0321a9 | 160 | return '(' . join(' ', map {$_->[0]} $self->scan_subst()) . ')' if $$b =~ /\G\(/gc; # $(...) |
7d480473 ES |
161 | return $self->scan_balanced('{', '}') if $$b =~ /\G\{/gc; # ${...} |
162 | return $1 if $$b =~ /\G(\w+)/gc; # $var | |
163 | return $1 if $$b =~ /\G([@*#?$!0-9-])/gc; # $*, $1, $$, etc. | |
164 | return ''; | |
165 | } | |
166 | ||
167 | sub swallow_heredocs { | |
168 | my $self = shift @_; | |
169 | my $b = $self->{buff}; | |
170 | my $tags = $self->{heretags}; | |
171 | while (my $tag = shift @$tags) { | |
bf42f0a0 | 172 | my $start = pos($$b); |
2b61c8dc ES |
173 | my $indent = $$tag[0] =~ s/^\t// ? '\\s*' : ''; |
174 | $$b =~ /(?:\G|\n)$indent\Q$$tag[0]\E(?:\n|\z)/gc; | |
175 | if (pos($$b) > $start) { | |
176 | my $body = substr($$b, $start, pos($$b) - $start); | |
177 | $self->{lineno} += () = $body =~ /\n/sg; | |
178 | next; | |
179 | } | |
180 | push(@{$self->{parser}->{problems}}, ['UNCLOSED-HEREDOC', $tag]); | |
181 | $$b =~ /(?:\G|\n).*\z/gc; # consume rest of input | |
bf42f0a0 ES |
182 | my $body = substr($$b, $start, pos($$b) - $start); |
183 | $self->{lineno} += () = $body =~ /\n/sg; | |
2b61c8dc | 184 | last; |
7d480473 ES |
185 | } |
186 | } | |
187 | ||
188 | sub scan_token { | |
189 | my $self = shift @_; | |
190 | my $b = $self->{buff}; | |
191 | my $token = ''; | |
bf42f0a0 | 192 | my ($start, $startln); |
7d480473 | 193 | RESTART: |
bf42f0a0 | 194 | $startln = $self->{lineno}; |
7d480473 | 195 | $$b =~ /\G[ \t]+/gc; # skip whitespace (but not newline) |
5f0321a9 | 196 | $start = pos($$b) || 0; |
bf42f0a0 | 197 | $self->{lineno}++, return ["\n", $start, pos($$b), $startln, $startln] if $$b =~ /\G#[^\n]*(?:\n|\z)/gc; # comment |
7d480473 ES |
198 | while (1) { |
199 | # slurp up non-special characters | |
200 | $token .= $1 if $$b =~ /\G([^\\;&|<>(){}'"\$\s]+)/gc; | |
201 | # handle special characters | |
202 | last unless $$b =~ /\G(.)/sgc; | |
203 | my $c = $1; | |
ca748f51 | 204 | pos($$b)--, last if $c =~ /^[ \t]$/; # whitespace ends token |
7d480473 ES |
205 | pos($$b)--, last if length($token) && $c =~ /^[;&|<>(){}\n]$/; |
206 | $token .= $self->scan_sqstring(), next if $c eq "'"; | |
207 | $token .= $self->scan_dqstring(), next if $c eq '"'; | |
208 | $token .= $c . $self->scan_dollar(), next if $c eq '$'; | |
bf42f0a0 | 209 | $self->{lineno}++, $self->swallow_heredocs(), $token = $c, last if $c eq "\n"; |
7d480473 ES |
210 | $token = $self->scan_op($c), last if $c =~ /^[;&|<>]$/; |
211 | $token = $c, last if $c =~ /^[(){}]$/; | |
212 | if ($c eq '\\') { | |
213 | $token .= '\\', last unless $$b =~ /\G(.)/sgc; | |
214 | $c = $1; | |
bf42f0a0 ES |
215 | $self->{lineno}++, next if $c eq "\n" && length($token); # line splice |
216 | $self->{lineno}++, goto RESTART if $c eq "\n"; # line splice | |
7d480473 ES |
217 | $token .= '\\' . $c; |
218 | next; | |
219 | } | |
220 | die("internal error scanning character '$c'\n"); | |
221 | } | |
bf42f0a0 | 222 | return length($token) ? [$token, $start, pos($$b), $startln, $self->{lineno}] : undef; |
7d480473 ES |
223 | } |
224 | ||
65945541 ES |
225 | # ShellParser parses POSIX shell scripts (with minor extensions for Bash). It |
226 | # is a recursive descent parser very roughly modeled after section 2.10 "Shell | |
227 | # Grammar" of POSIX chapter 2 "Shell Command Language". | |
228 | package ShellParser; | |
229 | ||
230 | sub new { | |
231 | my ($class, $s) = @_; | |
232 | my $self = bless { | |
233 | buff => [], | |
234 | stop => [], | |
235 | output => [] | |
236 | } => $class; | |
237 | $self->{lexer} = Lexer->new($self, $s); | |
238 | return $self; | |
239 | } | |
240 | ||
241 | sub next_token { | |
242 | my $self = shift @_; | |
243 | return pop(@{$self->{buff}}) if @{$self->{buff}}; | |
244 | return $self->{lexer}->scan_token(); | |
245 | } | |
246 | ||
247 | sub untoken { | |
248 | my $self = shift @_; | |
249 | push(@{$self->{buff}}, @_); | |
250 | } | |
251 | ||
252 | sub peek { | |
253 | my $self = shift @_; | |
254 | my $token = $self->next_token(); | |
255 | return undef unless defined($token); | |
256 | $self->untoken($token); | |
257 | return $token; | |
258 | } | |
259 | ||
260 | sub stop_at { | |
261 | my ($self, $token) = @_; | |
262 | return 1 unless defined($token); | |
263 | my $stop = ${$self->{stop}}[-1] if @{$self->{stop}}; | |
5f0321a9 | 264 | return defined($stop) && $token->[0] =~ $stop; |
65945541 ES |
265 | } |
266 | ||
267 | sub expect { | |
268 | my ($self, $expect) = @_; | |
269 | my $token = $self->next_token(); | |
5f0321a9 ES |
270 | return $token if defined($token) && $token->[0] eq $expect; |
271 | push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token->[0] : "<end-of-input>") . "'\n"); | |
65945541 ES |
272 | $self->untoken($token) if defined($token); |
273 | return (); | |
274 | } | |
275 | ||
276 | sub optional_newlines { | |
277 | my $self = shift @_; | |
278 | my @tokens; | |
279 | while (my $token = $self->peek()) { | |
5f0321a9 | 280 | last unless $token->[0] eq "\n"; |
65945541 ES |
281 | push(@tokens, $self->next_token()); |
282 | } | |
283 | return @tokens; | |
284 | } | |
285 | ||
286 | sub parse_group { | |
287 | my $self = shift @_; | |
288 | return ($self->parse(qr/^}$/), | |
289 | $self->expect('}')); | |
290 | } | |
291 | ||
292 | sub parse_subshell { | |
293 | my $self = shift @_; | |
294 | return ($self->parse(qr/^\)$/), | |
295 | $self->expect(')')); | |
296 | } | |
297 | ||
298 | sub parse_case_pattern { | |
299 | my $self = shift @_; | |
300 | my @tokens; | |
301 | while (defined(my $token = $self->next_token())) { | |
302 | push(@tokens, $token); | |
5f0321a9 | 303 | last if $token->[0] eq ')'; |
65945541 ES |
304 | } |
305 | return @tokens; | |
306 | } | |
307 | ||
308 | sub parse_case { | |
309 | my $self = shift @_; | |
310 | my @tokens; | |
311 | push(@tokens, | |
312 | $self->next_token(), # subject | |
313 | $self->optional_newlines(), | |
314 | $self->expect('in'), | |
315 | $self->optional_newlines()); | |
316 | while (1) { | |
317 | my $token = $self->peek(); | |
5f0321a9 | 318 | last unless defined($token) && $token->[0] ne 'esac'; |
65945541 ES |
319 | push(@tokens, |
320 | $self->parse_case_pattern(), | |
321 | $self->optional_newlines(), | |
322 | $self->parse(qr/^(?:;;|esac)$/)); # item body | |
323 | $token = $self->peek(); | |
5f0321a9 | 324 | last unless defined($token) && $token->[0] ne 'esac'; |
65945541 ES |
325 | push(@tokens, |
326 | $self->expect(';;'), | |
327 | $self->optional_newlines()); | |
328 | } | |
329 | push(@tokens, $self->expect('esac')); | |
330 | return @tokens; | |
331 | } | |
332 | ||
333 | sub parse_for { | |
334 | my $self = shift @_; | |
335 | my @tokens; | |
336 | push(@tokens, | |
337 | $self->next_token(), # variable | |
338 | $self->optional_newlines()); | |
339 | my $token = $self->peek(); | |
5f0321a9 | 340 | if (defined($token) && $token->[0] eq 'in') { |
65945541 ES |
341 | push(@tokens, |
342 | $self->expect('in'), | |
343 | $self->optional_newlines()); | |
344 | } | |
345 | push(@tokens, | |
346 | $self->parse(qr/^do$/), # items | |
347 | $self->expect('do'), | |
348 | $self->optional_newlines(), | |
349 | $self->parse_loop_body(), | |
350 | $self->expect('done')); | |
351 | return @tokens; | |
352 | } | |
353 | ||
354 | sub parse_if { | |
355 | my $self = shift @_; | |
356 | my @tokens; | |
357 | while (1) { | |
358 | push(@tokens, | |
359 | $self->parse(qr/^then$/), # if/elif condition | |
360 | $self->expect('then'), | |
361 | $self->optional_newlines(), | |
362 | $self->parse(qr/^(?:elif|else|fi)$/)); # if/elif body | |
363 | my $token = $self->peek(); | |
5f0321a9 | 364 | last unless defined($token) && $token->[0] eq 'elif'; |
65945541 ES |
365 | push(@tokens, $self->expect('elif')); |
366 | } | |
367 | my $token = $self->peek(); | |
5f0321a9 | 368 | if (defined($token) && $token->[0] eq 'else') { |
65945541 ES |
369 | push(@tokens, |
370 | $self->expect('else'), | |
371 | $self->optional_newlines(), | |
372 | $self->parse(qr/^fi$/)); # else body | |
373 | } | |
374 | push(@tokens, $self->expect('fi')); | |
375 | return @tokens; | |
376 | } | |
377 | ||
378 | sub parse_loop_body { | |
379 | my $self = shift @_; | |
380 | return $self->parse(qr/^done$/); | |
381 | } | |
382 | ||
383 | sub parse_loop { | |
384 | my $self = shift @_; | |
385 | return ($self->parse(qr/^do$/), # condition | |
386 | $self->expect('do'), | |
387 | $self->optional_newlines(), | |
388 | $self->parse_loop_body(), | |
389 | $self->expect('done')); | |
390 | } | |
391 | ||
392 | sub parse_func { | |
393 | my $self = shift @_; | |
394 | return ($self->expect('('), | |
395 | $self->expect(')'), | |
396 | $self->optional_newlines(), | |
397 | $self->parse_cmd()); # body | |
398 | } | |
399 | ||
400 | sub parse_bash_array_assignment { | |
401 | my $self = shift @_; | |
402 | my @tokens = $self->expect('('); | |
403 | while (defined(my $token = $self->next_token())) { | |
404 | push(@tokens, $token); | |
5f0321a9 | 405 | last if $token->[0] eq ')'; |
65945541 ES |
406 | } |
407 | return @tokens; | |
408 | } | |
409 | ||
410 | my %compound = ( | |
411 | '{' => \&parse_group, | |
412 | '(' => \&parse_subshell, | |
413 | 'case' => \&parse_case, | |
414 | 'for' => \&parse_for, | |
415 | 'if' => \&parse_if, | |
416 | 'until' => \&parse_loop, | |
417 | 'while' => \&parse_loop); | |
418 | ||
419 | sub parse_cmd { | |
420 | my $self = shift @_; | |
421 | my $cmd = $self->next_token(); | |
422 | return () unless defined($cmd); | |
5f0321a9 | 423 | return $cmd if $cmd->[0] eq "\n"; |
65945541 ES |
424 | |
425 | my $token; | |
426 | my @tokens = $cmd; | |
5f0321a9 | 427 | if ($cmd->[0] eq '!') { |
65945541 ES |
428 | push(@tokens, $self->parse_cmd()); |
429 | return @tokens; | |
5f0321a9 | 430 | } elsif (my $f = $compound{$cmd->[0]}) { |
65945541 | 431 | push(@tokens, $self->$f()); |
5f0321a9 ES |
432 | } elsif (defined($token = $self->peek()) && $token->[0] eq '(') { |
433 | if ($cmd->[0] !~ /\w=$/) { | |
65945541 ES |
434 | push(@tokens, $self->parse_func()); |
435 | return @tokens; | |
436 | } | |
5f0321a9 ES |
437 | my @array = $self->parse_bash_array_assignment(); |
438 | $tokens[-1]->[0] .= join(' ', map {$_->[0]} @array); | |
439 | $tokens[-1]->[2] = $array[$#array][2] if @array; | |
65945541 ES |
440 | } |
441 | ||
442 | while (defined(my $token = $self->next_token())) { | |
443 | $self->untoken($token), last if $self->stop_at($token); | |
444 | push(@tokens, $token); | |
5f0321a9 | 445 | last if $token->[0] =~ /^(?:[;&\n|]|&&|\|\|)$/; |
65945541 | 446 | } |
5f0321a9 | 447 | push(@tokens, $self->next_token()) if $tokens[-1]->[0] ne "\n" && defined($token = $self->peek()) && $token->[0] eq "\n"; |
65945541 ES |
448 | return @tokens; |
449 | } | |
450 | ||
451 | sub accumulate { | |
452 | my ($self, $tokens, $cmd) = @_; | |
453 | push(@$tokens, @$cmd); | |
454 | } | |
455 | ||
456 | sub parse { | |
457 | my ($self, $stop) = @_; | |
458 | push(@{$self->{stop}}, $stop); | |
459 | goto DONE if $self->stop_at($self->peek()); | |
460 | my @tokens; | |
461 | while (my @cmd = $self->parse_cmd()) { | |
462 | $self->accumulate(\@tokens, \@cmd); | |
463 | last if $self->stop_at($self->peek()); | |
464 | } | |
465 | DONE: | |
466 | pop(@{$self->{stop}}); | |
467 | return @tokens; | |
468 | } | |
469 | ||
6d932e92 ES |
470 | # TestParser is a subclass of ShellParser which, beyond parsing shell script |
471 | # code, is also imbued with semantic knowledge of test construction, and checks | |
472 | # tests for common problems (such as broken &&-chains) which might hide bugs in | |
473 | # the tests themselves or in behaviors being exercised by the tests. As such, | |
474 | # TestParser is only called upon to parse test bodies, not the top-level | |
475 | # scripts in which the tests are defined. | |
476 | package TestParser; | |
477 | ||
478 | use base 'ShellParser'; | |
479 | ||
73c768da ES |
480 | sub new { |
481 | my $class = shift @_; | |
482 | my $self = $class->SUPER::new(@_); | |
483 | $self->{problems} = []; | |
484 | return $self; | |
485 | } | |
486 | ||
6d932e92 ES |
487 | sub find_non_nl { |
488 | my $tokens = shift @_; | |
489 | my $n = shift @_; | |
490 | $n = $#$tokens if !defined($n); | |
5f0321a9 | 491 | $n-- while $n >= 0 && $$tokens[$n]->[0] eq "\n"; |
6d932e92 ES |
492 | return $n; |
493 | } | |
494 | ||
495 | sub ends_with { | |
496 | my ($tokens, $needles) = @_; | |
497 | my $n = find_non_nl($tokens); | |
498 | for my $needle (reverse(@$needles)) { | |
499 | return undef if $n < 0; | |
500 | $n = find_non_nl($tokens, $n), next if $needle eq "\n"; | |
5f0321a9 | 501 | return undef if $$tokens[$n]->[0] !~ $needle; |
6d932e92 ES |
502 | $n--; |
503 | } | |
504 | return 1; | |
505 | } | |
506 | ||
35ebb1e3 ES |
507 | sub match_ending { |
508 | my ($tokens, $endings) = @_; | |
509 | for my $needles (@$endings) { | |
510 | next if @$tokens < scalar(grep {$_ ne "\n"} @$needles); | |
511 | return 1 if ends_with($tokens, $needles); | |
512 | } | |
513 | return undef; | |
514 | } | |
515 | ||
fd4094c3 ES |
516 | sub parse_loop_body { |
517 | my $self = shift @_; | |
518 | my @tokens = $self->SUPER::parse_loop_body(@_); | |
519 | # did loop signal failure via "|| return" or "|| exit"? | |
5f0321a9 | 520 | return @tokens if !@tokens || grep {$_->[0] =~ /^(?:return|exit|\$\?)$/} @tokens; |
ae0c55ab ES |
521 | # did loop upstream of a pipe signal failure via "|| echo 'impossible |
522 | # text'" as the final command in the loop body? | |
523 | return @tokens if ends_with(\@tokens, [qr/^\|\|$/, "\n", qr/^echo$/, qr/^.+$/]); | |
fd4094c3 ES |
524 | # flag missing "return/exit" handling explicit failure in loop body |
525 | my $n = find_non_nl(\@tokens); | |
73c768da | 526 | push(@{$self->{problems}}, ['LOOP', $tokens[$n]]); |
fd4094c3 ES |
527 | return @tokens; |
528 | } | |
529 | ||
35ebb1e3 | 530 | my @safe_endings = ( |
aabc3258 | 531 | [qr/^(?:&&|\|\||\||&)$/], |
35ebb1e3 ES |
532 | [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/], |
533 | [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/, qr/^;$/], | |
534 | [qr/^(?:exit|return|continue)$/], | |
535 | [qr/^(?:exit|return|continue)$/, qr/^;$/]); | |
536 | ||
6d932e92 ES |
537 | sub accumulate { |
538 | my ($self, $tokens, $cmd) = @_; | |
73c768da | 539 | my $problems = $self->{problems}; |
c90d81f8 ES |
540 | |
541 | # no previous command to check for missing "&&" | |
6d932e92 | 542 | goto DONE unless @$tokens; |
c90d81f8 ES |
543 | |
544 | # new command is empty line; can't yet check if previous is missing "&&" | |
5f0321a9 | 545 | goto DONE if @$cmd == 1 && $$cmd[0]->[0] eq "\n"; |
6d932e92 | 546 | |
35ebb1e3 ES |
547 | # did previous command end with "&&", "|", "|| return" or similar? |
548 | goto DONE if match_ending($tokens, \@safe_endings); | |
6d932e92 | 549 | |
a8f30ee0 ES |
550 | # if this command handles "$?" specially, then okay for previous |
551 | # command to be missing "&&" | |
552 | for my $token (@$cmd) { | |
5f0321a9 | 553 | goto DONE if $token->[0] =~ /\$\?/; |
a8f30ee0 ES |
554 | } |
555 | ||
832c68b3 ES |
556 | # if this command is "false", "return 1", or "exit 1" (which signal |
557 | # failure explicitly), then okay for all preceding commands to be | |
558 | # missing "&&" | |
5f0321a9 | 559 | if ($$cmd[0]->[0] =~ /^(?:false|return|exit)$/) { |
73c768da | 560 | @$problems = grep {$_->[0] ne 'AMP'} @$problems; |
832c68b3 ES |
561 | goto DONE; |
562 | } | |
563 | ||
6d932e92 ES |
564 | # flag missing "&&" at end of previous command |
565 | my $n = find_non_nl($tokens); | |
73c768da | 566 | push(@$problems, ['AMP', $tokens->[$n]]) unless $n < 0; |
6d932e92 ES |
567 | |
568 | DONE: | |
569 | $self->SUPER::accumulate($tokens, $cmd); | |
570 | } | |
571 | ||
d99ebd6d ES |
572 | # ScriptParser is a subclass of ShellParser which identifies individual test |
573 | # definitions within test scripts, and passes each test body through TestParser | |
574 | # to identify possible problems. ShellParser detects test definitions not only | |
575 | # at the top-level of test scripts but also within compound commands such as | |
576 | # loops and function definitions. | |
b4f25b07 ES |
577 | package ScriptParser; |
578 | ||
d99ebd6d ES |
579 | use base 'ShellParser'; |
580 | ||
b4f25b07 ES |
581 | sub new { |
582 | my $class = shift @_; | |
d99ebd6d | 583 | my $self = $class->SUPER::new(@_); |
b4f25b07 ES |
584 | $self->{ntests} = 0; |
585 | return $self; | |
586 | } | |
587 | ||
d99ebd6d ES |
588 | # extract the raw content of a token, which may be a single string or a |
589 | # composition of multiple strings and non-string character runs; for instance, | |
590 | # `"test body"` unwraps to `test body`; `word"a b"42'c d'` to `worda b42c d` | |
591 | sub unwrap { | |
5f0321a9 | 592 | my $token = (@_ ? shift @_ : $_)->[0]; |
d99ebd6d ES |
593 | # simple case: 'sqstring' or "dqstring" |
594 | return $token if $token =~ s/^'([^']*)'$/$1/; | |
595 | return $token if $token =~ s/^"([^"]*)"$/$1/; | |
596 | ||
597 | # composite case | |
598 | my ($s, $q, $escaped); | |
599 | while (1) { | |
600 | # slurp up non-special characters | |
601 | $s .= $1 if $token =~ /\G([^\\'"]*)/gc; | |
602 | # handle special characters | |
603 | last unless $token =~ /\G(.)/sgc; | |
604 | my $c = $1; | |
605 | $q = undef, next if defined($q) && $c eq $q; | |
606 | $q = $c, next if !defined($q) && $c =~ /^['"]$/; | |
607 | if ($c eq '\\') { | |
608 | last unless $token =~ /\G(.)/sgc; | |
609 | $c = $1; | |
610 | $s .= '\\' if $c eq "\n"; # preserve line splice | |
611 | } | |
612 | $s .= $c; | |
613 | } | |
614 | return $s | |
615 | } | |
616 | ||
617 | sub check_test { | |
618 | my $self = shift @_; | |
619 | my ($title, $body) = map(unwrap, @_); | |
620 | $self->{ntests}++; | |
621 | my $parser = TestParser->new(\$body); | |
622 | my @tokens = $parser->parse(); | |
73c768da ES |
623 | my $problems = $parser->{problems}; |
624 | return unless $emit_all || @$problems; | |
7c04aa73 | 625 | my $c = main::fd_colors(1); |
48d69d8f | 626 | my $lineno = $_[1]->[3]; |
73c768da ES |
627 | my $start = 0; |
628 | my $checked = ''; | |
629 | for (sort {$a->[1]->[2] <=> $b->[1]->[2]} @$problems) { | |
630 | my ($label, $token) = @$_; | |
631 | my $pos = $token->[2]; | |
632 | $checked .= substr($body, $start, $pos - $start) . " ?!$label?! "; | |
633 | $start = $pos; | |
634 | } | |
635 | $checked .= substr($body, $start); | |
48d69d8f ES |
636 | $checked =~ s/^/$lineno++ . ' '/mge; |
637 | $checked =~ s/^\d+ \n//; | |
73c768da ES |
638 | $checked =~ s/(\s) \?!/$1?!/mg; |
639 | $checked =~ s/\?! (\s)/?!$1/mg; | |
7c04aa73 | 640 | $checked =~ s/(\?![^?]+\?!)/$c->{rev}$c->{red}$1$c->{reset}/mg; |
48d69d8f | 641 | $checked =~ s/^\d+/$c->{dim}$&$c->{reset}/mg; |
d99ebd6d | 642 | $checked .= "\n" unless $checked =~ /\n$/; |
7c04aa73 | 643 | push(@{$self->{output}}, "$c->{blue}# chainlint: $title$c->{reset}\n$checked"); |
d99ebd6d ES |
644 | } |
645 | ||
b4f25b07 | 646 | sub parse_cmd { |
d99ebd6d ES |
647 | my $self = shift @_; |
648 | my @tokens = $self->SUPER::parse_cmd(); | |
5f0321a9 | 649 | return @tokens unless @tokens && $tokens[0]->[0] =~ /^test_expect_(?:success|failure)$/; |
d99ebd6d | 650 | my $n = $#tokens; |
5f0321a9 | 651 | $n-- while $n >= 0 && $tokens[$n]->[0] =~ /^(?:[;&\n|]|&&|\|\|)$/; |
d99ebd6d ES |
652 | $self->check_test($tokens[1], $tokens[2]) if $n == 2; # title body |
653 | $self->check_test($tokens[2], $tokens[3]) if $n > 2; # prereq title body | |
654 | return @tokens; | |
b4f25b07 ES |
655 | } |
656 | ||
657 | # main contains high-level functionality for processing command-line switches, | |
658 | # feeding input test scripts to ScriptParser, and reporting results. | |
659 | package main; | |
660 | ||
661 | my $getnow = sub { return time(); }; | |
662 | my $interval = sub { return time() - shift; }; | |
663 | if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) { | |
664 | $getnow = sub { return [Time::HiRes::gettimeofday()]; }; | |
665 | $interval = sub { return Time::HiRes::tv_interval(shift); }; | |
666 | } | |
667 | ||
7c04aa73 ES |
668 | # Restore TERM if test framework set it to "dumb" so 'tput' will work; do this |
669 | # outside of get_colors() since under 'ithreads' all threads use %ENV of main | |
670 | # thread and ignore %ENV changes in subthreads. | |
671 | $ENV{TERM} = $ENV{USER_TERM} if $ENV{USER_TERM}; | |
672 | ||
48d69d8f | 673 | my @NOCOLORS = (bold => '', rev => '', dim => '', reset => '', blue => '', green => '', red => ''); |
7c04aa73 ES |
674 | my %COLORS = (); |
675 | sub get_colors { | |
676 | return \%COLORS if %COLORS; | |
5451877f | 677 | if (exists($ENV{NO_COLOR})) { |
7c04aa73 ES |
678 | %COLORS = @NOCOLORS; |
679 | return \%COLORS; | |
680 | } | |
5451877f ES |
681 | if ($ENV{TERM} =~ /xterm|xterm-\d+color|xterm-new|xterm-direct|nsterm|nsterm-\d+color|nsterm-direct/) { |
682 | %COLORS = (bold => "\e[1m", | |
683 | rev => "\e[7m", | |
48d69d8f | 684 | dim => "\e[2m", |
5451877f ES |
685 | reset => "\e[0m", |
686 | blue => "\e[34m", | |
687 | green => "\e[32m", | |
688 | red => "\e[31m"); | |
689 | return \%COLORS; | |
690 | } | |
691 | if (system("tput sgr0 >/dev/null 2>&1") == 0 && | |
692 | system("tput bold >/dev/null 2>&1") == 0 && | |
693 | system("tput rev >/dev/null 2>&1") == 0 && | |
48d69d8f | 694 | system("tput dim >/dev/null 2>&1") == 0 && |
5451877f ES |
695 | system("tput setaf 1 >/dev/null 2>&1") == 0) { |
696 | %COLORS = (bold => `tput bold`, | |
697 | rev => `tput rev`, | |
48d69d8f | 698 | dim => `tput dim`, |
5451877f ES |
699 | reset => `tput sgr0`, |
700 | blue => `tput setaf 4`, | |
701 | green => `tput setaf 2`, | |
702 | red => `tput setaf 1`); | |
703 | return \%COLORS; | |
704 | } | |
705 | %COLORS = @NOCOLORS; | |
7c04aa73 ES |
706 | return \%COLORS; |
707 | } | |
708 | ||
709 | my %FD_COLORS = (); | |
710 | sub fd_colors { | |
711 | my $fd = shift; | |
712 | return $FD_COLORS{$fd} if exists($FD_COLORS{$fd}); | |
713 | $FD_COLORS{$fd} = -t $fd ? get_colors() : {@NOCOLORS}; | |
714 | return $FD_COLORS{$fd}; | |
715 | } | |
716 | ||
29fb2ec3 ES |
717 | sub ncores { |
718 | # Windows | |
719 | return $ENV{NUMBER_OF_PROCESSORS} if exists($ENV{NUMBER_OF_PROCESSORS}); | |
720 | # Linux / MSYS2 / Cygwin / WSL | |
1f51b77f | 721 | do { local @ARGV='/proc/cpuinfo'; return scalar(grep(/^processor[\s\d]*:/, <>)); } if -r '/proc/cpuinfo'; |
29fb2ec3 ES |
722 | # macOS & BSD |
723 | return qx/sysctl -n hw.ncpu/ if $^O =~ /(?:^darwin$|bsd)/; | |
724 | return 1; | |
725 | } | |
726 | ||
b4f25b07 ES |
727 | sub show_stats { |
728 | my ($start_time, $stats) = @_; | |
729 | my $walltime = $interval->($start_time); | |
730 | my ($usertime) = times(); | |
731 | my ($total_workers, $total_scripts, $total_tests, $total_errs) = (0, 0, 0, 0); | |
7c04aa73 ES |
732 | my $c = fd_colors(2); |
733 | print(STDERR $c->{green}); | |
b4f25b07 ES |
734 | for (@$stats) { |
735 | my ($worker, $nscripts, $ntests, $nerrs) = @$_; | |
736 | print(STDERR "worker $worker: $nscripts scripts, $ntests tests, $nerrs errors\n"); | |
737 | $total_workers++; | |
738 | $total_scripts += $nscripts; | |
739 | $total_tests += $ntests; | |
740 | $total_errs += $nerrs; | |
741 | } | |
7c04aa73 | 742 | printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)$c->{reset}\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime); |
b4f25b07 ES |
743 | } |
744 | ||
745 | sub check_script { | |
746 | my ($id, $next_script, $emit) = @_; | |
747 | my ($nscripts, $ntests, $nerrs) = (0, 0, 0); | |
748 | while (my $path = $next_script->()) { | |
749 | $nscripts++; | |
750 | my $fh; | |
751 | unless (open($fh, "<", $path)) { | |
752 | $emit->("?!ERR?! $path: $!\n"); | |
753 | next; | |
754 | } | |
755 | my $s = do { local $/; <$fh> }; | |
756 | close($fh); | |
757 | my $parser = ScriptParser->new(\$s); | |
758 | 1 while $parser->parse_cmd(); | |
759 | if (@{$parser->{output}}) { | |
7c04aa73 | 760 | my $c = fd_colors(1); |
b4f25b07 | 761 | my $s = join('', @{$parser->{output}}); |
7c04aa73 | 762 | $emit->("$c->{bold}$c->{blue}# chainlint: $path$c->{reset}\n" . $s); |
b4f25b07 ES |
763 | $nerrs += () = $s =~ /\?![^?]+\?!/g; |
764 | } | |
765 | $ntests += $parser->{ntests}; | |
766 | } | |
767 | return [$id, $nscripts, $ntests, $nerrs]; | |
768 | } | |
769 | ||
770 | sub exit_code { | |
771 | my $stats = shift @_; | |
772 | for (@$stats) { | |
773 | my ($worker, $nscripts, $ntests, $nerrs) = @$_; | |
774 | return 1 if $nerrs; | |
775 | } | |
776 | return 0; | |
777 | } | |
778 | ||
779 | Getopt::Long::Configure(qw{bundling}); | |
780 | GetOptions( | |
781 | "emit-all!" => \$emit_all, | |
29fb2ec3 | 782 | "jobs|j=i" => \$jobs, |
b4f25b07 | 783 | "stats|show-stats!" => \$show_stats) or die("option error\n"); |
29fb2ec3 | 784 | $jobs = ncores() if $jobs < 1; |
b4f25b07 ES |
785 | |
786 | my $start_time = $getnow->(); | |
787 | my @stats; | |
788 | ||
789 | my @scripts; | |
790 | push(@scripts, File::Glob::bsd_glob($_)) for (@ARGV); | |
791 | unless (@scripts) { | |
792 | show_stats($start_time, \@stats) if $show_stats; | |
793 | exit; | |
794 | } | |
795 | ||
29fb2ec3 ES |
796 | unless ($Config{useithreads} && eval { |
797 | require threads; threads->import(); | |
798 | require Thread::Queue; Thread::Queue->import(); | |
799 | 1; | |
800 | }) { | |
801 | push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); })); | |
802 | show_stats($start_time, \@stats) if $show_stats; | |
803 | exit(exit_code(\@stats)); | |
804 | } | |
805 | ||
806 | my $script_queue = Thread::Queue->new(); | |
807 | my $output_queue = Thread::Queue->new(); | |
808 | ||
809 | sub next_script { return $script_queue->dequeue(); } | |
810 | sub emit { $output_queue->enqueue(@_); } | |
811 | ||
812 | sub monitor { | |
813 | while (my $s = $output_queue->dequeue()) { | |
814 | print($s); | |
815 | } | |
816 | } | |
817 | ||
818 | my $mon = threads->create({'context' => 'void'}, \&monitor); | |
819 | threads->create({'context' => 'list'}, \&check_script, $_, \&next_script, \&emit) for 1..$jobs; | |
820 | ||
821 | $script_queue->enqueue(@scripts); | |
822 | $script_queue->end(); | |
823 | ||
824 | for (threads->list()) { | |
825 | push(@stats, $_->join()) unless $_ == $mon; | |
826 | } | |
827 | ||
828 | $output_queue->end(); | |
829 | $mon->join(); | |
830 | ||
b4f25b07 ES |
831 | show_stats($start_time, \@stats) if $show_stats; |
832 | exit(exit_code(\@stats)); |