]>
Commit | Line | Data |
---|---|---|
1 | /* pathexp.c -- The shell interface to the globbing library. */ | |
2 | ||
3 | /* Copyright (C) 1995-2020 Free Software Foundation, Inc. | |
4 | ||
5 | This file is part of GNU Bash, the Bourne Again SHell. | |
6 | ||
7 | Bash is free software: you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation, either version 3 of the License, or | |
10 | (at your option) any later version. | |
11 | ||
12 | Bash is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with Bash. If not, see <http://www.gnu.org/licenses/>. | |
19 | */ | |
20 | ||
21 | #include "config.h" | |
22 | ||
23 | #include "bashtypes.h" | |
24 | #include <stdio.h> | |
25 | ||
26 | #if defined (HAVE_UNISTD_H) | |
27 | # include <unistd.h> | |
28 | #endif | |
29 | ||
30 | #include "bashansi.h" | |
31 | ||
32 | #include "shell.h" | |
33 | #include "pathexp.h" | |
34 | #include "flags.h" | |
35 | ||
36 | #include "shmbutil.h" | |
37 | #include "bashintl.h" | |
38 | ||
39 | #include <glob/strmatch.h> | |
40 | ||
41 | static int glob_name_is_acceptable PARAMS((const char *)); | |
42 | static void ignore_globbed_names PARAMS((char **, sh_ignore_func_t *)); | |
43 | static char *split_ignorespec PARAMS((char *, int *)); | |
44 | ||
45 | #include <glob/glob.h> | |
46 | ||
47 | /* Control whether * matches .files in globbing. */ | |
48 | int glob_dot_filenames; | |
49 | ||
50 | /* Control whether the extended globbing features are enabled. */ | |
51 | int extended_glob = EXTGLOB_DEFAULT; | |
52 | ||
53 | /* Control enabling special handling of `**' */ | |
54 | int glob_star = 0; | |
55 | ||
56 | /* Return nonzero if STRING has any unquoted special globbing chars in it. | |
57 | This is supposed to be called when pathname expansion is performed, so | |
58 | it implements the rules in Posix 2.13.3, specifically that an unquoted | |
59 | slash cannot appear in a bracket expression. */ | |
60 | int | |
61 | unquoted_glob_pattern_p (string) | |
62 | register char *string; | |
63 | { | |
64 | register int c; | |
65 | char *send; | |
66 | int open, bsquote; | |
67 | ||
68 | DECLARE_MBSTATE; | |
69 | ||
70 | open = bsquote = 0; | |
71 | send = string + strlen (string); | |
72 | ||
73 | while (c = *string++) | |
74 | { | |
75 | switch (c) | |
76 | { | |
77 | case '?': | |
78 | case '*': | |
79 | return (1); | |
80 | ||
81 | case '[': | |
82 | open++; | |
83 | continue; | |
84 | ||
85 | case ']': | |
86 | if (open) /* XXX - if --open == 0? */ | |
87 | return (1); | |
88 | continue; | |
89 | ||
90 | case '/': | |
91 | if (open) | |
92 | open = 0; | |
93 | ||
94 | case '+': | |
95 | case '@': | |
96 | case '!': | |
97 | if (*string == '(') /*)*/ | |
98 | return (1); | |
99 | continue; | |
100 | ||
101 | /* A pattern can't end with a backslash, but a backslash in the pattern | |
102 | can be special to the matching engine, so we note it in case we | |
103 | need it later. */ | |
104 | case '\\': | |
105 | if (*string != '\0' && *string != '/') | |
106 | { | |
107 | bsquote = 1; | |
108 | string++; | |
109 | continue; | |
110 | } | |
111 | else if (open && *string == '/') | |
112 | { | |
113 | string++; /* quoted slashes in bracket expressions are ok */ | |
114 | continue; | |
115 | } | |
116 | else if (*string == 0) | |
117 | return (0); | |
118 | ||
119 | case CTLESC: | |
120 | if (*string++ == '\0') | |
121 | return (0); | |
122 | } | |
123 | ||
124 | /* Advance one fewer byte than an entire multibyte character to | |
125 | account for the auto-increment in the loop above. */ | |
126 | #ifdef HANDLE_MULTIBYTE | |
127 | string--; | |
128 | ADVANCE_CHAR_P (string, send - string); | |
129 | string++; | |
130 | #else | |
131 | ADVANCE_CHAR_P (string, send - string); | |
132 | #endif | |
133 | } | |
134 | ||
135 | #if 0 | |
136 | return (bsquote ? 2 : 0); | |
137 | #else | |
138 | return (0); | |
139 | #endif | |
140 | } | |
141 | ||
142 | /* Return 1 if C is a character that is `special' in a POSIX ERE and needs to | |
143 | be quoted to match itself. */ | |
144 | static inline int | |
145 | ere_char (c) | |
146 | int c; | |
147 | { | |
148 | switch (c) | |
149 | { | |
150 | case '.': | |
151 | case '[': | |
152 | case '\\': | |
153 | case '(': | |
154 | case ')': | |
155 | case '*': | |
156 | case '+': | |
157 | case '?': | |
158 | case '{': | |
159 | case '|': | |
160 | case '^': | |
161 | case '$': | |
162 | return 1; | |
163 | default: | |
164 | return 0; | |
165 | } | |
166 | return (0); | |
167 | } | |
168 | ||
169 | /* This is only used to determine whether to backslash-quote a character. */ | |
170 | int | |
171 | glob_char_p (s) | |
172 | const char *s; | |
173 | { | |
174 | switch (*s) | |
175 | { | |
176 | case '*': | |
177 | case '[': | |
178 | case ']': | |
179 | case '?': | |
180 | case '\\': | |
181 | return 1; | |
182 | case '+': | |
183 | case '@': | |
184 | case '!': | |
185 | if (s[1] == '(') /*(*/ | |
186 | return 1; | |
187 | break; | |
188 | } | |
189 | return 0; | |
190 | } | |
191 | ||
192 | /* PATHNAME can contain characters prefixed by CTLESC; this indicates | |
193 | that the character is to be quoted. We quote it here in the style | |
194 | that the glob library recognizes. If flags includes QGLOB_CVTNULL, | |
195 | we change quoted null strings (pathname[0] == CTLNUL) into empty | |
196 | strings (pathname[0] == 0). If this is called after quote removal | |
197 | is performed, (flags & QGLOB_CVTNULL) should be 0; if called when quote | |
198 | removal has not been done (for example, before attempting to match a | |
199 | pattern while executing a case statement), flags should include | |
200 | QGLOB_CVTNULL. If flags includes QGLOB_CTLESC, we need to remove CTLESC | |
201 | quoting CTLESC or CTLNUL (as if dequote_string were called). If flags | |
202 | includes QGLOB_FILENAME, appropriate quoting to match a filename should be | |
203 | performed. QGLOB_REGEXP means we're quoting for a Posix ERE (for | |
204 | [[ string =~ pat ]]) and that requires some special handling. */ | |
205 | char * | |
206 | quote_string_for_globbing (pathname, qflags) | |
207 | const char *pathname; | |
208 | int qflags; | |
209 | { | |
210 | char *temp; | |
211 | register int i, j; | |
212 | int cclass, collsym, equiv, c, last_was_backslash; | |
213 | int savei, savej; | |
214 | ||
215 | temp = (char *)xmalloc (2 * strlen (pathname) + 1); | |
216 | ||
217 | if ((qflags & QGLOB_CVTNULL) && QUOTED_NULL (pathname)) | |
218 | { | |
219 | temp[0] = '\0'; | |
220 | return temp; | |
221 | } | |
222 | ||
223 | cclass = collsym = equiv = last_was_backslash = 0; | |
224 | for (i = j = 0; pathname[i]; i++) | |
225 | { | |
226 | /* Fix for CTLESC at the end of the string? */ | |
227 | if (pathname[i] == CTLESC && pathname[i+1] == '\0') | |
228 | { | |
229 | temp[j++] = pathname[i++]; | |
230 | break; | |
231 | } | |
232 | /* If we are parsing regexp, turn CTLESC CTLESC into CTLESC. It's not an | |
233 | ERE special character, so we should just be able to pass it through. */ | |
234 | else if ((qflags & (QGLOB_REGEXP|QGLOB_CTLESC)) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL)) | |
235 | { | |
236 | i++; | |
237 | temp[j++] = pathname[i]; | |
238 | continue; | |
239 | } | |
240 | else if (pathname[i] == CTLESC) | |
241 | { | |
242 | convert_to_backslash: | |
243 | if ((qflags & QGLOB_FILENAME) && pathname[i+1] == '/') | |
244 | continue; | |
245 | /* What to do if preceding char is backslash? */ | |
246 | if (pathname[i+1] != CTLESC && (qflags & QGLOB_REGEXP) && ere_char (pathname[i+1]) == 0) | |
247 | continue; | |
248 | temp[j++] = '\\'; | |
249 | i++; | |
250 | if (pathname[i] == '\0') | |
251 | break; | |
252 | } | |
253 | else if ((qflags & QGLOB_REGEXP) && (i == 0 || pathname[i-1] != CTLESC) && pathname[i] == '[') /*]*/ | |
254 | { | |
255 | temp[j++] = pathname[i++]; /* open bracket */ | |
256 | savej = j; | |
257 | savei = i; | |
258 | c = pathname[i++]; /* c == char after open bracket */ | |
259 | if (c == '^') /* ignore pattern negation */ | |
260 | { | |
261 | temp[j++] = c; | |
262 | c = pathname[i++]; | |
263 | } | |
264 | if (c == ']') /* ignore right bracket if first char */ | |
265 | { | |
266 | temp[j++] = c; | |
267 | c = pathname[i++]; | |
268 | } | |
269 | do | |
270 | { | |
271 | if (c == 0) | |
272 | goto endpat; | |
273 | else if (c == CTLESC) | |
274 | { | |
275 | /* skip c, check for EOS, let assignment at end of loop */ | |
276 | /* pathname[i] == backslash-escaped character */ | |
277 | if (pathname[i] == 0) | |
278 | goto endpat; | |
279 | temp[j++] = pathname[i++]; | |
280 | } | |
281 | else if (c == '[' && pathname[i] == ':') | |
282 | { | |
283 | temp[j++] = c; | |
284 | temp[j++] = pathname[i++]; | |
285 | cclass = 1; | |
286 | } | |
287 | else if (cclass && c == ':' && pathname[i] == ']') | |
288 | { | |
289 | temp[j++] = c; | |
290 | temp[j++] = pathname[i++]; | |
291 | cclass = 0; | |
292 | } | |
293 | else if (c == '[' && pathname[i] == '=') | |
294 | { | |
295 | temp[j++] = c; | |
296 | temp[j++] = pathname[i++]; | |
297 | if (pathname[i] == ']') | |
298 | temp[j++] = pathname[i++]; /* right brack can be in equiv */ | |
299 | equiv = 1; | |
300 | } | |
301 | else if (equiv && c == '=' && pathname[i] == ']') | |
302 | { | |
303 | temp[j++] = c; | |
304 | temp[j++] = pathname[i++]; | |
305 | equiv = 0; | |
306 | } | |
307 | else if (c == '[' && pathname[i] == '.') | |
308 | { | |
309 | temp[j++] = c; | |
310 | temp[j++] = pathname[i++]; | |
311 | if (pathname[i] == ']') | |
312 | temp[j++] = pathname[i++]; /* right brack can be in collsym */ | |
313 | collsym = 1; | |
314 | } | |
315 | else if (collsym && c == '.' && pathname[i] == ']') | |
316 | { | |
317 | temp[j++] = c; | |
318 | temp[j++] = pathname[i++]; | |
319 | collsym = 0; | |
320 | } | |
321 | else | |
322 | temp[j++] = c; | |
323 | } | |
324 | while (((c = pathname[i++]) != ']') && c != 0); | |
325 | ||
326 | /* If we don't find the closing bracket before we hit the end of | |
327 | the string, rescan string without treating it as a bracket | |
328 | expression (has implications for backslash and special ERE | |
329 | chars) */ | |
330 | if (c == 0) | |
331 | { | |
332 | i = savei - 1; /* -1 for autoincrement above */ | |
333 | j = savej; | |
334 | continue; | |
335 | } | |
336 | ||
337 | temp[j++] = c; /* closing right bracket */ | |
338 | i--; /* increment will happen above in loop */ | |
339 | continue; /* skip double assignment below */ | |
340 | } | |
341 | else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP) == 0) | |
342 | { | |
343 | /* XXX - if not quoting regexp, use backslash as quote char. Should | |
344 | We just pass it through without treating it as special? That is | |
345 | what ksh93 seems to do. */ | |
346 | ||
347 | /* If we want to pass through backslash unaltered, comment out these | |
348 | lines. */ | |
349 | temp[j++] = '\\'; | |
350 | ||
351 | i++; | |
352 | if (pathname[i] == '\0') | |
353 | break; | |
354 | /* If we are turning CTLESC CTLESC into CTLESC, we need to do that | |
355 | even when the first CTLESC is preceded by a backslash. */ | |
356 | if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL)) | |
357 | i++; /* skip over the CTLESC */ | |
358 | else if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC) | |
359 | /* A little more general: if there is an unquoted backslash in the | |
360 | pattern and we are handling quoted characters in the pattern, | |
361 | convert the CTLESC to backslash and add the next character on | |
362 | the theory that the backslash will quote the next character | |
363 | but it would be inconsistent not to replace the CTLESC with | |
364 | another backslash here. We can't tell at this point whether the | |
365 | CTLESC comes from a backslash or other form of quoting in the | |
366 | original pattern. */ | |
367 | goto convert_to_backslash; | |
368 | } | |
369 | else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP)) | |
370 | last_was_backslash = 1; | |
371 | temp[j++] = pathname[i]; | |
372 | } | |
373 | endpat: | |
374 | temp[j] = '\0'; | |
375 | ||
376 | return (temp); | |
377 | } | |
378 | ||
379 | char * | |
380 | quote_globbing_chars (string) | |
381 | const char *string; | |
382 | { | |
383 | size_t slen; | |
384 | char *temp, *t; | |
385 | const char *s, *send; | |
386 | DECLARE_MBSTATE; | |
387 | ||
388 | slen = strlen (string); | |
389 | send = string + slen; | |
390 | ||
391 | temp = (char *)xmalloc (slen * 2 + 1); | |
392 | for (t = temp, s = string; *s; ) | |
393 | { | |
394 | if (glob_char_p (s)) | |
395 | *t++ = '\\'; | |
396 | ||
397 | /* Copy a single (possibly multibyte) character from s to t, | |
398 | incrementing both. */ | |
399 | COPY_CHAR_P (t, s, send); | |
400 | } | |
401 | *t = '\0'; | |
402 | return temp; | |
403 | } | |
404 | ||
405 | /* Call the glob library to do globbing on PATHNAME. */ | |
406 | char ** | |
407 | shell_glob_filename (pathname, qflags) | |
408 | const char *pathname; | |
409 | int qflags; | |
410 | { | |
411 | char *temp, **results; | |
412 | int gflags, quoted_pattern; | |
413 | ||
414 | noglob_dot_filenames = glob_dot_filenames == 0; | |
415 | ||
416 | temp = quote_string_for_globbing (pathname, QGLOB_FILENAME|qflags); | |
417 | gflags = glob_star ? GX_GLOBSTAR : 0; | |
418 | results = glob_filename (temp, gflags); | |
419 | free (temp); | |
420 | ||
421 | if (results && ((GLOB_FAILED (results)) == 0)) | |
422 | { | |
423 | if (should_ignore_glob_matches ()) | |
424 | ignore_glob_matches (results); | |
425 | if (results && results[0]) | |
426 | strvec_sort (results, 1); /* posix sort */ | |
427 | else | |
428 | { | |
429 | FREE (results); | |
430 | results = (char **)&glob_error_return; | |
431 | } | |
432 | } | |
433 | ||
434 | return (results); | |
435 | } | |
436 | ||
437 | /* Stuff for GLOBIGNORE. */ | |
438 | ||
439 | static struct ignorevar globignore = | |
440 | { | |
441 | "GLOBIGNORE", | |
442 | (struct ign *)0, | |
443 | 0, | |
444 | (char *)0, | |
445 | (sh_iv_item_func_t *)0, | |
446 | }; | |
447 | ||
448 | /* Set up to ignore some glob matches because the value of GLOBIGNORE | |
449 | has changed. If GLOBIGNORE is being unset, we also need to disable | |
450 | the globbing of filenames beginning with a `.'. */ | |
451 | void | |
452 | setup_glob_ignore (name) | |
453 | char *name; | |
454 | { | |
455 | char *v; | |
456 | ||
457 | v = get_string_value (name); | |
458 | setup_ignore_patterns (&globignore); | |
459 | ||
460 | if (globignore.num_ignores) | |
461 | glob_dot_filenames = 1; | |
462 | else if (v == 0) | |
463 | glob_dot_filenames = 0; | |
464 | } | |
465 | ||
466 | int | |
467 | should_ignore_glob_matches () | |
468 | { | |
469 | return globignore.num_ignores; | |
470 | } | |
471 | ||
472 | /* Return 0 if NAME matches a pattern in the globignore.ignores list. */ | |
473 | static int | |
474 | glob_name_is_acceptable (name) | |
475 | const char *name; | |
476 | { | |
477 | struct ign *p; | |
478 | char *n; | |
479 | int flags; | |
480 | ||
481 | /* . and .. are never matched. We extend this to the terminal component of a | |
482 | pathname. */ | |
483 | n = strrchr (name, '/'); | |
484 | if (n == 0 || n[1] == 0) | |
485 | n = (char *)name; | |
486 | else | |
487 | n++; | |
488 | ||
489 | if (n[0] == '.' && (n[1] == '\0' || (n[1] == '.' && n[2] == '\0'))) | |
490 | return (0); | |
491 | ||
492 | flags = FNM_PATHNAME | FNMATCH_EXTFLAG | FNMATCH_NOCASEGLOB; | |
493 | for (p = globignore.ignores; p->val; p++) | |
494 | { | |
495 | if (strmatch (p->val, (char *)name, flags) != FNM_NOMATCH) | |
496 | return (0); | |
497 | } | |
498 | return (1); | |
499 | } | |
500 | ||
501 | /* Internal function to test whether filenames in NAMES should be | |
502 | ignored. NAME_FUNC is a pointer to a function to call with each | |
503 | name. It returns non-zero if the name is acceptable to the particular | |
504 | ignore function which called _ignore_names; zero if the name should | |
505 | be removed from NAMES. */ | |
506 | ||
507 | static void | |
508 | ignore_globbed_names (names, name_func) | |
509 | char **names; | |
510 | sh_ignore_func_t *name_func; | |
511 | { | |
512 | char **newnames; | |
513 | int n, i; | |
514 | ||
515 | for (i = 0; names[i]; i++) | |
516 | ; | |
517 | newnames = strvec_create (i + 1); | |
518 | ||
519 | for (n = i = 0; names[i]; i++) | |
520 | { | |
521 | if ((*name_func) (names[i])) | |
522 | newnames[n++] = names[i]; | |
523 | else | |
524 | free (names[i]); | |
525 | } | |
526 | ||
527 | newnames[n] = (char *)NULL; | |
528 | ||
529 | if (n == 0) | |
530 | { | |
531 | names[0] = (char *)NULL; | |
532 | free (newnames); | |
533 | return; | |
534 | } | |
535 | ||
536 | /* Copy the acceptable names from NEWNAMES back to NAMES and set the | |
537 | new array end. */ | |
538 | for (n = 0; newnames[n]; n++) | |
539 | names[n] = newnames[n]; | |
540 | names[n] = (char *)NULL; | |
541 | free (newnames); | |
542 | } | |
543 | ||
544 | void | |
545 | ignore_glob_matches (names) | |
546 | char **names; | |
547 | { | |
548 | if (globignore.num_ignores == 0) | |
549 | return; | |
550 | ||
551 | ignore_globbed_names (names, glob_name_is_acceptable); | |
552 | } | |
553 | ||
554 | static char * | |
555 | split_ignorespec (s, ip) | |
556 | char *s; | |
557 | int *ip; | |
558 | { | |
559 | char *t; | |
560 | int n, i; | |
561 | ||
562 | if (s == 0) | |
563 | return 0; | |
564 | ||
565 | i = *ip; | |
566 | if (s[i] == 0) | |
567 | return 0; | |
568 | ||
569 | n = skip_to_delim (s, i, ":", SD_NOJMP|SD_EXTGLOB|SD_GLOB); | |
570 | t = substring (s, i, n); | |
571 | ||
572 | if (s[n] == ':') | |
573 | n++; | |
574 | *ip = n; | |
575 | return t; | |
576 | } | |
577 | ||
578 | void | |
579 | setup_ignore_patterns (ivp) | |
580 | struct ignorevar *ivp; | |
581 | { | |
582 | int numitems, maxitems, ptr; | |
583 | char *colon_bit, *this_ignoreval; | |
584 | struct ign *p; | |
585 | ||
586 | this_ignoreval = get_string_value (ivp->varname); | |
587 | ||
588 | /* If nothing has changed then just exit now. */ | |
589 | if ((this_ignoreval && ivp->last_ignoreval && STREQ (this_ignoreval, ivp->last_ignoreval)) || | |
590 | (!this_ignoreval && !ivp->last_ignoreval)) | |
591 | return; | |
592 | ||
593 | /* Oops. The ignore variable has changed. Re-parse it. */ | |
594 | ivp->num_ignores = 0; | |
595 | ||
596 | if (ivp->ignores) | |
597 | { | |
598 | for (p = ivp->ignores; p->val; p++) | |
599 | free(p->val); | |
600 | free (ivp->ignores); | |
601 | ivp->ignores = (struct ign *)NULL; | |
602 | } | |
603 | ||
604 | if (ivp->last_ignoreval) | |
605 | { | |
606 | free (ivp->last_ignoreval); | |
607 | ivp->last_ignoreval = (char *)NULL; | |
608 | } | |
609 | ||
610 | if (this_ignoreval == 0 || *this_ignoreval == '\0') | |
611 | return; | |
612 | ||
613 | ivp->last_ignoreval = savestring (this_ignoreval); | |
614 | ||
615 | numitems = maxitems = ptr = 0; | |
616 | ||
617 | #if 0 | |
618 | while (colon_bit = extract_colon_unit (this_ignoreval, &ptr)) | |
619 | #else | |
620 | while (colon_bit = split_ignorespec (this_ignoreval, &ptr)) | |
621 | #endif | |
622 | { | |
623 | if (numitems + 1 >= maxitems) | |
624 | { | |
625 | maxitems += 10; | |
626 | ivp->ignores = (struct ign *)xrealloc (ivp->ignores, maxitems * sizeof (struct ign)); | |
627 | } | |
628 | ivp->ignores[numitems].val = colon_bit; | |
629 | ivp->ignores[numitems].len = strlen (colon_bit); | |
630 | ivp->ignores[numitems].flags = 0; | |
631 | if (ivp->item_func) | |
632 | (*ivp->item_func) (&ivp->ignores[numitems]); | |
633 | numitems++; | |
634 | } | |
635 | ivp->ignores[numitems].val = (char *)NULL; | |
636 | ivp->num_ignores = numitems; | |
637 | } |