]> git.ipfire.org Git - thirdparty/bash.git/blob - pathexp.c
bash-5.1 beta release
[thirdparty/bash.git] / pathexp.c
1 /* pathexp.c -- The shell interface to the globbing library. */
2
3 /* Copyright (C) 1995-2020 Free Software Foundation, Inc.
4
5 This file is part of GNU Bash, the Bourne Again SHell.
6
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "config.h"
22
23 #include "bashtypes.h"
24 #include <stdio.h>
25
26 #if defined (HAVE_UNISTD_H)
27 # include <unistd.h>
28 #endif
29
30 #include "bashansi.h"
31
32 #include "shell.h"
33 #include "pathexp.h"
34 #include "flags.h"
35
36 #include "shmbutil.h"
37 #include "bashintl.h"
38
39 #include <glob/strmatch.h>
40
41 static int glob_name_is_acceptable PARAMS((const char *));
42 static void ignore_globbed_names PARAMS((char **, sh_ignore_func_t *));
43 static char *split_ignorespec PARAMS((char *, int *));
44
45 #if defined (USE_POSIX_GLOB_LIBRARY)
46 # include <glob.h>
47 typedef int posix_glob_errfunc_t PARAMS((const char *, int));
48 #else
49 # include <glob/glob.h>
50 #endif
51
52 /* Control whether * matches .files in globbing. */
53 int glob_dot_filenames;
54
55 /* Control whether the extended globbing features are enabled. */
56 int extended_glob = EXTGLOB_DEFAULT;
57
58 /* Control enabling special handling of `**' */
59 int glob_star = 0;
60
61 /* Return nonzero if STRING has any unquoted special globbing chars in it.
62 This is supposed to be called when pathname expansion is performed, so
63 it implements the rules in Posix 2.13.3, specifically that an unquoted
64 slash cannot appear in a bracket expression. */
65 int
66 unquoted_glob_pattern_p (string)
67 register char *string;
68 {
69 register int c;
70 char *send;
71 int open, bsquote;
72
73 DECLARE_MBSTATE;
74
75 open = bsquote = 0;
76 send = string + strlen (string);
77
78 while (c = *string++)
79 {
80 switch (c)
81 {
82 case '?':
83 case '*':
84 return (1);
85
86 case '[':
87 open++;
88 continue;
89
90 case ']':
91 if (open) /* XXX - if --open == 0? */
92 return (1);
93 continue;
94
95 case '/':
96 if (open)
97 open = 0;
98
99 case '+':
100 case '@':
101 case '!':
102 if (*string == '(') /*)*/
103 return (1);
104 continue;
105
106 /* A pattern can't end with a backslash, but a backslash in the pattern
107 can be special to the matching engine, so we note it in case we
108 need it later. */
109 case '\\':
110 if (*string != '\0' && *string != '/')
111 {
112 bsquote = 1;
113 string++;
114 continue;
115 }
116 else if (open && *string == '/')
117 {
118 string++; /* quoted slashes in bracket expressions are ok */
119 continue;
120 }
121 else if (*string == 0)
122 return (0);
123
124 case CTLESC:
125 if (*string++ == '\0')
126 return (0);
127 }
128
129 /* Advance one fewer byte than an entire multibyte character to
130 account for the auto-increment in the loop above. */
131 #ifdef HANDLE_MULTIBYTE
132 string--;
133 ADVANCE_CHAR_P (string, send - string);
134 string++;
135 #else
136 ADVANCE_CHAR_P (string, send - string);
137 #endif
138 }
139
140 #if 0
141 return (bsquote ? 2 : 0);
142 #else
143 return (0);
144 #endif
145 }
146
147 /* Return 1 if C is a character that is `special' in a POSIX ERE and needs to
148 be quoted to match itself. */
149 static inline int
150 ere_char (c)
151 int c;
152 {
153 switch (c)
154 {
155 case '.':
156 case '[':
157 case '\\':
158 case '(':
159 case ')':
160 case '*':
161 case '+':
162 case '?':
163 case '{':
164 case '|':
165 case '^':
166 case '$':
167 return 1;
168 default:
169 return 0;
170 }
171 return (0);
172 }
173
174 int
175 glob_char_p (s)
176 const char *s;
177 {
178 switch (*s)
179 {
180 case '*':
181 case '[':
182 case ']':
183 case '?':
184 case '\\':
185 return 1;
186 case '+':
187 case '@':
188 case '!':
189 if (s[1] == '(') /*(*/
190 return 1;
191 break;
192 }
193 return 0;
194 }
195
196 /* PATHNAME can contain characters prefixed by CTLESC; this indicates
197 that the character is to be quoted. We quote it here in the style
198 that the glob library recognizes. If flags includes QGLOB_CVTNULL,
199 we change quoted null strings (pathname[0] == CTLNUL) into empty
200 strings (pathname[0] == 0). If this is called after quote removal
201 is performed, (flags & QGLOB_CVTNULL) should be 0; if called when quote
202 removal has not been done (for example, before attempting to match a
203 pattern while executing a case statement), flags should include
204 QGLOB_CVTNULL. If flags includes QGLOB_CTLESC, we need to remove CTLESC
205 quoting CTLESC or CTLNUL (as if dequote_string were called). If flags
206 includes QGLOB_FILENAME, appropriate quoting to match a filename should be
207 performed. QGLOB_REGEXP means we're quoting for a Posix ERE (for
208 [[ string =~ pat ]]) and that requires some special handling. */
209 char *
210 quote_string_for_globbing (pathname, qflags)
211 const char *pathname;
212 int qflags;
213 {
214 char *temp;
215 register int i, j;
216 int cclass, collsym, equiv, c, last_was_backslash;
217 int savei, savej;
218
219 temp = (char *)xmalloc (2 * strlen (pathname) + 1);
220
221 if ((qflags & QGLOB_CVTNULL) && QUOTED_NULL (pathname))
222 {
223 temp[0] = '\0';
224 return temp;
225 }
226
227 cclass = collsym = equiv = last_was_backslash = 0;
228 for (i = j = 0; pathname[i]; i++)
229 {
230 /* Fix for CTLESC at the end of the string? */
231 if (pathname[i] == CTLESC && pathname[i+1] == '\0')
232 {
233 temp[j++] = pathname[i++];
234 break;
235 }
236 /* If we are parsing regexp, turn CTLESC CTLESC into CTLESC. It's not an
237 ERE special character, so we should just be able to pass it through. */
238 else if ((qflags & (QGLOB_REGEXP|QGLOB_CTLESC)) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL))
239 {
240 i++;
241 temp[j++] = pathname[i];
242 continue;
243 }
244 else if (pathname[i] == CTLESC)
245 {
246 convert_to_backslash:
247 if ((qflags & QGLOB_FILENAME) && pathname[i+1] == '/')
248 continue;
249 /* What to do if preceding char is backslash? */
250 if (pathname[i+1] != CTLESC && (qflags & QGLOB_REGEXP) && ere_char (pathname[i+1]) == 0)
251 continue;
252 temp[j++] = '\\';
253 i++;
254 if (pathname[i] == '\0')
255 break;
256 }
257 else if ((qflags & QGLOB_REGEXP) && (i == 0 || pathname[i-1] != CTLESC) && pathname[i] == '[') /*]*/
258 {
259 temp[j++] = pathname[i++]; /* open bracket */
260 savej = j;
261 savei = i;
262 c = pathname[i++]; /* c == char after open bracket */
263 if (c == '^') /* ignore pattern negation */
264 {
265 temp[j++] = c;
266 c = pathname[i++];
267 }
268 if (c == ']') /* ignore right bracket if first char */
269 {
270 temp[j++] = c;
271 c = pathname[i++];
272 }
273 do
274 {
275 if (c == 0)
276 goto endpat;
277 else if (c == CTLESC)
278 {
279 /* skip c, check for EOS, let assignment at end of loop */
280 /* pathname[i] == backslash-escaped character */
281 if (pathname[i] == 0)
282 goto endpat;
283 temp[j++] = pathname[i++];
284 }
285 else if (c == '[' && pathname[i] == ':')
286 {
287 temp[j++] = c;
288 temp[j++] = pathname[i++];
289 cclass = 1;
290 }
291 else if (cclass && c == ':' && pathname[i] == ']')
292 {
293 temp[j++] = c;
294 temp[j++] = pathname[i++];
295 cclass = 0;
296 }
297 else if (c == '[' && pathname[i] == '=')
298 {
299 temp[j++] = c;
300 temp[j++] = pathname[i++];
301 if (pathname[i] == ']')
302 temp[j++] = pathname[i++]; /* right brack can be in equiv */
303 equiv = 1;
304 }
305 else if (equiv && c == '=' && pathname[i] == ']')
306 {
307 temp[j++] = c;
308 temp[j++] = pathname[i++];
309 equiv = 0;
310 }
311 else if (c == '[' && pathname[i] == '.')
312 {
313 temp[j++] = c;
314 temp[j++] = pathname[i++];
315 if (pathname[i] == ']')
316 temp[j++] = pathname[i++]; /* right brack can be in collsym */
317 collsym = 1;
318 }
319 else if (collsym && c == '.' && pathname[i] == ']')
320 {
321 temp[j++] = c;
322 temp[j++] = pathname[i++];
323 collsym = 0;
324 }
325 else
326 temp[j++] = c;
327 }
328 while (((c = pathname[i++]) != ']') && c != 0);
329
330 /* If we don't find the closing bracket before we hit the end of
331 the string, rescan string without treating it as a bracket
332 expression (has implications for backslash and special ERE
333 chars) */
334 if (c == 0)
335 {
336 i = savei - 1; /* -1 for autoincrement above */
337 j = savej;
338 continue;
339 }
340
341 temp[j++] = c; /* closing right bracket */
342 i--; /* increment will happen above in loop */
343 continue; /* skip double assignment below */
344 }
345 else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP) == 0)
346 {
347 /* XXX - if not quoting regexp, use backslash as quote char. Should
348 We just pass it through without treating it as special? That is
349 what ksh93 seems to do. */
350
351 /* If we want to pass through backslash unaltered, comment out these
352 lines. */
353 temp[j++] = '\\';
354
355 i++;
356 if (pathname[i] == '\0')
357 break;
358 /* If we are turning CTLESC CTLESC into CTLESC, we need to do that
359 even when the first CTLESC is preceded by a backslash. */
360 if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL))
361 i++; /* skip over the CTLESC */
362 else if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC)
363 /* A little more general: if there is an unquoted backslash in the
364 pattern and we are handling quoted characters in the pattern,
365 convert the CTLESC to backslash and add the next character on
366 the theory that the backslash will quote the next character
367 but it would be inconsistent not to replace the CTLESC with
368 another backslash here. We can't tell at this point whether the
369 CTLESC comes from a backslash or other form of quoting in the
370 original pattern. */
371 goto convert_to_backslash;
372 }
373 else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP))
374 last_was_backslash = 1;
375 temp[j++] = pathname[i];
376 }
377 endpat:
378 temp[j] = '\0';
379
380 return (temp);
381 }
382
383 char *
384 quote_globbing_chars (string)
385 const char *string;
386 {
387 size_t slen;
388 char *temp, *t;
389 const char *s, *send;
390 DECLARE_MBSTATE;
391
392 slen = strlen (string);
393 send = string + slen;
394
395 temp = (char *)xmalloc (slen * 2 + 1);
396 for (t = temp, s = string; *s; )
397 {
398 if (glob_char_p (s))
399 *t++ = '\\';
400
401 /* Copy a single (possibly multibyte) character from s to t,
402 incrementing both. */
403 COPY_CHAR_P (t, s, send);
404 }
405 *t = '\0';
406 return temp;
407 }
408
409 /* Call the glob library to do globbing on PATHNAME. */
410 char **
411 shell_glob_filename (pathname, qflags)
412 const char *pathname;
413 int qflags;
414 {
415 #if defined (USE_POSIX_GLOB_LIBRARY)
416 register int i;
417 char *temp, **results;
418 glob_t filenames;
419 int glob_flags;
420
421 temp = quote_string_for_globbing (pathname, QGLOB_FILENAME|qflags);
422
423 filenames.gl_offs = 0;
424
425 # if defined (GLOB_PERIOD)
426 glob_flags = glob_dot_filenames ? GLOB_PERIOD : 0;
427 # else
428 glob_flags = 0;
429 # endif /* !GLOB_PERIOD */
430
431 glob_flags |= (GLOB_ERR | GLOB_DOOFFS);
432
433 i = glob (temp, glob_flags, (posix_glob_errfunc_t *)NULL, &filenames);
434
435 free (temp);
436
437 if (i == GLOB_NOSPACE || i == GLOB_ABORTED)
438 return ((char **)NULL);
439 else if (i == GLOB_NOMATCH)
440 filenames.gl_pathv = (char **)NULL;
441 else if (i != 0) /* other error codes not in POSIX.2 */
442 filenames.gl_pathv = (char **)NULL;
443
444 results = filenames.gl_pathv;
445
446 if (results && ((GLOB_FAILED (results)) == 0))
447 {
448 if (should_ignore_glob_matches ())
449 ignore_glob_matches (results);
450 if (results && results[0])
451 strvec_sort (results, 1); /* posix sort */
452 else
453 {
454 FREE (results);
455 results = (char **)NULL;
456 }
457 }
458
459 return (results);
460
461 #else /* !USE_POSIX_GLOB_LIBRARY */
462
463 char *temp, **results;
464 int gflags, quoted_pattern;
465
466 noglob_dot_filenames = glob_dot_filenames == 0;
467
468 temp = quote_string_for_globbing (pathname, QGLOB_FILENAME|qflags);
469 gflags = glob_star ? GX_GLOBSTAR : 0;
470 results = glob_filename (temp, gflags);
471 free (temp);
472
473 if (results && ((GLOB_FAILED (results)) == 0))
474 {
475 if (should_ignore_glob_matches ())
476 ignore_glob_matches (results);
477 if (results && results[0])
478 strvec_sort (results, 1); /* posix sort */
479 else
480 {
481 FREE (results);
482 results = (char **)&glob_error_return;
483 }
484 }
485
486 return (results);
487 #endif /* !USE_POSIX_GLOB_LIBRARY */
488 }
489
490 /* Stuff for GLOBIGNORE. */
491
492 static struct ignorevar globignore =
493 {
494 "GLOBIGNORE",
495 (struct ign *)0,
496 0,
497 (char *)0,
498 (sh_iv_item_func_t *)0,
499 };
500
501 /* Set up to ignore some glob matches because the value of GLOBIGNORE
502 has changed. If GLOBIGNORE is being unset, we also need to disable
503 the globbing of filenames beginning with a `.'. */
504 void
505 setup_glob_ignore (name)
506 char *name;
507 {
508 char *v;
509
510 v = get_string_value (name);
511 setup_ignore_patterns (&globignore);
512
513 if (globignore.num_ignores)
514 glob_dot_filenames = 1;
515 else if (v == 0)
516 glob_dot_filenames = 0;
517 }
518
519 int
520 should_ignore_glob_matches ()
521 {
522 return globignore.num_ignores;
523 }
524
525 /* Return 0 if NAME matches a pattern in the globignore.ignores list. */
526 static int
527 glob_name_is_acceptable (name)
528 const char *name;
529 {
530 struct ign *p;
531 char *n;
532 int flags;
533
534 /* . and .. are never matched. We extend this to the terminal component of a
535 pathname. */
536 n = strrchr (name, '/');
537 if (n == 0 || n[1] == 0)
538 n = (char *)name;
539 else
540 n++;
541
542 if (n[0] == '.' && (n[1] == '\0' || (n[1] == '.' && n[2] == '\0')))
543 return (0);
544
545 flags = FNM_PATHNAME | FNMATCH_EXTFLAG | FNMATCH_NOCASEGLOB;
546 for (p = globignore.ignores; p->val; p++)
547 {
548 if (strmatch (p->val, (char *)name, flags) != FNM_NOMATCH)
549 return (0);
550 }
551 return (1);
552 }
553
554 /* Internal function to test whether filenames in NAMES should be
555 ignored. NAME_FUNC is a pointer to a function to call with each
556 name. It returns non-zero if the name is acceptable to the particular
557 ignore function which called _ignore_names; zero if the name should
558 be removed from NAMES. */
559
560 static void
561 ignore_globbed_names (names, name_func)
562 char **names;
563 sh_ignore_func_t *name_func;
564 {
565 char **newnames;
566 int n, i;
567
568 for (i = 0; names[i]; i++)
569 ;
570 newnames = strvec_create (i + 1);
571
572 for (n = i = 0; names[i]; i++)
573 {
574 if ((*name_func) (names[i]))
575 newnames[n++] = names[i];
576 else
577 free (names[i]);
578 }
579
580 newnames[n] = (char *)NULL;
581
582 if (n == 0)
583 {
584 names[0] = (char *)NULL;
585 free (newnames);
586 return;
587 }
588
589 /* Copy the acceptable names from NEWNAMES back to NAMES and set the
590 new array end. */
591 for (n = 0; newnames[n]; n++)
592 names[n] = newnames[n];
593 names[n] = (char *)NULL;
594 free (newnames);
595 }
596
597 void
598 ignore_glob_matches (names)
599 char **names;
600 {
601 if (globignore.num_ignores == 0)
602 return;
603
604 ignore_globbed_names (names, glob_name_is_acceptable);
605 }
606
607 static char *
608 split_ignorespec (s, ip)
609 char *s;
610 int *ip;
611 {
612 char *t;
613 int n, i;
614
615 if (s == 0)
616 return 0;
617
618 i = *ip;
619 if (s[i] == 0)
620 return 0;
621
622 n = skip_to_delim (s, i, ":", SD_NOJMP|SD_EXTGLOB|SD_GLOB);
623 t = substring (s, i, n);
624
625 if (s[n] == ':')
626 n++;
627 *ip = n;
628 return t;
629 }
630
631 void
632 setup_ignore_patterns (ivp)
633 struct ignorevar *ivp;
634 {
635 int numitems, maxitems, ptr;
636 char *colon_bit, *this_ignoreval;
637 struct ign *p;
638
639 this_ignoreval = get_string_value (ivp->varname);
640
641 /* If nothing has changed then just exit now. */
642 if ((this_ignoreval && ivp->last_ignoreval && STREQ (this_ignoreval, ivp->last_ignoreval)) ||
643 (!this_ignoreval && !ivp->last_ignoreval))
644 return;
645
646 /* Oops. The ignore variable has changed. Re-parse it. */
647 ivp->num_ignores = 0;
648
649 if (ivp->ignores)
650 {
651 for (p = ivp->ignores; p->val; p++)
652 free(p->val);
653 free (ivp->ignores);
654 ivp->ignores = (struct ign *)NULL;
655 }
656
657 if (ivp->last_ignoreval)
658 {
659 free (ivp->last_ignoreval);
660 ivp->last_ignoreval = (char *)NULL;
661 }
662
663 if (this_ignoreval == 0 || *this_ignoreval == '\0')
664 return;
665
666 ivp->last_ignoreval = savestring (this_ignoreval);
667
668 numitems = maxitems = ptr = 0;
669
670 #if 0
671 while (colon_bit = extract_colon_unit (this_ignoreval, &ptr))
672 #else
673 while (colon_bit = split_ignorespec (this_ignoreval, &ptr))
674 #endif
675 {
676 if (numitems + 1 >= maxitems)
677 {
678 maxitems += 10;
679 ivp->ignores = (struct ign *)xrealloc (ivp->ignores, maxitems * sizeof (struct ign));
680 }
681 ivp->ignores[numitems].val = colon_bit;
682 ivp->ignores[numitems].len = strlen (colon_bit);
683 ivp->ignores[numitems].flags = 0;
684 if (ivp->item_func)
685 (*ivp->item_func) (&ivp->ignores[numitems]);
686 numitems++;
687 }
688 ivp->ignores[numitems].val = (char *)NULL;
689 ivp->num_ignores = numitems;
690 }