]> git.ipfire.org Git - thirdparty/bash.git/blob - pathexp.c
Bash-5.0 patch 4: the wait builtin without arguments only waits for known children...
[thirdparty/bash.git] / pathexp.c
1 /* pathexp.c -- The shell interface to the globbing library. */
2
3 /* Copyright (C) 1995-2014 Free Software Foundation, Inc.
4
5 This file is part of GNU Bash, the Bourne Again SHell.
6
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "config.h"
22
23 #include "bashtypes.h"
24 #include <stdio.h>
25
26 #if defined (HAVE_UNISTD_H)
27 # include <unistd.h>
28 #endif
29
30 #include "bashansi.h"
31
32 #include "shell.h"
33 #include "pathexp.h"
34 #include "flags.h"
35
36 #include "shmbutil.h"
37 #include "bashintl.h"
38
39 #include <glob/strmatch.h>
40
41 static int glob_name_is_acceptable __P((const char *));
42 static void ignore_globbed_names __P((char **, sh_ignore_func_t *));
43 static char *split_ignorespec __P((char *, int *));
44
45 #if defined (USE_POSIX_GLOB_LIBRARY)
46 # include <glob.h>
47 typedef int posix_glob_errfunc_t __P((const char *, int));
48 #else
49 # include <glob/glob.h>
50 #endif
51
52 /* Control whether * matches .files in globbing. */
53 int glob_dot_filenames;
54
55 /* Control whether the extended globbing features are enabled. */
56 int extended_glob = EXTGLOB_DEFAULT;
57
58 /* Control enabling special handling of `**' */
59 int glob_star = 0;
60
61 /* Return nonzero if STRING has any unquoted special globbing chars in it. */
62 int
63 unquoted_glob_pattern_p (string)
64 register char *string;
65 {
66 register int c;
67 char *send;
68 int open, bsquote;
69
70 DECLARE_MBSTATE;
71
72 open = bsquote = 0;
73 send = string + strlen (string);
74
75 while (c = *string++)
76 {
77 switch (c)
78 {
79 case '?':
80 case '*':
81 return (1);
82
83 case '[':
84 open++;
85 continue;
86
87 case ']':
88 if (open)
89 return (1);
90 continue;
91
92 case '+':
93 case '@':
94 case '!':
95 if (*string == '(') /*)*/
96 return (1);
97 continue;
98
99 /* A pattern can't end with a backslash, but a backslash in the pattern
100 can be removed by the matching engine, so we have to run it through
101 globbing. */
102 case '\\':
103 if (*string != '\0' && *string != '/')
104 {
105 bsquote = 1;
106 string++;
107 continue;
108 }
109 else if (*string == 0)
110 return (0);
111
112 case CTLESC:
113 if (*string++ == '\0')
114 return (0);
115 }
116
117 /* Advance one fewer byte than an entire multibyte character to
118 account for the auto-increment in the loop above. */
119 #ifdef HANDLE_MULTIBYTE
120 string--;
121 ADVANCE_CHAR_P (string, send - string);
122 string++;
123 #else
124 ADVANCE_CHAR_P (string, send - string);
125 #endif
126 }
127
128 return (bsquote ? 2 : 0);
129 }
130
131 /* Return 1 if C is a character that is `special' in a POSIX ERE and needs to
132 be quoted to match itself. */
133 static inline int
134 ere_char (c)
135 int c;
136 {
137 switch (c)
138 {
139 case '.':
140 case '[':
141 case '\\':
142 case '(':
143 case ')':
144 case '*':
145 case '+':
146 case '?':
147 case '{':
148 case '|':
149 case '^':
150 case '$':
151 return 1;
152 default:
153 return 0;
154 }
155 return (0);
156 }
157
158 int
159 glob_char_p (s)
160 const char *s;
161 {
162 switch (*s)
163 {
164 case '*':
165 case '[':
166 case ']':
167 case '?':
168 case '\\':
169 return 1;
170 case '+':
171 case '@':
172 case '!':
173 if (s[1] == '(') /*(*/
174 return 1;
175 break;
176 }
177 return 0;
178 }
179
180 /* PATHNAME can contain characters prefixed by CTLESC; this indicates
181 that the character is to be quoted. We quote it here in the style
182 that the glob library recognizes. If flags includes QGLOB_CVTNULL,
183 we change quoted null strings (pathname[0] == CTLNUL) into empty
184 strings (pathname[0] == 0). If this is called after quote removal
185 is performed, (flags & QGLOB_CVTNULL) should be 0; if called when quote
186 removal has not been done (for example, before attempting to match a
187 pattern while executing a case statement), flags should include
188 QGLOB_CVTNULL. If flags includes QGLOB_CTLESC, we need to remove CTLESC
189 quoting CTLESC or CTLNUL (as if dequote_string were called). If flags
190 includes QGLOB_FILENAME, appropriate quoting to match a filename should be
191 performed. QGLOB_REGEXP means we're quoting for a Posix ERE (for
192 [[ string =~ pat ]]) and that requires some special handling. */
193 char *
194 quote_string_for_globbing (pathname, qflags)
195 const char *pathname;
196 int qflags;
197 {
198 char *temp;
199 register int i, j;
200 int cclass, collsym, equiv, c, last_was_backslash;
201 int savei, savej;
202
203 temp = (char *)xmalloc (2 * strlen (pathname) + 1);
204
205 if ((qflags & QGLOB_CVTNULL) && QUOTED_NULL (pathname))
206 {
207 temp[0] = '\0';
208 return temp;
209 }
210
211 cclass = collsym = equiv = last_was_backslash = 0;
212 for (i = j = 0; pathname[i]; i++)
213 {
214 /* Fix for CTLESC at the end of the string? */
215 if (pathname[i] == CTLESC && pathname[i+1] == '\0')
216 {
217 temp[j++] = pathname[i++];
218 break;
219 }
220 /* If we are parsing regexp, turn CTLESC CTLESC into CTLESC. It's not an
221 ERE special character, so we should just be able to pass it through. */
222 else if ((qflags & (QGLOB_REGEXP|QGLOB_CTLESC)) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL))
223 {
224 i++;
225 temp[j++] = pathname[i];
226 continue;
227 }
228 else if (pathname[i] == CTLESC)
229 {
230 if ((qflags & QGLOB_FILENAME) && pathname[i+1] == '/')
231 continue;
232 /* What to do if preceding char is backslash? */
233 if (pathname[i+1] != CTLESC && (qflags & QGLOB_REGEXP) && ere_char (pathname[i+1]) == 0)
234 continue;
235 temp[j++] = '\\';
236 i++;
237 if (pathname[i] == '\0')
238 break;
239 }
240 else if ((qflags & QGLOB_REGEXP) && (i == 0 || pathname[i-1] != CTLESC) && pathname[i] == '[') /*]*/
241 {
242 temp[j++] = pathname[i++]; /* open bracket */
243 savej = j;
244 savei = i;
245 c = pathname[i++]; /* c == char after open bracket */
246 if (c == '^') /* ignore pattern negation */
247 {
248 temp[j++] = c;
249 c = pathname[i++];
250 }
251 if (c == ']') /* ignore right bracket if first char */
252 {
253 temp[j++] = c;
254 c = pathname[i++];
255 }
256 do
257 {
258 if (c == 0)
259 goto endpat;
260 else if (c == CTLESC)
261 {
262 /* skip c, check for EOS, let assignment at end of loop */
263 /* pathname[i] == backslash-escaped character */
264 if (pathname[i] == 0)
265 goto endpat;
266 temp[j++] = pathname[i++];
267 }
268 else if (c == '[' && pathname[i] == ':')
269 {
270 temp[j++] = c;
271 temp[j++] = pathname[i++];
272 cclass = 1;
273 }
274 else if (cclass && c == ':' && pathname[i] == ']')
275 {
276 temp[j++] = c;
277 temp[j++] = pathname[i++];
278 cclass = 0;
279 }
280 else if (c == '[' && pathname[i] == '=')
281 {
282 temp[j++] = c;
283 temp[j++] = pathname[i++];
284 if (pathname[i] == ']')
285 temp[j++] = pathname[i++]; /* right brack can be in equiv */
286 equiv = 1;
287 }
288 else if (equiv && c == '=' && pathname[i] == ']')
289 {
290 temp[j++] = c;
291 temp[j++] = pathname[i++];
292 equiv = 0;
293 }
294 else if (c == '[' && pathname[i] == '.')
295 {
296 temp[j++] = c;
297 temp[j++] = pathname[i++];
298 if (pathname[i] == ']')
299 temp[j++] = pathname[i++]; /* right brack can be in collsym */
300 collsym = 1;
301 }
302 else if (collsym && c == '.' && pathname[i] == ']')
303 {
304 temp[j++] = c;
305 temp[j++] = pathname[i++];
306 collsym = 0;
307 }
308 else
309 temp[j++] = c;
310 }
311 while (((c = pathname[i++]) != ']') && c != 0);
312
313 /* If we don't find the closing bracket before we hit the end of
314 the string, rescan string without treating it as a bracket
315 expression (has implications for backslash and special ERE
316 chars) */
317 if (c == 0)
318 {
319 i = savei - 1; /* -1 for autoincrement above */
320 j = savej;
321 continue;
322 }
323
324 temp[j++] = c; /* closing right bracket */
325 i--; /* increment will happen above in loop */
326 continue; /* skip double assignment below */
327 }
328 else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP) == 0)
329 {
330 /* XXX - if not quoting regexp, use backslash as quote char. Should
331 we just pass it through without treating it as special? That is
332 what ksh93 seems to do. */
333
334 /* If we want to pass through backslash unaltered, comment out these
335 lines. */
336 temp[j++] = '\\';
337
338 i++;
339 if (pathname[i] == '\0')
340 break;
341 /* If we are turning CTLESC CTLESC into CTLESC, we need to do that
342 even when the first CTLESC is preceded by a backslash. */
343 if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL))
344 i++; /* skip over the CTLESC */
345 }
346 else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP))
347 last_was_backslash = 1;
348 temp[j++] = pathname[i];
349 }
350 endpat:
351 temp[j] = '\0';
352
353 return (temp);
354 }
355
356 char *
357 quote_globbing_chars (string)
358 const char *string;
359 {
360 size_t slen;
361 char *temp, *t;
362 const char *s, *send;
363 DECLARE_MBSTATE;
364
365 slen = strlen (string);
366 send = string + slen;
367
368 temp = (char *)xmalloc (slen * 2 + 1);
369 for (t = temp, s = string; *s; )
370 {
371 if (glob_char_p (s))
372 *t++ = '\\';
373
374 /* Copy a single (possibly multibyte) character from s to t,
375 incrementing both. */
376 COPY_CHAR_P (t, s, send);
377 }
378 *t = '\0';
379 return temp;
380 }
381
382 /* Call the glob library to do globbing on PATHNAME. */
383 char **
384 shell_glob_filename (pathname)
385 const char *pathname;
386 {
387 #if defined (USE_POSIX_GLOB_LIBRARY)
388 register int i;
389 char *temp, **results;
390 glob_t filenames;
391 int glob_flags;
392
393 temp = quote_string_for_globbing (pathname, QGLOB_FILENAME);
394
395 filenames.gl_offs = 0;
396
397 # if defined (GLOB_PERIOD)
398 glob_flags = glob_dot_filenames ? GLOB_PERIOD : 0;
399 # else
400 glob_flags = 0;
401 # endif /* !GLOB_PERIOD */
402
403 glob_flags |= (GLOB_ERR | GLOB_DOOFFS);
404
405 i = glob (temp, glob_flags, (posix_glob_errfunc_t *)NULL, &filenames);
406
407 free (temp);
408
409 if (i == GLOB_NOSPACE || i == GLOB_ABORTED)
410 return ((char **)NULL);
411 else if (i == GLOB_NOMATCH)
412 filenames.gl_pathv = (char **)NULL;
413 else if (i != 0) /* other error codes not in POSIX.2 */
414 filenames.gl_pathv = (char **)NULL;
415
416 results = filenames.gl_pathv;
417
418 if (results && ((GLOB_FAILED (results)) == 0))
419 {
420 if (should_ignore_glob_matches ())
421 ignore_glob_matches (results);
422 if (results && results[0])
423 strvec_sort (results);
424 else
425 {
426 FREE (results);
427 results = (char **)NULL;
428 }
429 }
430
431 return (results);
432
433 #else /* !USE_POSIX_GLOB_LIBRARY */
434
435 char *temp, **results;
436 int gflags;
437
438 noglob_dot_filenames = glob_dot_filenames == 0;
439
440 temp = quote_string_for_globbing (pathname, QGLOB_FILENAME);
441 gflags = glob_star ? GX_GLOBSTAR : 0;
442 results = glob_filename (temp, gflags);
443 free (temp);
444
445 if (results && ((GLOB_FAILED (results)) == 0))
446 {
447 if (should_ignore_glob_matches ())
448 ignore_glob_matches (results);
449 if (results && results[0])
450 strvec_sort (results);
451 else
452 {
453 FREE (results);
454 results = (char **)&glob_error_return;
455 }
456 }
457
458 return (results);
459 #endif /* !USE_POSIX_GLOB_LIBRARY */
460 }
461
462 /* Stuff for GLOBIGNORE. */
463
464 static struct ignorevar globignore =
465 {
466 "GLOBIGNORE",
467 (struct ign *)0,
468 0,
469 (char *)0,
470 (sh_iv_item_func_t *)0,
471 };
472
473 /* Set up to ignore some glob matches because the value of GLOBIGNORE
474 has changed. If GLOBIGNORE is being unset, we also need to disable
475 the globbing of filenames beginning with a `.'. */
476 void
477 setup_glob_ignore (name)
478 char *name;
479 {
480 char *v;
481
482 v = get_string_value (name);
483 setup_ignore_patterns (&globignore);
484
485 if (globignore.num_ignores)
486 glob_dot_filenames = 1;
487 else if (v == 0)
488 glob_dot_filenames = 0;
489 }
490
491 int
492 should_ignore_glob_matches ()
493 {
494 return globignore.num_ignores;
495 }
496
497 /* Return 0 if NAME matches a pattern in the globignore.ignores list. */
498 static int
499 glob_name_is_acceptable (name)
500 const char *name;
501 {
502 struct ign *p;
503 int flags;
504
505 /* . and .. are never matched */
506 if (name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')))
507 return (0);
508
509 flags = FNM_PATHNAME | FNMATCH_EXTFLAG | FNMATCH_NOCASEGLOB;
510 for (p = globignore.ignores; p->val; p++)
511 {
512 if (strmatch (p->val, (char *)name, flags) != FNM_NOMATCH)
513 return (0);
514 }
515 return (1);
516 }
517
518 /* Internal function to test whether filenames in NAMES should be
519 ignored. NAME_FUNC is a pointer to a function to call with each
520 name. It returns non-zero if the name is acceptable to the particular
521 ignore function which called _ignore_names; zero if the name should
522 be removed from NAMES. */
523
524 static void
525 ignore_globbed_names (names, name_func)
526 char **names;
527 sh_ignore_func_t *name_func;
528 {
529 char **newnames;
530 int n, i;
531
532 for (i = 0; names[i]; i++)
533 ;
534 newnames = strvec_create (i + 1);
535
536 for (n = i = 0; names[i]; i++)
537 {
538 if ((*name_func) (names[i]))
539 newnames[n++] = names[i];
540 else
541 free (names[i]);
542 }
543
544 newnames[n] = (char *)NULL;
545
546 if (n == 0)
547 {
548 names[0] = (char *)NULL;
549 free (newnames);
550 return;
551 }
552
553 /* Copy the acceptable names from NEWNAMES back to NAMES and set the
554 new array end. */
555 for (n = 0; newnames[n]; n++)
556 names[n] = newnames[n];
557 names[n] = (char *)NULL;
558 free (newnames);
559 }
560
561 void
562 ignore_glob_matches (names)
563 char **names;
564 {
565 if (globignore.num_ignores == 0)
566 return;
567
568 ignore_globbed_names (names, glob_name_is_acceptable);
569 }
570
571 static char *
572 split_ignorespec (s, ip)
573 char *s;
574 int *ip;
575 {
576 char *t;
577 int n, i;
578
579 if (s == 0)
580 return 0;
581
582 i = *ip;
583 if (s[i] == 0)
584 return 0;
585
586 n = skip_to_delim (s, i, ":", SD_NOJMP|SD_EXTGLOB|SD_GLOB);
587 t = substring (s, i, n);
588
589 if (s[n] == ':')
590 n++;
591 *ip = n;
592 return t;
593 }
594
595 void
596 setup_ignore_patterns (ivp)
597 struct ignorevar *ivp;
598 {
599 int numitems, maxitems, ptr;
600 char *colon_bit, *this_ignoreval;
601 struct ign *p;
602
603 this_ignoreval = get_string_value (ivp->varname);
604
605 /* If nothing has changed then just exit now. */
606 if ((this_ignoreval && ivp->last_ignoreval && STREQ (this_ignoreval, ivp->last_ignoreval)) ||
607 (!this_ignoreval && !ivp->last_ignoreval))
608 return;
609
610 /* Oops. The ignore variable has changed. Re-parse it. */
611 ivp->num_ignores = 0;
612
613 if (ivp->ignores)
614 {
615 for (p = ivp->ignores; p->val; p++)
616 free(p->val);
617 free (ivp->ignores);
618 ivp->ignores = (struct ign *)NULL;
619 }
620
621 if (ivp->last_ignoreval)
622 {
623 free (ivp->last_ignoreval);
624 ivp->last_ignoreval = (char *)NULL;
625 }
626
627 if (this_ignoreval == 0 || *this_ignoreval == '\0')
628 return;
629
630 ivp->last_ignoreval = savestring (this_ignoreval);
631
632 numitems = maxitems = ptr = 0;
633
634 #if 0
635 while (colon_bit = extract_colon_unit (this_ignoreval, &ptr))
636 #else
637 while (colon_bit = split_ignorespec (this_ignoreval, &ptr))
638 #endif
639 {
640 if (numitems + 1 >= maxitems)
641 {
642 maxitems += 10;
643 ivp->ignores = (struct ign *)xrealloc (ivp->ignores, maxitems * sizeof (struct ign));
644 }
645 ivp->ignores[numitems].val = colon_bit;
646 ivp->ignores[numitems].len = strlen (colon_bit);
647 ivp->ignores[numitems].flags = 0;
648 if (ivp->item_func)
649 (*ivp->item_func) (&ivp->ignores[numitems]);
650 numitems++;
651 }
652 ivp->ignores[numitems].val = (char *)NULL;
653 ivp->num_ignores = numitems;
654 }