]> git.ipfire.org Git - thirdparty/bash.git/blame - pathexp.c
bash-5.1 beta release
[thirdparty/bash.git] / pathexp.c
CommitLineData
ccc6cda3
JA
1/* pathexp.c -- The shell interface to the globbing library. */
2
712f80b0 3/* Copyright (C) 1995-2020 Free Software Foundation, Inc.
ccc6cda3
JA
4
5 This file is part of GNU Bash, the Bourne Again SHell.
6
3185942a
JA
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
ccc6cda3 11
3185942a
JA
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
ccc6cda3 16
3185942a
JA
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19*/
ccc6cda3
JA
20
21#include "config.h"
22
23#include "bashtypes.h"
24#include <stdio.h>
25
26#if defined (HAVE_UNISTD_H)
27# include <unistd.h>
28#endif
29
30#include "bashansi.h"
31
32#include "shell.h"
33#include "pathexp.h"
34#include "flags.h"
35
7117c2d2 36#include "shmbutil.h"
3185942a 37#include "bashintl.h"
7117c2d2 38
f73dda09 39#include <glob/strmatch.h>
b72432fd 40
712f80b0
CR
41static int glob_name_is_acceptable PARAMS((const char *));
42static void ignore_globbed_names PARAMS((char **, sh_ignore_func_t *));
43static char *split_ignorespec PARAMS((char *, int *));
ac50fbac 44
b72432fd
JA
45#if defined (USE_POSIX_GLOB_LIBRARY)
46# include <glob.h>
712f80b0 47typedef int posix_glob_errfunc_t PARAMS((const char *, int));
b72432fd
JA
48#else
49# include <glob/glob.h>
50#endif
ccc6cda3
JA
51
52/* Control whether * matches .files in globbing. */
53int glob_dot_filenames;
54
cce855bc 55/* Control whether the extended globbing features are enabled. */
0001803f 56int extended_glob = EXTGLOB_DEFAULT;
cce855bc 57
3185942a
JA
58/* Control enabling special handling of `**' */
59int glob_star = 0;
60
712f80b0
CR
61/* Return nonzero if STRING has any unquoted special globbing chars in it.
62 This is supposed to be called when pathname expansion is performed, so
63 it implements the rules in Posix 2.13.3, specifically that an unquoted
64 slash cannot appear in a bracket expression. */
ccc6cda3
JA
65int
66unquoted_glob_pattern_p (string)
67 register char *string;
68{
69 register int c;
7117c2d2 70 char *send;
fcf6ae7d 71 int open, bsquote;
ccc6cda3 72
7117c2d2
JA
73 DECLARE_MBSTATE;
74
fcf6ae7d 75 open = bsquote = 0;
7117c2d2
JA
76 send = string + strlen (string);
77
ccc6cda3
JA
78 while (c = *string++)
79 {
80 switch (c)
81 {
82 case '?':
83 case '*':
84 return (1);
85
86 case '[':
87 open++;
88 continue;
89
90 case ']':
712f80b0 91 if (open) /* XXX - if --open == 0? */
ccc6cda3
JA
92 return (1);
93 continue;
94
712f80b0
CR
95 case '/':
96 if (open)
97 open = 0;
98
cce855bc
JA
99 case '+':
100 case '@':
101 case '!':
102 if (*string == '(') /*)*/
103 return (1);
104 continue;
105
d233b485 106 /* A pattern can't end with a backslash, but a backslash in the pattern
712f80b0
CR
107 can be special to the matching engine, so we note it in case we
108 need it later. */
ccc6cda3 109 case '\\':
fcf6ae7d
CR
110 if (*string != '\0' && *string != '/')
111 {
112 bsquote = 1;
113 string++;
114 continue;
115 }
712f80b0
CR
116 else if (open && *string == '/')
117 {
118 string++; /* quoted slashes in bracket expressions are ok */
119 continue;
120 }
fcf6ae7d
CR
121 else if (*string == 0)
122 return (0);
d233b485
CR
123
124 case CTLESC:
ccc6cda3
JA
125 if (*string++ == '\0')
126 return (0);
127 }
7117c2d2
JA
128
129 /* Advance one fewer byte than an entire multibyte character to
130 account for the auto-increment in the loop above. */
131#ifdef HANDLE_MULTIBYTE
132 string--;
133 ADVANCE_CHAR_P (string, send - string);
134 string++;
135#else
136 ADVANCE_CHAR_P (string, send - string);
137#endif
ccc6cda3 138 }
fcf6ae7d 139
712f80b0 140#if 0
fcf6ae7d 141 return (bsquote ? 2 : 0);
712f80b0
CR
142#else
143 return (0);
144#endif
ccc6cda3
JA
145}
146
f1be666c
JA
147/* Return 1 if C is a character that is `special' in a POSIX ERE and needs to
148 be quoted to match itself. */
149static inline int
150ere_char (c)
151 int c;
152{
153 switch (c)
154 {
155 case '.':
156 case '[':
157 case '\\':
158 case '(':
159 case ')':
160 case '*':
161 case '+':
162 case '?':
163 case '{':
164 case '|':
165 case '^':
166 case '$':
167 return 1;
168 default:
169 return 0;
170 }
171 return (0);
172}
173
3185942a
JA
174int
175glob_char_p (s)
176 const char *s;
177{
178 switch (*s)
179 {
180 case '*':
181 case '[':
182 case ']':
183 case '?':
184 case '\\':
185 return 1;
186 case '+':
187 case '@':
188 case '!':
ac50fbac 189 if (s[1] == '(') /*(*/
3185942a
JA
190 return 1;
191 break;
192 }
193 return 0;
194}
195
ccc6cda3
JA
196/* PATHNAME can contain characters prefixed by CTLESC; this indicates
197 that the character is to be quoted. We quote it here in the style
cce855bc 198 that the glob library recognizes. If flags includes QGLOB_CVTNULL,
ccc6cda3
JA
199 we change quoted null strings (pathname[0] == CTLNUL) into empty
200 strings (pathname[0] == 0). If this is called after quote removal
cce855bc 201 is performed, (flags & QGLOB_CVTNULL) should be 0; if called when quote
ccc6cda3 202 removal has not been done (for example, before attempting to match a
cce855bc 203 pattern while executing a case statement), flags should include
d233b485
CR
204 QGLOB_CVTNULL. If flags includes QGLOB_CTLESC, we need to remove CTLESC
205 quoting CTLESC or CTLNUL (as if dequote_string were called). If flags
206 includes QGLOB_FILENAME, appropriate quoting to match a filename should be
207 performed. QGLOB_REGEXP means we're quoting for a Posix ERE (for
208 [[ string =~ pat ]]) and that requires some special handling. */
ccc6cda3 209char *
cce855bc 210quote_string_for_globbing (pathname, qflags)
28ef6c31 211 const char *pathname;
cce855bc 212 int qflags;
ccc6cda3
JA
213{
214 char *temp;
cce855bc 215 register int i, j;
d233b485 216 int cclass, collsym, equiv, c, last_was_backslash;
a0c0a00f 217 int savei, savej;
ccc6cda3 218
ac50fbac 219 temp = (char *)xmalloc (2 * strlen (pathname) + 1);
ccc6cda3 220
cce855bc 221 if ((qflags & QGLOB_CVTNULL) && QUOTED_NULL (pathname))
ccc6cda3
JA
222 {
223 temp[0] = '\0';
224 return temp;
225 }
226
d233b485 227 cclass = collsym = equiv = last_was_backslash = 0;
cce855bc 228 for (i = j = 0; pathname[i]; i++)
ccc6cda3 229 {
ac50fbac
CR
230 /* Fix for CTLESC at the end of the string? */
231 if (pathname[i] == CTLESC && pathname[i+1] == '\0')
232 {
233 temp[j++] = pathname[i++];
234 break;
235 }
236 /* If we are parsing regexp, turn CTLESC CTLESC into CTLESC. It's not an
237 ERE special character, so we should just be able to pass it through. */
d233b485 238 else if ((qflags & (QGLOB_REGEXP|QGLOB_CTLESC)) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL))
ac50fbac
CR
239 {
240 i++;
241 temp[j++] = pathname[i];
242 continue;
243 }
244 else if (pathname[i] == CTLESC)
28ef6c31 245 {
712f80b0 246convert_to_backslash:
28ef6c31
JA
247 if ((qflags & QGLOB_FILENAME) && pathname[i+1] == '/')
248 continue;
ac50fbac 249 /* What to do if preceding char is backslash? */
25db9a70 250 if (pathname[i+1] != CTLESC && (qflags & QGLOB_REGEXP) && ere_char (pathname[i+1]) == 0)
f1be666c 251 continue;
cce855bc 252 temp[j++] = '\\';
7117c2d2
JA
253 i++;
254 if (pathname[i] == '\0')
255 break;
28ef6c31 256 }
ac50fbac
CR
257 else if ((qflags & QGLOB_REGEXP) && (i == 0 || pathname[i-1] != CTLESC) && pathname[i] == '[') /*]*/
258 {
ac50fbac 259 temp[j++] = pathname[i++]; /* open bracket */
a0c0a00f
CR
260 savej = j;
261 savei = i;
ac50fbac 262 c = pathname[i++]; /* c == char after open bracket */
d233b485
CR
263 if (c == '^') /* ignore pattern negation */
264 {
265 temp[j++] = c;
266 c = pathname[i++];
267 }
268 if (c == ']') /* ignore right bracket if first char */
269 {
270 temp[j++] = c;
271 c = pathname[i++];
272 }
ac50fbac
CR
273 do
274 {
275 if (c == 0)
276 goto endpat;
277 else if (c == CTLESC)
278 {
279 /* skip c, check for EOS, let assignment at end of loop */
280 /* pathname[i] == backslash-escaped character */
281 if (pathname[i] == 0)
282 goto endpat;
283 temp[j++] = pathname[i++];
284 }
285 else if (c == '[' && pathname[i] == ':')
286 {
287 temp[j++] = c;
288 temp[j++] = pathname[i++];
289 cclass = 1;
290 }
291 else if (cclass && c == ':' && pathname[i] == ']')
292 {
293 temp[j++] = c;
294 temp[j++] = pathname[i++];
295 cclass = 0;
296 }
297 else if (c == '[' && pathname[i] == '=')
298 {
299 temp[j++] = c;
300 temp[j++] = pathname[i++];
301 if (pathname[i] == ']')
302 temp[j++] = pathname[i++]; /* right brack can be in equiv */
303 equiv = 1;
304 }
305 else if (equiv && c == '=' && pathname[i] == ']')
306 {
307 temp[j++] = c;
308 temp[j++] = pathname[i++];
309 equiv = 0;
310 }
311 else if (c == '[' && pathname[i] == '.')
312 {
313 temp[j++] = c;
314 temp[j++] = pathname[i++];
315 if (pathname[i] == ']')
316 temp[j++] = pathname[i++]; /* right brack can be in collsym */
317 collsym = 1;
318 }
319 else if (collsym && c == '.' && pathname[i] == ']')
320 {
321 temp[j++] = c;
322 temp[j++] = pathname[i++];
323 collsym = 0;
324 }
325 else
326 temp[j++] = c;
327 }
a0c0a00f
CR
328 while (((c = pathname[i++]) != ']') && c != 0);
329
330 /* If we don't find the closing bracket before we hit the end of
331 the string, rescan string without treating it as a bracket
332 expression (has implications for backslash and special ERE
333 chars) */
334 if (c == 0)
335 {
336 i = savei - 1; /* -1 for autoincrement above */
337 j = savej;
338 continue;
339 }
340
ac50fbac
CR
341 temp[j++] = c; /* closing right bracket */
342 i--; /* increment will happen above in loop */
343 continue; /* skip double assignment below */
344 }
345 else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP) == 0)
3185942a 346 {
ac50fbac 347 /* XXX - if not quoting regexp, use backslash as quote char. Should
712f80b0 348 We just pass it through without treating it as special? That is
ac50fbac
CR
349 what ksh93 seems to do. */
350
351 /* If we want to pass through backslash unaltered, comment out these
352 lines. */
3185942a 353 temp[j++] = '\\';
ac50fbac 354
3185942a
JA
355 i++;
356 if (pathname[i] == '\0')
357 break;
d233b485
CR
358 /* If we are turning CTLESC CTLESC into CTLESC, we need to do that
359 even when the first CTLESC is preceded by a backslash. */
360 if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL))
361 i++; /* skip over the CTLESC */
712f80b0
CR
362 else if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC)
363 /* A little more general: if there is an unquoted backslash in the
364 pattern and we are handling quoted characters in the pattern,
365 convert the CTLESC to backslash and add the next character on
366 the theory that the backslash will quote the next character
367 but it would be inconsistent not to replace the CTLESC with
368 another backslash here. We can't tell at this point whether the
369 CTLESC comes from a backslash or other form of quoting in the
370 original pattern. */
371 goto convert_to_backslash;
3185942a 372 }
ac50fbac
CR
373 else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP))
374 last_was_backslash = 1;
7117c2d2 375 temp[j++] = pathname[i];
ccc6cda3 376 }
ac50fbac 377endpat:
cce855bc 378 temp[j] = '\0';
ccc6cda3
JA
379
380 return (temp);
381}
382
383char *
384quote_globbing_chars (string)
a0c0a00f 385 const char *string;
ccc6cda3 386{
7117c2d2 387 size_t slen;
a0c0a00f
CR
388 char *temp, *t;
389 const char *s, *send;
7117c2d2
JA
390 DECLARE_MBSTATE;
391
392 slen = strlen (string);
393 send = string + slen;
ccc6cda3 394
7117c2d2 395 temp = (char *)xmalloc (slen * 2 + 1);
ccc6cda3
JA
396 for (t = temp, s = string; *s; )
397 {
3185942a
JA
398 if (glob_char_p (s))
399 *t++ = '\\';
7117c2d2
JA
400
401 /* Copy a single (possibly multibyte) character from s to t,
ac50fbac 402 incrementing both. */
7117c2d2 403 COPY_CHAR_P (t, s, send);
ccc6cda3
JA
404 }
405 *t = '\0';
406 return temp;
407}
408
409/* Call the glob library to do globbing on PATHNAME. */
410char **
712f80b0 411shell_glob_filename (pathname, qflags)
28ef6c31 412 const char *pathname;
712f80b0 413 int qflags;
ccc6cda3
JA
414{
415#if defined (USE_POSIX_GLOB_LIBRARY)
416 register int i;
28ef6c31 417 char *temp, **results;
ccc6cda3
JA
418 glob_t filenames;
419 int glob_flags;
420
712f80b0 421 temp = quote_string_for_globbing (pathname, QGLOB_FILENAME|qflags);
ccc6cda3
JA
422
423 filenames.gl_offs = 0;
424
b72432fd 425# if defined (GLOB_PERIOD)
ccc6cda3 426 glob_flags = glob_dot_filenames ? GLOB_PERIOD : 0;
b72432fd
JA
427# else
428 glob_flags = 0;
429# endif /* !GLOB_PERIOD */
430
ccc6cda3
JA
431 glob_flags |= (GLOB_ERR | GLOB_DOOFFS);
432
f73dda09 433 i = glob (temp, glob_flags, (posix_glob_errfunc_t *)NULL, &filenames);
ccc6cda3
JA
434
435 free (temp);
436
28ef6c31 437 if (i == GLOB_NOSPACE || i == GLOB_ABORTED)
ccc6cda3 438 return ((char **)NULL);
b72432fd
JA
439 else if (i == GLOB_NOMATCH)
440 filenames.gl_pathv = (char **)NULL;
441 else if (i != 0) /* other error codes not in POSIX.2 */
cce855bc 442 filenames.gl_pathv = (char **)NULL;
ccc6cda3 443
bb70624e
JA
444 results = filenames.gl_pathv;
445
446 if (results && ((GLOB_FAILED (results)) == 0))
447 {
448 if (should_ignore_glob_matches ())
449 ignore_glob_matches (results);
450 if (results && results[0])
712f80b0 451 strvec_sort (results, 1); /* posix sort */
bb70624e
JA
452 else
453 {
454 FREE (results);
455 results = (char **)NULL;
456 }
457 }
458
459 return (results);
ccc6cda3
JA
460
461#else /* !USE_POSIX_GLOB_LIBRARY */
462
463 char *temp, **results;
712f80b0 464 int gflags, quoted_pattern;
ccc6cda3
JA
465
466 noglob_dot_filenames = glob_dot_filenames == 0;
467
712f80b0 468 temp = quote_string_for_globbing (pathname, QGLOB_FILENAME|qflags);
d233b485
CR
469 gflags = glob_star ? GX_GLOBSTAR : 0;
470 results = glob_filename (temp, gflags);
ccc6cda3
JA
471 free (temp);
472
473 if (results && ((GLOB_FAILED (results)) == 0))
474 {
475 if (should_ignore_glob_matches ())
476 ignore_glob_matches (results);
477 if (results && results[0])
712f80b0 478 strvec_sort (results, 1); /* posix sort */
ccc6cda3
JA
479 else
480 {
481 FREE (results);
482 results = (char **)&glob_error_return;
483 }
484 }
485
486 return (results);
487#endif /* !USE_POSIX_GLOB_LIBRARY */
488}
489
490/* Stuff for GLOBIGNORE. */
491
492static struct ignorevar globignore =
493{
494 "GLOBIGNORE",
495 (struct ign *)0,
496 0,
497 (char *)0,
f73dda09 498 (sh_iv_item_func_t *)0,
ccc6cda3
JA
499};
500
501/* Set up to ignore some glob matches because the value of GLOBIGNORE
502 has changed. If GLOBIGNORE is being unset, we also need to disable
503 the globbing of filenames beginning with a `.'. */
504void
505setup_glob_ignore (name)
506 char *name;
507{
508 char *v;
509
510 v = get_string_value (name);
511 setup_ignore_patterns (&globignore);
512
513 if (globignore.num_ignores)
514 glob_dot_filenames = 1;
515 else if (v == 0)
516 glob_dot_filenames = 0;
517}
518
519int
520should_ignore_glob_matches ()
521{
522 return globignore.num_ignores;
523}
524
525/* Return 0 if NAME matches a pattern in the globignore.ignores list. */
526static int
527glob_name_is_acceptable (name)
f73dda09 528 const char *name;
ccc6cda3
JA
529{
530 struct ign *p;
3eb0018e 531 char *n;
cce855bc 532 int flags;
ccc6cda3 533
3eb0018e
CR
534 /* . and .. are never matched. We extend this to the terminal component of a
535 pathname. */
536 n = strrchr (name, '/');
537 if (n == 0 || n[1] == 0)
538 n = (char *)name;
539 else
540 n++;
541
542 if (n[0] == '.' && (n[1] == '\0' || (n[1] == '.' && n[2] == '\0')))
ccc6cda3
JA
543 return (0);
544
a0c0a00f 545 flags = FNM_PATHNAME | FNMATCH_EXTFLAG | FNMATCH_NOCASEGLOB;
ccc6cda3
JA
546 for (p = globignore.ignores; p->val; p++)
547 {
f73dda09 548 if (strmatch (p->val, (char *)name, flags) != FNM_NOMATCH)
28ef6c31 549 return (0);
ccc6cda3
JA
550 }
551 return (1);
552}
553
554/* Internal function to test whether filenames in NAMES should be
555 ignored. NAME_FUNC is a pointer to a function to call with each
556 name. It returns non-zero if the name is acceptable to the particular
557 ignore function which called _ignore_names; zero if the name should
558 be removed from NAMES. */
559
560static void
561ignore_globbed_names (names, name_func)
562 char **names;
f73dda09 563 sh_ignore_func_t *name_func;
ccc6cda3
JA
564{
565 char **newnames;
566 int n, i;
567
568 for (i = 0; names[i]; i++)
569 ;
7117c2d2 570 newnames = strvec_create (i + 1);
ccc6cda3
JA
571
572 for (n = i = 0; names[i]; i++)
573 {
574 if ((*name_func) (names[i]))
28ef6c31 575 newnames[n++] = names[i];
ccc6cda3
JA
576 else
577 free (names[i]);
578 }
579
580 newnames[n] = (char *)NULL;
581
582 if (n == 0)
583 {
584 names[0] = (char *)NULL;
585 free (newnames);
586 return;
587 }
588
589 /* Copy the acceptable names from NEWNAMES back to NAMES and set the
590 new array end. */
591 for (n = 0; newnames[n]; n++)
592 names[n] = newnames[n];
593 names[n] = (char *)NULL;
d166f048 594 free (newnames);
ccc6cda3
JA
595}
596
597void
598ignore_glob_matches (names)
599 char **names;
600{
601 if (globignore.num_ignores == 0)
602 return;
603
604 ignore_globbed_names (names, glob_name_is_acceptable);
605}
606
495aee44
CR
607static char *
608split_ignorespec (s, ip)
609 char *s;
610 int *ip;
611{
612 char *t;
613 int n, i;
614
615 if (s == 0)
616 return 0;
617
618 i = *ip;
619 if (s[i] == 0)
620 return 0;
621
a0c0a00f 622 n = skip_to_delim (s, i, ":", SD_NOJMP|SD_EXTGLOB|SD_GLOB);
495aee44
CR
623 t = substring (s, i, n);
624
625 if (s[n] == ':')
626 n++;
627 *ip = n;
628 return t;
629}
630
ccc6cda3
JA
631void
632setup_ignore_patterns (ivp)
633 struct ignorevar *ivp;
634{
635 int numitems, maxitems, ptr;
636 char *colon_bit, *this_ignoreval;
637 struct ign *p;
638
639 this_ignoreval = get_string_value (ivp->varname);
640
641 /* If nothing has changed then just exit now. */
642 if ((this_ignoreval && ivp->last_ignoreval && STREQ (this_ignoreval, ivp->last_ignoreval)) ||
643 (!this_ignoreval && !ivp->last_ignoreval))
644 return;
645
646 /* Oops. The ignore variable has changed. Re-parse it. */
647 ivp->num_ignores = 0;
648
649 if (ivp->ignores)
650 {
651 for (p = ivp->ignores; p->val; p++)
652 free(p->val);
653 free (ivp->ignores);
654 ivp->ignores = (struct ign *)NULL;
655 }
656
657 if (ivp->last_ignoreval)
658 {
659 free (ivp->last_ignoreval);
660 ivp->last_ignoreval = (char *)NULL;
661 }
662
663 if (this_ignoreval == 0 || *this_ignoreval == '\0')
664 return;
665
666 ivp->last_ignoreval = savestring (this_ignoreval);
667
668 numitems = maxitems = ptr = 0;
669
495aee44 670#if 0
ccc6cda3 671 while (colon_bit = extract_colon_unit (this_ignoreval, &ptr))
495aee44
CR
672#else
673 while (colon_bit = split_ignorespec (this_ignoreval, &ptr))
674#endif
ccc6cda3
JA
675 {
676 if (numitems + 1 >= maxitems)
677 {
678 maxitems += 10;
679 ivp->ignores = (struct ign *)xrealloc (ivp->ignores, maxitems * sizeof (struct ign));
680 }
681 ivp->ignores[numitems].val = colon_bit;
682 ivp->ignores[numitems].len = strlen (colon_bit);
683 ivp->ignores[numitems].flags = 0;
684 if (ivp->item_func)
28ef6c31 685 (*ivp->item_func) (&ivp->ignores[numitems]);
ccc6cda3
JA
686 numitems++;
687 }
688 ivp->ignores[numitems].val = (char *)NULL;
689 ivp->num_ignores = numitems;
690}