1 /* pathexp.c -- The shell interface to the globbing library. */
3 /* Copyright (C) 1995-2023 Free Software Foundation, Inc.
5 This file is part of GNU Bash, the Bourne Again SHell.
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
23 #include "bashtypes.h"
26 #if defined (HAVE_UNISTD_H)
30 #include "posixstat.h"
31 #include "stat-time.h"
43 #include <glob/strmatch.h>
45 static int glob_name_is_acceptable (const char *);
46 static void ignore_globbed_names (char **, sh_ignore_func_t
*);
47 static char *split_ignorespec (char *, int *);
48 static void sh_sortglob (char **);
50 #include <glob/glob.h>
52 /* Control whether * matches .files in globbing. */
53 int glob_dot_filenames
;
55 /* Control whether the extended globbing features are enabled. */
56 int extended_glob
= EXTGLOB_DEFAULT
;
58 /* Control enabling special handling of `**' */
61 /* Return nonzero if STRING has any unquoted special globbing chars in it.
62 This is supposed to be called when pathname expansion is performed, so
63 it implements the rules in Posix 2.13.3, specifically that an unquoted
64 slash cannot appear in a bracket expression. */
66 unquoted_glob_pattern_p (char *string
)
75 send
= string
+ strlen (string
);
90 if (open
) /* XXX - if --open == 0? */
102 if (extended_glob
&& *string
== '(') /*)*/
107 /* Even after an unquoted backslash, CTLESC either quotes the next
108 char or escapes a CTLESC or CTLNUL. Either way, the character
109 after it is not an unquoted globbing char. */
110 if (*string
== CTLESC
)
114 if (*string
++ == '\0')
118 /* Advance one fewer byte than an entire multibyte character to
119 account for the auto-increment in the loop above. */
120 #ifdef HANDLE_MULTIBYTE
122 ADVANCE_CHAR_P (string
, send
- string
);
125 ADVANCE_CHAR_P (string
, send
- string
);
132 /* Return 1 if C is a character that is `special' in a POSIX ERE and needs to
133 be quoted to match itself. */
158 /* This is only used to determine whether to backslash-quote a character. */
160 glob_char_p (const char *s
)
164 #if defined (EXTENDED_GLOB)
167 return (s
[1] == '('); /*)*/
188 /* PATHNAME can contain characters prefixed by CTLESC; this indicates
189 that the character is to be quoted. We quote it here in the style
190 that the glob library recognizes. If flags includes QGLOB_CVTNULL,
191 we change quoted null strings (pathname[0] == CTLNUL) into empty
192 strings (pathname[0] == 0). If this is called after quote removal
193 is performed, (flags & QGLOB_CVTNULL) should be 0; if called when quote
194 removal has not been done (for example, before attempting to match a
195 pattern while executing a case statement), flags should include
196 QGLOB_CVTNULL. If flags includes QGLOB_CTLESC, we need to remove CTLESC
197 quoting CTLESC or CTLNUL (as if dequote_string were called). If flags
198 includes QGLOB_FILENAME, appropriate quoting to match a filename should be
199 performed. QGLOB_REGEXP means we're quoting for a Posix ERE (for
200 [[ string =~ pat ]]) and that requires some special handling. */
202 quote_string_for_globbing (const char *pathname
, int qflags
)
206 int cclass
, collsym
, equiv
, c
, last_was_backslash
;
210 temp
= (char *)xmalloc (2 * strlen (pathname
) + 1);
212 if ((qflags
& QGLOB_CVTNULL
) && QUOTED_NULL (pathname
))
218 cclass
= collsym
= equiv
= last_was_backslash
= 0;
219 for (i
= j
= 0; pathname
[i
]; i
++)
221 /* Fix for CTLESC at the end of the string? */
222 if (pathname
[i
] == CTLESC
&& pathname
[i
+1] == '\0')
224 temp
[j
++] = pathname
[i
++];
227 /* If we are parsing regexp, turn CTLESC CTLESC into CTLESC. It's not an
228 ERE special character, so we should just be able to pass it through. */
229 else if ((qflags
& (QGLOB_REGEXP
|QGLOB_CTLESC
)) && pathname
[i
] == CTLESC
&& (pathname
[i
+1] == CTLESC
|| pathname
[i
+1] == CTLNUL
))
232 temp
[j
++] = pathname
[i
];
235 else if (pathname
[i
] == CTLESC
)
237 convert_to_backslash
:
240 if ((qflags
& QGLOB_FILENAME
) && pathname
[i
+1] == '/')
243 /* What to do if preceding char is backslash? */
245 /* We don't have to backslash-quote non-special ERE characters if
246 we're quoting a regexp. */
247 if (cc
!= CTLESC
&& (qflags
& QGLOB_REGEXP
) && ere_char (cc
) == 0)
250 /* We don't have to backslash-quote non-special BRE characters if
251 we're quoting a glob pattern. */
252 if (cc
!= CTLESC
&& (qflags
& QGLOB_REGEXP
) == 0 && glob_char_p (pathname
+i
+1) == 0)
255 /* If we're in a multibyte locale, don't bother quoting multibyte
256 characters. It matters if we're going to convert NFD to NFC on
257 macOS, and doesn't make a difference on other systems. */
258 if (cc
!= CTLESC
&& locale_utf8locale
&& UTF8_SINGLEBYTE (cc
) == 0)
259 continue; /* probably don't need to check for UTF-8 locale */
263 if (pathname
[i
] == '\0')
266 else if ((qflags
& QGLOB_REGEXP
) && (i
== 0 || pathname
[i
-1] != CTLESC
) && pathname
[i
] == '[') /*]*/
268 temp
[j
++] = pathname
[i
++]; /* open bracket */
271 c
= pathname
[i
++]; /* c == char after open bracket */
272 if (c
== '^') /* ignore pattern negation */
277 if (c
== ']') /* ignore right bracket if first char */
286 else if (c
== CTLESC
)
288 /* skip c, check for EOS, let assignment at end of loop */
289 /* pathname[i] == backslash-escaped character */
290 if (pathname
[i
] == 0)
292 temp
[j
++] = pathname
[i
++];
294 else if (c
== '[' && pathname
[i
] == ':')
297 temp
[j
++] = pathname
[i
++];
300 else if (cclass
&& c
== ':' && pathname
[i
] == ']')
303 temp
[j
++] = pathname
[i
++];
306 else if (c
== '[' && pathname
[i
] == '=')
309 temp
[j
++] = pathname
[i
++];
310 if (pathname
[i
] == ']')
311 temp
[j
++] = pathname
[i
++]; /* right brack can be in equiv */
314 else if (equiv
&& c
== '=' && pathname
[i
] == ']')
317 temp
[j
++] = pathname
[i
++];
320 else if (c
== '[' && pathname
[i
] == '.')
323 temp
[j
++] = pathname
[i
++];
324 if (pathname
[i
] == ']')
325 temp
[j
++] = pathname
[i
++]; /* right brack can be in collsym */
328 else if (collsym
&& c
== '.' && pathname
[i
] == ']')
331 temp
[j
++] = pathname
[i
++];
337 while (((c
= pathname
[i
++]) != ']') && c
!= 0);
339 /* If we don't find the closing bracket before we hit the end of
340 the string, rescan string without treating it as a bracket
341 expression (has implications for backslash and special ERE
345 i
= savei
- 1; /* -1 for autoincrement above */
350 temp
[j
++] = c
; /* closing right bracket */
351 i
--; /* increment will happen above in loop */
352 continue; /* skip double assignment below */
354 else if (pathname
[i
] == '\\' && (qflags
& QGLOB_REGEXP
) == 0)
356 /* XXX - if not quoting regexp, use backslash as quote char. Should
357 We just pass it through without treating it as special? That is
358 what ksh93 seems to do. */
360 /* If we want to pass through backslash unaltered, comment out these
365 if (pathname
[i
] == '\0')
367 /* If we are turning CTLESC CTLESC into CTLESC, we need to do that
368 even when the first CTLESC is preceded by a backslash. */
369 if ((qflags
& QGLOB_CTLESC
) && pathname
[i
] == CTLESC
&& (pathname
[i
+1] == CTLESC
|| pathname
[i
+1] == CTLNUL
))
370 i
++; /* skip over the CTLESC */
371 else if ((qflags
& QGLOB_CTLESC
) && pathname
[i
] == CTLESC
)
372 /* A little more general: if there is an unquoted backslash in the
373 pattern and we are handling quoted characters in the pattern,
374 convert the CTLESC to backslash and add the next character on
375 the theory that the backslash will quote the next character
376 but it would be inconsistent not to replace the CTLESC with
377 another backslash here. We can't tell at this point whether the
378 CTLESC comes from a backslash or other form of quoting in the
380 goto convert_to_backslash
;
382 else if (pathname
[i
] == '\\' && (qflags
& QGLOB_REGEXP
))
383 last_was_backslash
= 1;
384 temp
[j
++] = pathname
[i
];
393 quote_globbing_chars (const char *string
)
397 const char *s
, *send
;
400 slen
= strlen (string
);
401 send
= string
+ slen
;
403 temp
= (char *)xmalloc (slen
* 2 + 1);
404 for (t
= temp
, s
= string
; *s
; )
409 /* Copy a single (possibly multibyte) character from s to t,
410 incrementing both. */
411 COPY_CHAR_P (t
, s
, send
);
417 /* Call the glob library to do globbing on PATHNAME, honoring all the shell
418 variables that control globbing. */
420 shell_glob_filename (const char *pathname
, int qflags
)
422 char *temp
, **results
;
423 int gflags
, quoted_pattern
;
425 noglob_dot_filenames
= glob_dot_filenames
== 0;
427 temp
= quote_string_for_globbing (pathname
, QGLOB_FILENAME
|qflags
);
428 gflags
= glob_star
? GX_GLOBSTAR
: 0;
429 results
= glob_filename (temp
, gflags
);
432 if (results
&& ((GLOB_FAILED (results
)) == 0))
434 if (should_ignore_glob_matches ())
435 ignore_glob_matches (results
);
436 if (results
&& results
[0])
437 sh_sortglob (results
);
441 results
= (char **)&glob_error_return
;
448 #if defined (READLINE) && defined (PROGRAMMABLE_COMPLETION)
450 noquote_glob_filename (char *pathname
)
455 noglob_dot_filenames
= glob_dot_filenames
== 0;
456 gflags
= glob_star
? GX_GLOBSTAR
: 0;
458 results
= glob_filename (pathname
, gflags
);
460 if (results
&& GLOB_FAILED (results
))
461 results
= (char **)NULL
;
463 if (results
&& results
[0])
464 sh_sortglob (results
);
470 /* Stuff for GLOBIGNORE. */
472 static struct ignorevar globignore
=
478 (sh_iv_item_func_t
*)0,
481 /* Set up to ignore some glob matches because the value of GLOBIGNORE
482 has changed. If GLOBIGNORE is being unset, we also need to disable
483 the globbing of filenames beginning with a `.'. */
485 setup_glob_ignore (const char *name
)
489 v
= get_string_value (name
);
490 setup_ignore_patterns (&globignore
);
492 if (globignore
.num_ignores
)
493 glob_dot_filenames
= 1;
495 glob_dot_filenames
= 0;
499 should_ignore_glob_matches (void)
501 return globignore
.num_ignores
;
504 /* Return 0 if NAME matches a pattern in the globignore.ignores list. */
506 glob_name_is_acceptable (const char *name
)
512 /* . and .. are never matched. We extend this to the terminal component of a
514 n
= strrchr (name
, '/');
515 if (n
== 0 || n
[1] == 0)
520 if (n
[0] == '.' && (n
[1] == '\0' || (n
[1] == '.' && n
[2] == '\0')))
523 flags
= FNM_PATHNAME
| FNMATCH_EXTFLAG
| FNMATCH_NOCASEGLOB
;
524 for (p
= globignore
.ignores
; p
->val
; p
++)
526 if (strmatch (p
->val
, (char *)name
, flags
) != FNM_NOMATCH
)
532 /* Internal function to test whether filenames in NAMES should be
533 ignored. NAME_FUNC is a pointer to a function to call with each
534 name. It returns non-zero if the name is acceptable to the particular
535 ignore function which called _ignore_names; zero if the name should
536 be removed from NAMES. */
539 ignore_globbed_names (char **names
, sh_ignore_func_t
*name_func
)
544 for (i
= 0; names
[i
]; i
++)
546 newnames
= strvec_create (i
+ 1);
548 for (n
= i
= 0; names
[i
]; i
++)
550 if ((*name_func
) (names
[i
]))
551 newnames
[n
++] = names
[i
];
556 newnames
[n
] = (char *)NULL
;
560 names
[0] = (char *)NULL
;
565 /* Copy the acceptable names from NEWNAMES back to NAMES and set the
567 for (n
= 0; newnames
[n
]; n
++)
568 names
[n
] = newnames
[n
];
569 names
[n
] = (char *)NULL
;
574 ignore_glob_matches (char **names
)
576 if (globignore
.num_ignores
== 0)
579 ignore_globbed_names (names
, glob_name_is_acceptable
);
583 split_ignorespec (char *s
, int *ip
)
595 n
= skip_to_delim (s
, i
, ":", SD_NOJMP
|SD_EXTGLOB
|SD_GLOB
);
596 t
= substring (s
, i
, n
);
605 setup_ignore_patterns (struct ignorevar
*ivp
)
607 int numitems
, maxitems
, ptr
;
608 char *colon_bit
, *this_ignoreval
;
611 this_ignoreval
= get_string_value (ivp
->varname
);
613 /* If nothing has changed then just exit now. */
614 if ((this_ignoreval
&& ivp
->last_ignoreval
&& STREQ (this_ignoreval
, ivp
->last_ignoreval
)) ||
615 (!this_ignoreval
&& !ivp
->last_ignoreval
))
618 /* Oops. The ignore variable has changed. Re-parse it. */
619 ivp
->num_ignores
= 0;
623 for (p
= ivp
->ignores
; p
->val
; p
++)
626 ivp
->ignores
= (struct ign
*)NULL
;
629 if (ivp
->last_ignoreval
)
631 free (ivp
->last_ignoreval
);
632 ivp
->last_ignoreval
= (char *)NULL
;
635 if (this_ignoreval
== 0 || *this_ignoreval
== '\0')
638 ivp
->last_ignoreval
= savestring (this_ignoreval
);
640 numitems
= maxitems
= ptr
= 0;
642 while (colon_bit
= split_ignorespec (this_ignoreval
, &ptr
))
644 if (numitems
+ 1 >= maxitems
)
647 ivp
->ignores
= (struct ign
*)xrealloc (ivp
->ignores
, maxitems
* sizeof (struct ign
));
649 ivp
->ignores
[numitems
].val
= colon_bit
;
650 ivp
->ignores
[numitems
].len
= strlen (colon_bit
);
651 ivp
->ignores
[numitems
].flags
= 0;
653 (*ivp
->item_func
) (&ivp
->ignores
[numitems
]);
656 ivp
->ignores
[numitems
].val
= (char *)NULL
;
657 ivp
->num_ignores
= numitems
;
660 /* Functions to handle sorting glob results in different ways depending on
661 the value of the GLOBSORT variable. */
663 static int glob_sorttype
= SORT_NONE
;
665 static STRING_INT_ALIST sorttypes
[] = {
666 { "name", SORT_NAME
},
667 { "size", SORT_SIZE
},
668 { "mtime", SORT_MTIME
},
669 { "atime", SORT_ATIME
},
670 { "ctime", SORT_CTIME
},
671 { "blocks", SORT_BLOCKS
},
672 { "nosort", SORT_NOSORT
},
676 /* A subset of the fields in the posix stat struct -- the ones we need --
677 normalized to using struct timespec. */
680 struct timespec mtime
;
681 struct timespec atime
;
682 struct timespec ctime
;
691 static struct globstat glob_nullstat
= { -1, { -1, -1 }, { -1, -1 }, { -1, -1 }, -1 };
694 glob_findtype (char *t
)
698 type
= find_string_in_alist (t
, sorttypes
, 0);
699 return (type
== -1 ? SORT_NONE
: type
);
703 setup_globsort (const char *varname
)
708 glob_sorttype
= SORT_NONE
;
709 val
= get_string_value (varname
);
710 if (val
== 0 || *val
== 0)
714 while (*val
&& whitespace (*val
))
715 val
++; /* why not? */
717 val
++; /* allow leading `+' but ignore it */
718 else if (*val
== '-')
720 r
= SORT_REVERSE
; /* leading `-' reverses sort order */
726 /* A bare `+' means the default sort by name in ascending order; a bare
727 `-' means to sort by name in descending order. */
728 glob_sorttype
= SORT_NAME
| r
;
732 t
= glob_findtype (val
);
733 /* any other value is equivalent to the historical behavior */
734 glob_sorttype
= (t
== SORT_NONE
) ? t
: t
| r
;
738 globsort_namecmp (char **s1
, char **s2
)
740 return ((glob_sorttype
< SORT_REVERSE
) ? strvec_posixcmp (s1
, s2
) : strvec_posixcmp (s2
, s1
));
744 globsort_sizecmp (struct globsort_t
*g1
, struct globsort_t
*g2
)
746 return ((glob_sorttype
< SORT_REVERSE
) ? g1
->st
.size
- g2
->st
.size
: g2
->st
.size
- g1
->st
.size
);
750 globsort_timecmp (struct globsort_t
*g1
, struct globsort_t
*g2
)
753 struct timespec t1
, t2
;
755 t
= (glob_sorttype
< SORT_REVERSE
) ? glob_sorttype
: glob_sorttype
- SORT_REVERSE
;
761 else if (t
== SORT_ATIME
)
772 return ((glob_sorttype
< SORT_REVERSE
) ? timespec_cmp (t1
, t2
) : timespec_cmp (t2
, t1
));
776 globsort_blockscmp (struct globsort_t
*g1
, struct globsort_t
*g2
)
778 return ((glob_sorttype
< SORT_REVERSE
) ? g1
->st
.blocks
- g2
->st
.blocks
: g2
->st
.blocks
- g1
->st
.blocks
);
781 static struct globsort_t
*
782 globsort_buildarray (char **array
, size_t len
)
784 struct globsort_t
*ret
;
788 ret
= (struct globsort_t
*)xmalloc (len
* sizeof (struct globsort_t
));
790 for (i
= 0; i
< len
; i
++)
792 ret
[i
].name
= array
[i
];
793 if (stat (array
[i
], &st
) != 0)
794 ret
[i
].st
= glob_nullstat
;
797 ret
[i
].st
.size
= st
.st_size
;
798 ret
[i
].st
.mtime
= get_stat_mtime (&st
);
799 ret
[i
].st
.atime
= get_stat_atime (&st
);
800 ret
[i
].st
.ctime
= get_stat_ctime (&st
);
801 ret
[i
].st
.blocks
= st
.st_blocks
;
809 globsort_sortbyname (char **results
)
811 qsort (results
, strvec_len (results
), sizeof (char *), (QSFUNC
*)globsort_namecmp
);
815 globsort_sortarray (struct globsort_t
*garray
, size_t len
)
820 t
= (glob_sorttype
< SORT_REVERSE
) ? glob_sorttype
: glob_sorttype
- SORT_REVERSE
;
825 sortfunc
= (QSFUNC
*)globsort_sizecmp
;
830 sortfunc
= (QSFUNC
*)globsort_timecmp
;
833 sortfunc
= (QSFUNC
*)globsort_blockscmp
;
836 internal_error (_("invalid glob sort type"));
840 qsort (garray
, len
, sizeof (struct globsort_t
), sortfunc
);
844 sh_sortglob (char **results
)
847 struct globsort_t
*garray
;
849 if (glob_sorttype
== SORT_NOSORT
|| glob_sorttype
== (SORT_NOSORT
|SORT_REVERSE
))
852 if (glob_sorttype
== SORT_NONE
|| glob_sorttype
== SORT_NAME
)
853 globsort_sortbyname (results
); /* posix sort */
854 else if (glob_sorttype
== (SORT_NAME
|SORT_REVERSE
))
855 globsort_sortbyname (results
); /* posix sort reverse order */
860 rlen
= strvec_len (results
);
861 /* populate an array of name/statinfo, sort it appropriately, copy the
862 names from the sorted array back to RESULTS, and free the array */
863 garray
= globsort_buildarray (results
, rlen
);
864 globsort_sortarray (garray
, rlen
);
865 for (i
= 0; i
< rlen
; i
++)
866 results
[i
] = garray
[i
].name
;