1 /* pathexp.c -- The shell interface to the globbing library. */
3 /* Copyright (C) 1995-2024 Free Software Foundation, Inc.
5 This file is part of GNU Bash, the Bourne Again SHell.
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
23 #include "bashtypes.h"
26 #if defined (HAVE_UNISTD_H)
30 #include "posixstat.h"
31 #include "stat-time.h"
43 #include <glob/strmatch.h>
45 static int glob_name_is_acceptable (const char *);
46 static void ignore_globbed_names (char **, sh_ignore_func_t
*);
47 static char *split_ignorespec (char *, int *);
48 static void sh_sortglob (char **);
50 #include <glob/glob.h>
52 /* Control whether * matches .files in globbing. */
53 int glob_dot_filenames
;
55 /* Control whether the extended globbing features are enabled. */
56 int extended_glob
= EXTGLOB_DEFAULT
;
58 /* Control enabling special handling of `**' */
61 /* Return nonzero if STRING has any unquoted special globbing chars in it.
62 This is supposed to be called when pathname expansion is performed, so
63 it implements the rules in Posix 2.13.3, specifically that an unquoted
64 slash cannot appear in a bracket expression. */
66 unquoted_glob_pattern_p (char *string
)
75 send
= string
+ strlen (string
);
90 if (open
) /* XXX - if --open == 0? */
102 if (extended_glob
&& *string
== '(') /*)*/
107 if (*string
== CTLESC
)
110 /* If the CTLESC was quoting a CTLESC, skip it so that it's not
111 treated as a quoting character */
112 if (*string
== CTLESC
)
118 if (*string
++ == '\0')
122 /* Advance one fewer byte than an entire multibyte character to
123 account for the auto-increment in the loop above. */
124 #ifdef HANDLE_MULTIBYTE
126 ADVANCE_CHAR_P (string
, send
- string
);
129 ADVANCE_CHAR_P (string
, send
- string
);
136 /* Return 1 if C is a character that is `special' in a POSIX ERE and needs to
137 be quoted to match itself. */
162 /* This is only used to determine whether to backslash-quote a character. */
164 glob_char_p (const char *s
)
168 #if defined (EXTENDED_GLOB)
171 return (s
[1] == '('); /*)*/
193 glob_quote_char (const char *s
)
195 return (glob_char_p (s
) || (*s
== '%') || (*s
== '#'));
198 /* PATHNAME can contain characters prefixed by CTLESC; this indicates
199 that the character is to be quoted. We quote it here in the style
200 that the glob library recognizes. If flags includes QGLOB_CVTNULL,
201 we change quoted null strings (pathname[0] == CTLNUL) into empty
202 strings (pathname[0] == 0). If this is called after quote removal
203 is performed, (flags & QGLOB_CVTNULL) should be 0; if called when quote
204 removal has not been done (for example, before attempting to match a
205 pattern while executing a case statement), flags should include
206 QGLOB_CVTNULL. If flags includes QGLOB_CTLESC, we need to remove CTLESC
207 quoting CTLESC or CTLNUL (as if dequote_string were called). If flags
208 includes QGLOB_FILENAME, appropriate quoting to match a filename should be
209 performed. QGLOB_REGEXP means we're quoting for a Posix ERE (for
210 [[ string =~ pat ]]) and that requires some special handling. */
212 quote_string_for_globbing (const char *pathname
, int qflags
)
216 int cclass
, collsym
, equiv
, c
, last_was_backslash
;
220 temp
= (char *)xmalloc (2 * strlen (pathname
) + 1);
222 if ((qflags
& QGLOB_CVTNULL
) && QUOTED_NULL (pathname
))
228 cclass
= collsym
= equiv
= last_was_backslash
= 0;
229 for (i
= j
= 0; pathname
[i
]; i
++)
231 /* Fix for CTLESC at the end of the string? */
232 if (pathname
[i
] == CTLESC
&& pathname
[i
+1] == '\0')
234 temp
[j
++] = pathname
[i
++];
237 /* If we are parsing regexp, turn CTLESC CTLESC into CTLESC. It's not an
238 ERE special character, so we should just be able to pass it through. */
239 else if ((qflags
& (QGLOB_REGEXP
|QGLOB_CTLESC
)) && pathname
[i
] == CTLESC
&& (pathname
[i
+1] == CTLESC
|| pathname
[i
+1] == CTLNUL
))
242 temp
[j
++] = pathname
[i
];
245 else if (pathname
[i
] == CTLESC
)
247 convert_to_backslash
:
250 if ((qflags
& QGLOB_FILENAME
) && pathname
[i
+1] == '/')
253 /* What to do if preceding char is backslash? */
255 /* We don't have to backslash-quote non-special ERE characters if
256 we're quoting a regexp. */
257 if (cc
!= CTLESC
&& (qflags
& QGLOB_REGEXP
) && ere_char (cc
) == 0)
260 /* We don't have to backslash-quote non-special BRE characters if
261 we're quoting a glob pattern. */
262 if (cc
!= CTLESC
&& (qflags
& QGLOB_REGEXP
) == 0 && glob_quote_char (pathname
+i
+1) == 0)
265 /* If we're in a multibyte locale, don't bother quoting multibyte
266 characters. It matters if we're going to convert NFD to NFC on
267 macOS, and doesn't make a difference on other systems. */
268 if (cc
!= CTLESC
&& locale_utf8locale
&& UTF8_SINGLEBYTE (cc
) == 0)
269 continue; /* probably don't need to check for UTF-8 locale */
273 if (pathname
[i
] == '\0')
276 else if ((qflags
& QGLOB_REGEXP
) && (i
== 0 || pathname
[i
-1] != CTLESC
) && pathname
[i
] == '[') /*]*/
278 temp
[j
++] = pathname
[i
++]; /* open bracket */
281 c
= pathname
[i
++]; /* c == char after open bracket */
282 if (c
== '^') /* ignore pattern negation */
287 if (c
== ']') /* ignore right bracket if first char */
296 else if (c
== CTLESC
)
298 /* skip c, check for EOS, let assignment at end of loop */
299 /* pathname[i] == backslash-escaped character */
300 if (pathname
[i
] == 0)
302 temp
[j
++] = pathname
[i
++];
304 else if (c
== '[' && pathname
[i
] == ':')
307 temp
[j
++] = pathname
[i
++];
310 else if (cclass
&& c
== ':' && pathname
[i
] == ']')
313 temp
[j
++] = pathname
[i
++];
316 else if (c
== '[' && pathname
[i
] == '=')
319 temp
[j
++] = pathname
[i
++];
320 if (pathname
[i
] == ']')
321 temp
[j
++] = pathname
[i
++]; /* right brack can be in equiv */
324 else if (equiv
&& c
== '=' && pathname
[i
] == ']')
327 temp
[j
++] = pathname
[i
++];
330 else if (c
== '[' && pathname
[i
] == '.')
333 temp
[j
++] = pathname
[i
++];
334 if (pathname
[i
] == ']')
335 temp
[j
++] = pathname
[i
++]; /* right brack can be in collsym */
338 else if (collsym
&& c
== '.' && pathname
[i
] == ']')
341 temp
[j
++] = pathname
[i
++];
347 while (((c
= pathname
[i
++]) != ']') && c
!= 0);
349 /* If we don't find the closing bracket before we hit the end of
350 the string, rescan string without treating it as a bracket
351 expression (has implications for backslash and special ERE
355 i
= savei
- 1; /* -1 for autoincrement above */
360 temp
[j
++] = c
; /* closing right bracket */
361 i
--; /* increment will happen above in loop */
362 continue; /* skip double assignment below */
364 else if (pathname
[i
] == '\\' && (qflags
& QGLOB_REGEXP
) == 0)
366 /* XXX - if not quoting regexp, use backslash as quote char. Should
367 We just pass it through without treating it as special? That is
368 what ksh93 seems to do. */
370 /* If we want to pass through backslash unaltered, comment out these
375 if (pathname
[i
] == '\0')
377 /* If we are turning CTLESC CTLESC into CTLESC, we need to do that
378 even when the first CTLESC is preceded by a backslash. */
379 if ((qflags
& QGLOB_CTLESC
) && pathname
[i
] == CTLESC
&& (pathname
[i
+1] == CTLESC
|| pathname
[i
+1] == CTLNUL
))
380 i
++; /* skip over the CTLESC */
381 else if ((qflags
& QGLOB_CTLESC
) && pathname
[i
] == CTLESC
)
382 /* A little more general: if there is an unquoted backslash in the
383 pattern and we are handling quoted characters in the pattern,
384 convert the CTLESC to backslash and add the next character on
385 the theory that the backslash will quote the next character
386 but it would be inconsistent not to replace the CTLESC with
387 another backslash here. We can't tell at this point whether the
388 CTLESC comes from a backslash or other form of quoting in the
390 goto convert_to_backslash
;
392 else if (pathname
[i
] == '\\' && (qflags
& QGLOB_REGEXP
))
393 last_was_backslash
= 1;
395 /* TAG:bash-5.4 Takaaki Konno <re_c25@yahoo.co.jp> 6/23/2025 */
396 else if (pathname
[i
] == CTLNUL
&& (qflags
& QGLOB_CVTNULL
)
397 && (qflags
& QGLOB_CTLESC
))
398 /* If we have an unescaped CTLNUL in the string, and QFLAGS says
399 we want to remove those (QGLOB_CVTNULL) but the string is quoted
400 (QGLOB_CVTNULL and QGLOB_CTLESC), we need to remove it. This can
401 happen when the pattern contains a quoted null string adjacent
402 to non-null characters, and it is not removed by quote removal. */
406 temp
[j
++] = pathname
[i
];
415 quote_globbing_chars (const char *string
)
419 const char *s
, *send
;
422 slen
= strlen (string
);
423 send
= string
+ slen
;
425 temp
= (char *)xmalloc (slen
* 2 + 1);
426 for (t
= temp
, s
= string
; *s
; )
431 /* Copy a single (possibly multibyte) character from s to t,
432 incrementing both. */
433 COPY_CHAR_P (t
, s
, send
);
439 /* Call the glob library to do globbing on PATHNAME, honoring all the shell
440 variables that control globbing. */
442 shell_glob_filename (const char *pathname
, int qflags
)
444 char *temp
, **results
;
445 int gflags
, quoted_pattern
;
447 noglob_dot_filenames
= glob_dot_filenames
== 0;
449 temp
= quote_string_for_globbing (pathname
, QGLOB_FILENAME
|qflags
);
450 gflags
= glob_star
? GX_GLOBSTAR
: 0;
451 results
= glob_filename (temp
, gflags
);
454 if (results
&& ((GLOB_FAILED (results
)) == 0))
456 if (should_ignore_glob_matches ())
457 ignore_glob_matches (results
);
458 if (results
&& results
[0])
459 sh_sortglob (results
);
463 results
= (char **)&glob_error_return
;
470 #if defined (READLINE) && defined (PROGRAMMABLE_COMPLETION)
472 noquote_glob_filename (char *pathname
)
477 noglob_dot_filenames
= glob_dot_filenames
== 0;
478 gflags
= glob_star
? GX_GLOBSTAR
: 0;
480 results
= glob_filename (pathname
, gflags
);
482 if (results
&& GLOB_FAILED (results
))
483 results
= (char **)NULL
;
485 if (results
&& results
[0])
486 sh_sortglob (results
);
492 /* Stuff for GLOBIGNORE. */
494 static struct ignorevar globignore
=
500 (sh_iv_item_func_t
*)0,
503 /* Set up to ignore some glob matches because the value of GLOBIGNORE
504 has changed. If GLOBIGNORE is being unset, we also need to disable
505 the globbing of filenames beginning with a `.'. */
507 setup_glob_ignore (const char *name
)
511 v
= get_string_value (name
);
512 setup_ignore_patterns (&globignore
);
514 if (globignore
.num_ignores
)
515 glob_dot_filenames
= 1;
517 glob_dot_filenames
= 0;
521 should_ignore_glob_matches (void)
523 return globignore
.num_ignores
;
526 /* Return 0 if NAME matches a pattern in the globignore.ignores list. */
528 glob_name_is_acceptable (const char *name
)
534 /* . and .. are never matched. We extend this to the terminal component of a
536 n
= strrchr (name
, '/');
537 if (n
== 0 || n
[1] == 0)
542 if (n
[0] == '.' && (n
[1] == '\0' || (n
[1] == '.' && n
[2] == '\0')))
545 flags
= FNM_PATHNAME
| FNMATCH_EXTFLAG
| FNMATCH_NOCASEGLOB
;
546 for (p
= globignore
.ignores
; p
->val
; p
++)
548 if (strmatch (p
->val
, (char *)name
, flags
) != FNM_NOMATCH
)
554 /* Internal function to test whether filenames in NAMES should be
555 ignored. NAME_FUNC is a pointer to a function to call with each
556 name. It returns non-zero if the name is acceptable to the particular
557 ignore function which called _ignore_names; zero if the name should
558 be removed from NAMES. */
561 ignore_globbed_names (char **names
, sh_ignore_func_t
*name_func
)
566 for (i
= 0; names
[i
]; i
++)
568 newnames
= strvec_create (i
+ 1);
570 for (n
= i
= 0; names
[i
]; i
++)
572 if ((*name_func
) (names
[i
]))
573 newnames
[n
++] = names
[i
];
578 newnames
[n
] = (char *)NULL
;
582 names
[0] = (char *)NULL
;
587 /* Copy the acceptable names from NEWNAMES back to NAMES and set the
589 for (n
= 0; newnames
[n
]; n
++)
590 names
[n
] = newnames
[n
];
591 names
[n
] = (char *)NULL
;
596 ignore_glob_matches (char **names
)
598 if (globignore
.num_ignores
== 0)
601 ignore_globbed_names (names
, glob_name_is_acceptable
);
605 split_ignorespec (char *s
, int *ip
)
617 n
= skip_to_delim (s
, i
, ":", SD_NOJMP
|SD_EXTGLOB
|SD_GLOB
);
618 t
= substring (s
, i
, n
);
627 setup_ignore_patterns (struct ignorevar
*ivp
)
629 int numitems
, maxitems
, ptr
;
630 char *colon_bit
, *this_ignoreval
;
633 this_ignoreval
= get_string_value (ivp
->varname
);
635 /* If nothing has changed then just exit now. */
636 if ((this_ignoreval
&& ivp
->last_ignoreval
&& STREQ (this_ignoreval
, ivp
->last_ignoreval
)) ||
637 (!this_ignoreval
&& !ivp
->last_ignoreval
))
640 /* Oops. The ignore variable has changed. Re-parse it. */
641 ivp
->num_ignores
= 0;
645 for (p
= ivp
->ignores
; p
->val
; p
++)
648 ivp
->ignores
= (struct ign
*)NULL
;
651 if (ivp
->last_ignoreval
)
653 free (ivp
->last_ignoreval
);
654 ivp
->last_ignoreval
= (char *)NULL
;
657 if (this_ignoreval
== 0 || *this_ignoreval
== '\0')
660 ivp
->last_ignoreval
= savestring (this_ignoreval
);
662 numitems
= maxitems
= ptr
= 0;
664 while (colon_bit
= split_ignorespec (this_ignoreval
, &ptr
))
666 if (numitems
+ 1 >= maxitems
)
669 ivp
->ignores
= (struct ign
*)xrealloc (ivp
->ignores
, maxitems
* sizeof (struct ign
));
671 ivp
->ignores
[numitems
].val
= colon_bit
;
672 ivp
->ignores
[numitems
].len
= strlen (colon_bit
);
673 ivp
->ignores
[numitems
].flags
= 0;
675 (*ivp
->item_func
) (&ivp
->ignores
[numitems
]);
678 ivp
->ignores
[numitems
].val
= (char *)NULL
;
679 ivp
->num_ignores
= numitems
;
682 /* Functions to handle sorting glob results in different ways depending on
683 the value of the GLOBSORT variable. */
685 static int glob_sorttype
= SORT_NONE
;
687 static STRING_INT_ALIST sorttypes
[] = {
688 { "name", SORT_NAME
},
689 { "size", SORT_SIZE
},
690 { "mtime", SORT_MTIME
},
691 { "atime", SORT_ATIME
},
692 { "ctime", SORT_CTIME
},
693 { "blocks", SORT_BLOCKS
},
694 { "numeric", SORT_NUMERIC
},
695 { "nosort", SORT_NOSORT
},
699 /* A subset of the fields in the posix stat struct -- the ones we need --
700 normalized to using struct timespec. */
703 struct timespec mtime
;
704 struct timespec atime
;
705 struct timespec ctime
;
714 static struct globstat glob_nullstat
= { -1, { -1, -1 }, { -1, -1 }, { -1, -1 }, -1 };
717 glob_findtype (char *t
)
721 type
= find_string_in_alist (t
, sorttypes
, 0);
722 return (type
== -1 ? SORT_NONE
: type
);
726 setup_globsort (const char *varname
)
731 glob_sorttype
= SORT_NONE
;
732 val
= get_string_value (varname
);
733 if (val
== 0 || *val
== 0)
737 while (*val
&& whitespace (*val
))
738 val
++; /* why not? */
740 val
++; /* allow leading `+' but ignore it */
741 else if (*val
== '-')
743 r
= SORT_REVERSE
; /* leading `-' reverses sort order */
749 /* A bare `+' means the default sort by name in ascending order; a bare
750 `-' means to sort by name in descending order. */
751 glob_sorttype
= SORT_NAME
| r
;
755 t
= glob_findtype (val
);
756 /* any other value is equivalent to the historical behavior */
757 glob_sorttype
= (t
== SORT_NONE
) ? t
: t
| r
;
761 globsort_namecmp (char **s1
, char **s2
)
763 return ((glob_sorttype
< SORT_REVERSE
) ? strvec_posixcmp (s1
, s2
) : strvec_posixcmp (s2
, s1
));
766 /* Generic transitive comparison of two numeric values for qsort */
767 /* #define GENCMP(a,b) ((a) < (b) ? -1 : ((a) > (b) ? 1 : 0)) */
768 /* A clever idea from gnulib */
769 #define GENCMP(a,b) (((a) > (b)) - ((a) < (b)))
772 globsort_sizecmp (struct globsort_t
*g1
, struct globsort_t
*g2
)
776 x
= (glob_sorttype
< SORT_REVERSE
) ? GENCMP(g1
->st
.size
, g2
->st
.size
) : GENCMP(g2
->st
.size
, g1
->st
.size
);
777 return (x
== 0) ? (globsort_namecmp (&g1
->name
, &g2
->name
)) : x
;
781 globsort_timecmp (struct globsort_t
*g1
, struct globsort_t
*g2
)
784 struct timespec t1
, t2
;
786 t
= (glob_sorttype
< SORT_REVERSE
) ? glob_sorttype
: glob_sorttype
- SORT_REVERSE
;
792 else if (t
== SORT_ATIME
)
803 x
= (glob_sorttype
< SORT_REVERSE
) ? timespec_cmp (t1
, t2
) : timespec_cmp (t2
, t1
);
804 return (x
== 0) ? (globsort_namecmp (&g1
->name
, &g2
->name
)) : x
;
808 globsort_blockscmp (struct globsort_t
*g1
, struct globsort_t
*g2
)
812 x
= (glob_sorttype
< SORT_REVERSE
) ? GENCMP(g1
->st
.blocks
, g2
->st
.blocks
) : GENCMP(g2
->st
.blocks
, g1
->st
.blocks
);
813 return (x
== 0) ? (globsort_namecmp (&g1
->name
, &g2
->name
)) : x
;
817 gs_checknum (char *string
, intmax_t *val
)
822 v
= all_digits (string
);
824 *val
= strtoimax (string
, (char **)NULL
, 10);
829 globsort_numericcmp (struct globsort_t
*g1
, struct globsort_t
*g2
)
834 /* like valid_number but doesn't allow leading/trailing whitespace or sign */
835 v1
= gs_checknum (g1
->name
, &i1
);
836 v2
= gs_checknum (g2
->name
, &i2
);
838 if (v1
&& v2
) /* both valid numbers */
839 /* Don't need to fall back to name comparison here */
840 return (glob_sorttype
< SORT_REVERSE
) ? GENCMP(i1
, i2
) : GENCMP(i2
, i1
);
841 else if (v1
== 0 && v2
== 0) /* neither valid numbers */
842 return (globsort_namecmp (&g1
->name
, &g2
->name
));
843 else if (v1
!= 0 && v2
== 0)
844 return (glob_sorttype
< SORT_REVERSE
) ? -1 : 1;
846 return (glob_sorttype
< SORT_REVERSE
) ? 1 : -1;
851 static struct globsort_t
*
852 globsort_buildarray (char **array
, size_t len
)
854 struct globsort_t
*ret
;
858 ret
= (struct globsort_t
*)xmalloc (len
* sizeof (struct globsort_t
));
860 for (i
= 0; i
< len
; i
++)
862 ret
[i
].name
= array
[i
];
863 if (stat (array
[i
], &st
) != 0)
864 ret
[i
].st
= glob_nullstat
;
867 ret
[i
].st
.size
= st
.st_size
;
868 ret
[i
].st
.mtime
= get_stat_mtime (&st
);
869 ret
[i
].st
.atime
= get_stat_atime (&st
);
870 ret
[i
].st
.ctime
= get_stat_ctime (&st
);
871 ret
[i
].st
.blocks
= st
.st_blocks
;
879 globsort_sortbyname (char **results
)
881 qsort (results
, strvec_len (results
), sizeof (char *), (QSFUNC
*)globsort_namecmp
);
885 globsort_sortarray (struct globsort_t
*garray
, size_t len
)
890 t
= (glob_sorttype
< SORT_REVERSE
) ? glob_sorttype
: glob_sorttype
- SORT_REVERSE
;
895 sortfunc
= (QSFUNC
*)globsort_sizecmp
;
900 sortfunc
= (QSFUNC
*)globsort_timecmp
;
903 sortfunc
= (QSFUNC
*)globsort_blockscmp
;
906 sortfunc
= (QSFUNC
*)globsort_numericcmp
;
909 internal_error (_("invalid glob sort type"));
913 qsort (garray
, len
, sizeof (struct globsort_t
), sortfunc
);
917 sh_sortglob (char **results
)
920 struct globsort_t
*garray
;
922 if (glob_sorttype
== SORT_NOSORT
|| glob_sorttype
== (SORT_NOSORT
|SORT_REVERSE
))
925 if (glob_sorttype
== SORT_NONE
|| glob_sorttype
== SORT_NAME
)
926 globsort_sortbyname (results
); /* posix sort */
927 else if (glob_sorttype
== (SORT_NAME
|SORT_REVERSE
))
928 globsort_sortbyname (results
); /* posix sort reverse order */
933 rlen
= strvec_len (results
);
934 /* populate an array of name/statinfo, sort it appropriately, copy the
935 names from the sorted array back to RESULTS, and free the array */
936 garray
= globsort_buildarray (results
, rlen
);
937 globsort_sortarray (garray
, rlen
);
938 for (i
= 0; i
< rlen
; i
++)
939 results
[i
] = garray
[i
].name
;