]>
git.ipfire.org Git - thirdparty/bash.git/blob - pathexp.c
1 /* pathexp.c -- The shell interface to the globbing library. */
3 /* Copyright (C) 1995-2020 Free Software Foundation, Inc.
5 This file is part of GNU Bash, the Bourne Again SHell.
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
23 #include "bashtypes.h"
26 #if defined (HAVE_UNISTD_H)
39 #include <glob/strmatch.h>
41 static int glob_name_is_acceptable
PARAMS((const char *));
42 static void ignore_globbed_names
PARAMS((char **, sh_ignore_func_t
*));
43 static char *split_ignorespec
PARAMS((char *, int *));
45 #if defined (USE_POSIX_GLOB_LIBRARY)
47 typedef int posix_glob_errfunc_t
PARAMS((const char *, int));
49 # include <glob/glob.h>
52 /* Control whether * matches .files in globbing. */
53 int glob_dot_filenames
;
55 /* Control whether the extended globbing features are enabled. */
56 int extended_glob
= EXTGLOB_DEFAULT
;
58 /* Control enabling special handling of `**' */
61 /* Return nonzero if STRING has any unquoted special globbing chars in it.
62 This is supposed to be called when pathname expansion is performed, so
63 it implements the rules in Posix 2.13.3, specifically that an unquoted
64 slash cannot appear in a bracket expression. */
66 unquoted_glob_pattern_p (string
)
67 register char *string
;
76 send
= string
+ strlen (string
);
91 if (open
) /* XXX - if --open == 0? */
102 if (*string
== '(') /*)*/
106 /* A pattern can't end with a backslash, but a backslash in the pattern
107 can be special to the matching engine, so we note it in case we
110 if (*string
!= '\0' && *string
!= '/')
116 else if (open
&& *string
== '/')
118 string
++; /* quoted slashes in bracket expressions are ok */
121 else if (*string
== 0)
125 if (*string
++ == '\0')
129 /* Advance one fewer byte than an entire multibyte character to
130 account for the auto-increment in the loop above. */
131 #ifdef HANDLE_MULTIBYTE
133 ADVANCE_CHAR_P (string
, send
- string
);
136 ADVANCE_CHAR_P (string
, send
- string
);
141 return (bsquote
? 2 : 0);
147 /* Return 1 if C is a character that is `special' in a POSIX ERE and needs to
148 be quoted to match itself. */
189 if (s
[1] == '(') /*(*/
196 /* PATHNAME can contain characters prefixed by CTLESC; this indicates
197 that the character is to be quoted. We quote it here in the style
198 that the glob library recognizes. If flags includes QGLOB_CVTNULL,
199 we change quoted null strings (pathname[0] == CTLNUL) into empty
200 strings (pathname[0] == 0). If this is called after quote removal
201 is performed, (flags & QGLOB_CVTNULL) should be 0; if called when quote
202 removal has not been done (for example, before attempting to match a
203 pattern while executing a case statement), flags should include
204 QGLOB_CVTNULL. If flags includes QGLOB_CTLESC, we need to remove CTLESC
205 quoting CTLESC or CTLNUL (as if dequote_string were called). If flags
206 includes QGLOB_FILENAME, appropriate quoting to match a filename should be
207 performed. QGLOB_REGEXP means we're quoting for a Posix ERE (for
208 [[ string =~ pat ]]) and that requires some special handling. */
210 quote_string_for_globbing (pathname
, qflags
)
211 const char *pathname
;
216 int cclass
, collsym
, equiv
, c
, last_was_backslash
;
219 temp
= (char *)xmalloc (2 * strlen (pathname
) + 1);
221 if ((qflags
& QGLOB_CVTNULL
) && QUOTED_NULL (pathname
))
227 cclass
= collsym
= equiv
= last_was_backslash
= 0;
228 for (i
= j
= 0; pathname
[i
]; i
++)
230 /* Fix for CTLESC at the end of the string? */
231 if (pathname
[i
] == CTLESC
&& pathname
[i
+1] == '\0')
233 temp
[j
++] = pathname
[i
++];
236 /* If we are parsing regexp, turn CTLESC CTLESC into CTLESC. It's not an
237 ERE special character, so we should just be able to pass it through. */
238 else if ((qflags
& (QGLOB_REGEXP
|QGLOB_CTLESC
)) && pathname
[i
] == CTLESC
&& (pathname
[i
+1] == CTLESC
|| pathname
[i
+1] == CTLNUL
))
241 temp
[j
++] = pathname
[i
];
244 else if (pathname
[i
] == CTLESC
)
246 convert_to_backslash
:
247 if ((qflags
& QGLOB_FILENAME
) && pathname
[i
+1] == '/')
249 /* What to do if preceding char is backslash? */
250 if (pathname
[i
+1] != CTLESC
&& (qflags
& QGLOB_REGEXP
) && ere_char (pathname
[i
+1]) == 0)
254 if (pathname
[i
] == '\0')
257 else if ((qflags
& QGLOB_REGEXP
) && (i
== 0 || pathname
[i
-1] != CTLESC
) && pathname
[i
] == '[') /*]*/
259 temp
[j
++] = pathname
[i
++]; /* open bracket */
262 c
= pathname
[i
++]; /* c == char after open bracket */
263 if (c
== '^') /* ignore pattern negation */
268 if (c
== ']') /* ignore right bracket if first char */
277 else if (c
== CTLESC
)
279 /* skip c, check for EOS, let assignment at end of loop */
280 /* pathname[i] == backslash-escaped character */
281 if (pathname
[i
] == 0)
283 temp
[j
++] = pathname
[i
++];
285 else if (c
== '[' && pathname
[i
] == ':')
288 temp
[j
++] = pathname
[i
++];
291 else if (cclass
&& c
== ':' && pathname
[i
] == ']')
294 temp
[j
++] = pathname
[i
++];
297 else if (c
== '[' && pathname
[i
] == '=')
300 temp
[j
++] = pathname
[i
++];
301 if (pathname
[i
] == ']')
302 temp
[j
++] = pathname
[i
++]; /* right brack can be in equiv */
305 else if (equiv
&& c
== '=' && pathname
[i
] == ']')
308 temp
[j
++] = pathname
[i
++];
311 else if (c
== '[' && pathname
[i
] == '.')
314 temp
[j
++] = pathname
[i
++];
315 if (pathname
[i
] == ']')
316 temp
[j
++] = pathname
[i
++]; /* right brack can be in collsym */
319 else if (collsym
&& c
== '.' && pathname
[i
] == ']')
322 temp
[j
++] = pathname
[i
++];
328 while (((c
= pathname
[i
++]) != ']') && c
!= 0);
330 /* If we don't find the closing bracket before we hit the end of
331 the string, rescan string without treating it as a bracket
332 expression (has implications for backslash and special ERE
336 i
= savei
- 1; /* -1 for autoincrement above */
341 temp
[j
++] = c
; /* closing right bracket */
342 i
--; /* increment will happen above in loop */
343 continue; /* skip double assignment below */
345 else if (pathname
[i
] == '\\' && (qflags
& QGLOB_REGEXP
) == 0)
347 /* XXX - if not quoting regexp, use backslash as quote char. Should
348 We just pass it through without treating it as special? That is
349 what ksh93 seems to do. */
351 /* If we want to pass through backslash unaltered, comment out these
356 if (pathname
[i
] == '\0')
358 /* If we are turning CTLESC CTLESC into CTLESC, we need to do that
359 even when the first CTLESC is preceded by a backslash. */
360 if ((qflags
& QGLOB_CTLESC
) && pathname
[i
] == CTLESC
&& (pathname
[i
+1] == CTLESC
|| pathname
[i
+1] == CTLNUL
))
361 i
++; /* skip over the CTLESC */
362 else if ((qflags
& QGLOB_CTLESC
) && pathname
[i
] == CTLESC
)
363 /* A little more general: if there is an unquoted backslash in the
364 pattern and we are handling quoted characters in the pattern,
365 convert the CTLESC to backslash and add the next character on
366 the theory that the backslash will quote the next character
367 but it would be inconsistent not to replace the CTLESC with
368 another backslash here. We can't tell at this point whether the
369 CTLESC comes from a backslash or other form of quoting in the
371 goto convert_to_backslash
;
373 else if (pathname
[i
] == '\\' && (qflags
& QGLOB_REGEXP
))
374 last_was_backslash
= 1;
375 temp
[j
++] = pathname
[i
];
384 quote_globbing_chars (string
)
389 const char *s
, *send
;
392 slen
= strlen (string
);
393 send
= string
+ slen
;
395 temp
= (char *)xmalloc (slen
* 2 + 1);
396 for (t
= temp
, s
= string
; *s
; )
401 /* Copy a single (possibly multibyte) character from s to t,
402 incrementing both. */
403 COPY_CHAR_P (t
, s
, send
);
409 /* Call the glob library to do globbing on PATHNAME. */
411 shell_glob_filename (pathname
, qflags
)
412 const char *pathname
;
415 #if defined (USE_POSIX_GLOB_LIBRARY)
417 char *temp
, **results
;
421 temp
= quote_string_for_globbing (pathname
, QGLOB_FILENAME
|qflags
);
423 filenames
.gl_offs
= 0;
425 # if defined (GLOB_PERIOD)
426 glob_flags
= glob_dot_filenames
? GLOB_PERIOD
: 0;
429 # endif /* !GLOB_PERIOD */
431 glob_flags
|= (GLOB_ERR
| GLOB_DOOFFS
);
433 i
= glob (temp
, glob_flags
, (posix_glob_errfunc_t
*)NULL
, &filenames
);
437 if (i
== GLOB_NOSPACE
|| i
== GLOB_ABORTED
)
438 return ((char **)NULL
);
439 else if (i
== GLOB_NOMATCH
)
440 filenames
.gl_pathv
= (char **)NULL
;
441 else if (i
!= 0) /* other error codes not in POSIX.2 */
442 filenames
.gl_pathv
= (char **)NULL
;
444 results
= filenames
.gl_pathv
;
446 if (results
&& ((GLOB_FAILED (results
)) == 0))
448 if (should_ignore_glob_matches ())
449 ignore_glob_matches (results
);
450 if (results
&& results
[0])
451 strvec_sort (results
, 1); /* posix sort */
455 results
= (char **)NULL
;
461 #else /* !USE_POSIX_GLOB_LIBRARY */
463 char *temp
, **results
;
464 int gflags
, quoted_pattern
;
466 noglob_dot_filenames
= glob_dot_filenames
== 0;
468 temp
= quote_string_for_globbing (pathname
, QGLOB_FILENAME
|qflags
);
469 gflags
= glob_star
? GX_GLOBSTAR
: 0;
470 results
= glob_filename (temp
, gflags
);
473 if (results
&& ((GLOB_FAILED (results
)) == 0))
475 if (should_ignore_glob_matches ())
476 ignore_glob_matches (results
);
477 if (results
&& results
[0])
478 strvec_sort (results
, 1); /* posix sort */
482 results
= (char **)&glob_error_return
;
487 #endif /* !USE_POSIX_GLOB_LIBRARY */
490 /* Stuff for GLOBIGNORE. */
492 static struct ignorevar globignore
=
498 (sh_iv_item_func_t
*)0,
501 /* Set up to ignore some glob matches because the value of GLOBIGNORE
502 has changed. If GLOBIGNORE is being unset, we also need to disable
503 the globbing of filenames beginning with a `.'. */
505 setup_glob_ignore (name
)
510 v
= get_string_value (name
);
511 setup_ignore_patterns (&globignore
);
513 if (globignore
.num_ignores
)
514 glob_dot_filenames
= 1;
516 glob_dot_filenames
= 0;
520 should_ignore_glob_matches ()
522 return globignore
.num_ignores
;
525 /* Return 0 if NAME matches a pattern in the globignore.ignores list. */
527 glob_name_is_acceptable (name
)
534 /* . and .. are never matched. We extend this to the terminal component of a
536 n
= strrchr (name
, '/');
537 if (n
== 0 || n
[1] == 0)
542 if (n
[0] == '.' && (n
[1] == '\0' || (n
[1] == '.' && n
[2] == '\0')))
545 flags
= FNM_PATHNAME
| FNMATCH_EXTFLAG
| FNMATCH_NOCASEGLOB
;
546 for (p
= globignore
.ignores
; p
->val
; p
++)
548 if (strmatch (p
->val
, (char *)name
, flags
) != FNM_NOMATCH
)
554 /* Internal function to test whether filenames in NAMES should be
555 ignored. NAME_FUNC is a pointer to a function to call with each
556 name. It returns non-zero if the name is acceptable to the particular
557 ignore function which called _ignore_names; zero if the name should
558 be removed from NAMES. */
561 ignore_globbed_names (names
, name_func
)
563 sh_ignore_func_t
*name_func
;
568 for (i
= 0; names
[i
]; i
++)
570 newnames
= strvec_create (i
+ 1);
572 for (n
= i
= 0; names
[i
]; i
++)
574 if ((*name_func
) (names
[i
]))
575 newnames
[n
++] = names
[i
];
580 newnames
[n
] = (char *)NULL
;
584 names
[0] = (char *)NULL
;
589 /* Copy the acceptable names from NEWNAMES back to NAMES and set the
591 for (n
= 0; newnames
[n
]; n
++)
592 names
[n
] = newnames
[n
];
593 names
[n
] = (char *)NULL
;
598 ignore_glob_matches (names
)
601 if (globignore
.num_ignores
== 0)
604 ignore_globbed_names (names
, glob_name_is_acceptable
);
608 split_ignorespec (s
, ip
)
622 n
= skip_to_delim (s
, i
, ":", SD_NOJMP
|SD_EXTGLOB
|SD_GLOB
);
623 t
= substring (s
, i
, n
);
632 setup_ignore_patterns (ivp
)
633 struct ignorevar
*ivp
;
635 int numitems
, maxitems
, ptr
;
636 char *colon_bit
, *this_ignoreval
;
639 this_ignoreval
= get_string_value (ivp
->varname
);
641 /* If nothing has changed then just exit now. */
642 if ((this_ignoreval
&& ivp
->last_ignoreval
&& STREQ (this_ignoreval
, ivp
->last_ignoreval
)) ||
643 (!this_ignoreval
&& !ivp
->last_ignoreval
))
646 /* Oops. The ignore variable has changed. Re-parse it. */
647 ivp
->num_ignores
= 0;
651 for (p
= ivp
->ignores
; p
->val
; p
++)
654 ivp
->ignores
= (struct ign
*)NULL
;
657 if (ivp
->last_ignoreval
)
659 free (ivp
->last_ignoreval
);
660 ivp
->last_ignoreval
= (char *)NULL
;
663 if (this_ignoreval
== 0 || *this_ignoreval
== '\0')
666 ivp
->last_ignoreval
= savestring (this_ignoreval
);
668 numitems
= maxitems
= ptr
= 0;
671 while (colon_bit
= extract_colon_unit (this_ignoreval
, &ptr
))
673 while (colon_bit
= split_ignorespec (this_ignoreval
, &ptr
))
676 if (numitems
+ 1 >= maxitems
)
679 ivp
->ignores
= (struct ign
*)xrealloc (ivp
->ignores
, maxitems
* sizeof (struct ign
));
681 ivp
->ignores
[numitems
].val
= colon_bit
;
682 ivp
->ignores
[numitems
].len
= strlen (colon_bit
);
683 ivp
->ignores
[numitems
].flags
= 0;
685 (*ivp
->item_func
) (&ivp
->ignores
[numitems
]);
688 ivp
->ignores
[numitems
].val
= (char *)NULL
;
689 ivp
->num_ignores
= numitems
;