]> git.ipfire.org Git - thirdparty/bash.git/blame - pathexp.c
Bash-5.2 patch 26: fix typo when specifying readline's custom color prefix
[thirdparty/bash.git] / pathexp.c
CommitLineData
ccc6cda3
JA
1/* pathexp.c -- The shell interface to the globbing library. */
2
8868edaf 3/* Copyright (C) 1995-2020 Free Software Foundation, Inc.
ccc6cda3
JA
4
5 This file is part of GNU Bash, the Bourne Again SHell.
6
3185942a
JA
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
ccc6cda3 11
3185942a
JA
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
ccc6cda3 16
3185942a
JA
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19*/
ccc6cda3
JA
20
21#include "config.h"
22
23#include "bashtypes.h"
24#include <stdio.h>
25
26#if defined (HAVE_UNISTD_H)
27# include <unistd.h>
28#endif
29
30#include "bashansi.h"
31
32#include "shell.h"
33#include "pathexp.h"
34#include "flags.h"
35
7117c2d2 36#include "shmbutil.h"
3185942a 37#include "bashintl.h"
7117c2d2 38
f73dda09 39#include <glob/strmatch.h>
b72432fd 40
8868edaf
CR
41static int glob_name_is_acceptable PARAMS((const char *));
42static void ignore_globbed_names PARAMS((char **, sh_ignore_func_t *));
43static char *split_ignorespec PARAMS((char *, int *));
ac50fbac 44
74091dd4 45#include <glob/glob.h>
ccc6cda3
JA
46
47/* Control whether * matches .files in globbing. */
48int glob_dot_filenames;
49
cce855bc 50/* Control whether the extended globbing features are enabled. */
0001803f 51int extended_glob = EXTGLOB_DEFAULT;
cce855bc 52
3185942a
JA
53/* Control enabling special handling of `**' */
54int glob_star = 0;
55
8868edaf
CR
56/* Return nonzero if STRING has any unquoted special globbing chars in it.
57 This is supposed to be called when pathname expansion is performed, so
58 it implements the rules in Posix 2.13.3, specifically that an unquoted
59 slash cannot appear in a bracket expression. */
ccc6cda3
JA
60int
61unquoted_glob_pattern_p (string)
62 register char *string;
63{
64 register int c;
7117c2d2 65 char *send;
fcf6ae7d 66 int open, bsquote;
ccc6cda3 67
7117c2d2
JA
68 DECLARE_MBSTATE;
69
fcf6ae7d 70 open = bsquote = 0;
7117c2d2
JA
71 send = string + strlen (string);
72
ccc6cda3
JA
73 while (c = *string++)
74 {
75 switch (c)
76 {
77 case '?':
78 case '*':
79 return (1);
80
81 case '[':
82 open++;
83 continue;
84
85 case ']':
8868edaf 86 if (open) /* XXX - if --open == 0? */
ccc6cda3
JA
87 return (1);
88 continue;
89
8868edaf
CR
90 case '/':
91 if (open)
92 open = 0;
93
cce855bc
JA
94 case '+':
95 case '@':
96 case '!':
97 if (*string == '(') /*)*/
98 return (1);
99 continue;
100
d233b485 101 /* A pattern can't end with a backslash, but a backslash in the pattern
8868edaf
CR
102 can be special to the matching engine, so we note it in case we
103 need it later. */
ccc6cda3 104 case '\\':
fcf6ae7d
CR
105 if (*string != '\0' && *string != '/')
106 {
107 bsquote = 1;
108 string++;
109 continue;
110 }
8868edaf
CR
111 else if (open && *string == '/')
112 {
113 string++; /* quoted slashes in bracket expressions are ok */
114 continue;
115 }
fcf6ae7d
CR
116 else if (*string == 0)
117 return (0);
d233b485
CR
118
119 case CTLESC:
ccc6cda3
JA
120 if (*string++ == '\0')
121 return (0);
122 }
7117c2d2
JA
123
124 /* Advance one fewer byte than an entire multibyte character to
125 account for the auto-increment in the loop above. */
126#ifdef HANDLE_MULTIBYTE
127 string--;
128 ADVANCE_CHAR_P (string, send - string);
129 string++;
130#else
131 ADVANCE_CHAR_P (string, send - string);
132#endif
ccc6cda3 133 }
fcf6ae7d 134
8868edaf 135#if 0
fcf6ae7d 136 return (bsquote ? 2 : 0);
8868edaf
CR
137#else
138 return (0);
139#endif
ccc6cda3
JA
140}
141
f1be666c
JA
142/* Return 1 if C is a character that is `special' in a POSIX ERE and needs to
143 be quoted to match itself. */
144static inline int
145ere_char (c)
146 int c;
147{
148 switch (c)
149 {
150 case '.':
151 case '[':
152 case '\\':
153 case '(':
154 case ')':
155 case '*':
156 case '+':
157 case '?':
158 case '{':
159 case '|':
160 case '^':
161 case '$':
162 return 1;
163 default:
164 return 0;
165 }
166 return (0);
167}
168
74091dd4 169/* This is only used to determine whether to backslash-quote a character. */
3185942a
JA
170int
171glob_char_p (s)
172 const char *s;
173{
174 switch (*s)
175 {
176 case '*':
177 case '[':
178 case ']':
179 case '?':
180 case '\\':
181 return 1;
182 case '+':
183 case '@':
184 case '!':
ac50fbac 185 if (s[1] == '(') /*(*/
3185942a
JA
186 return 1;
187 break;
188 }
189 return 0;
190}
191
ccc6cda3
JA
192/* PATHNAME can contain characters prefixed by CTLESC; this indicates
193 that the character is to be quoted. We quote it here in the style
cce855bc 194 that the glob library recognizes. If flags includes QGLOB_CVTNULL,
ccc6cda3
JA
195 we change quoted null strings (pathname[0] == CTLNUL) into empty
196 strings (pathname[0] == 0). If this is called after quote removal
cce855bc 197 is performed, (flags & QGLOB_CVTNULL) should be 0; if called when quote
ccc6cda3 198 removal has not been done (for example, before attempting to match a
cce855bc 199 pattern while executing a case statement), flags should include
d233b485
CR
200 QGLOB_CVTNULL. If flags includes QGLOB_CTLESC, we need to remove CTLESC
201 quoting CTLESC or CTLNUL (as if dequote_string were called). If flags
202 includes QGLOB_FILENAME, appropriate quoting to match a filename should be
203 performed. QGLOB_REGEXP means we're quoting for a Posix ERE (for
204 [[ string =~ pat ]]) and that requires some special handling. */
ccc6cda3 205char *
cce855bc 206quote_string_for_globbing (pathname, qflags)
28ef6c31 207 const char *pathname;
cce855bc 208 int qflags;
ccc6cda3
JA
209{
210 char *temp;
cce855bc 211 register int i, j;
d233b485 212 int cclass, collsym, equiv, c, last_was_backslash;
a0c0a00f 213 int savei, savej;
ccc6cda3 214
ac50fbac 215 temp = (char *)xmalloc (2 * strlen (pathname) + 1);
ccc6cda3 216
cce855bc 217 if ((qflags & QGLOB_CVTNULL) && QUOTED_NULL (pathname))
ccc6cda3
JA
218 {
219 temp[0] = '\0';
220 return temp;
221 }
222
d233b485 223 cclass = collsym = equiv = last_was_backslash = 0;
cce855bc 224 for (i = j = 0; pathname[i]; i++)
ccc6cda3 225 {
ac50fbac
CR
226 /* Fix for CTLESC at the end of the string? */
227 if (pathname[i] == CTLESC && pathname[i+1] == '\0')
228 {
229 temp[j++] = pathname[i++];
230 break;
231 }
232 /* If we are parsing regexp, turn CTLESC CTLESC into CTLESC. It's not an
233 ERE special character, so we should just be able to pass it through. */
d233b485 234 else if ((qflags & (QGLOB_REGEXP|QGLOB_CTLESC)) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL))
ac50fbac
CR
235 {
236 i++;
237 temp[j++] = pathname[i];
238 continue;
239 }
240 else if (pathname[i] == CTLESC)
28ef6c31 241 {
8868edaf 242convert_to_backslash:
28ef6c31
JA
243 if ((qflags & QGLOB_FILENAME) && pathname[i+1] == '/')
244 continue;
ac50fbac 245 /* What to do if preceding char is backslash? */
25db9a70 246 if (pathname[i+1] != CTLESC && (qflags & QGLOB_REGEXP) && ere_char (pathname[i+1]) == 0)
f1be666c 247 continue;
cce855bc 248 temp[j++] = '\\';
7117c2d2
JA
249 i++;
250 if (pathname[i] == '\0')
251 break;
28ef6c31 252 }
ac50fbac
CR
253 else if ((qflags & QGLOB_REGEXP) && (i == 0 || pathname[i-1] != CTLESC) && pathname[i] == '[') /*]*/
254 {
ac50fbac 255 temp[j++] = pathname[i++]; /* open bracket */
a0c0a00f
CR
256 savej = j;
257 savei = i;
ac50fbac 258 c = pathname[i++]; /* c == char after open bracket */
d233b485
CR
259 if (c == '^') /* ignore pattern negation */
260 {
261 temp[j++] = c;
262 c = pathname[i++];
263 }
264 if (c == ']') /* ignore right bracket if first char */
265 {
266 temp[j++] = c;
267 c = pathname[i++];
268 }
ac50fbac
CR
269 do
270 {
271 if (c == 0)
272 goto endpat;
273 else if (c == CTLESC)
274 {
275 /* skip c, check for EOS, let assignment at end of loop */
276 /* pathname[i] == backslash-escaped character */
277 if (pathname[i] == 0)
278 goto endpat;
279 temp[j++] = pathname[i++];
280 }
281 else if (c == '[' && pathname[i] == ':')
282 {
283 temp[j++] = c;
284 temp[j++] = pathname[i++];
285 cclass = 1;
286 }
287 else if (cclass && c == ':' && pathname[i] == ']')
288 {
289 temp[j++] = c;
290 temp[j++] = pathname[i++];
291 cclass = 0;
292 }
293 else if (c == '[' && pathname[i] == '=')
294 {
295 temp[j++] = c;
296 temp[j++] = pathname[i++];
297 if (pathname[i] == ']')
298 temp[j++] = pathname[i++]; /* right brack can be in equiv */
299 equiv = 1;
300 }
301 else if (equiv && c == '=' && pathname[i] == ']')
302 {
303 temp[j++] = c;
304 temp[j++] = pathname[i++];
305 equiv = 0;
306 }
307 else if (c == '[' && pathname[i] == '.')
308 {
309 temp[j++] = c;
310 temp[j++] = pathname[i++];
311 if (pathname[i] == ']')
312 temp[j++] = pathname[i++]; /* right brack can be in collsym */
313 collsym = 1;
314 }
315 else if (collsym && c == '.' && pathname[i] == ']')
316 {
317 temp[j++] = c;
318 temp[j++] = pathname[i++];
319 collsym = 0;
320 }
321 else
322 temp[j++] = c;
323 }
a0c0a00f
CR
324 while (((c = pathname[i++]) != ']') && c != 0);
325
326 /* If we don't find the closing bracket before we hit the end of
327 the string, rescan string without treating it as a bracket
328 expression (has implications for backslash and special ERE
329 chars) */
330 if (c == 0)
331 {
332 i = savei - 1; /* -1 for autoincrement above */
333 j = savej;
334 continue;
335 }
336
ac50fbac
CR
337 temp[j++] = c; /* closing right bracket */
338 i--; /* increment will happen above in loop */
339 continue; /* skip double assignment below */
340 }
341 else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP) == 0)
3185942a 342 {
ac50fbac 343 /* XXX - if not quoting regexp, use backslash as quote char. Should
8868edaf 344 We just pass it through without treating it as special? That is
ac50fbac
CR
345 what ksh93 seems to do. */
346
347 /* If we want to pass through backslash unaltered, comment out these
348 lines. */
3185942a 349 temp[j++] = '\\';
ac50fbac 350
3185942a
JA
351 i++;
352 if (pathname[i] == '\0')
353 break;
d233b485
CR
354 /* If we are turning CTLESC CTLESC into CTLESC, we need to do that
355 even when the first CTLESC is preceded by a backslash. */
356 if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL))
357 i++; /* skip over the CTLESC */
8868edaf
CR
358 else if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC)
359 /* A little more general: if there is an unquoted backslash in the
360 pattern and we are handling quoted characters in the pattern,
361 convert the CTLESC to backslash and add the next character on
362 the theory that the backslash will quote the next character
363 but it would be inconsistent not to replace the CTLESC with
364 another backslash here. We can't tell at this point whether the
365 CTLESC comes from a backslash or other form of quoting in the
366 original pattern. */
367 goto convert_to_backslash;
3185942a 368 }
ac50fbac
CR
369 else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP))
370 last_was_backslash = 1;
7117c2d2 371 temp[j++] = pathname[i];
ccc6cda3 372 }
ac50fbac 373endpat:
cce855bc 374 temp[j] = '\0';
ccc6cda3
JA
375
376 return (temp);
377}
378
379char *
380quote_globbing_chars (string)
a0c0a00f 381 const char *string;
ccc6cda3 382{
7117c2d2 383 size_t slen;
a0c0a00f
CR
384 char *temp, *t;
385 const char *s, *send;
7117c2d2
JA
386 DECLARE_MBSTATE;
387
388 slen = strlen (string);
389 send = string + slen;
ccc6cda3 390
7117c2d2 391 temp = (char *)xmalloc (slen * 2 + 1);
ccc6cda3
JA
392 for (t = temp, s = string; *s; )
393 {
3185942a
JA
394 if (glob_char_p (s))
395 *t++ = '\\';
7117c2d2
JA
396
397 /* Copy a single (possibly multibyte) character from s to t,
ac50fbac 398 incrementing both. */
7117c2d2 399 COPY_CHAR_P (t, s, send);
ccc6cda3
JA
400 }
401 *t = '\0';
402 return temp;
403}
404
405/* Call the glob library to do globbing on PATHNAME. */
406char **
8868edaf 407shell_glob_filename (pathname, qflags)
28ef6c31 408 const char *pathname;
8868edaf 409 int qflags;
ccc6cda3 410{
ccc6cda3 411 char *temp, **results;
8868edaf 412 int gflags, quoted_pattern;
ccc6cda3
JA
413
414 noglob_dot_filenames = glob_dot_filenames == 0;
415
8868edaf 416 temp = quote_string_for_globbing (pathname, QGLOB_FILENAME|qflags);
d233b485
CR
417 gflags = glob_star ? GX_GLOBSTAR : 0;
418 results = glob_filename (temp, gflags);
ccc6cda3
JA
419 free (temp);
420
421 if (results && ((GLOB_FAILED (results)) == 0))
422 {
423 if (should_ignore_glob_matches ())
424 ignore_glob_matches (results);
425 if (results && results[0])
8868edaf 426 strvec_sort (results, 1); /* posix sort */
ccc6cda3
JA
427 else
428 {
429 FREE (results);
430 results = (char **)&glob_error_return;
431 }
432 }
433
434 return (results);
ccc6cda3
JA
435}
436
437/* Stuff for GLOBIGNORE. */
438
439static struct ignorevar globignore =
440{
441 "GLOBIGNORE",
442 (struct ign *)0,
443 0,
444 (char *)0,
f73dda09 445 (sh_iv_item_func_t *)0,
ccc6cda3
JA
446};
447
448/* Set up to ignore some glob matches because the value of GLOBIGNORE
449 has changed. If GLOBIGNORE is being unset, we also need to disable
450 the globbing of filenames beginning with a `.'. */
451void
452setup_glob_ignore (name)
453 char *name;
454{
455 char *v;
456
457 v = get_string_value (name);
458 setup_ignore_patterns (&globignore);
459
460 if (globignore.num_ignores)
461 glob_dot_filenames = 1;
462 else if (v == 0)
463 glob_dot_filenames = 0;
464}
465
466int
467should_ignore_glob_matches ()
468{
469 return globignore.num_ignores;
470}
471
472/* Return 0 if NAME matches a pattern in the globignore.ignores list. */
473static int
474glob_name_is_acceptable (name)
f73dda09 475 const char *name;
ccc6cda3
JA
476{
477 struct ign *p;
8868edaf 478 char *n;
cce855bc 479 int flags;
ccc6cda3 480
8868edaf
CR
481 /* . and .. are never matched. We extend this to the terminal component of a
482 pathname. */
483 n = strrchr (name, '/');
484 if (n == 0 || n[1] == 0)
485 n = (char *)name;
486 else
487 n++;
488
489 if (n[0] == '.' && (n[1] == '\0' || (n[1] == '.' && n[2] == '\0')))
ccc6cda3
JA
490 return (0);
491
a0c0a00f 492 flags = FNM_PATHNAME | FNMATCH_EXTFLAG | FNMATCH_NOCASEGLOB;
ccc6cda3
JA
493 for (p = globignore.ignores; p->val; p++)
494 {
f73dda09 495 if (strmatch (p->val, (char *)name, flags) != FNM_NOMATCH)
28ef6c31 496 return (0);
ccc6cda3
JA
497 }
498 return (1);
499}
500
501/* Internal function to test whether filenames in NAMES should be
502 ignored. NAME_FUNC is a pointer to a function to call with each
503 name. It returns non-zero if the name is acceptable to the particular
504 ignore function which called _ignore_names; zero if the name should
505 be removed from NAMES. */
506
507static void
508ignore_globbed_names (names, name_func)
509 char **names;
f73dda09 510 sh_ignore_func_t *name_func;
ccc6cda3
JA
511{
512 char **newnames;
513 int n, i;
514
515 for (i = 0; names[i]; i++)
516 ;
7117c2d2 517 newnames = strvec_create (i + 1);
ccc6cda3
JA
518
519 for (n = i = 0; names[i]; i++)
520 {
521 if ((*name_func) (names[i]))
28ef6c31 522 newnames[n++] = names[i];
ccc6cda3
JA
523 else
524 free (names[i]);
525 }
526
527 newnames[n] = (char *)NULL;
528
529 if (n == 0)
530 {
531 names[0] = (char *)NULL;
532 free (newnames);
533 return;
534 }
535
536 /* Copy the acceptable names from NEWNAMES back to NAMES and set the
537 new array end. */
538 for (n = 0; newnames[n]; n++)
539 names[n] = newnames[n];
540 names[n] = (char *)NULL;
d166f048 541 free (newnames);
ccc6cda3
JA
542}
543
544void
545ignore_glob_matches (names)
546 char **names;
547{
548 if (globignore.num_ignores == 0)
549 return;
550
551 ignore_globbed_names (names, glob_name_is_acceptable);
552}
553
495aee44
CR
554static char *
555split_ignorespec (s, ip)
556 char *s;
557 int *ip;
558{
559 char *t;
560 int n, i;
561
562 if (s == 0)
563 return 0;
564
565 i = *ip;
566 if (s[i] == 0)
567 return 0;
568
a0c0a00f 569 n = skip_to_delim (s, i, ":", SD_NOJMP|SD_EXTGLOB|SD_GLOB);
495aee44
CR
570 t = substring (s, i, n);
571
572 if (s[n] == ':')
573 n++;
574 *ip = n;
575 return t;
576}
577
ccc6cda3
JA
578void
579setup_ignore_patterns (ivp)
580 struct ignorevar *ivp;
581{
582 int numitems, maxitems, ptr;
583 char *colon_bit, *this_ignoreval;
584 struct ign *p;
585
586 this_ignoreval = get_string_value (ivp->varname);
587
588 /* If nothing has changed then just exit now. */
589 if ((this_ignoreval && ivp->last_ignoreval && STREQ (this_ignoreval, ivp->last_ignoreval)) ||
590 (!this_ignoreval && !ivp->last_ignoreval))
591 return;
592
593 /* Oops. The ignore variable has changed. Re-parse it. */
594 ivp->num_ignores = 0;
595
596 if (ivp->ignores)
597 {
598 for (p = ivp->ignores; p->val; p++)
599 free(p->val);
600 free (ivp->ignores);
601 ivp->ignores = (struct ign *)NULL;
602 }
603
604 if (ivp->last_ignoreval)
605 {
606 free (ivp->last_ignoreval);
607 ivp->last_ignoreval = (char *)NULL;
608 }
609
610 if (this_ignoreval == 0 || *this_ignoreval == '\0')
611 return;
612
613 ivp->last_ignoreval = savestring (this_ignoreval);
614
615 numitems = maxitems = ptr = 0;
616
495aee44 617#if 0
ccc6cda3 618 while (colon_bit = extract_colon_unit (this_ignoreval, &ptr))
495aee44
CR
619#else
620 while (colon_bit = split_ignorespec (this_ignoreval, &ptr))
621#endif
ccc6cda3
JA
622 {
623 if (numitems + 1 >= maxitems)
624 {
625 maxitems += 10;
626 ivp->ignores = (struct ign *)xrealloc (ivp->ignores, maxitems * sizeof (struct ign));
627 }
628 ivp->ignores[numitems].val = colon_bit;
629 ivp->ignores[numitems].len = strlen (colon_bit);
630 ivp->ignores[numitems].flags = 0;
631 if (ivp->item_func)
28ef6c31 632 (*ivp->item_func) (&ivp->ignores[numitems]);
ccc6cda3
JA
633 numitems++;
634 }
635 ivp->ignores[numitems].val = (char *)NULL;
636 ivp->num_ignores = numitems;
637}