]> git.ipfire.org Git - thirdparty/bash.git/blame - pathexp.c
fixes for HAVE_SELECT/HAVE_PSELECT; change some warning messages for nameref loops...
[thirdparty/bash.git] / pathexp.c
CommitLineData
ccc6cda3
JA
1/* pathexp.c -- The shell interface to the globbing library. */
2
ab309487 3/* Copyright (C) 1995-2020 Free Software Foundation, Inc.
ccc6cda3
JA
4
5 This file is part of GNU Bash, the Bourne Again SHell.
6
2e4498b3
CR
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
ccc6cda3 11
2e4498b3
CR
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
ccc6cda3 16
2e4498b3
CR
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19*/
ccc6cda3
JA
20
21#include "config.h"
22
23#include "bashtypes.h"
24#include <stdio.h>
25
26#if defined (HAVE_UNISTD_H)
27# include <unistd.h>
28#endif
29
30#include "bashansi.h"
31
32#include "shell.h"
33#include "pathexp.h"
34#include "flags.h"
35
7117c2d2 36#include "shmbutil.h"
547ef914 37#include "bashintl.h"
7117c2d2 38
f73dda09 39#include <glob/strmatch.h>
b72432fd 40
ab309487
CR
41static int glob_name_is_acceptable PARAMS((const char *));
42static void ignore_globbed_names PARAMS((char **, sh_ignore_func_t *));
43static char *split_ignorespec PARAMS((char *, int *));
084c952b 44
7a8455e4 45#include <glob/glob.h>
ccc6cda3
JA
46
47/* Control whether * matches .files in globbing. */
48int glob_dot_filenames;
49
cce855bc 50/* Control whether the extended globbing features are enabled. */
691aebcb 51int extended_glob = EXTGLOB_DEFAULT;
cce855bc 52
4ac1ff98
CR
53/* Control enabling special handling of `**' */
54int glob_star = 0;
55
951bdaad
CR
56/* Return nonzero if STRING has any unquoted special globbing chars in it.
57 This is supposed to be called when pathname expansion is performed, so
58 it implements the rules in Posix 2.13.3, specifically that an unquoted
59 slash cannot appear in a bracket expression. */
ccc6cda3
JA
60int
61unquoted_glob_pattern_p (string)
62 register char *string;
63{
64 register int c;
7117c2d2 65 char *send;
8a9718cf 66 int open, bsquote;
ccc6cda3 67
7117c2d2
JA
68 DECLARE_MBSTATE;
69
8a9718cf 70 open = bsquote = 0;
7117c2d2
JA
71 send = string + strlen (string);
72
ccc6cda3
JA
73 while (c = *string++)
74 {
75 switch (c)
76 {
77 case '?':
78 case '*':
79 return (1);
80
81 case '[':
82 open++;
83 continue;
84
85 case ']':
951bdaad 86 if (open) /* XXX - if --open == 0? */
ccc6cda3
JA
87 return (1);
88 continue;
89
951bdaad
CR
90 case '/':
91 if (open)
92 open = 0;
93
cce855bc
JA
94 case '+':
95 case '@':
96 case '!':
97 if (*string == '(') /*)*/
98 return (1);
99 continue;
100
2afeb2af 101 /* A pattern can't end with a backslash, but a backslash in the pattern
f7ec6b1a
CR
102 can be special to the matching engine, so we note it in case we
103 need it later. */
ccc6cda3 104 case '\\':
8a9718cf
CR
105 if (*string != '\0' && *string != '/')
106 {
107 bsquote = 1;
108 string++;
109 continue;
110 }
951bdaad
CR
111 else if (open && *string == '/')
112 {
113 string++; /* quoted slashes in bracket expressions are ok */
114 continue;
115 }
8a9718cf
CR
116 else if (*string == 0)
117 return (0);
2afeb2af
CR
118
119 case CTLESC:
ccc6cda3
JA
120 if (*string++ == '\0')
121 return (0);
122 }
7117c2d2
JA
123
124 /* Advance one fewer byte than an entire multibyte character to
125 account for the auto-increment in the loop above. */
126#ifdef HANDLE_MULTIBYTE
127 string--;
128 ADVANCE_CHAR_P (string, send - string);
129 string++;
130#else
131 ADVANCE_CHAR_P (string, send - string);
132#endif
ccc6cda3 133 }
8a9718cf 134
f7ec6b1a
CR
135#if 0
136 return (bsquote ? 2 : 0);
137#else
138 return (0);
139#endif
ccc6cda3
JA
140}
141
d3ad40de
CR
142/* Return 1 if C is a character that is `special' in a POSIX ERE and needs to
143 be quoted to match itself. */
144static inline int
145ere_char (c)
146 int c;
147{
148 switch (c)
149 {
150 case '.':
151 case '[':
152 case '\\':
153 case '(':
154 case ')':
155 case '*':
156 case '+':
157 case '?':
158 case '{':
159 case '|':
160 case '^':
161 case '$':
162 return 1;
163 default:
164 return 0;
165 }
166 return (0);
167}
168
e7a56619 169/* This is only used to determine whether to backslash-quote a character. */
4ac1ff98
CR
170int
171glob_char_p (s)
172 const char *s;
173{
174 switch (*s)
175 {
176 case '*':
177 case '[':
178 case ']':
179 case '?':
180 case '\\':
181 return 1;
182 case '+':
183 case '@':
184 case '!':
084c952b 185 if (s[1] == '(') /*(*/
4ac1ff98
CR
186 return 1;
187 break;
188 }
189 return 0;
190}
191
ccc6cda3
JA
192/* PATHNAME can contain characters prefixed by CTLESC; this indicates
193 that the character is to be quoted. We quote it here in the style
cce855bc 194 that the glob library recognizes. If flags includes QGLOB_CVTNULL,
ccc6cda3
JA
195 we change quoted null strings (pathname[0] == CTLNUL) into empty
196 strings (pathname[0] == 0). If this is called after quote removal
cce855bc 197 is performed, (flags & QGLOB_CVTNULL) should be 0; if called when quote
ccc6cda3 198 removal has not been done (for example, before attempting to match a
cce855bc 199 pattern while executing a case statement), flags should include
2afeb2af
CR
200 QGLOB_CVTNULL. If flags includes QGLOB_CTLESC, we need to remove CTLESC
201 quoting CTLESC or CTLNUL (as if dequote_string were called). If flags
202 includes QGLOB_FILENAME, appropriate quoting to match a filename should be
203 performed. QGLOB_REGEXP means we're quoting for a Posix ERE (for
204 [[ string =~ pat ]]) and that requires some special handling. */
ccc6cda3 205char *
cce855bc 206quote_string_for_globbing (pathname, qflags)
28ef6c31 207 const char *pathname;
cce855bc 208 int qflags;
ccc6cda3
JA
209{
210 char *temp;
cce855bc 211 register int i, j;
2e412574 212 int cclass, collsym, equiv, c, last_was_backslash;
2171061f 213 int savei, savej;
ccc6cda3 214
c61bfbfd 215 temp = (char *)xmalloc (2 * strlen (pathname) + 1);
ccc6cda3 216
cce855bc 217 if ((qflags & QGLOB_CVTNULL) && QUOTED_NULL (pathname))
ccc6cda3
JA
218 {
219 temp[0] = '\0';
220 return temp;
221 }
222
2e412574 223 cclass = collsym = equiv = last_was_backslash = 0;
cce855bc 224 for (i = j = 0; pathname[i]; i++)
ccc6cda3 225 {
4a2c75c6
CR
226 /* Fix for CTLESC at the end of the string? */
227 if (pathname[i] == CTLESC && pathname[i+1] == '\0')
228 {
229 temp[j++] = pathname[i++];
230 break;
231 }
5f0df7f9
CR
232 /* If we are parsing regexp, turn CTLESC CTLESC into CTLESC. It's not an
233 ERE special character, so we should just be able to pass it through. */
6078dd9a 234 else if ((qflags & (QGLOB_REGEXP|QGLOB_CTLESC)) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL))
5f0df7f9
CR
235 {
236 i++;
237 temp[j++] = pathname[i];
238 continue;
239 }
4a2c75c6 240 else if (pathname[i] == CTLESC)
28ef6c31 241 {
aa99ef52 242convert_to_backslash:
28ef6c31
JA
243 if ((qflags & QGLOB_FILENAME) && pathname[i+1] == '/')
244 continue;
5f0df7f9 245 /* What to do if preceding char is backslash? */
bfd181e7 246 if (pathname[i+1] != CTLESC && (qflags & QGLOB_REGEXP) && ere_char (pathname[i+1]) == 0)
d3ad40de 247 continue;
cce855bc 248 temp[j++] = '\\';
7117c2d2
JA
249 i++;
250 if (pathname[i] == '\0')
251 break;
28ef6c31 252 }
084c952b
CR
253 else if ((qflags & QGLOB_REGEXP) && (i == 0 || pathname[i-1] != CTLESC) && pathname[i] == '[') /*]*/
254 {
084c952b 255 temp[j++] = pathname[i++]; /* open bracket */
2171061f
CR
256 savej = j;
257 savei = i;
084c952b 258 c = pathname[i++]; /* c == char after open bracket */
553a7d66
CR
259 if (c == '^') /* ignore pattern negation */
260 {
261 temp[j++] = c;
262 c = pathname[i++];
263 }
264 if (c == ']') /* ignore right bracket if first char */
265 {
266 temp[j++] = c;
267 c = pathname[i++];
268 }
084c952b
CR
269 do
270 {
271 if (c == 0)
272 goto endpat;
273 else if (c == CTLESC)
274 {
275 /* skip c, check for EOS, let assignment at end of loop */
276 /* pathname[i] == backslash-escaped character */
277 if (pathname[i] == 0)
278 goto endpat;
279 temp[j++] = pathname[i++];
280 }
281 else if (c == '[' && pathname[i] == ':')
282 {
283 temp[j++] = c;
284 temp[j++] = pathname[i++];
285 cclass = 1;
286 }
287 else if (cclass && c == ':' && pathname[i] == ']')
288 {
289 temp[j++] = c;
290 temp[j++] = pathname[i++];
291 cclass = 0;
292 }
293 else if (c == '[' && pathname[i] == '=')
294 {
295 temp[j++] = c;
296 temp[j++] = pathname[i++];
297 if (pathname[i] == ']')
298 temp[j++] = pathname[i++]; /* right brack can be in equiv */
299 equiv = 1;
300 }
301 else if (equiv && c == '=' && pathname[i] == ']')
302 {
303 temp[j++] = c;
304 temp[j++] = pathname[i++];
305 equiv = 0;
306 }
307 else if (c == '[' && pathname[i] == '.')
308 {
309 temp[j++] = c;
310 temp[j++] = pathname[i++];
311 if (pathname[i] == ']')
312 temp[j++] = pathname[i++]; /* right brack can be in collsym */
313 collsym = 1;
314 }
315 else if (collsym && c == '.' && pathname[i] == ']')
316 {
317 temp[j++] = c;
318 temp[j++] = pathname[i++];
319 collsym = 0;
320 }
321 else
322 temp[j++] = c;
323 }
2171061f
CR
324 while (((c = pathname[i++]) != ']') && c != 0);
325
326 /* If we don't find the closing bracket before we hit the end of
327 the string, rescan string without treating it as a bracket
328 expression (has implications for backslash and special ERE
329 chars) */
330 if (c == 0)
331 {
332 i = savei - 1; /* -1 for autoincrement above */
333 j = savej;
334 continue;
335 }
336
084c952b
CR
337 temp[j++] = c; /* closing right bracket */
338 i--; /* increment will happen above in loop */
339 continue; /* skip double assignment below */
340 }
5f0df7f9 341 else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP) == 0)
6a2e7e1f 342 {
4a2c75c6 343 /* XXX - if not quoting regexp, use backslash as quote char. Should
f7ec6b1a 344 We just pass it through without treating it as special? That is
4a2c75c6 345 what ksh93 seems to do. */
5f0df7f9
CR
346
347 /* If we want to pass through backslash unaltered, comment out these
348 lines. */
349 temp[j++] = '\\';
350
351 i++;
352 if (pathname[i] == '\0')
353 break;
1a5fa30b
CR
354 /* If we are turning CTLESC CTLESC into CTLESC, we need to do that
355 even when the first CTLESC is preceded by a backslash. */
6078dd9a
CR
356 if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL))
357 i++; /* skip over the CTLESC */
aa99ef52
CR
358 else if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC)
359 /* A little more general: if there is an unquoted backslash in the
360 pattern and we are handling quoted characters in the pattern,
361 convert the CTLESC to backslash and add the next character on
362 the theory that the backslash will quote the next character
363 but it would be inconsistent not to replace the CTLESC with
364 another backslash here. We can't tell at this point whether the
365 CTLESC comes from a backslash or other form of quoting in the
366 original pattern. */
367 goto convert_to_backslash;
6a2e7e1f 368 }
5f0df7f9
CR
369 else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP))
370 last_was_backslash = 1;
7117c2d2 371 temp[j++] = pathname[i];
ccc6cda3 372 }
084c952b 373endpat:
cce855bc 374 temp[j] = '\0';
ccc6cda3
JA
375
376 return (temp);
377}
378
379char *
380quote_globbing_chars (string)
8f50a023 381 const char *string;
ccc6cda3 382{
7117c2d2 383 size_t slen;
8f50a023
CR
384 char *temp, *t;
385 const char *s, *send;
7117c2d2
JA
386 DECLARE_MBSTATE;
387
388 slen = strlen (string);
389 send = string + slen;
ccc6cda3 390
7117c2d2 391 temp = (char *)xmalloc (slen * 2 + 1);
ccc6cda3
JA
392 for (t = temp, s = string; *s; )
393 {
4ac1ff98
CR
394 if (glob_char_p (s))
395 *t++ = '\\';
7117c2d2
JA
396
397 /* Copy a single (possibly multibyte) character from s to t,
084c952b 398 incrementing both. */
7117c2d2 399 COPY_CHAR_P (t, s, send);
ccc6cda3
JA
400 }
401 *t = '\0';
402 return temp;
403}
404
405/* Call the glob library to do globbing on PATHNAME. */
406char **
f7ec6b1a 407shell_glob_filename (pathname, qflags)
28ef6c31 408 const char *pathname;
f7ec6b1a 409 int qflags;
ccc6cda3 410{
ccc6cda3 411 char *temp, **results;
48492ffa 412 int gflags, quoted_pattern;
ccc6cda3
JA
413
414 noglob_dot_filenames = glob_dot_filenames == 0;
415
f7ec6b1a 416 temp = quote_string_for_globbing (pathname, QGLOB_FILENAME|qflags);
5af34ee8
CR
417 gflags = glob_star ? GX_GLOBSTAR : 0;
418 results = glob_filename (temp, gflags);
ccc6cda3
JA
419 free (temp);
420
421 if (results && ((GLOB_FAILED (results)) == 0))
422 {
423 if (should_ignore_glob_matches ())
424 ignore_glob_matches (results);
425 if (results && results[0])
94206228 426 strvec_sort (results, 1); /* posix sort */
ccc6cda3
JA
427 else
428 {
429 FREE (results);
430 results = (char **)&glob_error_return;
431 }
432 }
433
434 return (results);
ccc6cda3
JA
435}
436
437/* Stuff for GLOBIGNORE. */
438
439static struct ignorevar globignore =
440{
441 "GLOBIGNORE",
442 (struct ign *)0,
443 0,
444 (char *)0,
f73dda09 445 (sh_iv_item_func_t *)0,
ccc6cda3
JA
446};
447
448/* Set up to ignore some glob matches because the value of GLOBIGNORE
449 has changed. If GLOBIGNORE is being unset, we also need to disable
450 the globbing of filenames beginning with a `.'. */
451void
452setup_glob_ignore (name)
453 char *name;
454{
455 char *v;
456
457 v = get_string_value (name);
458 setup_ignore_patterns (&globignore);
459
460 if (globignore.num_ignores)
461 glob_dot_filenames = 1;
462 else if (v == 0)
463 glob_dot_filenames = 0;
464}
465
466int
467should_ignore_glob_matches ()
468{
469 return globignore.num_ignores;
470}
471
472/* Return 0 if NAME matches a pattern in the globignore.ignores list. */
473static int
474glob_name_is_acceptable (name)
f73dda09 475 const char *name;
ccc6cda3
JA
476{
477 struct ign *p;
d37a4722 478 char *n;
cce855bc 479 int flags;
ccc6cda3 480
d37a4722
CR
481 /* . and .. are never matched. We extend this to the terminal component of a
482 pathname. */
483 n = strrchr (name, '/');
484 if (n == 0 || n[1] == 0)
485 n = (char *)name;
486 else
487 n++;
488
489 if (n[0] == '.' && (n[1] == '\0' || (n[1] == '.' && n[2] == '\0')))
ccc6cda3
JA
490 return (0);
491
0a233f3e 492 flags = FNM_PATHNAME | FNMATCH_EXTFLAG | FNMATCH_NOCASEGLOB;
ccc6cda3
JA
493 for (p = globignore.ignores; p->val; p++)
494 {
f73dda09 495 if (strmatch (p->val, (char *)name, flags) != FNM_NOMATCH)
28ef6c31 496 return (0);
ccc6cda3
JA
497 }
498 return (1);
499}
500
501/* Internal function to test whether filenames in NAMES should be
502 ignored. NAME_FUNC is a pointer to a function to call with each
503 name. It returns non-zero if the name is acceptable to the particular
504 ignore function which called _ignore_names; zero if the name should
505 be removed from NAMES. */
506
507static void
508ignore_globbed_names (names, name_func)
509 char **names;
f73dda09 510 sh_ignore_func_t *name_func;
ccc6cda3
JA
511{
512 char **newnames;
513 int n, i;
514
515 for (i = 0; names[i]; i++)
516 ;
7117c2d2 517 newnames = strvec_create (i + 1);
ccc6cda3
JA
518
519 for (n = i = 0; names[i]; i++)
520 {
521 if ((*name_func) (names[i]))
28ef6c31 522 newnames[n++] = names[i];
ccc6cda3
JA
523 else
524 free (names[i]);
525 }
526
527 newnames[n] = (char *)NULL;
528
529 if (n == 0)
530 {
531 names[0] = (char *)NULL;
532 free (newnames);
533 return;
534 }
535
536 /* Copy the acceptable names from NEWNAMES back to NAMES and set the
537 new array end. */
538 for (n = 0; newnames[n]; n++)
539 names[n] = newnames[n];
540 names[n] = (char *)NULL;
d166f048 541 free (newnames);
ccc6cda3
JA
542}
543
544void
545ignore_glob_matches (names)
546 char **names;
547{
548 if (globignore.num_ignores == 0)
549 return;
550
551 ignore_globbed_names (names, glob_name_is_acceptable);
552}
553
5f8cde23
CR
554static char *
555split_ignorespec (s, ip)
556 char *s;
557 int *ip;
558{
559 char *t;
560 int n, i;
561
562 if (s == 0)
563 return 0;
564
565 i = *ip;
566 if (s[i] == 0)
567 return 0;
568
fbbc416f 569 n = skip_to_delim (s, i, ":", SD_NOJMP|SD_EXTGLOB|SD_GLOB);
5f8cde23
CR
570 t = substring (s, i, n);
571
572 if (s[n] == ':')
573 n++;
574 *ip = n;
575 return t;
576}
577
ccc6cda3
JA
578void
579setup_ignore_patterns (ivp)
580 struct ignorevar *ivp;
581{
582 int numitems, maxitems, ptr;
583 char *colon_bit, *this_ignoreval;
584 struct ign *p;
585
586 this_ignoreval = get_string_value (ivp->varname);
587
588 /* If nothing has changed then just exit now. */
589 if ((this_ignoreval && ivp->last_ignoreval && STREQ (this_ignoreval, ivp->last_ignoreval)) ||
590 (!this_ignoreval && !ivp->last_ignoreval))
591 return;
592
593 /* Oops. The ignore variable has changed. Re-parse it. */
594 ivp->num_ignores = 0;
595
596 if (ivp->ignores)
597 {
598 for (p = ivp->ignores; p->val; p++)
599 free(p->val);
600 free (ivp->ignores);
601 ivp->ignores = (struct ign *)NULL;
602 }
603
604 if (ivp->last_ignoreval)
605 {
606 free (ivp->last_ignoreval);
607 ivp->last_ignoreval = (char *)NULL;
608 }
609
610 if (this_ignoreval == 0 || *this_ignoreval == '\0')
611 return;
612
613 ivp->last_ignoreval = savestring (this_ignoreval);
614
615 numitems = maxitems = ptr = 0;
616
5f8cde23 617#if 0
ccc6cda3 618 while (colon_bit = extract_colon_unit (this_ignoreval, &ptr))
5f8cde23
CR
619#else
620 while (colon_bit = split_ignorespec (this_ignoreval, &ptr))
621#endif
ccc6cda3
JA
622 {
623 if (numitems + 1 >= maxitems)
624 {
625 maxitems += 10;
626 ivp->ignores = (struct ign *)xrealloc (ivp->ignores, maxitems * sizeof (struct ign));
627 }
628 ivp->ignores[numitems].val = colon_bit;
629 ivp->ignores[numitems].len = strlen (colon_bit);
630 ivp->ignores[numitems].flags = 0;
631 if (ivp->item_func)
28ef6c31 632 (*ivp->item_func) (&ivp->ignores[numitems]);
ccc6cda3
JA
633 numitems++;
634 }
635 ivp->ignores[numitems].val = (char *)NULL;
636 ivp->num_ignores = numitems;
637}