]> git.ipfire.org Git - thirdparty/bash.git/blame_incremental - pathexp.c
Bash-5.2 patch 26: fix typo when specifying readline's custom color prefix
[thirdparty/bash.git] / pathexp.c
... / ...
CommitLineData
1/* pathexp.c -- The shell interface to the globbing library. */
2
3/* Copyright (C) 1995-2020 Free Software Foundation, Inc.
4
5 This file is part of GNU Bash, the Bourne Again SHell.
6
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19*/
20
21#include "config.h"
22
23#include "bashtypes.h"
24#include <stdio.h>
25
26#if defined (HAVE_UNISTD_H)
27# include <unistd.h>
28#endif
29
30#include "bashansi.h"
31
32#include "shell.h"
33#include "pathexp.h"
34#include "flags.h"
35
36#include "shmbutil.h"
37#include "bashintl.h"
38
39#include <glob/strmatch.h>
40
41static int glob_name_is_acceptable PARAMS((const char *));
42static void ignore_globbed_names PARAMS((char **, sh_ignore_func_t *));
43static char *split_ignorespec PARAMS((char *, int *));
44
45#include <glob/glob.h>
46
47/* Control whether * matches .files in globbing. */
48int glob_dot_filenames;
49
50/* Control whether the extended globbing features are enabled. */
51int extended_glob = EXTGLOB_DEFAULT;
52
53/* Control enabling special handling of `**' */
54int glob_star = 0;
55
56/* Return nonzero if STRING has any unquoted special globbing chars in it.
57 This is supposed to be called when pathname expansion is performed, so
58 it implements the rules in Posix 2.13.3, specifically that an unquoted
59 slash cannot appear in a bracket expression. */
60int
61unquoted_glob_pattern_p (string)
62 register char *string;
63{
64 register int c;
65 char *send;
66 int open, bsquote;
67
68 DECLARE_MBSTATE;
69
70 open = bsquote = 0;
71 send = string + strlen (string);
72
73 while (c = *string++)
74 {
75 switch (c)
76 {
77 case '?':
78 case '*':
79 return (1);
80
81 case '[':
82 open++;
83 continue;
84
85 case ']':
86 if (open) /* XXX - if --open == 0? */
87 return (1);
88 continue;
89
90 case '/':
91 if (open)
92 open = 0;
93
94 case '+':
95 case '@':
96 case '!':
97 if (*string == '(') /*)*/
98 return (1);
99 continue;
100
101 /* A pattern can't end with a backslash, but a backslash in the pattern
102 can be special to the matching engine, so we note it in case we
103 need it later. */
104 case '\\':
105 if (*string != '\0' && *string != '/')
106 {
107 bsquote = 1;
108 string++;
109 continue;
110 }
111 else if (open && *string == '/')
112 {
113 string++; /* quoted slashes in bracket expressions are ok */
114 continue;
115 }
116 else if (*string == 0)
117 return (0);
118
119 case CTLESC:
120 if (*string++ == '\0')
121 return (0);
122 }
123
124 /* Advance one fewer byte than an entire multibyte character to
125 account for the auto-increment in the loop above. */
126#ifdef HANDLE_MULTIBYTE
127 string--;
128 ADVANCE_CHAR_P (string, send - string);
129 string++;
130#else
131 ADVANCE_CHAR_P (string, send - string);
132#endif
133 }
134
135#if 0
136 return (bsquote ? 2 : 0);
137#else
138 return (0);
139#endif
140}
141
142/* Return 1 if C is a character that is `special' in a POSIX ERE and needs to
143 be quoted to match itself. */
144static inline int
145ere_char (c)
146 int c;
147{
148 switch (c)
149 {
150 case '.':
151 case '[':
152 case '\\':
153 case '(':
154 case ')':
155 case '*':
156 case '+':
157 case '?':
158 case '{':
159 case '|':
160 case '^':
161 case '$':
162 return 1;
163 default:
164 return 0;
165 }
166 return (0);
167}
168
169/* This is only used to determine whether to backslash-quote a character. */
170int
171glob_char_p (s)
172 const char *s;
173{
174 switch (*s)
175 {
176 case '*':
177 case '[':
178 case ']':
179 case '?':
180 case '\\':
181 return 1;
182 case '+':
183 case '@':
184 case '!':
185 if (s[1] == '(') /*(*/
186 return 1;
187 break;
188 }
189 return 0;
190}
191
192/* PATHNAME can contain characters prefixed by CTLESC; this indicates
193 that the character is to be quoted. We quote it here in the style
194 that the glob library recognizes. If flags includes QGLOB_CVTNULL,
195 we change quoted null strings (pathname[0] == CTLNUL) into empty
196 strings (pathname[0] == 0). If this is called after quote removal
197 is performed, (flags & QGLOB_CVTNULL) should be 0; if called when quote
198 removal has not been done (for example, before attempting to match a
199 pattern while executing a case statement), flags should include
200 QGLOB_CVTNULL. If flags includes QGLOB_CTLESC, we need to remove CTLESC
201 quoting CTLESC or CTLNUL (as if dequote_string were called). If flags
202 includes QGLOB_FILENAME, appropriate quoting to match a filename should be
203 performed. QGLOB_REGEXP means we're quoting for a Posix ERE (for
204 [[ string =~ pat ]]) and that requires some special handling. */
205char *
206quote_string_for_globbing (pathname, qflags)
207 const char *pathname;
208 int qflags;
209{
210 char *temp;
211 register int i, j;
212 int cclass, collsym, equiv, c, last_was_backslash;
213 int savei, savej;
214
215 temp = (char *)xmalloc (2 * strlen (pathname) + 1);
216
217 if ((qflags & QGLOB_CVTNULL) && QUOTED_NULL (pathname))
218 {
219 temp[0] = '\0';
220 return temp;
221 }
222
223 cclass = collsym = equiv = last_was_backslash = 0;
224 for (i = j = 0; pathname[i]; i++)
225 {
226 /* Fix for CTLESC at the end of the string? */
227 if (pathname[i] == CTLESC && pathname[i+1] == '\0')
228 {
229 temp[j++] = pathname[i++];
230 break;
231 }
232 /* If we are parsing regexp, turn CTLESC CTLESC into CTLESC. It's not an
233 ERE special character, so we should just be able to pass it through. */
234 else if ((qflags & (QGLOB_REGEXP|QGLOB_CTLESC)) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL))
235 {
236 i++;
237 temp[j++] = pathname[i];
238 continue;
239 }
240 else if (pathname[i] == CTLESC)
241 {
242convert_to_backslash:
243 if ((qflags & QGLOB_FILENAME) && pathname[i+1] == '/')
244 continue;
245 /* What to do if preceding char is backslash? */
246 if (pathname[i+1] != CTLESC && (qflags & QGLOB_REGEXP) && ere_char (pathname[i+1]) == 0)
247 continue;
248 temp[j++] = '\\';
249 i++;
250 if (pathname[i] == '\0')
251 break;
252 }
253 else if ((qflags & QGLOB_REGEXP) && (i == 0 || pathname[i-1] != CTLESC) && pathname[i] == '[') /*]*/
254 {
255 temp[j++] = pathname[i++]; /* open bracket */
256 savej = j;
257 savei = i;
258 c = pathname[i++]; /* c == char after open bracket */
259 if (c == '^') /* ignore pattern negation */
260 {
261 temp[j++] = c;
262 c = pathname[i++];
263 }
264 if (c == ']') /* ignore right bracket if first char */
265 {
266 temp[j++] = c;
267 c = pathname[i++];
268 }
269 do
270 {
271 if (c == 0)
272 goto endpat;
273 else if (c == CTLESC)
274 {
275 /* skip c, check for EOS, let assignment at end of loop */
276 /* pathname[i] == backslash-escaped character */
277 if (pathname[i] == 0)
278 goto endpat;
279 temp[j++] = pathname[i++];
280 }
281 else if (c == '[' && pathname[i] == ':')
282 {
283 temp[j++] = c;
284 temp[j++] = pathname[i++];
285 cclass = 1;
286 }
287 else if (cclass && c == ':' && pathname[i] == ']')
288 {
289 temp[j++] = c;
290 temp[j++] = pathname[i++];
291 cclass = 0;
292 }
293 else if (c == '[' && pathname[i] == '=')
294 {
295 temp[j++] = c;
296 temp[j++] = pathname[i++];
297 if (pathname[i] == ']')
298 temp[j++] = pathname[i++]; /* right brack can be in equiv */
299 equiv = 1;
300 }
301 else if (equiv && c == '=' && pathname[i] == ']')
302 {
303 temp[j++] = c;
304 temp[j++] = pathname[i++];
305 equiv = 0;
306 }
307 else if (c == '[' && pathname[i] == '.')
308 {
309 temp[j++] = c;
310 temp[j++] = pathname[i++];
311 if (pathname[i] == ']')
312 temp[j++] = pathname[i++]; /* right brack can be in collsym */
313 collsym = 1;
314 }
315 else if (collsym && c == '.' && pathname[i] == ']')
316 {
317 temp[j++] = c;
318 temp[j++] = pathname[i++];
319 collsym = 0;
320 }
321 else
322 temp[j++] = c;
323 }
324 while (((c = pathname[i++]) != ']') && c != 0);
325
326 /* If we don't find the closing bracket before we hit the end of
327 the string, rescan string without treating it as a bracket
328 expression (has implications for backslash and special ERE
329 chars) */
330 if (c == 0)
331 {
332 i = savei - 1; /* -1 for autoincrement above */
333 j = savej;
334 continue;
335 }
336
337 temp[j++] = c; /* closing right bracket */
338 i--; /* increment will happen above in loop */
339 continue; /* skip double assignment below */
340 }
341 else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP) == 0)
342 {
343 /* XXX - if not quoting regexp, use backslash as quote char. Should
344 We just pass it through without treating it as special? That is
345 what ksh93 seems to do. */
346
347 /* If we want to pass through backslash unaltered, comment out these
348 lines. */
349 temp[j++] = '\\';
350
351 i++;
352 if (pathname[i] == '\0')
353 break;
354 /* If we are turning CTLESC CTLESC into CTLESC, we need to do that
355 even when the first CTLESC is preceded by a backslash. */
356 if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL))
357 i++; /* skip over the CTLESC */
358 else if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC)
359 /* A little more general: if there is an unquoted backslash in the
360 pattern and we are handling quoted characters in the pattern,
361 convert the CTLESC to backslash and add the next character on
362 the theory that the backslash will quote the next character
363 but it would be inconsistent not to replace the CTLESC with
364 another backslash here. We can't tell at this point whether the
365 CTLESC comes from a backslash or other form of quoting in the
366 original pattern. */
367 goto convert_to_backslash;
368 }
369 else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP))
370 last_was_backslash = 1;
371 temp[j++] = pathname[i];
372 }
373endpat:
374 temp[j] = '\0';
375
376 return (temp);
377}
378
379char *
380quote_globbing_chars (string)
381 const char *string;
382{
383 size_t slen;
384 char *temp, *t;
385 const char *s, *send;
386 DECLARE_MBSTATE;
387
388 slen = strlen (string);
389 send = string + slen;
390
391 temp = (char *)xmalloc (slen * 2 + 1);
392 for (t = temp, s = string; *s; )
393 {
394 if (glob_char_p (s))
395 *t++ = '\\';
396
397 /* Copy a single (possibly multibyte) character from s to t,
398 incrementing both. */
399 COPY_CHAR_P (t, s, send);
400 }
401 *t = '\0';
402 return temp;
403}
404
405/* Call the glob library to do globbing on PATHNAME. */
406char **
407shell_glob_filename (pathname, qflags)
408 const char *pathname;
409 int qflags;
410{
411 char *temp, **results;
412 int gflags, quoted_pattern;
413
414 noglob_dot_filenames = glob_dot_filenames == 0;
415
416 temp = quote_string_for_globbing (pathname, QGLOB_FILENAME|qflags);
417 gflags = glob_star ? GX_GLOBSTAR : 0;
418 results = glob_filename (temp, gflags);
419 free (temp);
420
421 if (results && ((GLOB_FAILED (results)) == 0))
422 {
423 if (should_ignore_glob_matches ())
424 ignore_glob_matches (results);
425 if (results && results[0])
426 strvec_sort (results, 1); /* posix sort */
427 else
428 {
429 FREE (results);
430 results = (char **)&glob_error_return;
431 }
432 }
433
434 return (results);
435}
436
437/* Stuff for GLOBIGNORE. */
438
439static struct ignorevar globignore =
440{
441 "GLOBIGNORE",
442 (struct ign *)0,
443 0,
444 (char *)0,
445 (sh_iv_item_func_t *)0,
446};
447
448/* Set up to ignore some glob matches because the value of GLOBIGNORE
449 has changed. If GLOBIGNORE is being unset, we also need to disable
450 the globbing of filenames beginning with a `.'. */
451void
452setup_glob_ignore (name)
453 char *name;
454{
455 char *v;
456
457 v = get_string_value (name);
458 setup_ignore_patterns (&globignore);
459
460 if (globignore.num_ignores)
461 glob_dot_filenames = 1;
462 else if (v == 0)
463 glob_dot_filenames = 0;
464}
465
466int
467should_ignore_glob_matches ()
468{
469 return globignore.num_ignores;
470}
471
472/* Return 0 if NAME matches a pattern in the globignore.ignores list. */
473static int
474glob_name_is_acceptable (name)
475 const char *name;
476{
477 struct ign *p;
478 char *n;
479 int flags;
480
481 /* . and .. are never matched. We extend this to the terminal component of a
482 pathname. */
483 n = strrchr (name, '/');
484 if (n == 0 || n[1] == 0)
485 n = (char *)name;
486 else
487 n++;
488
489 if (n[0] == '.' && (n[1] == '\0' || (n[1] == '.' && n[2] == '\0')))
490 return (0);
491
492 flags = FNM_PATHNAME | FNMATCH_EXTFLAG | FNMATCH_NOCASEGLOB;
493 for (p = globignore.ignores; p->val; p++)
494 {
495 if (strmatch (p->val, (char *)name, flags) != FNM_NOMATCH)
496 return (0);
497 }
498 return (1);
499}
500
501/* Internal function to test whether filenames in NAMES should be
502 ignored. NAME_FUNC is a pointer to a function to call with each
503 name. It returns non-zero if the name is acceptable to the particular
504 ignore function which called _ignore_names; zero if the name should
505 be removed from NAMES. */
506
507static void
508ignore_globbed_names (names, name_func)
509 char **names;
510 sh_ignore_func_t *name_func;
511{
512 char **newnames;
513 int n, i;
514
515 for (i = 0; names[i]; i++)
516 ;
517 newnames = strvec_create (i + 1);
518
519 for (n = i = 0; names[i]; i++)
520 {
521 if ((*name_func) (names[i]))
522 newnames[n++] = names[i];
523 else
524 free (names[i]);
525 }
526
527 newnames[n] = (char *)NULL;
528
529 if (n == 0)
530 {
531 names[0] = (char *)NULL;
532 free (newnames);
533 return;
534 }
535
536 /* Copy the acceptable names from NEWNAMES back to NAMES and set the
537 new array end. */
538 for (n = 0; newnames[n]; n++)
539 names[n] = newnames[n];
540 names[n] = (char *)NULL;
541 free (newnames);
542}
543
544void
545ignore_glob_matches (names)
546 char **names;
547{
548 if (globignore.num_ignores == 0)
549 return;
550
551 ignore_globbed_names (names, glob_name_is_acceptable);
552}
553
554static char *
555split_ignorespec (s, ip)
556 char *s;
557 int *ip;
558{
559 char *t;
560 int n, i;
561
562 if (s == 0)
563 return 0;
564
565 i = *ip;
566 if (s[i] == 0)
567 return 0;
568
569 n = skip_to_delim (s, i, ":", SD_NOJMP|SD_EXTGLOB|SD_GLOB);
570 t = substring (s, i, n);
571
572 if (s[n] == ':')
573 n++;
574 *ip = n;
575 return t;
576}
577
578void
579setup_ignore_patterns (ivp)
580 struct ignorevar *ivp;
581{
582 int numitems, maxitems, ptr;
583 char *colon_bit, *this_ignoreval;
584 struct ign *p;
585
586 this_ignoreval = get_string_value (ivp->varname);
587
588 /* If nothing has changed then just exit now. */
589 if ((this_ignoreval && ivp->last_ignoreval && STREQ (this_ignoreval, ivp->last_ignoreval)) ||
590 (!this_ignoreval && !ivp->last_ignoreval))
591 return;
592
593 /* Oops. The ignore variable has changed. Re-parse it. */
594 ivp->num_ignores = 0;
595
596 if (ivp->ignores)
597 {
598 for (p = ivp->ignores; p->val; p++)
599 free(p->val);
600 free (ivp->ignores);
601 ivp->ignores = (struct ign *)NULL;
602 }
603
604 if (ivp->last_ignoreval)
605 {
606 free (ivp->last_ignoreval);
607 ivp->last_ignoreval = (char *)NULL;
608 }
609
610 if (this_ignoreval == 0 || *this_ignoreval == '\0')
611 return;
612
613 ivp->last_ignoreval = savestring (this_ignoreval);
614
615 numitems = maxitems = ptr = 0;
616
617#if 0
618 while (colon_bit = extract_colon_unit (this_ignoreval, &ptr))
619#else
620 while (colon_bit = split_ignorespec (this_ignoreval, &ptr))
621#endif
622 {
623 if (numitems + 1 >= maxitems)
624 {
625 maxitems += 10;
626 ivp->ignores = (struct ign *)xrealloc (ivp->ignores, maxitems * sizeof (struct ign));
627 }
628 ivp->ignores[numitems].val = colon_bit;
629 ivp->ignores[numitems].len = strlen (colon_bit);
630 ivp->ignores[numitems].flags = 0;
631 if (ivp->item_func)
632 (*ivp->item_func) (&ivp->ignores[numitems]);
633 numitems++;
634 }
635 ivp->ignores[numitems].val = (char *)NULL;
636 ivp->num_ignores = numitems;
637}