]> git.ipfire.org Git - thirdparty/bash.git/blame - pathexp.c
Bash-5.0 patch 11: fix quoted null character removal in operands of conditional ...
[thirdparty/bash.git] / pathexp.c
CommitLineData
ccc6cda3
JA
1/* pathexp.c -- The shell interface to the globbing library. */
2
ac50fbac 3/* Copyright (C) 1995-2014 Free Software Foundation, Inc.
ccc6cda3
JA
4
5 This file is part of GNU Bash, the Bourne Again SHell.
6
3185942a
JA
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
ccc6cda3 11
3185942a
JA
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
ccc6cda3 16
3185942a
JA
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19*/
ccc6cda3
JA
20
21#include "config.h"
22
23#include "bashtypes.h"
24#include <stdio.h>
25
26#if defined (HAVE_UNISTD_H)
27# include <unistd.h>
28#endif
29
30#include "bashansi.h"
31
32#include "shell.h"
33#include "pathexp.h"
34#include "flags.h"
35
7117c2d2 36#include "shmbutil.h"
3185942a 37#include "bashintl.h"
7117c2d2 38
f73dda09 39#include <glob/strmatch.h>
b72432fd 40
7117c2d2
JA
41static int glob_name_is_acceptable __P((const char *));
42static void ignore_globbed_names __P((char **, sh_ignore_func_t *));
495aee44 43static char *split_ignorespec __P((char *, int *));
ac50fbac 44
b72432fd
JA
45#if defined (USE_POSIX_GLOB_LIBRARY)
46# include <glob.h>
f73dda09 47typedef int posix_glob_errfunc_t __P((const char *, int));
b72432fd
JA
48#else
49# include <glob/glob.h>
50#endif
ccc6cda3
JA
51
52/* Control whether * matches .files in globbing. */
53int glob_dot_filenames;
54
cce855bc 55/* Control whether the extended globbing features are enabled. */
0001803f 56int extended_glob = EXTGLOB_DEFAULT;
cce855bc 57
3185942a
JA
58/* Control enabling special handling of `**' */
59int glob_star = 0;
60
ccc6cda3
JA
61/* Return nonzero if STRING has any unquoted special globbing chars in it. */
62int
63unquoted_glob_pattern_p (string)
64 register char *string;
65{
66 register int c;
7117c2d2 67 char *send;
fcf6ae7d 68 int open, bsquote;
ccc6cda3 69
7117c2d2
JA
70 DECLARE_MBSTATE;
71
fcf6ae7d 72 open = bsquote = 0;
7117c2d2
JA
73 send = string + strlen (string);
74
ccc6cda3
JA
75 while (c = *string++)
76 {
77 switch (c)
78 {
79 case '?':
80 case '*':
81 return (1);
82
83 case '[':
84 open++;
85 continue;
86
87 case ']':
88 if (open)
89 return (1);
90 continue;
91
cce855bc
JA
92 case '+':
93 case '@':
94 case '!':
95 if (*string == '(') /*)*/
96 return (1);
97 continue;
98
d233b485
CR
99 /* A pattern can't end with a backslash, but a backslash in the pattern
100 can be removed by the matching engine, so we have to run it through
101 globbing. */
ccc6cda3 102 case '\\':
fcf6ae7d
CR
103 if (*string != '\0' && *string != '/')
104 {
105 bsquote = 1;
106 string++;
107 continue;
108 }
109 else if (*string == 0)
110 return (0);
d233b485
CR
111
112 case CTLESC:
ccc6cda3
JA
113 if (*string++ == '\0')
114 return (0);
115 }
7117c2d2
JA
116
117 /* Advance one fewer byte than an entire multibyte character to
118 account for the auto-increment in the loop above. */
119#ifdef HANDLE_MULTIBYTE
120 string--;
121 ADVANCE_CHAR_P (string, send - string);
122 string++;
123#else
124 ADVANCE_CHAR_P (string, send - string);
125#endif
ccc6cda3 126 }
fcf6ae7d
CR
127
128 return (bsquote ? 2 : 0);
ccc6cda3
JA
129}
130
f1be666c
JA
131/* Return 1 if C is a character that is `special' in a POSIX ERE and needs to
132 be quoted to match itself. */
133static inline int
134ere_char (c)
135 int c;
136{
137 switch (c)
138 {
139 case '.':
140 case '[':
141 case '\\':
142 case '(':
143 case ')':
144 case '*':
145 case '+':
146 case '?':
147 case '{':
148 case '|':
149 case '^':
150 case '$':
151 return 1;
152 default:
153 return 0;
154 }
155 return (0);
156}
157
3185942a
JA
158int
159glob_char_p (s)
160 const char *s;
161{
162 switch (*s)
163 {
164 case '*':
165 case '[':
166 case ']':
167 case '?':
168 case '\\':
169 return 1;
170 case '+':
171 case '@':
172 case '!':
ac50fbac 173 if (s[1] == '(') /*(*/
3185942a
JA
174 return 1;
175 break;
176 }
177 return 0;
178}
179
ccc6cda3
JA
180/* PATHNAME can contain characters prefixed by CTLESC; this indicates
181 that the character is to be quoted. We quote it here in the style
cce855bc 182 that the glob library recognizes. If flags includes QGLOB_CVTNULL,
ccc6cda3
JA
183 we change quoted null strings (pathname[0] == CTLNUL) into empty
184 strings (pathname[0] == 0). If this is called after quote removal
cce855bc 185 is performed, (flags & QGLOB_CVTNULL) should be 0; if called when quote
ccc6cda3 186 removal has not been done (for example, before attempting to match a
cce855bc 187 pattern while executing a case statement), flags should include
d233b485
CR
188 QGLOB_CVTNULL. If flags includes QGLOB_CTLESC, we need to remove CTLESC
189 quoting CTLESC or CTLNUL (as if dequote_string were called). If flags
190 includes QGLOB_FILENAME, appropriate quoting to match a filename should be
191 performed. QGLOB_REGEXP means we're quoting for a Posix ERE (for
192 [[ string =~ pat ]]) and that requires some special handling. */
ccc6cda3 193char *
cce855bc 194quote_string_for_globbing (pathname, qflags)
28ef6c31 195 const char *pathname;
cce855bc 196 int qflags;
ccc6cda3
JA
197{
198 char *temp;
cce855bc 199 register int i, j;
d233b485 200 int cclass, collsym, equiv, c, last_was_backslash;
a0c0a00f 201 int savei, savej;
ccc6cda3 202
ac50fbac 203 temp = (char *)xmalloc (2 * strlen (pathname) + 1);
ccc6cda3 204
cce855bc 205 if ((qflags & QGLOB_CVTNULL) && QUOTED_NULL (pathname))
ccc6cda3
JA
206 {
207 temp[0] = '\0';
208 return temp;
209 }
210
d233b485 211 cclass = collsym = equiv = last_was_backslash = 0;
cce855bc 212 for (i = j = 0; pathname[i]; i++)
ccc6cda3 213 {
ac50fbac
CR
214 /* Fix for CTLESC at the end of the string? */
215 if (pathname[i] == CTLESC && pathname[i+1] == '\0')
216 {
217 temp[j++] = pathname[i++];
218 break;
219 }
220 /* If we are parsing regexp, turn CTLESC CTLESC into CTLESC. It's not an
221 ERE special character, so we should just be able to pass it through. */
d233b485 222 else if ((qflags & (QGLOB_REGEXP|QGLOB_CTLESC)) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL))
ac50fbac
CR
223 {
224 i++;
225 temp[j++] = pathname[i];
226 continue;
227 }
228 else if (pathname[i] == CTLESC)
28ef6c31
JA
229 {
230 if ((qflags & QGLOB_FILENAME) && pathname[i+1] == '/')
231 continue;
ac50fbac 232 /* What to do if preceding char is backslash? */
25db9a70 233 if (pathname[i+1] != CTLESC && (qflags & QGLOB_REGEXP) && ere_char (pathname[i+1]) == 0)
f1be666c 234 continue;
cce855bc 235 temp[j++] = '\\';
7117c2d2
JA
236 i++;
237 if (pathname[i] == '\0')
238 break;
28ef6c31 239 }
ac50fbac
CR
240 else if ((qflags & QGLOB_REGEXP) && (i == 0 || pathname[i-1] != CTLESC) && pathname[i] == '[') /*]*/
241 {
ac50fbac 242 temp[j++] = pathname[i++]; /* open bracket */
a0c0a00f
CR
243 savej = j;
244 savei = i;
ac50fbac 245 c = pathname[i++]; /* c == char after open bracket */
d233b485
CR
246 if (c == '^') /* ignore pattern negation */
247 {
248 temp[j++] = c;
249 c = pathname[i++];
250 }
251 if (c == ']') /* ignore right bracket if first char */
252 {
253 temp[j++] = c;
254 c = pathname[i++];
255 }
ac50fbac
CR
256 do
257 {
258 if (c == 0)
259 goto endpat;
260 else if (c == CTLESC)
261 {
262 /* skip c, check for EOS, let assignment at end of loop */
263 /* pathname[i] == backslash-escaped character */
264 if (pathname[i] == 0)
265 goto endpat;
266 temp[j++] = pathname[i++];
267 }
268 else if (c == '[' && pathname[i] == ':')
269 {
270 temp[j++] = c;
271 temp[j++] = pathname[i++];
272 cclass = 1;
273 }
274 else if (cclass && c == ':' && pathname[i] == ']')
275 {
276 temp[j++] = c;
277 temp[j++] = pathname[i++];
278 cclass = 0;
279 }
280 else if (c == '[' && pathname[i] == '=')
281 {
282 temp[j++] = c;
283 temp[j++] = pathname[i++];
284 if (pathname[i] == ']')
285 temp[j++] = pathname[i++]; /* right brack can be in equiv */
286 equiv = 1;
287 }
288 else if (equiv && c == '=' && pathname[i] == ']')
289 {
290 temp[j++] = c;
291 temp[j++] = pathname[i++];
292 equiv = 0;
293 }
294 else if (c == '[' && pathname[i] == '.')
295 {
296 temp[j++] = c;
297 temp[j++] = pathname[i++];
298 if (pathname[i] == ']')
299 temp[j++] = pathname[i++]; /* right brack can be in collsym */
300 collsym = 1;
301 }
302 else if (collsym && c == '.' && pathname[i] == ']')
303 {
304 temp[j++] = c;
305 temp[j++] = pathname[i++];
306 collsym = 0;
307 }
308 else
309 temp[j++] = c;
310 }
a0c0a00f
CR
311 while (((c = pathname[i++]) != ']') && c != 0);
312
313 /* If we don't find the closing bracket before we hit the end of
314 the string, rescan string without treating it as a bracket
315 expression (has implications for backslash and special ERE
316 chars) */
317 if (c == 0)
318 {
319 i = savei - 1; /* -1 for autoincrement above */
320 j = savej;
321 continue;
322 }
323
ac50fbac
CR
324 temp[j++] = c; /* closing right bracket */
325 i--; /* increment will happen above in loop */
326 continue; /* skip double assignment below */
327 }
328 else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP) == 0)
3185942a 329 {
ac50fbac
CR
330 /* XXX - if not quoting regexp, use backslash as quote char. Should
331 we just pass it through without treating it as special? That is
332 what ksh93 seems to do. */
333
334 /* If we want to pass through backslash unaltered, comment out these
335 lines. */
3185942a 336 temp[j++] = '\\';
ac50fbac 337
3185942a
JA
338 i++;
339 if (pathname[i] == '\0')
340 break;
d233b485
CR
341 /* If we are turning CTLESC CTLESC into CTLESC, we need to do that
342 even when the first CTLESC is preceded by a backslash. */
343 if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL))
344 i++; /* skip over the CTLESC */
3185942a 345 }
ac50fbac
CR
346 else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP))
347 last_was_backslash = 1;
7117c2d2 348 temp[j++] = pathname[i];
ccc6cda3 349 }
ac50fbac 350endpat:
cce855bc 351 temp[j] = '\0';
ccc6cda3
JA
352
353 return (temp);
354}
355
356char *
357quote_globbing_chars (string)
a0c0a00f 358 const char *string;
ccc6cda3 359{
7117c2d2 360 size_t slen;
a0c0a00f
CR
361 char *temp, *t;
362 const char *s, *send;
7117c2d2
JA
363 DECLARE_MBSTATE;
364
365 slen = strlen (string);
366 send = string + slen;
ccc6cda3 367
7117c2d2 368 temp = (char *)xmalloc (slen * 2 + 1);
ccc6cda3
JA
369 for (t = temp, s = string; *s; )
370 {
3185942a
JA
371 if (glob_char_p (s))
372 *t++ = '\\';
7117c2d2
JA
373
374 /* Copy a single (possibly multibyte) character from s to t,
ac50fbac 375 incrementing both. */
7117c2d2 376 COPY_CHAR_P (t, s, send);
ccc6cda3
JA
377 }
378 *t = '\0';
379 return temp;
380}
381
382/* Call the glob library to do globbing on PATHNAME. */
383char **
384shell_glob_filename (pathname)
28ef6c31 385 const char *pathname;
ccc6cda3
JA
386{
387#if defined (USE_POSIX_GLOB_LIBRARY)
388 register int i;
28ef6c31 389 char *temp, **results;
ccc6cda3
JA
390 glob_t filenames;
391 int glob_flags;
392
cce855bc 393 temp = quote_string_for_globbing (pathname, QGLOB_FILENAME);
ccc6cda3
JA
394
395 filenames.gl_offs = 0;
396
b72432fd 397# if defined (GLOB_PERIOD)
ccc6cda3 398 glob_flags = glob_dot_filenames ? GLOB_PERIOD : 0;
b72432fd
JA
399# else
400 glob_flags = 0;
401# endif /* !GLOB_PERIOD */
402
ccc6cda3
JA
403 glob_flags |= (GLOB_ERR | GLOB_DOOFFS);
404
f73dda09 405 i = glob (temp, glob_flags, (posix_glob_errfunc_t *)NULL, &filenames);
ccc6cda3
JA
406
407 free (temp);
408
28ef6c31 409 if (i == GLOB_NOSPACE || i == GLOB_ABORTED)
ccc6cda3 410 return ((char **)NULL);
b72432fd
JA
411 else if (i == GLOB_NOMATCH)
412 filenames.gl_pathv = (char **)NULL;
413 else if (i != 0) /* other error codes not in POSIX.2 */
cce855bc 414 filenames.gl_pathv = (char **)NULL;
ccc6cda3 415
bb70624e
JA
416 results = filenames.gl_pathv;
417
418 if (results && ((GLOB_FAILED (results)) == 0))
419 {
420 if (should_ignore_glob_matches ())
421 ignore_glob_matches (results);
422 if (results && results[0])
7117c2d2 423 strvec_sort (results);
bb70624e
JA
424 else
425 {
426 FREE (results);
427 results = (char **)NULL;
428 }
429 }
430
431 return (results);
ccc6cda3
JA
432
433#else /* !USE_POSIX_GLOB_LIBRARY */
434
435 char *temp, **results;
d233b485 436 int gflags;
ccc6cda3
JA
437
438 noglob_dot_filenames = glob_dot_filenames == 0;
439
cce855bc 440 temp = quote_string_for_globbing (pathname, QGLOB_FILENAME);
d233b485
CR
441 gflags = glob_star ? GX_GLOBSTAR : 0;
442 results = glob_filename (temp, gflags);
ccc6cda3
JA
443 free (temp);
444
445 if (results && ((GLOB_FAILED (results)) == 0))
446 {
447 if (should_ignore_glob_matches ())
448 ignore_glob_matches (results);
449 if (results && results[0])
7117c2d2 450 strvec_sort (results);
ccc6cda3
JA
451 else
452 {
453 FREE (results);
454 results = (char **)&glob_error_return;
455 }
456 }
457
458 return (results);
459#endif /* !USE_POSIX_GLOB_LIBRARY */
460}
461
462/* Stuff for GLOBIGNORE. */
463
464static struct ignorevar globignore =
465{
466 "GLOBIGNORE",
467 (struct ign *)0,
468 0,
469 (char *)0,
f73dda09 470 (sh_iv_item_func_t *)0,
ccc6cda3
JA
471};
472
473/* Set up to ignore some glob matches because the value of GLOBIGNORE
474 has changed. If GLOBIGNORE is being unset, we also need to disable
475 the globbing of filenames beginning with a `.'. */
476void
477setup_glob_ignore (name)
478 char *name;
479{
480 char *v;
481
482 v = get_string_value (name);
483 setup_ignore_patterns (&globignore);
484
485 if (globignore.num_ignores)
486 glob_dot_filenames = 1;
487 else if (v == 0)
488 glob_dot_filenames = 0;
489}
490
491int
492should_ignore_glob_matches ()
493{
494 return globignore.num_ignores;
495}
496
497/* Return 0 if NAME matches a pattern in the globignore.ignores list. */
498static int
499glob_name_is_acceptable (name)
f73dda09 500 const char *name;
ccc6cda3
JA
501{
502 struct ign *p;
cce855bc 503 int flags;
ccc6cda3
JA
504
505 /* . and .. are never matched */
506 if (name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')))
507 return (0);
508
a0c0a00f 509 flags = FNM_PATHNAME | FNMATCH_EXTFLAG | FNMATCH_NOCASEGLOB;
ccc6cda3
JA
510 for (p = globignore.ignores; p->val; p++)
511 {
f73dda09 512 if (strmatch (p->val, (char *)name, flags) != FNM_NOMATCH)
28ef6c31 513 return (0);
ccc6cda3
JA
514 }
515 return (1);
516}
517
518/* Internal function to test whether filenames in NAMES should be
519 ignored. NAME_FUNC is a pointer to a function to call with each
520 name. It returns non-zero if the name is acceptable to the particular
521 ignore function which called _ignore_names; zero if the name should
522 be removed from NAMES. */
523
524static void
525ignore_globbed_names (names, name_func)
526 char **names;
f73dda09 527 sh_ignore_func_t *name_func;
ccc6cda3
JA
528{
529 char **newnames;
530 int n, i;
531
532 for (i = 0; names[i]; i++)
533 ;
7117c2d2 534 newnames = strvec_create (i + 1);
ccc6cda3
JA
535
536 for (n = i = 0; names[i]; i++)
537 {
538 if ((*name_func) (names[i]))
28ef6c31 539 newnames[n++] = names[i];
ccc6cda3
JA
540 else
541 free (names[i]);
542 }
543
544 newnames[n] = (char *)NULL;
545
546 if (n == 0)
547 {
548 names[0] = (char *)NULL;
549 free (newnames);
550 return;
551 }
552
553 /* Copy the acceptable names from NEWNAMES back to NAMES and set the
554 new array end. */
555 for (n = 0; newnames[n]; n++)
556 names[n] = newnames[n];
557 names[n] = (char *)NULL;
d166f048 558 free (newnames);
ccc6cda3
JA
559}
560
561void
562ignore_glob_matches (names)
563 char **names;
564{
565 if (globignore.num_ignores == 0)
566 return;
567
568 ignore_globbed_names (names, glob_name_is_acceptable);
569}
570
495aee44
CR
571static char *
572split_ignorespec (s, ip)
573 char *s;
574 int *ip;
575{
576 char *t;
577 int n, i;
578
579 if (s == 0)
580 return 0;
581
582 i = *ip;
583 if (s[i] == 0)
584 return 0;
585
a0c0a00f 586 n = skip_to_delim (s, i, ":", SD_NOJMP|SD_EXTGLOB|SD_GLOB);
495aee44
CR
587 t = substring (s, i, n);
588
589 if (s[n] == ':')
590 n++;
591 *ip = n;
592 return t;
593}
594
ccc6cda3
JA
595void
596setup_ignore_patterns (ivp)
597 struct ignorevar *ivp;
598{
599 int numitems, maxitems, ptr;
600 char *colon_bit, *this_ignoreval;
601 struct ign *p;
602
603 this_ignoreval = get_string_value (ivp->varname);
604
605 /* If nothing has changed then just exit now. */
606 if ((this_ignoreval && ivp->last_ignoreval && STREQ (this_ignoreval, ivp->last_ignoreval)) ||
607 (!this_ignoreval && !ivp->last_ignoreval))
608 return;
609
610 /* Oops. The ignore variable has changed. Re-parse it. */
611 ivp->num_ignores = 0;
612
613 if (ivp->ignores)
614 {
615 for (p = ivp->ignores; p->val; p++)
616 free(p->val);
617 free (ivp->ignores);
618 ivp->ignores = (struct ign *)NULL;
619 }
620
621 if (ivp->last_ignoreval)
622 {
623 free (ivp->last_ignoreval);
624 ivp->last_ignoreval = (char *)NULL;
625 }
626
627 if (this_ignoreval == 0 || *this_ignoreval == '\0')
628 return;
629
630 ivp->last_ignoreval = savestring (this_ignoreval);
631
632 numitems = maxitems = ptr = 0;
633
495aee44 634#if 0
ccc6cda3 635 while (colon_bit = extract_colon_unit (this_ignoreval, &ptr))
495aee44
CR
636#else
637 while (colon_bit = split_ignorespec (this_ignoreval, &ptr))
638#endif
ccc6cda3
JA
639 {
640 if (numitems + 1 >= maxitems)
641 {
642 maxitems += 10;
643 ivp->ignores = (struct ign *)xrealloc (ivp->ignores, maxitems * sizeof (struct ign));
644 }
645 ivp->ignores[numitems].val = colon_bit;
646 ivp->ignores[numitems].len = strlen (colon_bit);
647 ivp->ignores[numitems].flags = 0;
648 if (ivp->item_func)
28ef6c31 649 (*ivp->item_func) (&ivp->ignores[numitems]);
ccc6cda3
JA
650 numitems++;
651 }
652 ivp->ignores[numitems].val = (char *)NULL;
653 ivp->num_ignores = numitems;
654}