]> git.ipfire.org Git - thirdparty/bash.git/blame - pathexp.c
Bash-5.0 patch 1: fix pathname expansion of directory names containing backslashes
[thirdparty/bash.git] / pathexp.c
CommitLineData
ccc6cda3
JA
1/* pathexp.c -- The shell interface to the globbing library. */
2
ac50fbac 3/* Copyright (C) 1995-2014 Free Software Foundation, Inc.
ccc6cda3
JA
4
5 This file is part of GNU Bash, the Bourne Again SHell.
6
3185942a
JA
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
ccc6cda3 11
3185942a
JA
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
ccc6cda3 16
3185942a
JA
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19*/
ccc6cda3
JA
20
21#include "config.h"
22
23#include "bashtypes.h"
24#include <stdio.h>
25
26#if defined (HAVE_UNISTD_H)
27# include <unistd.h>
28#endif
29
30#include "bashansi.h"
31
32#include "shell.h"
33#include "pathexp.h"
34#include "flags.h"
35
7117c2d2 36#include "shmbutil.h"
3185942a 37#include "bashintl.h"
7117c2d2 38
f73dda09 39#include <glob/strmatch.h>
b72432fd 40
7117c2d2
JA
41static int glob_name_is_acceptable __P((const char *));
42static void ignore_globbed_names __P((char **, sh_ignore_func_t *));
495aee44 43static char *split_ignorespec __P((char *, int *));
ac50fbac 44
b72432fd
JA
45#if defined (USE_POSIX_GLOB_LIBRARY)
46# include <glob.h>
f73dda09 47typedef int posix_glob_errfunc_t __P((const char *, int));
b72432fd
JA
48#else
49# include <glob/glob.h>
50#endif
ccc6cda3
JA
51
52/* Control whether * matches .files in globbing. */
53int glob_dot_filenames;
54
cce855bc 55/* Control whether the extended globbing features are enabled. */
0001803f 56int extended_glob = EXTGLOB_DEFAULT;
cce855bc 57
3185942a
JA
58/* Control enabling special handling of `**' */
59int glob_star = 0;
60
ccc6cda3
JA
61/* Return nonzero if STRING has any unquoted special globbing chars in it. */
62int
63unquoted_glob_pattern_p (string)
64 register char *string;
65{
66 register int c;
7117c2d2 67 char *send;
ccc6cda3
JA
68 int open;
69
7117c2d2
JA
70 DECLARE_MBSTATE;
71
ccc6cda3 72 open = 0;
7117c2d2
JA
73 send = string + strlen (string);
74
ccc6cda3
JA
75 while (c = *string++)
76 {
77 switch (c)
78 {
79 case '?':
80 case '*':
81 return (1);
82
83 case '[':
84 open++;
85 continue;
86
87 case ']':
88 if (open)
89 return (1);
90 continue;
91
cce855bc
JA
92 case '+':
93 case '@':
94 case '!':
95 if (*string == '(') /*)*/
96 return (1);
97 continue;
98
d233b485
CR
99 /* A pattern can't end with a backslash, but a backslash in the pattern
100 can be removed by the matching engine, so we have to run it through
101 globbing. */
ccc6cda3 102 case '\\':
d233b485
CR
103 return (*string != 0);
104
105 case CTLESC:
ccc6cda3
JA
106 if (*string++ == '\0')
107 return (0);
108 }
7117c2d2
JA
109
110 /* Advance one fewer byte than an entire multibyte character to
111 account for the auto-increment in the loop above. */
112#ifdef HANDLE_MULTIBYTE
113 string--;
114 ADVANCE_CHAR_P (string, send - string);
115 string++;
116#else
117 ADVANCE_CHAR_P (string, send - string);
118#endif
ccc6cda3
JA
119 }
120 return (0);
121}
122
f1be666c
JA
123/* Return 1 if C is a character that is `special' in a POSIX ERE and needs to
124 be quoted to match itself. */
125static inline int
126ere_char (c)
127 int c;
128{
129 switch (c)
130 {
131 case '.':
132 case '[':
133 case '\\':
134 case '(':
135 case ')':
136 case '*':
137 case '+':
138 case '?':
139 case '{':
140 case '|':
141 case '^':
142 case '$':
143 return 1;
144 default:
145 return 0;
146 }
147 return (0);
148}
149
3185942a
JA
150int
151glob_char_p (s)
152 const char *s;
153{
154 switch (*s)
155 {
156 case '*':
157 case '[':
158 case ']':
159 case '?':
160 case '\\':
161 return 1;
162 case '+':
163 case '@':
164 case '!':
ac50fbac 165 if (s[1] == '(') /*(*/
3185942a
JA
166 return 1;
167 break;
168 }
169 return 0;
170}
171
ccc6cda3
JA
172/* PATHNAME can contain characters prefixed by CTLESC; this indicates
173 that the character is to be quoted. We quote it here in the style
cce855bc 174 that the glob library recognizes. If flags includes QGLOB_CVTNULL,
ccc6cda3
JA
175 we change quoted null strings (pathname[0] == CTLNUL) into empty
176 strings (pathname[0] == 0). If this is called after quote removal
cce855bc 177 is performed, (flags & QGLOB_CVTNULL) should be 0; if called when quote
ccc6cda3 178 removal has not been done (for example, before attempting to match a
cce855bc 179 pattern while executing a case statement), flags should include
d233b485
CR
180 QGLOB_CVTNULL. If flags includes QGLOB_CTLESC, we need to remove CTLESC
181 quoting CTLESC or CTLNUL (as if dequote_string were called). If flags
182 includes QGLOB_FILENAME, appropriate quoting to match a filename should be
183 performed. QGLOB_REGEXP means we're quoting for a Posix ERE (for
184 [[ string =~ pat ]]) and that requires some special handling. */
ccc6cda3 185char *
cce855bc 186quote_string_for_globbing (pathname, qflags)
28ef6c31 187 const char *pathname;
cce855bc 188 int qflags;
ccc6cda3
JA
189{
190 char *temp;
cce855bc 191 register int i, j;
d233b485 192 int cclass, collsym, equiv, c, last_was_backslash;
a0c0a00f 193 int savei, savej;
ccc6cda3 194
ac50fbac 195 temp = (char *)xmalloc (2 * strlen (pathname) + 1);
ccc6cda3 196
cce855bc 197 if ((qflags & QGLOB_CVTNULL) && QUOTED_NULL (pathname))
ccc6cda3
JA
198 {
199 temp[0] = '\0';
200 return temp;
201 }
202
d233b485 203 cclass = collsym = equiv = last_was_backslash = 0;
cce855bc 204 for (i = j = 0; pathname[i]; i++)
ccc6cda3 205 {
ac50fbac
CR
206 /* Fix for CTLESC at the end of the string? */
207 if (pathname[i] == CTLESC && pathname[i+1] == '\0')
208 {
209 temp[j++] = pathname[i++];
210 break;
211 }
212 /* If we are parsing regexp, turn CTLESC CTLESC into CTLESC. It's not an
213 ERE special character, so we should just be able to pass it through. */
d233b485 214 else if ((qflags & (QGLOB_REGEXP|QGLOB_CTLESC)) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL))
ac50fbac
CR
215 {
216 i++;
217 temp[j++] = pathname[i];
218 continue;
219 }
220 else if (pathname[i] == CTLESC)
28ef6c31
JA
221 {
222 if ((qflags & QGLOB_FILENAME) && pathname[i+1] == '/')
223 continue;
ac50fbac 224 /* What to do if preceding char is backslash? */
25db9a70 225 if (pathname[i+1] != CTLESC && (qflags & QGLOB_REGEXP) && ere_char (pathname[i+1]) == 0)
f1be666c 226 continue;
cce855bc 227 temp[j++] = '\\';
7117c2d2
JA
228 i++;
229 if (pathname[i] == '\0')
230 break;
28ef6c31 231 }
ac50fbac
CR
232 else if ((qflags & QGLOB_REGEXP) && (i == 0 || pathname[i-1] != CTLESC) && pathname[i] == '[') /*]*/
233 {
ac50fbac 234 temp[j++] = pathname[i++]; /* open bracket */
a0c0a00f
CR
235 savej = j;
236 savei = i;
ac50fbac 237 c = pathname[i++]; /* c == char after open bracket */
d233b485
CR
238 if (c == '^') /* ignore pattern negation */
239 {
240 temp[j++] = c;
241 c = pathname[i++];
242 }
243 if (c == ']') /* ignore right bracket if first char */
244 {
245 temp[j++] = c;
246 c = pathname[i++];
247 }
ac50fbac
CR
248 do
249 {
250 if (c == 0)
251 goto endpat;
252 else if (c == CTLESC)
253 {
254 /* skip c, check for EOS, let assignment at end of loop */
255 /* pathname[i] == backslash-escaped character */
256 if (pathname[i] == 0)
257 goto endpat;
258 temp[j++] = pathname[i++];
259 }
260 else if (c == '[' && pathname[i] == ':')
261 {
262 temp[j++] = c;
263 temp[j++] = pathname[i++];
264 cclass = 1;
265 }
266 else if (cclass && c == ':' && pathname[i] == ']')
267 {
268 temp[j++] = c;
269 temp[j++] = pathname[i++];
270 cclass = 0;
271 }
272 else if (c == '[' && pathname[i] == '=')
273 {
274 temp[j++] = c;
275 temp[j++] = pathname[i++];
276 if (pathname[i] == ']')
277 temp[j++] = pathname[i++]; /* right brack can be in equiv */
278 equiv = 1;
279 }
280 else if (equiv && c == '=' && pathname[i] == ']')
281 {
282 temp[j++] = c;
283 temp[j++] = pathname[i++];
284 equiv = 0;
285 }
286 else if (c == '[' && pathname[i] == '.')
287 {
288 temp[j++] = c;
289 temp[j++] = pathname[i++];
290 if (pathname[i] == ']')
291 temp[j++] = pathname[i++]; /* right brack can be in collsym */
292 collsym = 1;
293 }
294 else if (collsym && c == '.' && pathname[i] == ']')
295 {
296 temp[j++] = c;
297 temp[j++] = pathname[i++];
298 collsym = 0;
299 }
300 else
301 temp[j++] = c;
302 }
a0c0a00f
CR
303 while (((c = pathname[i++]) != ']') && c != 0);
304
305 /* If we don't find the closing bracket before we hit the end of
306 the string, rescan string without treating it as a bracket
307 expression (has implications for backslash and special ERE
308 chars) */
309 if (c == 0)
310 {
311 i = savei - 1; /* -1 for autoincrement above */
312 j = savej;
313 continue;
314 }
315
ac50fbac
CR
316 temp[j++] = c; /* closing right bracket */
317 i--; /* increment will happen above in loop */
318 continue; /* skip double assignment below */
319 }
320 else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP) == 0)
3185942a 321 {
ac50fbac
CR
322 /* XXX - if not quoting regexp, use backslash as quote char. Should
323 we just pass it through without treating it as special? That is
324 what ksh93 seems to do. */
325
326 /* If we want to pass through backslash unaltered, comment out these
327 lines. */
3185942a 328 temp[j++] = '\\';
ac50fbac 329
3185942a
JA
330 i++;
331 if (pathname[i] == '\0')
332 break;
d233b485
CR
333 /* If we are turning CTLESC CTLESC into CTLESC, we need to do that
334 even when the first CTLESC is preceded by a backslash. */
335 if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL))
336 i++; /* skip over the CTLESC */
3185942a 337 }
ac50fbac
CR
338 else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP))
339 last_was_backslash = 1;
7117c2d2 340 temp[j++] = pathname[i];
ccc6cda3 341 }
ac50fbac 342endpat:
cce855bc 343 temp[j] = '\0';
ccc6cda3
JA
344
345 return (temp);
346}
347
348char *
349quote_globbing_chars (string)
a0c0a00f 350 const char *string;
ccc6cda3 351{
7117c2d2 352 size_t slen;
a0c0a00f
CR
353 char *temp, *t;
354 const char *s, *send;
7117c2d2
JA
355 DECLARE_MBSTATE;
356
357 slen = strlen (string);
358 send = string + slen;
ccc6cda3 359
7117c2d2 360 temp = (char *)xmalloc (slen * 2 + 1);
ccc6cda3
JA
361 for (t = temp, s = string; *s; )
362 {
3185942a
JA
363 if (glob_char_p (s))
364 *t++ = '\\';
7117c2d2
JA
365
366 /* Copy a single (possibly multibyte) character from s to t,
ac50fbac 367 incrementing both. */
7117c2d2 368 COPY_CHAR_P (t, s, send);
ccc6cda3
JA
369 }
370 *t = '\0';
371 return temp;
372}
373
374/* Call the glob library to do globbing on PATHNAME. */
375char **
376shell_glob_filename (pathname)
28ef6c31 377 const char *pathname;
ccc6cda3
JA
378{
379#if defined (USE_POSIX_GLOB_LIBRARY)
380 register int i;
28ef6c31 381 char *temp, **results;
ccc6cda3
JA
382 glob_t filenames;
383 int glob_flags;
384
cce855bc 385 temp = quote_string_for_globbing (pathname, QGLOB_FILENAME);
ccc6cda3
JA
386
387 filenames.gl_offs = 0;
388
b72432fd 389# if defined (GLOB_PERIOD)
ccc6cda3 390 glob_flags = glob_dot_filenames ? GLOB_PERIOD : 0;
b72432fd
JA
391# else
392 glob_flags = 0;
393# endif /* !GLOB_PERIOD */
394
ccc6cda3
JA
395 glob_flags |= (GLOB_ERR | GLOB_DOOFFS);
396
f73dda09 397 i = glob (temp, glob_flags, (posix_glob_errfunc_t *)NULL, &filenames);
ccc6cda3
JA
398
399 free (temp);
400
28ef6c31 401 if (i == GLOB_NOSPACE || i == GLOB_ABORTED)
ccc6cda3 402 return ((char **)NULL);
b72432fd
JA
403 else if (i == GLOB_NOMATCH)
404 filenames.gl_pathv = (char **)NULL;
405 else if (i != 0) /* other error codes not in POSIX.2 */
cce855bc 406 filenames.gl_pathv = (char **)NULL;
ccc6cda3 407
bb70624e
JA
408 results = filenames.gl_pathv;
409
410 if (results && ((GLOB_FAILED (results)) == 0))
411 {
412 if (should_ignore_glob_matches ())
413 ignore_glob_matches (results);
414 if (results && results[0])
7117c2d2 415 strvec_sort (results);
bb70624e
JA
416 else
417 {
418 FREE (results);
419 results = (char **)NULL;
420 }
421 }
422
423 return (results);
ccc6cda3
JA
424
425#else /* !USE_POSIX_GLOB_LIBRARY */
426
427 char *temp, **results;
d233b485 428 int gflags;
ccc6cda3
JA
429
430 noglob_dot_filenames = glob_dot_filenames == 0;
431
cce855bc 432 temp = quote_string_for_globbing (pathname, QGLOB_FILENAME);
d233b485
CR
433 gflags = glob_star ? GX_GLOBSTAR : 0;
434 results = glob_filename (temp, gflags);
ccc6cda3
JA
435 free (temp);
436
437 if (results && ((GLOB_FAILED (results)) == 0))
438 {
439 if (should_ignore_glob_matches ())
440 ignore_glob_matches (results);
441 if (results && results[0])
7117c2d2 442 strvec_sort (results);
ccc6cda3
JA
443 else
444 {
445 FREE (results);
446 results = (char **)&glob_error_return;
447 }
448 }
449
450 return (results);
451#endif /* !USE_POSIX_GLOB_LIBRARY */
452}
453
454/* Stuff for GLOBIGNORE. */
455
456static struct ignorevar globignore =
457{
458 "GLOBIGNORE",
459 (struct ign *)0,
460 0,
461 (char *)0,
f73dda09 462 (sh_iv_item_func_t *)0,
ccc6cda3
JA
463};
464
465/* Set up to ignore some glob matches because the value of GLOBIGNORE
466 has changed. If GLOBIGNORE is being unset, we also need to disable
467 the globbing of filenames beginning with a `.'. */
468void
469setup_glob_ignore (name)
470 char *name;
471{
472 char *v;
473
474 v = get_string_value (name);
475 setup_ignore_patterns (&globignore);
476
477 if (globignore.num_ignores)
478 glob_dot_filenames = 1;
479 else if (v == 0)
480 glob_dot_filenames = 0;
481}
482
483int
484should_ignore_glob_matches ()
485{
486 return globignore.num_ignores;
487}
488
489/* Return 0 if NAME matches a pattern in the globignore.ignores list. */
490static int
491glob_name_is_acceptable (name)
f73dda09 492 const char *name;
ccc6cda3
JA
493{
494 struct ign *p;
cce855bc 495 int flags;
ccc6cda3
JA
496
497 /* . and .. are never matched */
498 if (name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')))
499 return (0);
500
a0c0a00f 501 flags = FNM_PATHNAME | FNMATCH_EXTFLAG | FNMATCH_NOCASEGLOB;
ccc6cda3
JA
502 for (p = globignore.ignores; p->val; p++)
503 {
f73dda09 504 if (strmatch (p->val, (char *)name, flags) != FNM_NOMATCH)
28ef6c31 505 return (0);
ccc6cda3
JA
506 }
507 return (1);
508}
509
510/* Internal function to test whether filenames in NAMES should be
511 ignored. NAME_FUNC is a pointer to a function to call with each
512 name. It returns non-zero if the name is acceptable to the particular
513 ignore function which called _ignore_names; zero if the name should
514 be removed from NAMES. */
515
516static void
517ignore_globbed_names (names, name_func)
518 char **names;
f73dda09 519 sh_ignore_func_t *name_func;
ccc6cda3
JA
520{
521 char **newnames;
522 int n, i;
523
524 for (i = 0; names[i]; i++)
525 ;
7117c2d2 526 newnames = strvec_create (i + 1);
ccc6cda3
JA
527
528 for (n = i = 0; names[i]; i++)
529 {
530 if ((*name_func) (names[i]))
28ef6c31 531 newnames[n++] = names[i];
ccc6cda3
JA
532 else
533 free (names[i]);
534 }
535
536 newnames[n] = (char *)NULL;
537
538 if (n == 0)
539 {
540 names[0] = (char *)NULL;
541 free (newnames);
542 return;
543 }
544
545 /* Copy the acceptable names from NEWNAMES back to NAMES and set the
546 new array end. */
547 for (n = 0; newnames[n]; n++)
548 names[n] = newnames[n];
549 names[n] = (char *)NULL;
d166f048 550 free (newnames);
ccc6cda3
JA
551}
552
553void
554ignore_glob_matches (names)
555 char **names;
556{
557 if (globignore.num_ignores == 0)
558 return;
559
560 ignore_globbed_names (names, glob_name_is_acceptable);
561}
562
495aee44
CR
563static char *
564split_ignorespec (s, ip)
565 char *s;
566 int *ip;
567{
568 char *t;
569 int n, i;
570
571 if (s == 0)
572 return 0;
573
574 i = *ip;
575 if (s[i] == 0)
576 return 0;
577
a0c0a00f 578 n = skip_to_delim (s, i, ":", SD_NOJMP|SD_EXTGLOB|SD_GLOB);
495aee44
CR
579 t = substring (s, i, n);
580
581 if (s[n] == ':')
582 n++;
583 *ip = n;
584 return t;
585}
586
ccc6cda3
JA
587void
588setup_ignore_patterns (ivp)
589 struct ignorevar *ivp;
590{
591 int numitems, maxitems, ptr;
592 char *colon_bit, *this_ignoreval;
593 struct ign *p;
594
595 this_ignoreval = get_string_value (ivp->varname);
596
597 /* If nothing has changed then just exit now. */
598 if ((this_ignoreval && ivp->last_ignoreval && STREQ (this_ignoreval, ivp->last_ignoreval)) ||
599 (!this_ignoreval && !ivp->last_ignoreval))
600 return;
601
602 /* Oops. The ignore variable has changed. Re-parse it. */
603 ivp->num_ignores = 0;
604
605 if (ivp->ignores)
606 {
607 for (p = ivp->ignores; p->val; p++)
608 free(p->val);
609 free (ivp->ignores);
610 ivp->ignores = (struct ign *)NULL;
611 }
612
613 if (ivp->last_ignoreval)
614 {
615 free (ivp->last_ignoreval);
616 ivp->last_ignoreval = (char *)NULL;
617 }
618
619 if (this_ignoreval == 0 || *this_ignoreval == '\0')
620 return;
621
622 ivp->last_ignoreval = savestring (this_ignoreval);
623
624 numitems = maxitems = ptr = 0;
625
495aee44 626#if 0
ccc6cda3 627 while (colon_bit = extract_colon_unit (this_ignoreval, &ptr))
495aee44
CR
628#else
629 while (colon_bit = split_ignorespec (this_ignoreval, &ptr))
630#endif
ccc6cda3
JA
631 {
632 if (numitems + 1 >= maxitems)
633 {
634 maxitems += 10;
635 ivp->ignores = (struct ign *)xrealloc (ivp->ignores, maxitems * sizeof (struct ign));
636 }
637 ivp->ignores[numitems].val = colon_bit;
638 ivp->ignores[numitems].len = strlen (colon_bit);
639 ivp->ignores[numitems].flags = 0;
640 if (ivp->item_func)
28ef6c31 641 (*ivp->item_func) (&ivp->ignores[numitems]);
ccc6cda3
JA
642 numitems++;
643 }
644 ivp->ignores[numitems].val = (char *)NULL;
645 ivp->num_ignores = numitems;
646}