]> git.ipfire.org Git - thirdparty/bash.git/blame_incremental - pathexp.c
bash-5.0-rc1 release
[thirdparty/bash.git] / pathexp.c
... / ...
CommitLineData
1/* pathexp.c -- The shell interface to the globbing library. */
2
3/* Copyright (C) 1995-2014 Free Software Foundation, Inc.
4
5 This file is part of GNU Bash, the Bourne Again SHell.
6
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19*/
20
21#include "config.h"
22
23#include "bashtypes.h"
24#include <stdio.h>
25
26#if defined (HAVE_UNISTD_H)
27# include <unistd.h>
28#endif
29
30#include "bashansi.h"
31
32#include "shell.h"
33#include "pathexp.h"
34#include "flags.h"
35
36#include "shmbutil.h"
37#include "bashintl.h"
38
39#include <glob/strmatch.h>
40
41static int glob_name_is_acceptable __P((const char *));
42static void ignore_globbed_names __P((char **, sh_ignore_func_t *));
43static char *split_ignorespec __P((char *, int *));
44
45#if defined (USE_POSIX_GLOB_LIBRARY)
46# include <glob.h>
47typedef int posix_glob_errfunc_t __P((const char *, int));
48#else
49# include <glob/glob.h>
50#endif
51
52/* Control whether * matches .files in globbing. */
53int glob_dot_filenames;
54
55/* Control whether the extended globbing features are enabled. */
56int extended_glob = EXTGLOB_DEFAULT;
57
58/* Control enabling special handling of `**' */
59int glob_star = 0;
60
61/* Return nonzero if STRING has any unquoted special globbing chars in it. */
62int
63unquoted_glob_pattern_p (string)
64 register char *string;
65{
66 register int c;
67 char *send;
68 int open;
69
70 DECLARE_MBSTATE;
71
72 open = 0;
73 send = string + strlen (string);
74
75 while (c = *string++)
76 {
77 switch (c)
78 {
79 case '?':
80 case '*':
81 return (1);
82
83 case '[':
84 open++;
85 continue;
86
87 case ']':
88 if (open)
89 return (1);
90 continue;
91
92 case '+':
93 case '@':
94 case '!':
95 if (*string == '(') /*)*/
96 return (1);
97 continue;
98
99 /* A pattern can't end with a backslash, but a backslash in the pattern
100 can be removed by the matching engine, so we have to run it through
101 globbing. */
102 case '\\':
103 return (*string != 0);
104
105 case CTLESC:
106 if (*string++ == '\0')
107 return (0);
108 }
109
110 /* Advance one fewer byte than an entire multibyte character to
111 account for the auto-increment in the loop above. */
112#ifdef HANDLE_MULTIBYTE
113 string--;
114 ADVANCE_CHAR_P (string, send - string);
115 string++;
116#else
117 ADVANCE_CHAR_P (string, send - string);
118#endif
119 }
120 return (0);
121}
122
123/* Return 1 if C is a character that is `special' in a POSIX ERE and needs to
124 be quoted to match itself. */
125static inline int
126ere_char (c)
127 int c;
128{
129 switch (c)
130 {
131 case '.':
132 case '[':
133 case '\\':
134 case '(':
135 case ')':
136 case '*':
137 case '+':
138 case '?':
139 case '{':
140 case '|':
141 case '^':
142 case '$':
143 return 1;
144 default:
145 return 0;
146 }
147 return (0);
148}
149
150int
151glob_char_p (s)
152 const char *s;
153{
154 switch (*s)
155 {
156 case '*':
157 case '[':
158 case ']':
159 case '?':
160 case '\\':
161 return 1;
162 case '+':
163 case '@':
164 case '!':
165 if (s[1] == '(') /*(*/
166 return 1;
167 break;
168 }
169 return 0;
170}
171
172/* PATHNAME can contain characters prefixed by CTLESC; this indicates
173 that the character is to be quoted. We quote it here in the style
174 that the glob library recognizes. If flags includes QGLOB_CVTNULL,
175 we change quoted null strings (pathname[0] == CTLNUL) into empty
176 strings (pathname[0] == 0). If this is called after quote removal
177 is performed, (flags & QGLOB_CVTNULL) should be 0; if called when quote
178 removal has not been done (for example, before attempting to match a
179 pattern while executing a case statement), flags should include
180 QGLOB_CVTNULL. If flags includes QGLOB_CTLESC, we need to remove CTLESC
181 quoting CTLESC or CTLNUL (as if dequote_string were called). If flags
182 includes QGLOB_FILENAME, appropriate quoting to match a filename should be
183 performed. QGLOB_REGEXP means we're quoting for a Posix ERE (for
184 [[ string =~ pat ]]) and that requires some special handling. */
185char *
186quote_string_for_globbing (pathname, qflags)
187 const char *pathname;
188 int qflags;
189{
190 char *temp;
191 register int i, j;
192 int cclass, collsym, equiv, c, last_was_backslash;
193 int savei, savej;
194
195 temp = (char *)xmalloc (2 * strlen (pathname) + 1);
196
197 if ((qflags & QGLOB_CVTNULL) && QUOTED_NULL (pathname))
198 {
199 temp[0] = '\0';
200 return temp;
201 }
202
203 cclass = collsym = equiv = last_was_backslash = 0;
204 for (i = j = 0; pathname[i]; i++)
205 {
206 /* Fix for CTLESC at the end of the string? */
207 if (pathname[i] == CTLESC && pathname[i+1] == '\0')
208 {
209 temp[j++] = pathname[i++];
210 break;
211 }
212 /* If we are parsing regexp, turn CTLESC CTLESC into CTLESC. It's not an
213 ERE special character, so we should just be able to pass it through. */
214 else if ((qflags & (QGLOB_REGEXP|QGLOB_CTLESC)) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL))
215 {
216 i++;
217 temp[j++] = pathname[i];
218 continue;
219 }
220 else if (pathname[i] == CTLESC)
221 {
222 if ((qflags & QGLOB_FILENAME) && pathname[i+1] == '/')
223 continue;
224 /* What to do if preceding char is backslash? */
225 if (pathname[i+1] != CTLESC && (qflags & QGLOB_REGEXP) && ere_char (pathname[i+1]) == 0)
226 continue;
227 temp[j++] = '\\';
228 i++;
229 if (pathname[i] == '\0')
230 break;
231 }
232 else if ((qflags & QGLOB_REGEXP) && (i == 0 || pathname[i-1] != CTLESC) && pathname[i] == '[') /*]*/
233 {
234 temp[j++] = pathname[i++]; /* open bracket */
235 savej = j;
236 savei = i;
237 c = pathname[i++]; /* c == char after open bracket */
238 if (c == '^') /* ignore pattern negation */
239 {
240 temp[j++] = c;
241 c = pathname[i++];
242 }
243 if (c == ']') /* ignore right bracket if first char */
244 {
245 temp[j++] = c;
246 c = pathname[i++];
247 }
248 do
249 {
250 if (c == 0)
251 goto endpat;
252 else if (c == CTLESC)
253 {
254 /* skip c, check for EOS, let assignment at end of loop */
255 /* pathname[i] == backslash-escaped character */
256 if (pathname[i] == 0)
257 goto endpat;
258 temp[j++] = pathname[i++];
259 }
260 else if (c == '[' && pathname[i] == ':')
261 {
262 temp[j++] = c;
263 temp[j++] = pathname[i++];
264 cclass = 1;
265 }
266 else if (cclass && c == ':' && pathname[i] == ']')
267 {
268 temp[j++] = c;
269 temp[j++] = pathname[i++];
270 cclass = 0;
271 }
272 else if (c == '[' && pathname[i] == '=')
273 {
274 temp[j++] = c;
275 temp[j++] = pathname[i++];
276 if (pathname[i] == ']')
277 temp[j++] = pathname[i++]; /* right brack can be in equiv */
278 equiv = 1;
279 }
280 else if (equiv && c == '=' && pathname[i] == ']')
281 {
282 temp[j++] = c;
283 temp[j++] = pathname[i++];
284 equiv = 0;
285 }
286 else if (c == '[' && pathname[i] == '.')
287 {
288 temp[j++] = c;
289 temp[j++] = pathname[i++];
290 if (pathname[i] == ']')
291 temp[j++] = pathname[i++]; /* right brack can be in collsym */
292 collsym = 1;
293 }
294 else if (collsym && c == '.' && pathname[i] == ']')
295 {
296 temp[j++] = c;
297 temp[j++] = pathname[i++];
298 collsym = 0;
299 }
300 else
301 temp[j++] = c;
302 }
303 while (((c = pathname[i++]) != ']') && c != 0);
304
305 /* If we don't find the closing bracket before we hit the end of
306 the string, rescan string without treating it as a bracket
307 expression (has implications for backslash and special ERE
308 chars) */
309 if (c == 0)
310 {
311 i = savei - 1; /* -1 for autoincrement above */
312 j = savej;
313 continue;
314 }
315
316 temp[j++] = c; /* closing right bracket */
317 i--; /* increment will happen above in loop */
318 continue; /* skip double assignment below */
319 }
320 else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP) == 0)
321 {
322 /* XXX - if not quoting regexp, use backslash as quote char. Should
323 we just pass it through without treating it as special? That is
324 what ksh93 seems to do. */
325
326 /* If we want to pass through backslash unaltered, comment out these
327 lines. */
328 temp[j++] = '\\';
329
330 i++;
331 if (pathname[i] == '\0')
332 break;
333 /* If we are turning CTLESC CTLESC into CTLESC, we need to do that
334 even when the first CTLESC is preceded by a backslash. */
335 if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL))
336 i++; /* skip over the CTLESC */
337 }
338 else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP))
339 last_was_backslash = 1;
340 temp[j++] = pathname[i];
341 }
342endpat:
343 temp[j] = '\0';
344
345 return (temp);
346}
347
348char *
349quote_globbing_chars (string)
350 const char *string;
351{
352 size_t slen;
353 char *temp, *t;
354 const char *s, *send;
355 DECLARE_MBSTATE;
356
357 slen = strlen (string);
358 send = string + slen;
359
360 temp = (char *)xmalloc (slen * 2 + 1);
361 for (t = temp, s = string; *s; )
362 {
363 if (glob_char_p (s))
364 *t++ = '\\';
365
366 /* Copy a single (possibly multibyte) character from s to t,
367 incrementing both. */
368 COPY_CHAR_P (t, s, send);
369 }
370 *t = '\0';
371 return temp;
372}
373
374/* Call the glob library to do globbing on PATHNAME. */
375char **
376shell_glob_filename (pathname)
377 const char *pathname;
378{
379#if defined (USE_POSIX_GLOB_LIBRARY)
380 register int i;
381 char *temp, **results;
382 glob_t filenames;
383 int glob_flags;
384
385 temp = quote_string_for_globbing (pathname, QGLOB_FILENAME);
386
387 filenames.gl_offs = 0;
388
389# if defined (GLOB_PERIOD)
390 glob_flags = glob_dot_filenames ? GLOB_PERIOD : 0;
391# else
392 glob_flags = 0;
393# endif /* !GLOB_PERIOD */
394
395 glob_flags |= (GLOB_ERR | GLOB_DOOFFS);
396
397 i = glob (temp, glob_flags, (posix_glob_errfunc_t *)NULL, &filenames);
398
399 free (temp);
400
401 if (i == GLOB_NOSPACE || i == GLOB_ABORTED)
402 return ((char **)NULL);
403 else if (i == GLOB_NOMATCH)
404 filenames.gl_pathv = (char **)NULL;
405 else if (i != 0) /* other error codes not in POSIX.2 */
406 filenames.gl_pathv = (char **)NULL;
407
408 results = filenames.gl_pathv;
409
410 if (results && ((GLOB_FAILED (results)) == 0))
411 {
412 if (should_ignore_glob_matches ())
413 ignore_glob_matches (results);
414 if (results && results[0])
415 strvec_sort (results);
416 else
417 {
418 FREE (results);
419 results = (char **)NULL;
420 }
421 }
422
423 return (results);
424
425#else /* !USE_POSIX_GLOB_LIBRARY */
426
427 char *temp, **results;
428 int gflags;
429
430 noglob_dot_filenames = glob_dot_filenames == 0;
431
432 temp = quote_string_for_globbing (pathname, QGLOB_FILENAME);
433 gflags = glob_star ? GX_GLOBSTAR : 0;
434 results = glob_filename (temp, gflags);
435 free (temp);
436
437 if (results && ((GLOB_FAILED (results)) == 0))
438 {
439 if (should_ignore_glob_matches ())
440 ignore_glob_matches (results);
441 if (results && results[0])
442 strvec_sort (results);
443 else
444 {
445 FREE (results);
446 results = (char **)&glob_error_return;
447 }
448 }
449
450 return (results);
451#endif /* !USE_POSIX_GLOB_LIBRARY */
452}
453
454/* Stuff for GLOBIGNORE. */
455
456static struct ignorevar globignore =
457{
458 "GLOBIGNORE",
459 (struct ign *)0,
460 0,
461 (char *)0,
462 (sh_iv_item_func_t *)0,
463};
464
465/* Set up to ignore some glob matches because the value of GLOBIGNORE
466 has changed. If GLOBIGNORE is being unset, we also need to disable
467 the globbing of filenames beginning with a `.'. */
468void
469setup_glob_ignore (name)
470 char *name;
471{
472 char *v;
473
474 v = get_string_value (name);
475 setup_ignore_patterns (&globignore);
476
477 if (globignore.num_ignores)
478 glob_dot_filenames = 1;
479 else if (v == 0)
480 glob_dot_filenames = 0;
481}
482
483int
484should_ignore_glob_matches ()
485{
486 return globignore.num_ignores;
487}
488
489/* Return 0 if NAME matches a pattern in the globignore.ignores list. */
490static int
491glob_name_is_acceptable (name)
492 const char *name;
493{
494 struct ign *p;
495 int flags;
496
497 /* . and .. are never matched */
498 if (name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')))
499 return (0);
500
501 flags = FNM_PATHNAME | FNMATCH_EXTFLAG | FNMATCH_NOCASEGLOB;
502 for (p = globignore.ignores; p->val; p++)
503 {
504 if (strmatch (p->val, (char *)name, flags) != FNM_NOMATCH)
505 return (0);
506 }
507 return (1);
508}
509
510/* Internal function to test whether filenames in NAMES should be
511 ignored. NAME_FUNC is a pointer to a function to call with each
512 name. It returns non-zero if the name is acceptable to the particular
513 ignore function which called _ignore_names; zero if the name should
514 be removed from NAMES. */
515
516static void
517ignore_globbed_names (names, name_func)
518 char **names;
519 sh_ignore_func_t *name_func;
520{
521 char **newnames;
522 int n, i;
523
524 for (i = 0; names[i]; i++)
525 ;
526 newnames = strvec_create (i + 1);
527
528 for (n = i = 0; names[i]; i++)
529 {
530 if ((*name_func) (names[i]))
531 newnames[n++] = names[i];
532 else
533 free (names[i]);
534 }
535
536 newnames[n] = (char *)NULL;
537
538 if (n == 0)
539 {
540 names[0] = (char *)NULL;
541 free (newnames);
542 return;
543 }
544
545 /* Copy the acceptable names from NEWNAMES back to NAMES and set the
546 new array end. */
547 for (n = 0; newnames[n]; n++)
548 names[n] = newnames[n];
549 names[n] = (char *)NULL;
550 free (newnames);
551}
552
553void
554ignore_glob_matches (names)
555 char **names;
556{
557 if (globignore.num_ignores == 0)
558 return;
559
560 ignore_globbed_names (names, glob_name_is_acceptable);
561}
562
563static char *
564split_ignorespec (s, ip)
565 char *s;
566 int *ip;
567{
568 char *t;
569 int n, i;
570
571 if (s == 0)
572 return 0;
573
574 i = *ip;
575 if (s[i] == 0)
576 return 0;
577
578 n = skip_to_delim (s, i, ":", SD_NOJMP|SD_EXTGLOB|SD_GLOB);
579 t = substring (s, i, n);
580
581 if (s[n] == ':')
582 n++;
583 *ip = n;
584 return t;
585}
586
587void
588setup_ignore_patterns (ivp)
589 struct ignorevar *ivp;
590{
591 int numitems, maxitems, ptr;
592 char *colon_bit, *this_ignoreval;
593 struct ign *p;
594
595 this_ignoreval = get_string_value (ivp->varname);
596
597 /* If nothing has changed then just exit now. */
598 if ((this_ignoreval && ivp->last_ignoreval && STREQ (this_ignoreval, ivp->last_ignoreval)) ||
599 (!this_ignoreval && !ivp->last_ignoreval))
600 return;
601
602 /* Oops. The ignore variable has changed. Re-parse it. */
603 ivp->num_ignores = 0;
604
605 if (ivp->ignores)
606 {
607 for (p = ivp->ignores; p->val; p++)
608 free(p->val);
609 free (ivp->ignores);
610 ivp->ignores = (struct ign *)NULL;
611 }
612
613 if (ivp->last_ignoreval)
614 {
615 free (ivp->last_ignoreval);
616 ivp->last_ignoreval = (char *)NULL;
617 }
618
619 if (this_ignoreval == 0 || *this_ignoreval == '\0')
620 return;
621
622 ivp->last_ignoreval = savestring (this_ignoreval);
623
624 numitems = maxitems = ptr = 0;
625
626#if 0
627 while (colon_bit = extract_colon_unit (this_ignoreval, &ptr))
628#else
629 while (colon_bit = split_ignorespec (this_ignoreval, &ptr))
630#endif
631 {
632 if (numitems + 1 >= maxitems)
633 {
634 maxitems += 10;
635 ivp->ignores = (struct ign *)xrealloc (ivp->ignores, maxitems * sizeof (struct ign));
636 }
637 ivp->ignores[numitems].val = colon_bit;
638 ivp->ignores[numitems].len = strlen (colon_bit);
639 ivp->ignores[numitems].flags = 0;
640 if (ivp->item_func)
641 (*ivp->item_func) (&ivp->ignores[numitems]);
642 numitems++;
643 }
644 ivp->ignores[numitems].val = (char *)NULL;
645 ivp->num_ignores = numitems;
646}