]>
Commit | Line | Data |
---|---|---|
1 | /* pathexp.c -- The shell interface to the globbing library. */ | |
2 | ||
3 | /* Copyright (C) 1995-2014 Free Software Foundation, Inc. | |
4 | ||
5 | This file is part of GNU Bash, the Bourne Again SHell. | |
6 | ||
7 | Bash is free software: you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation, either version 3 of the License, or | |
10 | (at your option) any later version. | |
11 | ||
12 | Bash is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with Bash. If not, see <http://www.gnu.org/licenses/>. | |
19 | */ | |
20 | ||
21 | #include "config.h" | |
22 | ||
23 | #include "bashtypes.h" | |
24 | #include <stdio.h> | |
25 | ||
26 | #if defined (HAVE_UNISTD_H) | |
27 | # include <unistd.h> | |
28 | #endif | |
29 | ||
30 | #include "bashansi.h" | |
31 | ||
32 | #include "shell.h" | |
33 | #include "pathexp.h" | |
34 | #include "flags.h" | |
35 | ||
36 | #include "shmbutil.h" | |
37 | #include "bashintl.h" | |
38 | ||
39 | #include <glob/strmatch.h> | |
40 | ||
41 | static int glob_name_is_acceptable __P((const char *)); | |
42 | static void ignore_globbed_names __P((char **, sh_ignore_func_t *)); | |
43 | static char *split_ignorespec __P((char *, int *)); | |
44 | ||
45 | #if defined (USE_POSIX_GLOB_LIBRARY) | |
46 | # include <glob.h> | |
47 | typedef int posix_glob_errfunc_t __P((const char *, int)); | |
48 | #else | |
49 | # include <glob/glob.h> | |
50 | #endif | |
51 | ||
52 | /* Control whether * matches .files in globbing. */ | |
53 | int glob_dot_filenames; | |
54 | ||
55 | /* Control whether the extended globbing features are enabled. */ | |
56 | int extended_glob = EXTGLOB_DEFAULT; | |
57 | ||
58 | /* Control enabling special handling of `**' */ | |
59 | int glob_star = 0; | |
60 | ||
61 | /* Return nonzero if STRING has any unquoted special globbing chars in it. */ | |
62 | int | |
63 | unquoted_glob_pattern_p (string) | |
64 | register char *string; | |
65 | { | |
66 | register int c; | |
67 | char *send; | |
68 | int open; | |
69 | ||
70 | DECLARE_MBSTATE; | |
71 | ||
72 | open = 0; | |
73 | send = string + strlen (string); | |
74 | ||
75 | while (c = *string++) | |
76 | { | |
77 | switch (c) | |
78 | { | |
79 | case '?': | |
80 | case '*': | |
81 | return (1); | |
82 | ||
83 | case '[': | |
84 | open++; | |
85 | continue; | |
86 | ||
87 | case ']': | |
88 | if (open) | |
89 | return (1); | |
90 | continue; | |
91 | ||
92 | case '+': | |
93 | case '@': | |
94 | case '!': | |
95 | if (*string == '(') /*)*/ | |
96 | return (1); | |
97 | continue; | |
98 | ||
99 | /* A pattern can't end with a backslash, but a backslash in the pattern | |
100 | can be removed by the matching engine, so we have to run it through | |
101 | globbing. */ | |
102 | case '\\': | |
103 | return (*string != 0); | |
104 | ||
105 | case CTLESC: | |
106 | if (*string++ == '\0') | |
107 | return (0); | |
108 | } | |
109 | ||
110 | /* Advance one fewer byte than an entire multibyte character to | |
111 | account for the auto-increment in the loop above. */ | |
112 | #ifdef HANDLE_MULTIBYTE | |
113 | string--; | |
114 | ADVANCE_CHAR_P (string, send - string); | |
115 | string++; | |
116 | #else | |
117 | ADVANCE_CHAR_P (string, send - string); | |
118 | #endif | |
119 | } | |
120 | return (0); | |
121 | } | |
122 | ||
123 | /* Return 1 if C is a character that is `special' in a POSIX ERE and needs to | |
124 | be quoted to match itself. */ | |
125 | static inline int | |
126 | ere_char (c) | |
127 | int c; | |
128 | { | |
129 | switch (c) | |
130 | { | |
131 | case '.': | |
132 | case '[': | |
133 | case '\\': | |
134 | case '(': | |
135 | case ')': | |
136 | case '*': | |
137 | case '+': | |
138 | case '?': | |
139 | case '{': | |
140 | case '|': | |
141 | case '^': | |
142 | case '$': | |
143 | return 1; | |
144 | default: | |
145 | return 0; | |
146 | } | |
147 | return (0); | |
148 | } | |
149 | ||
150 | int | |
151 | glob_char_p (s) | |
152 | const char *s; | |
153 | { | |
154 | switch (*s) | |
155 | { | |
156 | case '*': | |
157 | case '[': | |
158 | case ']': | |
159 | case '?': | |
160 | case '\\': | |
161 | return 1; | |
162 | case '+': | |
163 | case '@': | |
164 | case '!': | |
165 | if (s[1] == '(') /*(*/ | |
166 | return 1; | |
167 | break; | |
168 | } | |
169 | return 0; | |
170 | } | |
171 | ||
172 | /* PATHNAME can contain characters prefixed by CTLESC; this indicates | |
173 | that the character is to be quoted. We quote it here in the style | |
174 | that the glob library recognizes. If flags includes QGLOB_CVTNULL, | |
175 | we change quoted null strings (pathname[0] == CTLNUL) into empty | |
176 | strings (pathname[0] == 0). If this is called after quote removal | |
177 | is performed, (flags & QGLOB_CVTNULL) should be 0; if called when quote | |
178 | removal has not been done (for example, before attempting to match a | |
179 | pattern while executing a case statement), flags should include | |
180 | QGLOB_CVTNULL. If flags includes QGLOB_CTLESC, we need to remove CTLESC | |
181 | quoting CTLESC or CTLNUL (as if dequote_string were called). If flags | |
182 | includes QGLOB_FILENAME, appropriate quoting to match a filename should be | |
183 | performed. QGLOB_REGEXP means we're quoting for a Posix ERE (for | |
184 | [[ string =~ pat ]]) and that requires some special handling. */ | |
185 | char * | |
186 | quote_string_for_globbing (pathname, qflags) | |
187 | const char *pathname; | |
188 | int qflags; | |
189 | { | |
190 | char *temp; | |
191 | register int i, j; | |
192 | int cclass, collsym, equiv, c, last_was_backslash; | |
193 | int savei, savej; | |
194 | ||
195 | temp = (char *)xmalloc (2 * strlen (pathname) + 1); | |
196 | ||
197 | if ((qflags & QGLOB_CVTNULL) && QUOTED_NULL (pathname)) | |
198 | { | |
199 | temp[0] = '\0'; | |
200 | return temp; | |
201 | } | |
202 | ||
203 | cclass = collsym = equiv = last_was_backslash = 0; | |
204 | for (i = j = 0; pathname[i]; i++) | |
205 | { | |
206 | /* Fix for CTLESC at the end of the string? */ | |
207 | if (pathname[i] == CTLESC && pathname[i+1] == '\0') | |
208 | { | |
209 | temp[j++] = pathname[i++]; | |
210 | break; | |
211 | } | |
212 | /* If we are parsing regexp, turn CTLESC CTLESC into CTLESC. It's not an | |
213 | ERE special character, so we should just be able to pass it through. */ | |
214 | else if ((qflags & (QGLOB_REGEXP|QGLOB_CTLESC)) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL)) | |
215 | { | |
216 | i++; | |
217 | temp[j++] = pathname[i]; | |
218 | continue; | |
219 | } | |
220 | else if (pathname[i] == CTLESC) | |
221 | { | |
222 | if ((qflags & QGLOB_FILENAME) && pathname[i+1] == '/') | |
223 | continue; | |
224 | /* What to do if preceding char is backslash? */ | |
225 | if (pathname[i+1] != CTLESC && (qflags & QGLOB_REGEXP) && ere_char (pathname[i+1]) == 0) | |
226 | continue; | |
227 | temp[j++] = '\\'; | |
228 | i++; | |
229 | if (pathname[i] == '\0') | |
230 | break; | |
231 | } | |
232 | else if ((qflags & QGLOB_REGEXP) && (i == 0 || pathname[i-1] != CTLESC) && pathname[i] == '[') /*]*/ | |
233 | { | |
234 | temp[j++] = pathname[i++]; /* open bracket */ | |
235 | savej = j; | |
236 | savei = i; | |
237 | c = pathname[i++]; /* c == char after open bracket */ | |
238 | if (c == '^') /* ignore pattern negation */ | |
239 | { | |
240 | temp[j++] = c; | |
241 | c = pathname[i++]; | |
242 | } | |
243 | if (c == ']') /* ignore right bracket if first char */ | |
244 | { | |
245 | temp[j++] = c; | |
246 | c = pathname[i++]; | |
247 | } | |
248 | do | |
249 | { | |
250 | if (c == 0) | |
251 | goto endpat; | |
252 | else if (c == CTLESC) | |
253 | { | |
254 | /* skip c, check for EOS, let assignment at end of loop */ | |
255 | /* pathname[i] == backslash-escaped character */ | |
256 | if (pathname[i] == 0) | |
257 | goto endpat; | |
258 | temp[j++] = pathname[i++]; | |
259 | } | |
260 | else if (c == '[' && pathname[i] == ':') | |
261 | { | |
262 | temp[j++] = c; | |
263 | temp[j++] = pathname[i++]; | |
264 | cclass = 1; | |
265 | } | |
266 | else if (cclass && c == ':' && pathname[i] == ']') | |
267 | { | |
268 | temp[j++] = c; | |
269 | temp[j++] = pathname[i++]; | |
270 | cclass = 0; | |
271 | } | |
272 | else if (c == '[' && pathname[i] == '=') | |
273 | { | |
274 | temp[j++] = c; | |
275 | temp[j++] = pathname[i++]; | |
276 | if (pathname[i] == ']') | |
277 | temp[j++] = pathname[i++]; /* right brack can be in equiv */ | |
278 | equiv = 1; | |
279 | } | |
280 | else if (equiv && c == '=' && pathname[i] == ']') | |
281 | { | |
282 | temp[j++] = c; | |
283 | temp[j++] = pathname[i++]; | |
284 | equiv = 0; | |
285 | } | |
286 | else if (c == '[' && pathname[i] == '.') | |
287 | { | |
288 | temp[j++] = c; | |
289 | temp[j++] = pathname[i++]; | |
290 | if (pathname[i] == ']') | |
291 | temp[j++] = pathname[i++]; /* right brack can be in collsym */ | |
292 | collsym = 1; | |
293 | } | |
294 | else if (collsym && c == '.' && pathname[i] == ']') | |
295 | { | |
296 | temp[j++] = c; | |
297 | temp[j++] = pathname[i++]; | |
298 | collsym = 0; | |
299 | } | |
300 | else | |
301 | temp[j++] = c; | |
302 | } | |
303 | while (((c = pathname[i++]) != ']') && c != 0); | |
304 | ||
305 | /* If we don't find the closing bracket before we hit the end of | |
306 | the string, rescan string without treating it as a bracket | |
307 | expression (has implications for backslash and special ERE | |
308 | chars) */ | |
309 | if (c == 0) | |
310 | { | |
311 | i = savei - 1; /* -1 for autoincrement above */ | |
312 | j = savej; | |
313 | continue; | |
314 | } | |
315 | ||
316 | temp[j++] = c; /* closing right bracket */ | |
317 | i--; /* increment will happen above in loop */ | |
318 | continue; /* skip double assignment below */ | |
319 | } | |
320 | else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP) == 0) | |
321 | { | |
322 | /* XXX - if not quoting regexp, use backslash as quote char. Should | |
323 | we just pass it through without treating it as special? That is | |
324 | what ksh93 seems to do. */ | |
325 | ||
326 | /* If we want to pass through backslash unaltered, comment out these | |
327 | lines. */ | |
328 | temp[j++] = '\\'; | |
329 | ||
330 | i++; | |
331 | if (pathname[i] == '\0') | |
332 | break; | |
333 | /* If we are turning CTLESC CTLESC into CTLESC, we need to do that | |
334 | even when the first CTLESC is preceded by a backslash. */ | |
335 | if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL)) | |
336 | i++; /* skip over the CTLESC */ | |
337 | } | |
338 | else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP)) | |
339 | last_was_backslash = 1; | |
340 | temp[j++] = pathname[i]; | |
341 | } | |
342 | endpat: | |
343 | temp[j] = '\0'; | |
344 | ||
345 | return (temp); | |
346 | } | |
347 | ||
348 | char * | |
349 | quote_globbing_chars (string) | |
350 | const char *string; | |
351 | { | |
352 | size_t slen; | |
353 | char *temp, *t; | |
354 | const char *s, *send; | |
355 | DECLARE_MBSTATE; | |
356 | ||
357 | slen = strlen (string); | |
358 | send = string + slen; | |
359 | ||
360 | temp = (char *)xmalloc (slen * 2 + 1); | |
361 | for (t = temp, s = string; *s; ) | |
362 | { | |
363 | if (glob_char_p (s)) | |
364 | *t++ = '\\'; | |
365 | ||
366 | /* Copy a single (possibly multibyte) character from s to t, | |
367 | incrementing both. */ | |
368 | COPY_CHAR_P (t, s, send); | |
369 | } | |
370 | *t = '\0'; | |
371 | return temp; | |
372 | } | |
373 | ||
374 | /* Call the glob library to do globbing on PATHNAME. */ | |
375 | char ** | |
376 | shell_glob_filename (pathname) | |
377 | const char *pathname; | |
378 | { | |
379 | #if defined (USE_POSIX_GLOB_LIBRARY) | |
380 | register int i; | |
381 | char *temp, **results; | |
382 | glob_t filenames; | |
383 | int glob_flags; | |
384 | ||
385 | temp = quote_string_for_globbing (pathname, QGLOB_FILENAME); | |
386 | ||
387 | filenames.gl_offs = 0; | |
388 | ||
389 | # if defined (GLOB_PERIOD) | |
390 | glob_flags = glob_dot_filenames ? GLOB_PERIOD : 0; | |
391 | # else | |
392 | glob_flags = 0; | |
393 | # endif /* !GLOB_PERIOD */ | |
394 | ||
395 | glob_flags |= (GLOB_ERR | GLOB_DOOFFS); | |
396 | ||
397 | i = glob (temp, glob_flags, (posix_glob_errfunc_t *)NULL, &filenames); | |
398 | ||
399 | free (temp); | |
400 | ||
401 | if (i == GLOB_NOSPACE || i == GLOB_ABORTED) | |
402 | return ((char **)NULL); | |
403 | else if (i == GLOB_NOMATCH) | |
404 | filenames.gl_pathv = (char **)NULL; | |
405 | else if (i != 0) /* other error codes not in POSIX.2 */ | |
406 | filenames.gl_pathv = (char **)NULL; | |
407 | ||
408 | results = filenames.gl_pathv; | |
409 | ||
410 | if (results && ((GLOB_FAILED (results)) == 0)) | |
411 | { | |
412 | if (should_ignore_glob_matches ()) | |
413 | ignore_glob_matches (results); | |
414 | if (results && results[0]) | |
415 | strvec_sort (results); | |
416 | else | |
417 | { | |
418 | FREE (results); | |
419 | results = (char **)NULL; | |
420 | } | |
421 | } | |
422 | ||
423 | return (results); | |
424 | ||
425 | #else /* !USE_POSIX_GLOB_LIBRARY */ | |
426 | ||
427 | char *temp, **results; | |
428 | int gflags; | |
429 | ||
430 | noglob_dot_filenames = glob_dot_filenames == 0; | |
431 | ||
432 | temp = quote_string_for_globbing (pathname, QGLOB_FILENAME); | |
433 | gflags = glob_star ? GX_GLOBSTAR : 0; | |
434 | results = glob_filename (temp, gflags); | |
435 | free (temp); | |
436 | ||
437 | if (results && ((GLOB_FAILED (results)) == 0)) | |
438 | { | |
439 | if (should_ignore_glob_matches ()) | |
440 | ignore_glob_matches (results); | |
441 | if (results && results[0]) | |
442 | strvec_sort (results); | |
443 | else | |
444 | { | |
445 | FREE (results); | |
446 | results = (char **)&glob_error_return; | |
447 | } | |
448 | } | |
449 | ||
450 | return (results); | |
451 | #endif /* !USE_POSIX_GLOB_LIBRARY */ | |
452 | } | |
453 | ||
454 | /* Stuff for GLOBIGNORE. */ | |
455 | ||
456 | static struct ignorevar globignore = | |
457 | { | |
458 | "GLOBIGNORE", | |
459 | (struct ign *)0, | |
460 | 0, | |
461 | (char *)0, | |
462 | (sh_iv_item_func_t *)0, | |
463 | }; | |
464 | ||
465 | /* Set up to ignore some glob matches because the value of GLOBIGNORE | |
466 | has changed. If GLOBIGNORE is being unset, we also need to disable | |
467 | the globbing of filenames beginning with a `.'. */ | |
468 | void | |
469 | setup_glob_ignore (name) | |
470 | char *name; | |
471 | { | |
472 | char *v; | |
473 | ||
474 | v = get_string_value (name); | |
475 | setup_ignore_patterns (&globignore); | |
476 | ||
477 | if (globignore.num_ignores) | |
478 | glob_dot_filenames = 1; | |
479 | else if (v == 0) | |
480 | glob_dot_filenames = 0; | |
481 | } | |
482 | ||
483 | int | |
484 | should_ignore_glob_matches () | |
485 | { | |
486 | return globignore.num_ignores; | |
487 | } | |
488 | ||
489 | /* Return 0 if NAME matches a pattern in the globignore.ignores list. */ | |
490 | static int | |
491 | glob_name_is_acceptable (name) | |
492 | const char *name; | |
493 | { | |
494 | struct ign *p; | |
495 | int flags; | |
496 | ||
497 | /* . and .. are never matched */ | |
498 | if (name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) | |
499 | return (0); | |
500 | ||
501 | flags = FNM_PATHNAME | FNMATCH_EXTFLAG | FNMATCH_NOCASEGLOB; | |
502 | for (p = globignore.ignores; p->val; p++) | |
503 | { | |
504 | if (strmatch (p->val, (char *)name, flags) != FNM_NOMATCH) | |
505 | return (0); | |
506 | } | |
507 | return (1); | |
508 | } | |
509 | ||
510 | /* Internal function to test whether filenames in NAMES should be | |
511 | ignored. NAME_FUNC is a pointer to a function to call with each | |
512 | name. It returns non-zero if the name is acceptable to the particular | |
513 | ignore function which called _ignore_names; zero if the name should | |
514 | be removed from NAMES. */ | |
515 | ||
516 | static void | |
517 | ignore_globbed_names (names, name_func) | |
518 | char **names; | |
519 | sh_ignore_func_t *name_func; | |
520 | { | |
521 | char **newnames; | |
522 | int n, i; | |
523 | ||
524 | for (i = 0; names[i]; i++) | |
525 | ; | |
526 | newnames = strvec_create (i + 1); | |
527 | ||
528 | for (n = i = 0; names[i]; i++) | |
529 | { | |
530 | if ((*name_func) (names[i])) | |
531 | newnames[n++] = names[i]; | |
532 | else | |
533 | free (names[i]); | |
534 | } | |
535 | ||
536 | newnames[n] = (char *)NULL; | |
537 | ||
538 | if (n == 0) | |
539 | { | |
540 | names[0] = (char *)NULL; | |
541 | free (newnames); | |
542 | return; | |
543 | } | |
544 | ||
545 | /* Copy the acceptable names from NEWNAMES back to NAMES and set the | |
546 | new array end. */ | |
547 | for (n = 0; newnames[n]; n++) | |
548 | names[n] = newnames[n]; | |
549 | names[n] = (char *)NULL; | |
550 | free (newnames); | |
551 | } | |
552 | ||
553 | void | |
554 | ignore_glob_matches (names) | |
555 | char **names; | |
556 | { | |
557 | if (globignore.num_ignores == 0) | |
558 | return; | |
559 | ||
560 | ignore_globbed_names (names, glob_name_is_acceptable); | |
561 | } | |
562 | ||
563 | static char * | |
564 | split_ignorespec (s, ip) | |
565 | char *s; | |
566 | int *ip; | |
567 | { | |
568 | char *t; | |
569 | int n, i; | |
570 | ||
571 | if (s == 0) | |
572 | return 0; | |
573 | ||
574 | i = *ip; | |
575 | if (s[i] == 0) | |
576 | return 0; | |
577 | ||
578 | n = skip_to_delim (s, i, ":", SD_NOJMP|SD_EXTGLOB|SD_GLOB); | |
579 | t = substring (s, i, n); | |
580 | ||
581 | if (s[n] == ':') | |
582 | n++; | |
583 | *ip = n; | |
584 | return t; | |
585 | } | |
586 | ||
587 | void | |
588 | setup_ignore_patterns (ivp) | |
589 | struct ignorevar *ivp; | |
590 | { | |
591 | int numitems, maxitems, ptr; | |
592 | char *colon_bit, *this_ignoreval; | |
593 | struct ign *p; | |
594 | ||
595 | this_ignoreval = get_string_value (ivp->varname); | |
596 | ||
597 | /* If nothing has changed then just exit now. */ | |
598 | if ((this_ignoreval && ivp->last_ignoreval && STREQ (this_ignoreval, ivp->last_ignoreval)) || | |
599 | (!this_ignoreval && !ivp->last_ignoreval)) | |
600 | return; | |
601 | ||
602 | /* Oops. The ignore variable has changed. Re-parse it. */ | |
603 | ivp->num_ignores = 0; | |
604 | ||
605 | if (ivp->ignores) | |
606 | { | |
607 | for (p = ivp->ignores; p->val; p++) | |
608 | free(p->val); | |
609 | free (ivp->ignores); | |
610 | ivp->ignores = (struct ign *)NULL; | |
611 | } | |
612 | ||
613 | if (ivp->last_ignoreval) | |
614 | { | |
615 | free (ivp->last_ignoreval); | |
616 | ivp->last_ignoreval = (char *)NULL; | |
617 | } | |
618 | ||
619 | if (this_ignoreval == 0 || *this_ignoreval == '\0') | |
620 | return; | |
621 | ||
622 | ivp->last_ignoreval = savestring (this_ignoreval); | |
623 | ||
624 | numitems = maxitems = ptr = 0; | |
625 | ||
626 | #if 0 | |
627 | while (colon_bit = extract_colon_unit (this_ignoreval, &ptr)) | |
628 | #else | |
629 | while (colon_bit = split_ignorespec (this_ignoreval, &ptr)) | |
630 | #endif | |
631 | { | |
632 | if (numitems + 1 >= maxitems) | |
633 | { | |
634 | maxitems += 10; | |
635 | ivp->ignores = (struct ign *)xrealloc (ivp->ignores, maxitems * sizeof (struct ign)); | |
636 | } | |
637 | ivp->ignores[numitems].val = colon_bit; | |
638 | ivp->ignores[numitems].len = strlen (colon_bit); | |
639 | ivp->ignores[numitems].flags = 0; | |
640 | if (ivp->item_func) | |
641 | (*ivp->item_func) (&ivp->ignores[numitems]); | |
642 | numitems++; | |
643 | } | |
644 | ivp->ignores[numitems].val = (char *)NULL; | |
645 | ivp->num_ignores = numitems; | |
646 | } |