]>
Commit | Line | Data |
---|---|---|
1 | /* pathexp.c -- The shell interface to the globbing library. */ | |
2 | ||
3 | /* Copyright (C) 1995-2014 Free Software Foundation, Inc. | |
4 | ||
5 | This file is part of GNU Bash, the Bourne Again SHell. | |
6 | ||
7 | Bash is free software: you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation, either version 3 of the License, or | |
10 | (at your option) any later version. | |
11 | ||
12 | Bash is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with Bash. If not, see <http://www.gnu.org/licenses/>. | |
19 | */ | |
20 | ||
21 | #include "config.h" | |
22 | ||
23 | #include "bashtypes.h" | |
24 | #include <stdio.h> | |
25 | ||
26 | #if defined (HAVE_UNISTD_H) | |
27 | # include <unistd.h> | |
28 | #endif | |
29 | ||
30 | #include "bashansi.h" | |
31 | ||
32 | #include "shell.h" | |
33 | #include "pathexp.h" | |
34 | #include "flags.h" | |
35 | ||
36 | #include "shmbutil.h" | |
37 | #include "bashintl.h" | |
38 | ||
39 | #include <glob/strmatch.h> | |
40 | ||
41 | static int glob_name_is_acceptable __P((const char *)); | |
42 | static void ignore_globbed_names __P((char **, sh_ignore_func_t *)); | |
43 | static char *split_ignorespec __P((char *, int *)); | |
44 | ||
45 | #if defined (USE_POSIX_GLOB_LIBRARY) | |
46 | # include <glob.h> | |
47 | typedef int posix_glob_errfunc_t __P((const char *, int)); | |
48 | #else | |
49 | # include <glob/glob.h> | |
50 | #endif | |
51 | ||
52 | /* Control whether * matches .files in globbing. */ | |
53 | int glob_dot_filenames; | |
54 | ||
55 | /* Control whether the extended globbing features are enabled. */ | |
56 | int extended_glob = EXTGLOB_DEFAULT; | |
57 | ||
58 | /* Control enabling special handling of `**' */ | |
59 | int glob_star = 0; | |
60 | ||
61 | /* Return nonzero if STRING has any unquoted special globbing chars in it. */ | |
62 | int | |
63 | unquoted_glob_pattern_p (string) | |
64 | register char *string; | |
65 | { | |
66 | register int c; | |
67 | char *send; | |
68 | int open; | |
69 | ||
70 | DECLARE_MBSTATE; | |
71 | ||
72 | open = 0; | |
73 | send = string + strlen (string); | |
74 | ||
75 | while (c = *string++) | |
76 | { | |
77 | switch (c) | |
78 | { | |
79 | case '?': | |
80 | case '*': | |
81 | return (1); | |
82 | ||
83 | case '[': | |
84 | open++; | |
85 | continue; | |
86 | ||
87 | case ']': | |
88 | if (open) | |
89 | return (1); | |
90 | continue; | |
91 | ||
92 | case '+': | |
93 | case '@': | |
94 | case '!': | |
95 | if (*string == '(') /*)*/ | |
96 | return (1); | |
97 | continue; | |
98 | ||
99 | case CTLESC: | |
100 | case '\\': | |
101 | if (*string++ == '\0') | |
102 | return (0); | |
103 | } | |
104 | ||
105 | /* Advance one fewer byte than an entire multibyte character to | |
106 | account for the auto-increment in the loop above. */ | |
107 | #ifdef HANDLE_MULTIBYTE | |
108 | string--; | |
109 | ADVANCE_CHAR_P (string, send - string); | |
110 | string++; | |
111 | #else | |
112 | ADVANCE_CHAR_P (string, send - string); | |
113 | #endif | |
114 | } | |
115 | return (0); | |
116 | } | |
117 | ||
118 | /* Return 1 if C is a character that is `special' in a POSIX ERE and needs to | |
119 | be quoted to match itself. */ | |
120 | static inline int | |
121 | ere_char (c) | |
122 | int c; | |
123 | { | |
124 | switch (c) | |
125 | { | |
126 | case '.': | |
127 | case '[': | |
128 | case '\\': | |
129 | case '(': | |
130 | case ')': | |
131 | case '*': | |
132 | case '+': | |
133 | case '?': | |
134 | case '{': | |
135 | case '|': | |
136 | case '^': | |
137 | case '$': | |
138 | return 1; | |
139 | default: | |
140 | return 0; | |
141 | } | |
142 | return (0); | |
143 | } | |
144 | ||
145 | int | |
146 | glob_char_p (s) | |
147 | const char *s; | |
148 | { | |
149 | switch (*s) | |
150 | { | |
151 | case '*': | |
152 | case '[': | |
153 | case ']': | |
154 | case '?': | |
155 | case '\\': | |
156 | return 1; | |
157 | case '+': | |
158 | case '@': | |
159 | case '!': | |
160 | if (s[1] == '(') /*(*/ | |
161 | return 1; | |
162 | break; | |
163 | } | |
164 | return 0; | |
165 | } | |
166 | ||
167 | /* PATHNAME can contain characters prefixed by CTLESC; this indicates | |
168 | that the character is to be quoted. We quote it here in the style | |
169 | that the glob library recognizes. If flags includes QGLOB_CVTNULL, | |
170 | we change quoted null strings (pathname[0] == CTLNUL) into empty | |
171 | strings (pathname[0] == 0). If this is called after quote removal | |
172 | is performed, (flags & QGLOB_CVTNULL) should be 0; if called when quote | |
173 | removal has not been done (for example, before attempting to match a | |
174 | pattern while executing a case statement), flags should include | |
175 | QGLOB_CVTNULL. If flags includes QGLOB_FILENAME, appropriate quoting | |
176 | to match a filename should be performed. QGLOB_REGEXP means we're | |
177 | quoting for a Posix ERE (for [[ string =~ pat ]]) and that requires | |
178 | some special handling. */ | |
179 | char * | |
180 | quote_string_for_globbing (pathname, qflags) | |
181 | const char *pathname; | |
182 | int qflags; | |
183 | { | |
184 | char *temp; | |
185 | register int i, j; | |
186 | int brack, cclass, collsym, equiv, c, last_was_backslash; | |
187 | int savei, savej; | |
188 | ||
189 | temp = (char *)xmalloc (2 * strlen (pathname) + 1); | |
190 | ||
191 | if ((qflags & QGLOB_CVTNULL) && QUOTED_NULL (pathname)) | |
192 | { | |
193 | temp[0] = '\0'; | |
194 | return temp; | |
195 | } | |
196 | ||
197 | brack = cclass = collsym = equiv = last_was_backslash = 0; | |
198 | for (i = j = 0; pathname[i]; i++) | |
199 | { | |
200 | /* Fix for CTLESC at the end of the string? */ | |
201 | if (pathname[i] == CTLESC && pathname[i+1] == '\0') | |
202 | { | |
203 | temp[j++] = pathname[i++]; | |
204 | break; | |
205 | } | |
206 | /* If we are parsing regexp, turn CTLESC CTLESC into CTLESC. It's not an | |
207 | ERE special character, so we should just be able to pass it through. */ | |
208 | else if ((qflags & QGLOB_REGEXP) && pathname[i] == CTLESC && pathname[i+1] == CTLESC) | |
209 | { | |
210 | i++; | |
211 | temp[j++] = pathname[i]; | |
212 | continue; | |
213 | } | |
214 | else if (pathname[i] == CTLESC) | |
215 | { | |
216 | if ((qflags & QGLOB_FILENAME) && pathname[i+1] == '/') | |
217 | continue; | |
218 | /* What to do if preceding char is backslash? */ | |
219 | if (pathname[i+1] != CTLESC && (qflags & QGLOB_REGEXP) && ere_char (pathname[i+1]) == 0) | |
220 | continue; | |
221 | temp[j++] = '\\'; | |
222 | i++; | |
223 | if (pathname[i] == '\0') | |
224 | break; | |
225 | } | |
226 | else if ((qflags & QGLOB_REGEXP) && (i == 0 || pathname[i-1] != CTLESC) && pathname[i] == '[') /*]*/ | |
227 | { | |
228 | brack = 1; | |
229 | temp[j++] = pathname[i++]; /* open bracket */ | |
230 | savej = j; | |
231 | savei = i; | |
232 | c = pathname[i++]; /* c == char after open bracket */ | |
233 | do | |
234 | { | |
235 | if (c == 0) | |
236 | goto endpat; | |
237 | else if (c == CTLESC) | |
238 | { | |
239 | /* skip c, check for EOS, let assignment at end of loop */ | |
240 | /* pathname[i] == backslash-escaped character */ | |
241 | if (pathname[i] == 0) | |
242 | goto endpat; | |
243 | temp[j++] = pathname[i++]; | |
244 | } | |
245 | else if (c == '[' && pathname[i] == ':') | |
246 | { | |
247 | temp[j++] = c; | |
248 | temp[j++] = pathname[i++]; | |
249 | cclass = 1; | |
250 | } | |
251 | else if (cclass && c == ':' && pathname[i] == ']') | |
252 | { | |
253 | temp[j++] = c; | |
254 | temp[j++] = pathname[i++]; | |
255 | cclass = 0; | |
256 | } | |
257 | else if (c == '[' && pathname[i] == '=') | |
258 | { | |
259 | temp[j++] = c; | |
260 | temp[j++] = pathname[i++]; | |
261 | if (pathname[i] == ']') | |
262 | temp[j++] = pathname[i++]; /* right brack can be in equiv */ | |
263 | equiv = 1; | |
264 | } | |
265 | else if (equiv && c == '=' && pathname[i] == ']') | |
266 | { | |
267 | temp[j++] = c; | |
268 | temp[j++] = pathname[i++]; | |
269 | equiv = 0; | |
270 | } | |
271 | else if (c == '[' && pathname[i] == '.') | |
272 | { | |
273 | temp[j++] = c; | |
274 | temp[j++] = pathname[i++]; | |
275 | if (pathname[i] == ']') | |
276 | temp[j++] = pathname[i++]; /* right brack can be in collsym */ | |
277 | collsym = 1; | |
278 | } | |
279 | else if (collsym && c == '.' && pathname[i] == ']') | |
280 | { | |
281 | temp[j++] = c; | |
282 | temp[j++] = pathname[i++]; | |
283 | collsym = 0; | |
284 | } | |
285 | else | |
286 | temp[j++] = c; | |
287 | } | |
288 | while (((c = pathname[i++]) != ']') && c != 0); | |
289 | ||
290 | /* If we don't find the closing bracket before we hit the end of | |
291 | the string, rescan string without treating it as a bracket | |
292 | expression (has implications for backslash and special ERE | |
293 | chars) */ | |
294 | if (c == 0) | |
295 | { | |
296 | i = savei - 1; /* -1 for autoincrement above */ | |
297 | j = savej; | |
298 | continue; | |
299 | } | |
300 | ||
301 | temp[j++] = c; /* closing right bracket */ | |
302 | i--; /* increment will happen above in loop */ | |
303 | continue; /* skip double assignment below */ | |
304 | } | |
305 | else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP) == 0) | |
306 | { | |
307 | /* XXX - if not quoting regexp, use backslash as quote char. Should | |
308 | we just pass it through without treating it as special? That is | |
309 | what ksh93 seems to do. */ | |
310 | ||
311 | /* If we want to pass through backslash unaltered, comment out these | |
312 | lines. */ | |
313 | temp[j++] = '\\'; | |
314 | ||
315 | i++; | |
316 | if (pathname[i] == '\0') | |
317 | break; | |
318 | } | |
319 | else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP)) | |
320 | last_was_backslash = 1; | |
321 | temp[j++] = pathname[i]; | |
322 | } | |
323 | endpat: | |
324 | temp[j] = '\0'; | |
325 | ||
326 | return (temp); | |
327 | } | |
328 | ||
329 | char * | |
330 | quote_globbing_chars (string) | |
331 | const char *string; | |
332 | { | |
333 | size_t slen; | |
334 | char *temp, *t; | |
335 | const char *s, *send; | |
336 | DECLARE_MBSTATE; | |
337 | ||
338 | slen = strlen (string); | |
339 | send = string + slen; | |
340 | ||
341 | temp = (char *)xmalloc (slen * 2 + 1); | |
342 | for (t = temp, s = string; *s; ) | |
343 | { | |
344 | if (glob_char_p (s)) | |
345 | *t++ = '\\'; | |
346 | ||
347 | /* Copy a single (possibly multibyte) character from s to t, | |
348 | incrementing both. */ | |
349 | COPY_CHAR_P (t, s, send); | |
350 | } | |
351 | *t = '\0'; | |
352 | return temp; | |
353 | } | |
354 | ||
355 | /* Call the glob library to do globbing on PATHNAME. */ | |
356 | char ** | |
357 | shell_glob_filename (pathname) | |
358 | const char *pathname; | |
359 | { | |
360 | #if defined (USE_POSIX_GLOB_LIBRARY) | |
361 | register int i; | |
362 | char *temp, **results; | |
363 | glob_t filenames; | |
364 | int glob_flags; | |
365 | ||
366 | temp = quote_string_for_globbing (pathname, QGLOB_FILENAME); | |
367 | ||
368 | filenames.gl_offs = 0; | |
369 | ||
370 | # if defined (GLOB_PERIOD) | |
371 | glob_flags = glob_dot_filenames ? GLOB_PERIOD : 0; | |
372 | # else | |
373 | glob_flags = 0; | |
374 | # endif /* !GLOB_PERIOD */ | |
375 | ||
376 | glob_flags |= (GLOB_ERR | GLOB_DOOFFS); | |
377 | ||
378 | i = glob (temp, glob_flags, (posix_glob_errfunc_t *)NULL, &filenames); | |
379 | ||
380 | free (temp); | |
381 | ||
382 | if (i == GLOB_NOSPACE || i == GLOB_ABORTED) | |
383 | return ((char **)NULL); | |
384 | else if (i == GLOB_NOMATCH) | |
385 | filenames.gl_pathv = (char **)NULL; | |
386 | else if (i != 0) /* other error codes not in POSIX.2 */ | |
387 | filenames.gl_pathv = (char **)NULL; | |
388 | ||
389 | results = filenames.gl_pathv; | |
390 | ||
391 | if (results && ((GLOB_FAILED (results)) == 0)) | |
392 | { | |
393 | if (should_ignore_glob_matches ()) | |
394 | ignore_glob_matches (results); | |
395 | if (results && results[0]) | |
396 | strvec_sort (results); | |
397 | else | |
398 | { | |
399 | FREE (results); | |
400 | results = (char **)NULL; | |
401 | } | |
402 | } | |
403 | ||
404 | return (results); | |
405 | ||
406 | #else /* !USE_POSIX_GLOB_LIBRARY */ | |
407 | ||
408 | char *temp, **results; | |
409 | ||
410 | noglob_dot_filenames = glob_dot_filenames == 0; | |
411 | ||
412 | temp = quote_string_for_globbing (pathname, QGLOB_FILENAME); | |
413 | results = glob_filename (temp, glob_star ? GX_GLOBSTAR : 0); | |
414 | free (temp); | |
415 | ||
416 | if (results && ((GLOB_FAILED (results)) == 0)) | |
417 | { | |
418 | if (should_ignore_glob_matches ()) | |
419 | ignore_glob_matches (results); | |
420 | if (results && results[0]) | |
421 | strvec_sort (results); | |
422 | else | |
423 | { | |
424 | FREE (results); | |
425 | results = (char **)&glob_error_return; | |
426 | } | |
427 | } | |
428 | ||
429 | return (results); | |
430 | #endif /* !USE_POSIX_GLOB_LIBRARY */ | |
431 | } | |
432 | ||
433 | /* Stuff for GLOBIGNORE. */ | |
434 | ||
435 | static struct ignorevar globignore = | |
436 | { | |
437 | "GLOBIGNORE", | |
438 | (struct ign *)0, | |
439 | 0, | |
440 | (char *)0, | |
441 | (sh_iv_item_func_t *)0, | |
442 | }; | |
443 | ||
444 | /* Set up to ignore some glob matches because the value of GLOBIGNORE | |
445 | has changed. If GLOBIGNORE is being unset, we also need to disable | |
446 | the globbing of filenames beginning with a `.'. */ | |
447 | void | |
448 | setup_glob_ignore (name) | |
449 | char *name; | |
450 | { | |
451 | char *v; | |
452 | ||
453 | v = get_string_value (name); | |
454 | setup_ignore_patterns (&globignore); | |
455 | ||
456 | if (globignore.num_ignores) | |
457 | glob_dot_filenames = 1; | |
458 | else if (v == 0) | |
459 | glob_dot_filenames = 0; | |
460 | } | |
461 | ||
462 | int | |
463 | should_ignore_glob_matches () | |
464 | { | |
465 | return globignore.num_ignores; | |
466 | } | |
467 | ||
468 | /* Return 0 if NAME matches a pattern in the globignore.ignores list. */ | |
469 | static int | |
470 | glob_name_is_acceptable (name) | |
471 | const char *name; | |
472 | { | |
473 | struct ign *p; | |
474 | int flags; | |
475 | ||
476 | /* . and .. are never matched */ | |
477 | if (name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) | |
478 | return (0); | |
479 | ||
480 | flags = FNM_PATHNAME | FNMATCH_EXTFLAG | FNMATCH_NOCASEGLOB; | |
481 | for (p = globignore.ignores; p->val; p++) | |
482 | { | |
483 | if (strmatch (p->val, (char *)name, flags) != FNM_NOMATCH) | |
484 | return (0); | |
485 | } | |
486 | return (1); | |
487 | } | |
488 | ||
489 | /* Internal function to test whether filenames in NAMES should be | |
490 | ignored. NAME_FUNC is a pointer to a function to call with each | |
491 | name. It returns non-zero if the name is acceptable to the particular | |
492 | ignore function which called _ignore_names; zero if the name should | |
493 | be removed from NAMES. */ | |
494 | ||
495 | static void | |
496 | ignore_globbed_names (names, name_func) | |
497 | char **names; | |
498 | sh_ignore_func_t *name_func; | |
499 | { | |
500 | char **newnames; | |
501 | int n, i; | |
502 | ||
503 | for (i = 0; names[i]; i++) | |
504 | ; | |
505 | newnames = strvec_create (i + 1); | |
506 | ||
507 | for (n = i = 0; names[i]; i++) | |
508 | { | |
509 | if ((*name_func) (names[i])) | |
510 | newnames[n++] = names[i]; | |
511 | else | |
512 | free (names[i]); | |
513 | } | |
514 | ||
515 | newnames[n] = (char *)NULL; | |
516 | ||
517 | if (n == 0) | |
518 | { | |
519 | names[0] = (char *)NULL; | |
520 | free (newnames); | |
521 | return; | |
522 | } | |
523 | ||
524 | /* Copy the acceptable names from NEWNAMES back to NAMES and set the | |
525 | new array end. */ | |
526 | for (n = 0; newnames[n]; n++) | |
527 | names[n] = newnames[n]; | |
528 | names[n] = (char *)NULL; | |
529 | free (newnames); | |
530 | } | |
531 | ||
532 | void | |
533 | ignore_glob_matches (names) | |
534 | char **names; | |
535 | { | |
536 | if (globignore.num_ignores == 0) | |
537 | return; | |
538 | ||
539 | ignore_globbed_names (names, glob_name_is_acceptable); | |
540 | } | |
541 | ||
542 | static char * | |
543 | split_ignorespec (s, ip) | |
544 | char *s; | |
545 | int *ip; | |
546 | { | |
547 | char *t; | |
548 | int n, i; | |
549 | ||
550 | if (s == 0) | |
551 | return 0; | |
552 | ||
553 | i = *ip; | |
554 | if (s[i] == 0) | |
555 | return 0; | |
556 | ||
557 | n = skip_to_delim (s, i, ":", SD_NOJMP|SD_EXTGLOB|SD_GLOB); | |
558 | t = substring (s, i, n); | |
559 | ||
560 | if (s[n] == ':') | |
561 | n++; | |
562 | *ip = n; | |
563 | return t; | |
564 | } | |
565 | ||
566 | void | |
567 | setup_ignore_patterns (ivp) | |
568 | struct ignorevar *ivp; | |
569 | { | |
570 | int numitems, maxitems, ptr; | |
571 | char *colon_bit, *this_ignoreval; | |
572 | struct ign *p; | |
573 | ||
574 | this_ignoreval = get_string_value (ivp->varname); | |
575 | ||
576 | /* If nothing has changed then just exit now. */ | |
577 | if ((this_ignoreval && ivp->last_ignoreval && STREQ (this_ignoreval, ivp->last_ignoreval)) || | |
578 | (!this_ignoreval && !ivp->last_ignoreval)) | |
579 | return; | |
580 | ||
581 | /* Oops. The ignore variable has changed. Re-parse it. */ | |
582 | ivp->num_ignores = 0; | |
583 | ||
584 | if (ivp->ignores) | |
585 | { | |
586 | for (p = ivp->ignores; p->val; p++) | |
587 | free(p->val); | |
588 | free (ivp->ignores); | |
589 | ivp->ignores = (struct ign *)NULL; | |
590 | } | |
591 | ||
592 | if (ivp->last_ignoreval) | |
593 | { | |
594 | free (ivp->last_ignoreval); | |
595 | ivp->last_ignoreval = (char *)NULL; | |
596 | } | |
597 | ||
598 | if (this_ignoreval == 0 || *this_ignoreval == '\0') | |
599 | return; | |
600 | ||
601 | ivp->last_ignoreval = savestring (this_ignoreval); | |
602 | ||
603 | numitems = maxitems = ptr = 0; | |
604 | ||
605 | #if 0 | |
606 | while (colon_bit = extract_colon_unit (this_ignoreval, &ptr)) | |
607 | #else | |
608 | while (colon_bit = split_ignorespec (this_ignoreval, &ptr)) | |
609 | #endif | |
610 | { | |
611 | if (numitems + 1 >= maxitems) | |
612 | { | |
613 | maxitems += 10; | |
614 | ivp->ignores = (struct ign *)xrealloc (ivp->ignores, maxitems * sizeof (struct ign)); | |
615 | } | |
616 | ivp->ignores[numitems].val = colon_bit; | |
617 | ivp->ignores[numitems].len = strlen (colon_bit); | |
618 | ivp->ignores[numitems].flags = 0; | |
619 | if (ivp->item_func) | |
620 | (*ivp->item_func) (&ivp->ignores[numitems]); | |
621 | numitems++; | |
622 | } | |
623 | ivp->ignores[numitems].val = (char *)NULL; | |
624 | ivp->num_ignores = numitems; | |
625 | } |