]> git.ipfire.org Git - thirdparty/bash.git/blob - lib/glob/sm_loop.c
final set of ANSI C changes
[thirdparty/bash.git] / lib / glob / sm_loop.c
1 /* Copyright (C) 1991-2023 Free Software Foundation, Inc.
2
3 This file is part of GNU Bash, the Bourne Again SHell.
4
5 Bash is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 Bash is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with Bash. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 extern int interrupt_state, terminating_signal;
20
21 struct STRUCT
22 {
23 CHAR *pattern;
24 CHAR *string;
25 };
26
27 int FCT (CHAR *, CHAR *, int);
28
29 static int GMATCH (CHAR *, CHAR *, CHAR *, CHAR *, struct STRUCT *, int);
30 static inline CHAR *PARSE_SUBBRACKET (CHAR *, int);
31 static CHAR *BRACKMATCH (CHAR *, U_CHAR, int);
32 static int EXTMATCH (INT, CHAR *, CHAR *, CHAR *, CHAR *, int);
33
34 extern void DEQUOTE_PATHNAME (CHAR *);
35
36 /*static*/ CHAR *PATSCAN (CHAR *, CHAR *, INT, int);
37
38 int
39 FCT (CHAR *pattern, CHAR *string, int flags)
40 {
41 CHAR *se, *pe;
42
43 if (string == 0 || pattern == 0)
44 return FNM_NOMATCH;
45
46 se = string + STRLEN ((XCHAR *)string);
47 pe = pattern + STRLEN ((XCHAR *)pattern);
48
49 return (GMATCH (string, se, pattern, pe, (struct STRUCT *)NULL, flags));
50 }
51
52 /* Match STRING against the filename pattern PATTERN, returning zero if
53 it matches, FNM_NOMATCH if not. */
54 static int
55 GMATCH (CHAR *string, CHAR *se, CHAR *pattern, CHAR *pe, struct STRUCT *ends, int flags)
56 {
57 CHAR *p, *n; /* pattern, string */
58 INT c; /* current pattern character - XXX U_CHAR? */
59 INT sc; /* current string character - XXX U_CHAR? */
60
61 p = pattern;
62 n = string;
63
64 if (string == 0 || pattern == 0)
65 return FNM_NOMATCH;
66
67 #if DEBUG_MATCHING
68 fprintf(stderr, "gmatch: string = %s; se = %s\n", string, se);
69 fprintf(stderr, "gmatch: pattern = %s; pe = %s\n", pattern, pe);
70 #endif
71
72 while (p < pe)
73 {
74 c = *p++;
75 c = FOLD (c);
76
77 sc = n < se ? *n : '\0';
78
79 if (interrupt_state || terminating_signal)
80 return FNM_NOMATCH;
81
82 #ifdef EXTENDED_GLOB
83 /* EXTMATCH () will handle recursively calling GMATCH, so we can
84 just return what EXTMATCH() returns. */
85 if ((flags & FNM_EXTMATCH) && *p == L('(') &&
86 (c == L('+') || c == L('*') || c == L('?') || c == L('@') || c == L('!'))) /* ) */
87 {
88 int lflags;
89 /* If we're not matching the start of the string, we're not
90 concerned about the special cases for matching `.' */
91 lflags = (n == string) ? flags : (flags & ~(FNM_PERIOD|FNM_DOTDOT));
92 return (EXTMATCH (c, n, se, p, pe, lflags));
93 }
94 #endif /* EXTENDED_GLOB */
95
96 switch (c)
97 {
98 case L('?'): /* Match single character */
99 if (sc == '\0')
100 return FNM_NOMATCH;
101 else if ((flags & FNM_PATHNAME) && sc == L('/'))
102 /* If we are matching a pathname, `?' can never match a `/'. */
103 return FNM_NOMATCH;
104 else if ((flags & FNM_PERIOD) && sc == L('.') &&
105 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
106 /* `?' cannot match a `.' if it is the first character of the
107 string or if it is the first character following a slash and
108 we are matching a pathname. */
109 return FNM_NOMATCH;
110
111 /* `?' cannot match `.' or `..' if it is the first character of the
112 string or if it is the first character following a slash and
113 we are matching a pathname. */
114 if ((flags & FNM_DOTDOT) &&
115 ((n == string && SDOT_OR_DOTDOT(n)) ||
116 ((flags & FNM_PATHNAME) && n[-1] == L('/') && PDOT_OR_DOTDOT(n))))
117 return FNM_NOMATCH;
118
119 break;
120
121 case L('\\'): /* backslash escape removes special meaning */
122 if (p == pe && sc == '\\' && (n+1 == se))
123 break;
124
125 if (p == pe)
126 return FNM_NOMATCH;
127
128 if ((flags & FNM_NOESCAPE) == 0)
129 {
130 c = *p++;
131 /* A trailing `\' cannot match. */
132 if (p > pe)
133 return FNM_NOMATCH;
134 c = FOLD (c);
135 }
136 if (FOLD (sc) != (U_CHAR)c)
137 return FNM_NOMATCH;
138 break;
139
140 case L('*'): /* Match zero or more characters */
141 /* See below for the reason for using this. It avoids backtracking
142 back to a previous `*'. Picked up from glibc. */
143 if (ends != NULL)
144 {
145 ends->pattern = p - 1;
146 ends->string = n;
147 return (0);
148 }
149
150 if ((flags & FNM_PERIOD) && sc == L('.') &&
151 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
152 /* `*' cannot match a `.' if it is the first character of the
153 string or if it is the first character following a slash and
154 we are matching a pathname. */
155 return FNM_NOMATCH;
156
157 /* `*' cannot match `.' or `..' if it is the first character of the
158 string or if it is the first character following a slash and
159 we are matching a pathname. */
160 if ((flags & FNM_DOTDOT) &&
161 ((n == string && SDOT_OR_DOTDOT(n)) ||
162 ((flags & FNM_PATHNAME) && n[-1] == L('/') && PDOT_OR_DOTDOT(n))))
163 return FNM_NOMATCH;
164
165 if (p == pe)
166 return 0;
167
168 /* Collapse multiple consecutive `*' and `?', but make sure that
169 one character of the string is consumed for each `?'. */
170 for (c = *p++; (c == L('?') || c == L('*')); c = *p++)
171 {
172 if ((flags & FNM_PATHNAME) && sc == L('/'))
173 /* A slash does not match a wildcard under FNM_PATHNAME. */
174 return FNM_NOMATCH;
175 #ifdef EXTENDED_GLOB
176 else if ((flags & FNM_EXTMATCH) && c == L('?') && *p == L('(')) /* ) */
177 {
178 CHAR *newn;
179
180 /* We can match 0 or 1 times. If we match, return success */
181 if (EXTMATCH (c, n, se, p, pe, flags) == 0)
182 return (0);
183
184 /* We didn't match the extended glob pattern, but
185 that's OK, since we can match 0 or 1 occurrences.
186 We need to skip the glob pattern and see if we
187 match the rest of the string. */
188 newn = PATSCAN (p + 1, pe, 0, flags);
189 /* If NEWN is 0, we have an ill-formed pattern. */
190 p = newn ? newn : pe;
191 }
192 #endif
193 else if (c == L('?'))
194 {
195 if (sc == L('\0'))
196 return FNM_NOMATCH;
197 /* One character of the string is consumed in matching
198 this ? wildcard, so *??? won't match if there are
199 fewer than three characters. */
200 n++;
201 sc = n < se ? *n : '\0';
202 }
203
204 #ifdef EXTENDED_GLOB
205 /* Handle ******(patlist) */
206 if ((flags & FNM_EXTMATCH) && c == L('*') && *p == L('(')) /*)*/
207 {
208 CHAR *newn;
209 /* We need to check whether or not the extended glob
210 pattern matches the remainder of the string.
211 If it does, we match the entire pattern. */
212 for (newn = n; newn < se; ++newn)
213 {
214 if (EXTMATCH (c, newn, se, p, pe, flags) == 0)
215 return (0);
216 }
217 /* We didn't match the extended glob pattern, but
218 that's OK, since we can match 0 or more occurrences.
219 We need to skip the glob pattern and see if we
220 match the rest of the string. */
221 newn = PATSCAN (p + 1, pe, 0, flags);
222 /* If NEWN is 0, we have an ill-formed pattern. */
223 p = newn ? newn : pe;
224 }
225 #endif
226 if (p == pe)
227 break;
228 }
229
230 /* The wildcards are the last element of the pattern. The name
231 cannot match completely if we are looking for a pathname and
232 it contains another slash, unless FNM_LEADING_DIR is set. */
233 if (c == L('\0'))
234 {
235 int r = (flags & FNM_PATHNAME) == 0 ? 0 : FNM_NOMATCH;
236 if (flags & FNM_PATHNAME)
237 {
238 if (flags & FNM_LEADING_DIR)
239 r = 0;
240 else if (MEMCHR (n, L('/'), se - n) == NULL)
241 r = 0;
242 }
243 return r;
244 }
245
246 /* If we've hit the end of the pattern and the last character of
247 the pattern was handled by the loop above, we've succeeded.
248 Otherwise, we need to match that last character. */
249 if (p == pe && (c == L('?') || c == L('*')))
250 return (0);
251
252 /* If we've hit the end of the string and the rest of the pattern
253 is something that matches the empty string, we can succeed. */
254 #if defined (EXTENDED_GLOB)
255 if (n == se && ((flags & FNM_EXTMATCH) && (c == L('!') || c == L('?')) && *p == L('(')))
256 {
257 --p;
258 if (EXTMATCH (c, n, se, p, pe, flags) == 0)
259 return (c == L('!') ? FNM_NOMATCH : 0);
260 return (c == L('!') ? 0 : FNM_NOMATCH);
261 }
262 #endif
263
264 /* If we stop at a slash in the pattern and we are looking for a
265 pathname ([star]/foo), then consume enough of the string to stop
266 at any slash and then try to match the rest of the pattern. If
267 the string doesn't contain a slash, fail */
268 if (c == L('/') && (flags & FNM_PATHNAME))
269 {
270 while (n < se && *n != L('/'))
271 ++n;
272 if (n < se && *n == L('/') && (GMATCH (n+1, se, p, pe, NULL, flags) == 0))
273 return 0;
274 return FNM_NOMATCH; /* XXX */
275 }
276
277 /* General case, use recursion. */
278 {
279 U_CHAR c1;
280 const CHAR *endp;
281 struct STRUCT end;
282
283 end.pattern = NULL;
284 endp = MEMCHR (n, (flags & FNM_PATHNAME) ? L('/') : L('\0'), se - n);
285 if (endp == 0)
286 endp = se;
287
288 c1 = ((flags & FNM_NOESCAPE) == 0 && c == L('\\')) ? *p : c;
289 c1 = FOLD (c1);
290 for (--p; n < endp; ++n)
291 {
292 /* Only call strmatch if the first character indicates a
293 possible match. We can check the first character if
294 we're not doing an extended glob match. */
295 if ((flags & FNM_EXTMATCH) == 0 && c != L('[') && FOLD (*n) != c1) /*]*/
296 continue;
297
298 /* If we're doing an extended glob match and the pattern is not
299 one of the extended glob patterns, we can check the first
300 character. */
301 if ((flags & FNM_EXTMATCH) && p[1] != L('(') && /*)*/
302 STRCHR (L("?*+@!"), *p) == 0 && c != L('[') && FOLD (*n) != c1) /*]*/
303 continue;
304
305 /* Otherwise, we just recurse. */
306 if (GMATCH (n, se, p, pe, &end, flags & ~(FNM_PERIOD|FNM_DOTDOT)) == 0)
307 {
308 if (end.pattern == NULL)
309 return (0);
310 break;
311 }
312 }
313 /* This is a clever idea from glibc, used to avoid backtracking
314 to a `*' that appears earlier in the pattern. We get away
315 without saving se and pe because they are always the same,
316 even in the recursive calls to gmatch */
317 if (end.pattern != NULL)
318 {
319 p = end.pattern;
320 n = end.string;
321 continue;
322 }
323
324 return FNM_NOMATCH;
325 }
326
327 case L('['):
328 {
329 if (sc == L('\0') || n == se)
330 return FNM_NOMATCH;
331
332 /* A character class cannot match a `.' if it is the first
333 character of the string or if it is the first character
334 following a slash and we are matching a pathname. */
335 if ((flags & FNM_PERIOD) && sc == L('.') &&
336 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
337 return (FNM_NOMATCH);
338
339 /* If we are matching pathnames, we can't match a slash with a
340 bracket expression. */
341 if (sc == L('/') && (flags & FNM_PATHNAME))
342 return (FNM_NOMATCH);
343
344 /* `?' cannot match `.' or `..' if it is the first character of the
345 string or if it is the first character following a slash and
346 we are matching a pathname. */
347 if ((flags & FNM_DOTDOT) &&
348 ((n == string && SDOT_OR_DOTDOT(n)) ||
349 ((flags & FNM_PATHNAME) && n[-1] == L('/') && PDOT_OR_DOTDOT(n))))
350 return FNM_NOMATCH;
351
352 p = BRACKMATCH (p, sc, flags);
353 if (p == 0)
354 return FNM_NOMATCH;
355 }
356 break;
357
358 default:
359 if ((U_CHAR)c != FOLD (sc))
360 return (FNM_NOMATCH);
361 }
362
363 ++n;
364 }
365
366 if (n == se)
367 return (0);
368
369 if ((flags & FNM_LEADING_DIR) && *n == L('/'))
370 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
371 return 0;
372
373 return (FNM_NOMATCH);
374 }
375
376 #define SLASH_PATHNAME(c) (c == L('/') && (flags & FNM_PATHNAME))
377
378 /* Parse a special bracket expression symbol ([.sym.], [=char=], [:cclass:]),
379 starting at P, and return the position of the terminating .], =], or :].
380 P points to the character after the opening bracket. Returns NULL if the
381 symbol isn't correctly terminated. */
382 static inline CHAR *
383 PARSE_SUBBRACKET (CHAR *p, int flags)
384 {
385 CHAR type; /* the type of special bracket expression symbol */
386
387 type = *p;
388
389 /* POSIX allows a right bracket to appear in a collating symbol. */
390 while (*++p != L('\0') && SLASH_PATHNAME (*p) == 0 && (type != L('.') && *p == L(']')) == 0)
391 if (*p == type && p[1] == L(']'))
392 return p;
393 return NULL;
394 }
395
396 /* Use prototype definition here because of type promotion. */
397 static CHAR *
398 BRACKMATCH (CHAR *p, U_CHAR test, int flags)
399 {
400 register CHAR cstart, cend, c;
401 register int not; /* Nonzero if the sense of the character class is inverted. */
402 int forcecoll, isrange;
403 INT pc;
404 CHAR *savep;
405 CHAR *close;
406 U_CHAR orig_test;
407
408 orig_test = test;
409 test = FOLD (orig_test);
410
411 savep = p;
412
413 /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the
414 circumflex (`^') in its role in a `nonmatching list'. A bracket
415 expression starting with an unquoted circumflex character produces
416 unspecified results. This implementation treats the two identically. */
417 if (not = (*p == L('!') || *p == L('^')))
418 ++p;
419
420 c = *p++;
421 for (;;)
422 {
423 /* Initialize cstart and cend in case `-' is the last
424 character of the pattern. */
425 cstart = cend = c;
426 forcecoll = 0;
427
428 /* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find
429 the end of the equivalence class, move the pattern pointer past
430 it, and check for equivalence. */
431 if (c == L('[') && *p == L('=') && (close = PARSE_SUBBRACKET (p, flags)) != NULL)
432 {
433 p++;
434 pc = COLLSYM (p, close - p);
435 pc = FOLD (pc);
436 p = close + 2;
437
438 if (COLLEQUIV (test, pc))
439 {
440 /*[*/ /* Move past the closing `]', since the first thing we do at
441 the `matched:' label is back p up one. */
442 p++;
443 goto matched;
444 }
445 else
446 {
447 c = *p++;
448 if (c == L('\0'))
449 return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
450 else if (c == L('/') && (flags & FNM_PATHNAME))
451 return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
452 else if (c == L(']'))
453 break;
454 c = FOLD (c);
455 continue;
456 }
457 }
458
459 /* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */
460 if (c == L('[') && *p == L(':') && (close = PARSE_SUBBRACKET (p, flags)) != NULL)
461 {
462 CHAR *ccname;
463
464 pc = 0; /* make sure invalid char classes don't match. */
465
466 ccname = (CHAR *)malloc ((close - p) * sizeof (CHAR));
467 if (ccname)
468 {
469 bcopy (p + 1, ccname, (close - p - 1) * sizeof (CHAR));
470 *(ccname + (close - p - 1)) = L('\0');
471 /* As a result of a POSIX discussion, char class names are
472 allowed to be quoted (?) */
473 DEQUOTE_PATHNAME (ccname);
474 pc = IS_CCLASS (orig_test, (XCHAR *)ccname);
475 if (pc == -1)
476 {
477 /* CCNAME is not a valid character class in the current
478 locale. In addition to noting no match (pc = 0), we have
479 a choice about what to do with the invalid charclass.
480 Posix leaves the behavior unspecified, but we're going
481 to skip over the charclass and keep going instead of
482 testing ORIG_TEST against each character in the class
483 string. If we don't want to do that, take out the update
484 of P. */
485 pc = 0;
486 }
487 }
488 free (ccname);
489
490 p = close + 2;
491
492 if (pc)
493 {
494 /*[*/ /* Move past the closing `]', since the first thing we do at
495 the `matched:' label is back p up one. */
496 p++;
497 goto matched;
498 }
499 else
500 {
501 /* continue the loop here, since this expression can't be
502 the first part of a range expression. */
503 c = *p++;
504 if (c == L('\0'))
505 return ((test == L('[')) ? savep : (CHAR *)0);
506 else if (c == L('/') && (flags & FNM_PATHNAME))
507 return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
508 else if (c == L(']'))
509 break;
510 c = FOLD (c);
511 continue;
512 }
513 }
514
515 /* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of
516 the symbol name, make sure it is terminated by `.]', translate
517 the name to a character using the external table, and do the
518 comparison. */
519 if (c == L('[') && *p == L('.') && (close = PARSE_SUBBRACKET (p, flags)) != NULL)
520 {
521 p++;
522 cstart = COLLSYM (p, close - p);
523 p = close + 2;
524 forcecoll = 1;
525 }
526
527 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
528 {
529 if (*p == '\0')
530 return ((test == L('[')) ? savep : (CHAR *)0);
531 else if (*p == L('/') && (flags & FNM_PATHNAME))
532 return ((test == L('[')) ? savep : (CHAR *)0);
533 cstart = cend = *p++;
534 }
535
536 cstart = cend = FOLD (cstart);
537 isrange = 0;
538
539 /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that
540 is not preceded by a backslash and is not part of a bracket
541 expression produces undefined results.' This implementation
542 treats the `[' as just a character to be matched if there is
543 not a closing `]'. */
544 if (c == L('\0'))
545 return ((test == L('[')) ? savep : (CHAR *)0);
546
547 /* POSIX.2 2.13.3 says: `If a <slash> character is found following an
548 unescaped <left-square-bracket> character before a corresponding
549 <right-square-bracket> is found, the open bracket shall be treated
550 as an ordinary character.' If we find a slash in a bracket
551 expression and the flags indicate we're supposed to be treating the
552 string like a pathname, we have to treat the `[' as just a character
553 to be matched. */
554 if (c == L('/') && (flags & FNM_PATHNAME))
555 return ((test == L('[')) ? savep : (CHAR *)0);
556
557 c = *p++;
558 c = FOLD (c);
559
560 if (c == L('\0'))
561 return ((test == L('[')) ? savep : (CHAR *)0);
562 else if (c == L('/') && (flags & FNM_PATHNAME))
563 return ((test == L('[')) ? savep : (CHAR *)0);
564
565 /* This introduces a range, unless the `-' is the last
566 character of the class. Find the end of the range
567 and move past it. */
568 if (c == L('-') && *p != L(']'))
569 {
570 cend = *p++;
571 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
572 cend = *p++;
573 if (cend == L('\0'))
574 return ((test == L('[')) ? savep : (CHAR *)0);
575 else if (cend == L('/') && (flags & FNM_PATHNAME))
576 return ((test == L('[')) ? savep : (CHAR *)0);
577 if (cend == L('[') && *p == L('.') && (close = PARSE_SUBBRACKET (p, flags)) != NULL)
578 {
579 p++;
580 cend = COLLSYM (p, close - p);
581 p = close + 2;
582 forcecoll = 1;
583 }
584 cend = FOLD (cend);
585
586 c = *p++;
587
588 /* POSIX.2 2.8.3.2: ``The ending range point shall collate
589 equal to or higher than the starting range point; otherwise
590 the expression shall be treated as invalid.'' Note that this
591 applies to only the range expression; the rest of the bracket
592 expression is still checked for matches. */
593 if (RANGECMP (cstart, cend, forcecoll) > 0)
594 {
595 if (c == L(']'))
596 break;
597 c = FOLD (c);
598 continue;
599 }
600 isrange = 1;
601 }
602
603 if (isrange == 0 && test == cstart)
604 goto matched;
605 if (isrange && RANGECMP (test, cstart, forcecoll) >= 0 && RANGECMP (test, cend, forcecoll) <= 0)
606 goto matched;
607
608 if (c == L(']'))
609 break;
610 }
611 /* No match. */
612 return (!not ? (CHAR *)0 : p);
613
614 matched:
615 /* Skip the rest of the [...] that already matched. */
616 c = *--p;
617 while (1)
618 {
619 /* A `[' without a matching `]' is just another character to match. */
620 if (c == L('\0'))
621 return ((test == L('[')) ? savep : (CHAR *)0);
622 else if (c == L('/') && (flags & FNM_PATHNAME))
623 return ((test == L('[')) ? savep : (CHAR *)0);
624
625 c = *p++;
626 if (c == L('[') && (*p == L('=') || *p == L(':') || *p == L('.')))
627 {
628 if ((close = PARSE_SUBBRACKET (p, flags)) != NULL)
629 p = close + 2;
630 }
631 /* Left bracket loses its special meaning inside a bracket expression.
632 It is only valid when followed by a `.', `=', or `:', which we check
633 for above. Technically the right bracket can appear in a collating
634 symbol, so we check for that as well. The right brackets terminating
635 collating symbols, equivalence classes, or character classes are
636 processed by PARSE_SUBBRACKET. Otherwise, a right bracket terminates
637 the bracket expression. */
638 else if (c == L(']'))
639 break;
640 else if (!(flags & FNM_NOESCAPE) && c == L('\\'))
641 {
642 if (*p == '\0')
643 return ((test == L('[')) ? savep : (CHAR *)0);
644 /* We don't allow backslash to quote slash if we're matching pathnames */
645 else if (*p == L('/') && (flags & FNM_PATHNAME))
646 return ((test == L('[')) ? savep : (CHAR *)0);
647 /* Posix issue 8 leaves this unspecified for the shell. */
648 ++p;
649 }
650 }
651 return (not ? (CHAR *)0 : p);
652 }
653
654 #if defined (EXTENDED_GLOB)
655 /* ksh-like extended pattern matching:
656
657 [?*+@!](pat-list)
658
659 where pat-list is a list of one or patterns separated by `|'. Operation
660 is as follows:
661
662 ?(patlist) match zero or one of the given patterns
663 *(patlist) match zero or more of the given patterns
664 +(patlist) match one or more of the given patterns
665 @(patlist) match exactly one of the given patterns
666 !(patlist) match anything except one of the given patterns
667 */
668
669 /* Scan a pattern starting at STRING and ending at END, keeping track of
670 embedded () and []. If DELIM is 0, we scan until a matching `)'
671 because we're scanning a `patlist'. Otherwise, we scan until we see
672 DELIM. In all cases, we never scan past END. The return value is the
673 first character after the matching DELIM or NULL if the pattern is
674 empty or invalid. */
675 /*static*/ CHAR *
676 PATSCAN (CHAR *string, CHAR *end, INT delim, int flags)
677 {
678 int pnest, bnest, skip;
679 CHAR *s, c, *bfirst, *t;
680
681 pnest = bnest = skip = 0;
682 bfirst = NULL;
683
684 if (string == end)
685 return (NULL);
686
687 for (s = string; c = *s; s++)
688 {
689 if (s >= end)
690 return (s);
691 if (skip)
692 {
693 skip = 0;
694 continue;
695 }
696 switch (c)
697 {
698 case L('\\'):
699 if ((flags & FNM_NOESCAPE) == 0)
700 skip = 1;
701 break;
702
703 case L('\0'):
704 return ((CHAR *)NULL);
705
706 /* `[' is not special inside a bracket expression, but it may
707 introduce one of the special POSIX bracket expressions
708 ([.SYM.], [=c=], [: ... :]) that needs special handling. */
709 case L('['):
710 if (bnest == 0)
711 {
712 bfirst = s + 1;
713 if (*bfirst == L('!') || *bfirst == L('^'))
714 bfirst++;
715 bnest++;
716 }
717 else if (s[1] == L(':') || s[1] == L('.') || s[1] == L('='))
718 {
719 t = PARSE_SUBBRACKET (s + 1, flags);
720 if (t)
721 s = t + 2 - 1; /* -1 to cancel s++ in loop above */
722 }
723 break;
724
725 /* `]' is not special if it's the first char (after a leading `!'
726 or `^') in a bracket expression or if it's part of one of the
727 special POSIX bracket expressions ([.SYM.], [=c=], [: ... :]) */
728 case L(']'):
729 if (bnest)
730 {
731 if (s != bfirst)
732 {
733 bnest--;
734 bfirst = 0;
735 }
736 }
737 break;
738
739 case L('('):
740 if (bnest == 0)
741 pnest++;
742 break;
743
744 case L(')'):
745 if (bnest == 0 && pnest-- <= 0)
746 return ++s;
747 break;
748
749 case L('|'):
750 if (bnest == 0 && pnest == 0 && delim == L('|'))
751 return ++s;
752 break;
753 }
754 }
755
756 return (NULL);
757 }
758
759 /* Return 0 if dequoted pattern matches S in the current locale. */
760 static int
761 STRCOMPARE (CHAR *p, CHAR *pe, CHAR *s, CHAR *se)
762 {
763 int ret;
764 CHAR c1, c2;
765 int l1, l2;
766
767 l1 = pe - p;
768 l2 = se - s;
769
770 if (l1 != l2)
771 return (FNM_NOMATCH); /* unequal lengths, can't be identical */
772
773 c1 = *pe;
774 c2 = *se;
775
776 if (c1 != 0)
777 *pe = '\0';
778 if (c2 != 0)
779 *se = '\0';
780
781 #if HAVE_MULTIBYTE || defined (HAVE_STRCOLL)
782 ret = STRCOLL ((XCHAR *)p, (XCHAR *)s);
783 #else
784 ret = STRCMP ((XCHAR *)p, (XCHAR *)s);
785 #endif
786
787 if (c1 != 0)
788 *pe = c1;
789 if (c2 != 0)
790 *se = c2;
791
792 return (ret == 0 ? ret : FNM_NOMATCH);
793 }
794
795 /* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or
796 0 on success. This is handed the entire rest of the pattern and string
797 the first time an extended pattern specifier is encountered, so it calls
798 gmatch recursively. */
799 static int
800 EXTMATCH (INT xc, CHAR *s, CHAR *se, CHAR *p, CHAR *pe, int flags)
801 {
802 CHAR *prest; /* pointer to rest of pattern */
803 CHAR *psub; /* pointer to sub-pattern */
804 CHAR *pnext; /* pointer to next sub-pattern */
805 CHAR *srest; /* pointer to rest of string */
806 int m1, m2, xflags; /* xflags = flags passed to recursive matches */
807
808 #if DEBUG_MATCHING
809 fprintf(stderr, "extmatch: xc = %c\n", xc);
810 fprintf(stderr, "extmatch: s = %s; se = %s\n", s, se);
811 fprintf(stderr, "extmatch: p = %s; pe = %s\n", p, pe);
812 fprintf(stderr, "extmatch: flags = %d\n", flags);
813 #endif
814
815 prest = PATSCAN (p + (*p == L('(')), pe, 0, flags); /* ) */
816 if (prest == 0)
817 /* If PREST is 0, we failed to scan a valid pattern. In this
818 case, we just want to compare the two as strings. */
819 return (STRCOMPARE (p - 1, pe, s, se));
820
821 glob_recursion_depth++;
822
823 switch (xc)
824 {
825 case L('+'): /* match one or more occurrences */
826 case L('*'): /* match zero or more occurrences */
827 /* If we can get away with no matches, don't even bother. Just
828 call GMATCH on the rest of the pattern and return success if
829 it succeeds. */
830 if (xc == L('*') && (GMATCH (s, se, prest, pe, NULL, flags) == 0))
831 return 0;
832
833 /* OK, we have to do this the hard way. First, we make sure one of
834 the subpatterns matches, then we try to match the rest of the
835 string. */
836 for (psub = p + 1; ; psub = pnext)
837 {
838 pnext = PATSCAN (psub, pe, L('|'), flags);
839 for (srest = s; srest <= se; srest++)
840 {
841 /* Match this substring (S -> SREST) against this
842 subpattern (psub -> pnext - 1) */
843 m1 = GMATCH (s, srest, psub, pnext - 1, NULL, flags) == 0;
844 /* OK, we matched a subpattern, so make sure the rest of the
845 string matches the rest of the pattern. Also handle
846 multiple matches of the pattern. */
847 if (m1)
848 {
849 /* if srest > s, we are not at start of string */
850 xflags = (srest > s) ? (flags & ~(FNM_PERIOD|FNM_DOTDOT)) : flags;
851 m2 = (GMATCH (srest, se, prest, pe, NULL, xflags) == 0) ||
852 (s != srest && GMATCH (srest, se, p - 1, pe, NULL, xflags) == 0);
853 }
854 if (m1 && m2)
855 return (0);
856 }
857 if (pnext == prest)
858 break;
859 }
860 return (FNM_NOMATCH);
861
862 case L('?'): /* match zero or one of the patterns */
863 case L('@'): /* match one (or more) of the patterns */
864 /* If we can get away with no matches, don't even bother. Just
865 call gmatch on the rest of the pattern and return success if
866 it succeeds. */
867 if (xc == L('?') && (GMATCH (s, se, prest, pe, NULL, flags) == 0))
868 return 0;
869
870 /* OK, we have to do this the hard way. First, we see if one of
871 the subpatterns matches, then, if it does, we try to match the
872 rest of the string. */
873 for (psub = p + 1; ; psub = pnext)
874 {
875 pnext = PATSCAN (psub, pe, L('|'), flags);
876 srest = (prest == pe) ? se : s;
877 for ( ; srest <= se; srest++)
878 {
879 /* if srest > s, we are not at start of string */
880 xflags = (srest > s) ? (flags & ~(FNM_PERIOD|FNM_DOTDOT)) : flags;
881 if (GMATCH (s, srest, psub, pnext - 1, NULL, flags) == 0 &&
882 GMATCH (srest, se, prest, pe, NULL, xflags) == 0)
883 return (0);
884 }
885 if (pnext == prest)
886 break;
887 }
888 return (FNM_NOMATCH);
889
890 case '!': /* match anything *except* one of the patterns */
891 for (srest = s; srest <= se; srest++)
892 {
893 m1 = 0;
894 for (psub = p + 1; ; psub = pnext)
895 {
896 pnext = PATSCAN (psub, pe, L('|'), flags);
897 /* If one of the patterns matches, just bail immediately. */
898 if (m1 = (GMATCH (s, srest, psub, pnext - 1, NULL, flags) == 0))
899 break;
900 if (pnext == prest)
901 break;
902 }
903
904 /* If nothing matched, but the string starts with a period and we
905 need to match periods explicitly, don't return this as a match,
906 even for negation. */
907 if (m1 == 0 && (flags & FNM_PERIOD) && *s == '.')
908 return (FNM_NOMATCH);
909
910 if (m1 == 0 && (flags & FNM_DOTDOT) &&
911 (SDOT_OR_DOTDOT (s) ||
912 ((flags & FNM_PATHNAME) && s[-1] == L('/') && PDOT_OR_DOTDOT(s))))
913 return (FNM_NOMATCH);
914
915 /* if srest > s, we are not at start of string */
916 xflags = (srest > s) ? (flags & ~(FNM_PERIOD|FNM_DOTDOT)) : flags;
917 if (m1 == 0 && GMATCH (srest, se, prest, pe, NULL, xflags) == 0)
918 return (0);
919 }
920 return (FNM_NOMATCH);
921 }
922
923 return (FNM_NOMATCH);
924 }
925 #endif /* EXTENDED_GLOB */
926
927 #undef IS_CCLASS
928 #undef FOLD
929 #undef CHAR
930 #undef U_CHAR
931 #undef XCHAR
932 #undef INT
933 #undef INVALID
934 #undef FCT
935 #undef GMATCH
936 #undef COLLSYM
937 #undef PARSE_SUBBRACKET
938 #undef PATSCAN
939 #undef STRCOMPARE
940 #undef EXTMATCH
941 #undef DEQUOTE_PATHNAME
942 #undef STRUCT
943 #undef BRACKMATCH
944 #undef STRCHR
945 #undef STRCOLL
946 #undef STRLEN
947 #undef STRCMP
948 #undef MEMCHR
949 #undef COLLEQUIV
950 #undef RANGECMP
951 #undef ISDIRSEP
952 #undef PATHSEP
953 #undef PDOT_OR_DOTDOT
954 #undef SDOT_OR_DOTDOT
955 #undef L