]>
git.ipfire.org Git - thirdparty/bash.git/blob - lib/glob/sm_loop.c
1 /* Copyright (C) 1991-2011 Free Software Foundation, Inc.
3 This file is part of GNU Bash, the Bourne Again SHell.
5 Bash is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 Bash is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19 int FCT
__P((CHAR
*, CHAR
*, int));
21 static int GMATCH
__P((CHAR
*, CHAR
*, CHAR
*, CHAR
*, int));
22 static CHAR
*PARSE_COLLSYM
__P((CHAR
*, INT
*));
23 static CHAR
*BRACKMATCH
__P((CHAR
*, U_CHAR
, int));
24 static int EXTMATCH
__P((INT
, CHAR
*, CHAR
*, CHAR
*, CHAR
*, int));
26 /*static*/ CHAR
*PATSCAN
__P((CHAR
*, CHAR
*, INT
));
29 FCT (pattern
, string
, flags
)
36 if (string
== 0 || pattern
== 0)
39 se
= string
+ STRLEN ((XCHAR
*)string
);
40 pe
= pattern
+ STRLEN ((XCHAR
*)pattern
);
42 return (GMATCH (string
, se
, pattern
, pe
, flags
));
45 /* Match STRING against the filename pattern PATTERN, returning zero if
46 it matches, FNM_NOMATCH if not. */
48 GMATCH (string
, se
, pattern
, pe
, flags
)
53 CHAR
*p
, *n
; /* pattern, string */
54 INT c
; /* current pattern character - XXX U_CHAR? */
55 INT sc
; /* current string character - XXX U_CHAR? */
60 if (string
== 0 || pattern
== 0)
64 fprintf(stderr
, "gmatch: string = %s; se = %s\n", string
, se
);
65 fprintf(stderr
, "gmatch: pattern = %s; pe = %s\n", pattern
, pe
);
73 sc
= n
< se
? *n
: '\0';
76 /* EXTMATCH () will handle recursively calling GMATCH, so we can
77 just return what EXTMATCH() returns. */
78 if ((flags
& FNM_EXTMATCH
) && *p
== L('(') &&
79 (c
== L('+') || c
== L('*') || c
== L('?') || c
== L('@') || c
== L('!'))) /* ) */
82 /* If we're not matching the start of the string, we're not
83 concerned about the special cases for matching `.' */
84 lflags
= (n
== string
) ? flags
: (flags
& ~FNM_PERIOD
);
85 return (EXTMATCH (c
, n
, se
, p
, pe
, lflags
));
87 #endif /* EXTENDED_GLOB */
91 case L('?'): /* Match single character */
94 else if ((flags
& FNM_PATHNAME
) && sc
== L('/'))
95 /* If we are matching a pathname, `?' can never match a `/'. */
97 else if ((flags
& FNM_PERIOD
) && sc
== L('.') &&
98 (n
== string
|| ((flags
& FNM_PATHNAME
) && n
[-1] == L('/'))))
99 /* `?' cannot match a `.' if it is the first character of the
100 string or if it is the first character following a slash and
101 we are matching a pathname. */
105 case L('\\'): /* backslash escape removes special meaning */
109 if ((flags
& FNM_NOESCAPE
) == 0)
112 /* A trailing `\' cannot match. */
117 if (FOLD (sc
) != (U_CHAR
)c
)
121 case '*': /* Match zero or more characters */
122 if ((flags
& FNM_PERIOD
) && sc
== L('.') &&
123 (n
== string
|| ((flags
& FNM_PATHNAME
) && n
[-1] == L('/'))))
124 /* `*' cannot match a `.' if it is the first character of the
125 string or if it is the first character following a slash and
126 we are matching a pathname. */
132 /* Collapse multiple consecutive `*' and `?', but make sure that
133 one character of the string is consumed for each `?'. */
134 for (c
= *p
++; (c
== L('?') || c
== L('*')); c
= *p
++)
136 if ((flags
& FNM_PATHNAME
) && sc
== L('/'))
137 /* A slash does not match a wildcard under FNM_PATHNAME. */
140 else if ((flags
& FNM_EXTMATCH
) && c
== L('?') && *p
== L('(')) /* ) */
145 for (newn
= n
; newn
< se
; ++newn
)
147 if (EXTMATCH (c
, newn
, se
, p
, pe
, flags
) == 0)
151 /* We can match 0 or 1 times. If we match, return success */
152 if (EXTMATCH (c
, n
, se
, p
, pe
, flags
) == 0)
156 /* We didn't match the extended glob pattern, but
157 that's OK, since we can match 0 or 1 occurrences.
158 We need to skip the glob pattern and see if we
159 match the rest of the string. */
160 newn
= PATSCAN (p
+ 1, pe
, 0);
161 /* If NEWN is 0, we have an ill-formed pattern. */
162 p
= newn
? newn
: pe
;
165 else if (c
== L('?'))
169 /* One character of the string is consumed in matching
170 this ? wildcard, so *??? won't match if there are
171 fewer than three characters. */
173 sc
= n
< se
? *n
: '\0';
177 /* Handle ******(patlist) */
178 if ((flags
& FNM_EXTMATCH
) && c
== L('*') && *p
== L('(')) /*)*/
181 /* We need to check whether or not the extended glob
182 pattern matches the remainder of the string.
183 If it does, we match the entire pattern. */
184 for (newn
= n
; newn
< se
; ++newn
)
186 if (EXTMATCH (c
, newn
, se
, p
, pe
, flags
) == 0)
189 /* We didn't match the extended glob pattern, but
190 that's OK, since we can match 0 or more occurrences.
191 We need to skip the glob pattern and see if we
192 match the rest of the string. */
193 newn
= PATSCAN (p
+ 1, pe
, 0);
194 /* If NEWN is 0, we have an ill-formed pattern. */
195 p
= newn
? newn
: pe
;
202 /* The wildcards are the last element of the pattern. The name
203 cannot match completely if we are looking for a pathname and
204 it contains another slash, unless FNM_LEADING_DIR is set. */
207 int r
= (flags
& FNM_PATHNAME
) == 0 ? 0 : FNM_NOMATCH
;
208 if (flags
& FNM_PATHNAME
)
210 if (flags
& FNM_LEADING_DIR
)
212 else if (MEMCHR (n
, L('/'), se
- n
) == NULL
)
218 /* If we've hit the end of the pattern and the last character of
219 the pattern was handled by the loop above, we've succeeded.
220 Otherwise, we need to match that last character. */
221 if (p
== pe
&& (c
== L('?') || c
== L('*')))
224 /* If we've hit the end of the string and the rest of the pattern
225 is something that matches the empty string, we can succeed. */
226 #if defined (EXTENDED_GLOB)
227 if (n
== se
&& ((flags
& FNM_EXTMATCH
) && (c
== L('!') || c
== L('?')) && *p
== L('(')))
230 if (EXTMATCH (c
, n
, se
, p
, pe
, flags
) == 0)
231 return (c
== L('!') ? FNM_NOMATCH
: 0);
232 return (c
== L('!') ? 0 : FNM_NOMATCH
);
236 /* If we stop at a slash in the pattern and we are looking for a
237 pathname ([star]/foo), then consume enough of the string to stop
238 at any slash and then try to match the rest of the pattern. If
239 the string doesn't contain a slash, fail */
240 if (c
== L('/') && (flags
& FNM_PATHNAME
))
242 while (n
< se
&& *n
!= L('/'))
244 if (n
< se
&& *n
== L('/') && (GMATCH (n
+1, se
, p
, pe
, flags
) == 0))
246 return FNM_NOMATCH
; /* XXX */
249 /* General case, use recursion. */
254 endp
= MEMCHR (n
, (flags
& FNM_PATHNAME
) ? L('/') : L('\0'), se
- n
);
257 c1
= ((flags
& FNM_NOESCAPE
) == 0 && c
== L('\\')) ? *p
: c
;
259 for (--p
; n
< endp
; ++n
)
261 /* Only call strmatch if the first character indicates a
262 possible match. We can check the first character if
263 we're not doing an extended glob match. */
264 if ((flags
& FNM_EXTMATCH
) == 0 && c
!= L('[') && FOLD (*n
) != c1
) /*]*/
267 /* If we're doing an extended glob match and the pattern is not
268 one of the extended glob patterns, we can check the first
270 if ((flags
& FNM_EXTMATCH
) && p
[1] != L('(') && /*)*/
271 STRCHR (L("?*+@!"), *p
) == 0 && c
!= L('[') && FOLD (*n
) != c1
) /*]*/
274 /* Otherwise, we just recurse. */
275 if (GMATCH (n
, se
, p
, pe
, flags
& ~FNM_PERIOD
) == 0)
283 if (sc
== L('\0') || n
== se
)
286 /* A character class cannot match a `.' if it is the first
287 character of the string or if it is the first character
288 following a slash and we are matching a pathname. */
289 if ((flags
& FNM_PERIOD
) && sc
== L('.') &&
290 (n
== string
|| ((flags
& FNM_PATHNAME
) && n
[-1] == L('/'))))
291 return (FNM_NOMATCH
);
293 p
= BRACKMATCH (p
, sc
, flags
);
300 if ((U_CHAR
)c
!= FOLD (sc
))
301 return (FNM_NOMATCH
);
310 if ((flags
& FNM_LEADING_DIR
) && *n
== L('/'))
311 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
314 return (FNM_NOMATCH
);
317 /* Parse a bracket expression collating symbol ([.sym.]) starting at P, find
318 the value of the symbol, and move P past the collating symbol expression.
319 The value is returned in *VP, if VP is not null. */
321 PARSE_COLLSYM (p
, vp
)
328 p
++; /* move past the `.' */
330 for (pc
= 0; p
[pc
]; pc
++)
331 if (p
[pc
] == L('.') && p
[pc
+1] == L(']'))
339 val
= COLLSYM (p
, pc
);
345 /* Use prototype definition here because of type promotion. */
347 #if defined (PROTOTYPES)
348 BRACKMATCH (CHAR
*p
, U_CHAR test
, int flags
)
350 BRACKMATCH (p
, test
, flags
)
356 register CHAR cstart
, cend
, c
;
357 register int not; /* Nonzero if the sense of the character class is inverted. */
358 int brcnt
, forcecoll
;
366 /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the
367 circumflex (`^') in its role in a `nonmatching list'. A bracket
368 expression starting with an unquoted circumflex character produces
369 unspecified results. This implementation treats the two identically. */
370 if (not = (*p
== L('!') || *p
== L('^')))
376 /* Initialize cstart and cend in case `-' is the last
377 character of the pattern. */
381 /* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find
382 the end of the equivalence class, move the pattern pointer past
383 it, and check for equivalence. XXX - this handles only
384 single-character equivalence classes, which is wrong, or at
386 if (c
== L('[') && *p
== L('=') && p
[2] == L('=') && p
[3] == L(']'))
390 if (COLLEQUIV (test
, pc
))
392 /*[*/ /* Move past the closing `]', since the first thing we do at
393 the `matched:' label is back p up one. */
401 return ((test
== L('[')) ? savep
: (CHAR
*)0); /*]*/
407 /* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */
408 if (c
== L('[') && *p
== L(':'))
410 CHAR
*close
, *ccname
;
412 pc
= 0; /* make sure invalid char classes don't match. */
413 /* Find end of character class name */
414 for (close
= p
+ 1; *close
!= '\0'; close
++)
415 if (*close
== L(':') && *(close
+1) == L(']'))
418 if (*close
!= L('\0'))
420 ccname
= (CHAR
*)malloc ((close
- p
) * sizeof (CHAR
));
425 bcopy (p
+ 1, ccname
, (close
- p
- 1) * sizeof (CHAR
));
426 *(ccname
+ (close
- p
- 1)) = L('\0');
427 pc
= IS_CCLASS (test
, (XCHAR
*)ccname
);
439 /*[*/ /* Move past the closing `]', since the first thing we do at
440 the `matched:' label is back p up one. */
446 /* continue the loop here, since this expression can't be
447 the first part of a range expression. */
450 return ((test
== L('[')) ? savep
: (CHAR
*)0);
451 else if (c
== L(']'))
458 /* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of
459 the symbol name, make sure it is terminated by `.]', translate
460 the name to a character using the external table, and do the
462 if (c
== L('[') && *p
== L('.'))
464 p
= PARSE_COLLSYM (p
, &pc
);
465 /* An invalid collating symbol cannot be the first point of a
466 range. If it is, we set cstart to one greater than `test',
467 so any comparisons later will fail. */
468 cstart
= (pc
== INVALID
) ? test
+ 1 : pc
;
472 if (!(flags
& FNM_NOESCAPE
) && c
== L('\\'))
476 cstart
= cend
= *p
++;
479 cstart
= cend
= FOLD (cstart
);
481 /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that
482 is not preceded by a backslash and is not part of a bracket
483 expression produces undefined results.' This implementation
484 treats the `[' as just a character to be matched if there is
485 not a closing `]'. */
487 return ((test
== L('[')) ? savep
: (CHAR
*)0);
493 return ((test
== L('[')) ? savep
: (CHAR
*)0);
495 if ((flags
& FNM_PATHNAME
) && c
== L('/'))
496 /* [/] can never match when matching a pathname. */
499 /* This introduces a range, unless the `-' is the last
500 character of the class. Find the end of the range
502 if (c
== L('-') && *p
!= L(']'))
505 if (!(flags
& FNM_NOESCAPE
) && cend
== L('\\'))
509 if (cend
== L('[') && *p
== L('.'))
511 p
= PARSE_COLLSYM (p
, &pc
);
512 /* An invalid collating symbol cannot be the second part of a
513 range expression. If we get one, we set cend to one fewer
514 than the test character to make sure the range test fails. */
515 cend
= (pc
== INVALID
) ? test
- 1 : pc
;
522 /* POSIX.2 2.8.3.2: ``The ending range point shall collate
523 equal to or higher than the starting range point; otherwise
524 the expression shall be treated as invalid.'' Note that this
525 applies to only the range expression; the rest of the bracket
526 expression is still checked for matches. */
527 if (RANGECMP (cstart
, cend
, forcecoll
) > 0)
536 if (RANGECMP (test
, cstart
, forcecoll
) >= 0 && RANGECMP (test
, cend
, forcecoll
) <= 0)
543 return (!not ? (CHAR
*)0 : p
);
546 /* Skip the rest of the [...] that already matched. */
551 /* A `[' without a matching `]' is just another character to match. */
553 return ((test
== L('[')) ? savep
: (CHAR
*)0);
556 if (c
== L('[') && (*p
== L('=') || *p
== L(':') || *p
== L('.')))
558 else if (c
== L(']'))
560 else if (!(flags
& FNM_NOESCAPE
) && c
== L('\\'))
564 /* XXX 1003.2d11 is unclear if this is right. */
568 return (not ? (CHAR
*)0 : p
);
571 #if defined (EXTENDED_GLOB)
572 /* ksh-like extended pattern matching:
576 where pat-list is a list of one or patterns separated by `|'. Operation
579 ?(patlist) match zero or one of the given patterns
580 *(patlist) match zero or more of the given patterns
581 +(patlist) match one or more of the given patterns
582 @(patlist) match exactly one of the given patterns
583 !(patlist) match anything except one of the given patterns
586 /* Scan a pattern starting at STRING and ending at END, keeping track of
587 embedded () and []. If DELIM is 0, we scan until a matching `)'
588 because we're scanning a `patlist'. Otherwise, we scan until we see
589 DELIM. In all cases, we never scan past END. The return value is the
590 first character after the matching DELIM or NULL if the pattern is
593 PATSCAN (string
, end
, delim
)
597 int pnest
, bnest
, skip
;
601 pnest
= bnest
= skip
= 0;
608 for (s
= string
; c
= *s
; s
++)
624 return ((CHAR
*)NULL
);
626 /* `[' is not special inside a bracket expression, but it may
627 introduce one of the special POSIX bracket expressions
628 ([.SYM.], [=c=], [: ... :]) that needs special handling. */
633 if (*bfirst
== L('!') || *bfirst
== L('^'))
637 else if (s
[1] == L(':') || s
[1] == L('.') || s
[1] == L('='))
641 /* `]' is not special if it's the first char (after a leading `!'
642 or `^') in a bracket expression or if it's part of one of the
643 special POSIX bracket expressions ([.SYM.], [=c=], [: ... :]) */
647 if (cchar
&& s
[-1] == cchar
)
649 else if (s
!= bfirst
)
663 if (bnest
== 0 && pnest
-- <= 0)
668 if (bnest
== 0 && pnest
== 0 && delim
== L('|'))
677 /* Return 0 if dequoted pattern matches S in the current locale. */
679 STRCOMPARE (p
, pe
, s
, se
)
680 CHAR
*p
, *pe
, *s
, *se
;
690 return (FNM_NOMATCH
); /* unequal lengths, can't be identical */
700 #if HAVE_MULTIBYTE || defined (HAVE_STRCOLL)
701 ret
= STRCOLL ((XCHAR
*)p
, (XCHAR
*)s
);
703 ret
= STRCMP ((XCHAR
*)p
, (XCHAR
*)s
);
711 return (ret
== 0 ? ret
: FNM_NOMATCH
);
714 /* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or
715 0 on success. This is handed the entire rest of the pattern and string
716 the first time an extended pattern specifier is encountered, so it calls
717 gmatch recursively. */
719 EXTMATCH (xc
, s
, se
, p
, pe
, flags
)
720 INT xc
; /* select which operation */
725 CHAR
*prest
; /* pointer to rest of pattern */
726 CHAR
*psub
; /* pointer to sub-pattern */
727 CHAR
*pnext
; /* pointer to next sub-pattern */
728 CHAR
*srest
; /* pointer to rest of string */
729 int m1
, m2
, xflags
; /* xflags = flags passed to recursive matches */
732 fprintf(stderr
, "extmatch: xc = %c\n", xc
);
733 fprintf(stderr
, "extmatch: s = %s; se = %s\n", s
, se
);
734 fprintf(stderr
, "extmatch: p = %s; pe = %s\n", p
, pe
);
735 fprintf(stderr
, "extmatch: flags = %d\n", flags
);
738 prest
= PATSCAN (p
+ (*p
== L('(')), pe
, 0); /* ) */
740 /* If PREST is 0, we failed to scan a valid pattern. In this
741 case, we just want to compare the two as strings. */
742 return (STRCOMPARE (p
- 1, pe
, s
, se
));
746 case L('+'): /* match one or more occurrences */
747 case L('*'): /* match zero or more occurrences */
748 /* If we can get away with no matches, don't even bother. Just
749 call GMATCH on the rest of the pattern and return success if
751 if (xc
== L('*') && (GMATCH (s
, se
, prest
, pe
, flags
) == 0))
754 /* OK, we have to do this the hard way. First, we make sure one of
755 the subpatterns matches, then we try to match the rest of the
757 for (psub
= p
+ 1; ; psub
= pnext
)
759 pnext
= PATSCAN (psub
, pe
, L('|'));
760 for (srest
= s
; srest
<= se
; srest
++)
762 /* Match this substring (S -> SREST) against this
763 subpattern (psub -> pnext - 1) */
764 m1
= GMATCH (s
, srest
, psub
, pnext
- 1, flags
) == 0;
765 /* OK, we matched a subpattern, so make sure the rest of the
766 string matches the rest of the pattern. Also handle
767 multiple matches of the pattern. */
770 /* if srest > s, we are not at start of string */
771 xflags
= (srest
> s
) ? (flags
& ~FNM_PERIOD
) : flags
;
772 m2
= (GMATCH (srest
, se
, prest
, pe
, xflags
) == 0) ||
773 (s
!= srest
&& GMATCH (srest
, se
, p
- 1, pe
, xflags
) == 0);
781 return (FNM_NOMATCH
);
783 case L('?'): /* match zero or one of the patterns */
784 case L('@'): /* match one (or more) of the patterns */
785 /* If we can get away with no matches, don't even bother. Just
786 call gmatch on the rest of the pattern and return success if
788 if (xc
== L('?') && (GMATCH (s
, se
, prest
, pe
, flags
) == 0))
791 /* OK, we have to do this the hard way. First, we see if one of
792 the subpatterns matches, then, if it does, we try to match the
793 rest of the string. */
794 for (psub
= p
+ 1; ; psub
= pnext
)
796 pnext
= PATSCAN (psub
, pe
, L('|'));
797 srest
= (prest
== pe
) ? se
: s
;
798 for ( ; srest
<= se
; srest
++)
800 /* if srest > s, we are not at start of string */
801 xflags
= (srest
> s
) ? (flags
& ~FNM_PERIOD
) : flags
;
802 if (GMATCH (s
, srest
, psub
, pnext
- 1, flags
) == 0 &&
803 GMATCH (srest
, se
, prest
, pe
, xflags
) == 0)
809 return (FNM_NOMATCH
);
811 case '!': /* match anything *except* one of the patterns */
812 for (srest
= s
; srest
<= se
; srest
++)
815 for (psub
= p
+ 1; ; psub
= pnext
)
817 pnext
= PATSCAN (psub
, pe
, L('|'));
818 /* If one of the patterns matches, just bail immediately. */
819 if (m1
= (GMATCH (s
, srest
, psub
, pnext
- 1, flags
) == 0))
824 /* if srest > s, we are not at start of string */
825 xflags
= (srest
> s
) ? (flags
& ~FNM_PERIOD
) : flags
;
826 if (m1
== 0 && GMATCH (srest
, se
, prest
, pe
, xflags
) == 0)
829 return (FNM_NOMATCH
);
832 return (FNM_NOMATCH
);
834 #endif /* EXTENDED_GLOB */