]>
git.ipfire.org Git - thirdparty/bash.git/blob - lib/glob/fnmatch.c
b032a6904b35cd4b7148c49292092e311a2b6934
1 /* fnmatch.c -- ksh-like extended pattern matching for the shell and filename
4 /* Copyright (C) 1991, 1997 Free Software Foundation, Inc.
6 This file is part of GNU Bash, the Bourne Again SHell.
8 Bash is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
13 Bash is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License along
19 with Bash; see the file COPYING. If not, write to the Free Software
20 Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
28 #if defined (HAVE_STRING_H)
32 #endif /* HAVE_STRING_H */
35 static char *brackmatch ();
37 static int extmatch ();
38 static char *patscan ();
41 #if !defined (isascii)
42 # define isascii(c) ((unsigned int)(c) <= 0177)
45 /* Note that these evaluate C many times. */
48 # define isblank(c) ((c) == ' ' || (c) == '\t')
52 # define isgraph(c) ((c) != ' ' && isprint((c)))
56 # define isxdigit(c) (((c) >= '0' && (c) <= '9') || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))
59 /* The result of FOLD is an `unsigned char' */
60 # define FOLD(c) ((flags & FNM_CASEFOLD) && isupper ((unsigned char)c) \
61 ? tolower ((unsigned char)c) \
65 #define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
66 #define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
69 #if defined (HAVE_STRCOLL)
70 static int rangecmp (c1
, c2
)
73 static char s1
[2] = { ' ', '\0' };
74 static char s2
[2] = { ' ', '\0' };
77 /* Eight bits only. Period. */
87 if ((ret
= strcoll (s1
, s2
)) != 0)
91 #else /* !HAVE_STRCOLL */
92 # define rangecmp(c1, c2) ((c1) - (c2))
93 #endif /* !HAVE_STRCOLL */
95 #if defined (HAVE_STRCOLL)
96 static int collequiv (c1
, c2
)
99 return (rangecmp (c1
, c2
) == 0);
102 # define collequiv(c1, c2) ((c1) == (c2))
110 register struct _collsym
*csp
;
112 for (csp
= posix_collsyms
; csp
->name
; csp
++)
114 if (STREQN(csp
->name
, s
, len
) && csp
->name
[len
] == '\0')
123 fnmatch (pattern
, string
, flags
)
130 if (string
== 0 || pattern
== 0)
133 se
= string
+ strlen (string
);
134 pe
= pattern
+ strlen (pattern
);
136 return (gmatch (string
, se
, pattern
, pe
, flags
));
139 /* Match STRING against the filename pattern PATTERN, returning zero if
140 it matches, FNM_NOMATCH if not. */
142 gmatch (string
, se
, pattern
, pe
, flags
)
147 register char *p
, *n
; /* pattern, string */
148 register char c
; /* current pattern character */
149 register char sc
; /* current string character */
154 if (string
== 0 || pattern
== 0)
162 sc
= n
< se
? *n
: '\0';
165 /* extmatch () will handle recursively calling gmatch, so we can
166 just return what extmatch() returns. */
167 if ((flags
& FNM_EXTMATCH
) && *p
== '(' &&
168 (c
== '+' || c
== '*' || c
== '?' || c
== '@' || c
== '!')) /* ) */
171 /* If we're not matching the start of the string, we're not
172 concerned about the special cases for matching `.' */
173 lflags
= (n
== string
) ? flags
: (flags
& ~FNM_PERIOD
);
174 return (extmatch (c
, n
, se
, p
, pe
, lflags
));
180 case '?': /* Match single character */
183 else if ((flags
& FNM_PATHNAME
) && sc
== '/')
184 /* If we are matching a pathname, `?' can never match a `/'. */
186 else if ((flags
& FNM_PERIOD
) && sc
== '.' &&
187 (n
== string
|| ((flags
& FNM_PATHNAME
) && n
[-1] == '/')))
188 /* `?' cannot match a `.' if it is the first character of the
189 string or if it is the first character following a slash and
190 we are matching a pathname. */
194 case '\\': /* backslash escape removes special meaning */
198 if ((flags
& FNM_NOESCAPE
) == 0)
201 /* A trailing `\' cannot match. */
206 if (FOLD (sc
) != (unsigned char)c
)
210 case '*': /* Match zero or more characters */
214 if ((flags
& FNM_PERIOD
) && sc
== '.' &&
215 (n
== string
|| ((flags
& FNM_PATHNAME
) && n
[-1] == '/')))
216 /* `*' cannot match a `.' if it is the first character of the
217 string or if it is the first character following a slash and
218 we are matching a pathname. */
221 /* Collapse multiple consecutive, `*' and `?', but make sure that
222 one character of the string is consumed for each `?'. */
223 for (c
= *p
++; (c
== '?' || c
== '*'); c
= *p
++)
225 if ((flags
& FNM_PATHNAME
) && sc
== '/')
226 /* A slash does not match a wildcard under FNM_PATHNAME. */
232 /* One character of the string is consumed in matching
233 this ? wildcard, so *??? won't match if there are
234 fewer than three characters. */
236 sc
= n
< se
? *n
: '\0';
240 /* Handle ******(patlist) */
241 if ((flags
& FNM_EXTMATCH
) && c
== '*' && *p
== '(') /*)*/
244 /* We need to check whether or not the extended glob
245 pattern matches the remainder of the string.
246 If it does, we match the entire pattern. */
247 for (newn
= n
; newn
< se
; ++newn
)
249 if (extmatch (c
, newn
, se
, p
, pe
, flags
) == 0)
252 /* We didn't match the extended glob pattern, but
253 that's OK, since we can match 0 or more occurrences.
254 We need to skip the glob pattern and see if we
255 match the rest of the string. */
256 newn
= patscan (p
, pe
, 0);
264 /* If we've hit the end of the pattern and the last character of
265 the pattern was handled by the loop above, we've succeeded.
266 Otherwise, we need to match that last character. */
267 if (p
== pe
&& (c
== '?' || c
== '*'))
270 /* General case, use recursion. */
274 c1
= (unsigned char)((flags
& FNM_NOESCAPE
) == 0 && c
== '\\') ? *p
: c
;
276 for (--p
; n
< se
; ++n
)
278 /* Only call fnmatch if the first character indicates a
279 possible match. We can check the first character if
280 we're not doing an extended glob match. */
281 if ((flags
& FNM_EXTMATCH
) == 0 && c
!= '[' && FOLD (*n
) != c1
)
284 /* If we're doing an extended glob match and the pattern is not
285 one of the extended glob patterns, we can check the first
287 if ((flags
& FNM_EXTMATCH
) && p
[1] != '(' && /*)*/
288 strchr ("?*+@!", *p
) == 0 && c
!= '[' && FOLD (*n
) != c1
)
291 /* Otherwise, we just recurse. */
292 if (gmatch (n
, se
, p
, pe
, flags
& ~FNM_PERIOD
) == 0)
300 if (sc
== '\0' || n
== se
)
303 /* A character class cannot match a `.' if it is the first
304 character of the string or if it is the first character
305 following a slash and we are matching a pathname. */
306 if ((flags
& FNM_PERIOD
) && sc
== '.' &&
307 (n
== string
|| ((flags
& FNM_PATHNAME
) && n
[-1] == '/')))
308 return (FNM_NOMATCH
);
310 p
= brackmatch (p
, sc
, flags
);
317 if ((unsigned char)c
!= FOLD (sc
))
318 return (FNM_NOMATCH
);
327 if ((flags
& FNM_LEADING_DIR
) && *n
== '/')
328 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
331 return (FNM_NOMATCH
);
334 /* Parse a bracket expression collating symbol ([.sym.]) starting at P, find
335 the value of the symbol, and move P past the collating symbol expression.
336 The value is returned in *VP, if VP is not null. */
338 parse_collsym (p
, vp
)
345 p
++; /* move past the `.' */
347 for (pc
= 0; p
[pc
]; pc
++)
348 if (p
[pc
] == '.' && p
[pc
+1] == ']')
350 val
= collsym (p
, pc
);
357 brackmatch (p
, test
, flags
)
362 register char cstart
, cend
, c
;
363 register int not; /* Nonzero if the sense of the character class is inverted. */
371 /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the
372 circumflex (`^') in its role in a `nonmatching list'. A bracket
373 expression starging with an unquoted circumflex character produces
374 unspecified results. This implementation treats the two identically. */
375 if (not = (*p
== '!' || *p
== '^'))
381 /* Initialize cstart and cend in case `-' is the last
382 character of the pattern. */
385 /* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find
386 the end of the equivalence class, move the pattern pointer past
387 it, and check for equivalence. XXX - this handles only
388 single-character equivalence classes, which is wrong, or at
390 if (c
== '[' && *p
== '=' && p
[2] == '=' && p
[3] == ']')
394 if (collequiv (test
, pc
))
400 return ((test
== '[') ? savep
: (char *)0);
406 /* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */
407 if (c
== '[' && *p
== ':')
409 pc
= 0; /* make sure invalid char classes don't match. */
410 if (STREQN (p
+1, "alnum:]", 7))
411 { pc
= isalnum (test
); p
+= 8; }
412 else if (STREQN (p
+1, "alpha:]", 7))
413 { pc
= isalpha (test
); p
+= 8; }
414 else if (STREQN (p
+1, "blank:]", 7))
415 { pc
= isblank (test
); p
+= 8; }
416 else if (STREQN (p
+1, "cntrl:]", 7))
417 { pc
= iscntrl (test
); p
+= 8; }
418 else if (STREQN (p
+1, "digit:]", 7))
419 { pc
= isdigit (test
); p
+= 8; }
420 else if (STREQN (p
+1, "graph:]", 7))
421 { pc
= isgraph (test
); p
+= 8; }
422 else if (STREQN (p
+1, "lower:]", 7))
423 { pc
= islower (test
); p
+= 8; }
424 else if (STREQN (p
+1, "print:]", 7))
425 { pc
= isprint (test
); p
+= 8; }
426 else if (STREQN (p
+1, "punct:]", 7))
427 { pc
= ispunct (test
); p
+= 8; }
428 else if (STREQN (p
+1, "space:]", 7))
429 { pc
= isspace (test
); p
+= 8; }
430 else if (STREQN (p
+1, "upper:]", 7))
431 { pc
= isupper (test
); p
+= 8; }
432 else if (STREQN (p
+1, "xdigit:]", 8))
433 { pc
= isxdigit (test
); p
+= 9; }
434 else if (STREQN (p
+1, "ascii:]", 7))
435 { pc
= isascii (test
); p
+= 8; }
440 /* continue the loop here, since this expression can't be
441 the first part of a range expression. */
444 return ((test
== '[') ? savep
: (char *)0);
452 /* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of
453 the symbol name, make sure it is terminated by `.]', translate
454 the name to a character using the external table, and do the
456 if (c
== '[' && *p
== '.')
458 p
= parse_collsym (p
, &pc
);
459 /* An invalid collating symbol cannot be the first point of a
460 range. If it is, we set cstart to one greater than `test',
461 so any comparisons later will fail. */
462 cstart
= (pc
== -1) ? test
+ 1 : pc
;
465 if (!(flags
& FNM_NOESCAPE
) && c
== '\\')
469 cstart
= cend
= *p
++;
472 cstart
= cend
= FOLD (cstart
);
474 /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that
475 is not preceded by a backslash and is not part of a bracket
476 expression produces undefined results.' This implementation
477 treats the `[' as just a character to be matched if there is
478 not a closing `]'. */
480 return ((test
== '[') ? savep
: (char *)0);
485 if ((flags
& FNM_PATHNAME
) && c
== '/')
486 /* [/] can never match when matching a pathname. */
489 /* This introduces a range, unless the `-' is the last
490 character of the class. Find the end of the range
492 if (c
== '-' && *p
!= ']')
495 if (!(flags
& FNM_NOESCAPE
) && cend
== '\\')
499 if (cend
== '[' && *p
== '.')
501 p
= parse_collsym (p
, &pc
);
502 /* An invalid collating symbol cannot be the second part of a
503 range expression. If we get one, we set cend to one fewer
504 than the test character to make sure the range test fails. */
505 cend
= (pc
== -1) ? test
- 1 : pc
;
511 /* POSIX.2 2.8.3.2: ``The ending range point shall collate
512 equal to or higher than the starting range point; otherwise
513 the expression shall be treated as invalid.'' Note that this
514 applies to only the range expression; the rest of the bracket
515 expression is still checked for matches. */
516 if (rangecmp (cstart
, cend
) > 0)
525 if (rangecmp (test
, cstart
) >= 0 && rangecmp (test
, cend
) <= 0)
532 return (!not ? (char *)0 : p
);
535 /* Skip the rest of the [...] that already matched. */
536 brcnt
= (c
!= ']') + (c
== '[' && (*p
== '=' || *p
== ':' || *p
== '.'));
539 /* A `[' without a matching `]' is just another character to match. */
541 return ((test
== '[') ? savep
: (char *)0);
544 if (c
== '[' && (*p
== '=' || *p
== ':' || *p
== '.'))
548 else if (!(flags
& FNM_NOESCAPE
) && c
== '\\')
552 /* XXX 1003.2d11 is unclear if this is right. */
556 return (not ? (char *)0 : p
);
559 #if defined (EXTENDED_GLOB)
560 /* ksh-like extended pattern matching:
564 where pat-list is a list of one or patterns separated by `|'. Operation
567 ?(patlist) match zero or one of the given patterns
568 *(patlist) match zero or more of the given patterns
569 +(patlist) match one or more of the given patterns
570 @(patlist) match exactly one of the given patterns
571 !(patlist) match anything except one of the given patterns
574 /* Scan a pattern starting at STRING and ending at END, keeping track of
575 embedded () and []. If DELIM is 0, we scan until a matching `)'
576 because we're scanning a `patlist'. Otherwise, we scan until we see
577 DELIM. In all cases, we never scan past END. The return value is the
578 first character after the matching DELIM. */
580 patscan (string
, end
, delim
)
588 for (s
= string
; c
= *s
; s
++)
612 if (bnest
== 0 && pnest
== 0 && delim
== '|')
620 /* Return 0 if dequoted pattern matches S in the current locale. */
622 strcompare (p
, pe
, s
, se
)
623 char *p
, *pe
, *s
, *se
;
632 #if defined (HAVE_STRCOLL)
633 ret
= strcoll (p
, s
);
641 return (ret
== 0 ? ret
: FNM_NOMATCH
);
644 /* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or
645 0 on success. This is handed the entire rest of the pattern and string
646 the first time an extended pattern specifier is encountered, so it calls
647 gmatch recursively. */
649 extmatch (xc
, s
, se
, p
, pe
, flags
)
650 int xc
; /* select which operation */
655 char *prest
; /* pointer to rest of pattern */
656 char *psub
; /* pointer to sub-pattern */
657 char *pnext
; /* pointer to next sub-pattern */
658 char *srest
; /* pointer to rest of string */
663 case '+': /* match one or more occurrences */
664 case '*': /* match zero or more occurrences */
665 prest
= patscan (p
, pe
, 0);
667 /* If PREST is 0, we failed to scan a valid pattern. In this
668 case, we just want to compare the two as strings. */
669 return (strcompare (p
- 1, pe
, s
, se
));
671 /* If we can get away with no matches, don't even bother. Just
672 call gmatch on the rest of the pattern and return success if
674 if (xc
== '*' && (gmatch (s
, se
, prest
, pe
, flags
) == 0))
677 /* OK, we have to do this the hard way. First, we make sure one of
678 the subpatterns matches, then we try to match the rest of the
680 for (psub
= p
+ 1; ; psub
= pnext
)
682 pnext
= patscan (psub
, pe
, '|');
683 for (srest
= s
; srest
<= se
; srest
++)
685 /* Match this substring (S -> SREST) against this
686 subpattern (psub -> pnext - 1) */
687 m1
= gmatch (s
, srest
, psub
, pnext
- 1, flags
) == 0;
688 /* OK, we matched a subpattern, so make sure the rest of the
689 string matches the rest of the pattern. Also handle
690 multiple matches of the pattern. */
692 m2
= (gmatch (srest
, se
, prest
, pe
, flags
) == 0) ||
693 (s
!= srest
&& gmatch (srest
, se
, p
- 1, pe
, flags
) == 0);
700 return (FNM_NOMATCH
);
702 case '?': /* match zero or one of the patterns */
703 case '@': /* match exactly one of the patterns */
704 prest
= patscan (p
, pe
, 0);
706 return (strcompare (p
- 1, pe
, s
, se
));
708 /* If we can get away with no matches, don't even bother. Just
709 call gmatch on the rest of the pattern and return success if
711 if (xc
== '?' && (gmatch (s
, se
, prest
, pe
, flags
) == 0))
714 /* OK, we have to do this the hard way. First, we see if one of
715 the subpatterns matches, then, if it does, we try to match the
716 rest of the string. */
717 for (psub
= p
+ 1; ; psub
= pnext
)
719 pnext
= patscan (psub
, pe
, '|');
720 srest
= (prest
== pe
) ? se
: s
;
721 for ( ; srest
<= se
; srest
++)
723 if (gmatch (s
, srest
, psub
, pnext
- 1, flags
) == 0 &&
724 gmatch (srest
, se
, prest
, pe
, flags
) == 0)
730 return (FNM_NOMATCH
);
732 case '!': /* match anything *except* one of the patterns */
733 prest
= patscan (p
, pe
, 0);
735 return (strcompare (p
- 1, pe
, s
, se
));
737 for (srest
= s
; srest
<= se
; srest
++)
740 for (psub
= p
+ 1; ; psub
= pnext
)
742 pnext
= patscan (psub
, pe
, '|');
743 /* If one of the patterns matches, just bail immediately. */
744 if (m1
= (gmatch (s
, srest
, psub
, pnext
- 1, flags
) == 0))
749 if (m1
== 0 && gmatch (srest
, se
, prest
, pe
, flags
) == 0)
752 return (FNM_NOMATCH
);
755 return (FNM_NOMATCH
);
757 #endif /* EXTENDED_GLOB */
769 if (fnmatch (pat
, string
, 0) == 0)
771 printf ("%s matches %s\n", string
, pat
);
776 printf ("%s does not match %s\n", string
, pat
);