]>
git.ipfire.org Git - thirdparty/bash.git/blob - lib/glob/fnmatch.c
1 /* fnmatch.c -- ksh-like extended pattern matching for the shell and filename
4 /* Copyright (C) 1991, 1997 Free Software Foundation, Inc.
6 This file is part of GNU Bash, the Bourne Again SHell.
8 Bash is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
13 Bash is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License along
19 with Bash; see the file COPYING. If not, write to the Free Software
20 Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
24 #include <stdio.h> /* for debugging */
30 #if defined (HAVE_STRING_H)
34 #endif /* HAVE_STRING_H */
37 static char *brackmatch ();
39 static int extmatch ();
40 static char *patscan ();
43 #if !defined (isascii)
44 # define isascii(c) ((unsigned int)(c) <= 0177)
47 /* Note that these evaluate C many times. */
50 # define isblank(c) ((c) == ' ' || (c) == '\t')
54 # define isgraph(c) ((c) != ' ' && isprint((c)))
58 # define isxdigit(c) (((c) >= '0' && (c) <= '9') || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))
61 /* The result of FOLD is an `unsigned char' */
62 # define FOLD(c) ((flags & FNM_CASEFOLD) && isupper ((unsigned char)c) \
63 ? tolower ((unsigned char)c) \
67 #define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
68 #define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
71 /* We don't use strcoll(3) for range comparisons in bracket expressions,
72 even if we have it, since it can have unwanted side effects in locales
73 other than POSIX or US. For instance, in the de locale, [A-Z] matches
74 all characters. So, for ranges we use ASCII collation, and for
75 collating symbol equivalence we use strcoll(). The casts to int are
76 to handle tests that use unsigned chars. */
78 #define rangecmp(c1, c2) ((int)(c1) - (int)(c2))
80 #if defined (HAVE_STRCOLL)
81 /* Helper function for collating symbol equivalence. */
82 static int rangecmp2 (c1
, c2
)
85 static char s1
[2] = { ' ', '\0' };
86 static char s2
[2] = { ' ', '\0' };
89 /* Eight bits only. Period. */
99 if ((ret
= strcoll (s1
, s2
)) != 0)
103 #else /* !HAVE_STRCOLL */
104 # define rangecmp2(c1, c2) ((int)(c1) - (int)(c2))
105 #endif /* !HAVE_STRCOLL */
107 #if defined (HAVE_STRCOLL)
108 static int collequiv (c1
, c2
)
111 return (rangecmp2 (c1
, c2
) == 0);
114 # define collequiv(c1, c2) ((c1) == (c2))
122 register struct _collsym
*csp
;
124 for (csp
= posix_collsyms
; csp
->name
; csp
++)
126 if (STREQN(csp
->name
, s
, len
) && csp
->name
[len
] == '\0')
135 fnmatch (pattern
, string
, flags
)
142 if (string
== 0 || pattern
== 0)
145 se
= string
+ strlen (string
);
146 pe
= pattern
+ strlen (pattern
);
148 return (gmatch (string
, se
, pattern
, pe
, flags
));
151 /* Match STRING against the filename pattern PATTERN, returning zero if
152 it matches, FNM_NOMATCH if not. */
154 gmatch (string
, se
, pattern
, pe
, flags
)
159 register char *p
, *n
; /* pattern, string */
160 register char c
; /* current pattern character */
161 register char sc
; /* current string character */
166 if (string
== 0 || pattern
== 0)
174 sc
= n
< se
? *n
: '\0';
177 /* extmatch () will handle recursively calling gmatch, so we can
178 just return what extmatch() returns. */
179 if ((flags
& FNM_EXTMATCH
) && *p
== '(' &&
180 (c
== '+' || c
== '*' || c
== '?' || c
== '@' || c
== '!')) /* ) */
183 /* If we're not matching the start of the string, we're not
184 concerned about the special cases for matching `.' */
185 lflags
= (n
== string
) ? flags
: (flags
& ~FNM_PERIOD
);
186 return (extmatch (c
, n
, se
, p
, pe
, lflags
));
192 case '?': /* Match single character */
195 else if ((flags
& FNM_PATHNAME
) && sc
== '/')
196 /* If we are matching a pathname, `?' can never match a `/'. */
198 else if ((flags
& FNM_PERIOD
) && sc
== '.' &&
199 (n
== string
|| ((flags
& FNM_PATHNAME
) && n
[-1] == '/')))
200 /* `?' cannot match a `.' if it is the first character of the
201 string or if it is the first character following a slash and
202 we are matching a pathname. */
206 case '\\': /* backslash escape removes special meaning */
210 if ((flags
& FNM_NOESCAPE
) == 0)
213 /* A trailing `\' cannot match. */
218 if (FOLD (sc
) != (unsigned char)c
)
222 case '*': /* Match zero or more characters */
226 if ((flags
& FNM_PERIOD
) && sc
== '.' &&
227 (n
== string
|| ((flags
& FNM_PATHNAME
) && n
[-1] == '/')))
228 /* `*' cannot match a `.' if it is the first character of the
229 string or if it is the first character following a slash and
230 we are matching a pathname. */
233 /* Collapse multiple consecutive, `*' and `?', but make sure that
234 one character of the string is consumed for each `?'. */
235 for (c
= *p
++; (c
== '?' || c
== '*'); c
= *p
++)
237 if ((flags
& FNM_PATHNAME
) && sc
== '/')
238 /* A slash does not match a wildcard under FNM_PATHNAME. */
244 /* One character of the string is consumed in matching
245 this ? wildcard, so *??? won't match if there are
246 fewer than three characters. */
248 sc
= n
< se
? *n
: '\0';
252 /* Handle ******(patlist) */
253 if ((flags
& FNM_EXTMATCH
) && c
== '*' && *p
== '(') /*)*/
256 /* We need to check whether or not the extended glob
257 pattern matches the remainder of the string.
258 If it does, we match the entire pattern. */
259 for (newn
= n
; newn
< se
; ++newn
)
261 if (extmatch (c
, newn
, se
, p
, pe
, flags
) == 0)
264 /* We didn't match the extended glob pattern, but
265 that's OK, since we can match 0 or more occurrences.
266 We need to skip the glob pattern and see if we
267 match the rest of the string. */
268 newn
= patscan (p
+ 1, pe
, 0);
276 /* If we've hit the end of the pattern and the last character of
277 the pattern was handled by the loop above, we've succeeded.
278 Otherwise, we need to match that last character. */
279 if (p
== pe
&& (c
== '?' || c
== '*'))
282 /* General case, use recursion. */
286 c1
= (unsigned char)((flags
& FNM_NOESCAPE
) == 0 && c
== '\\') ? *p
: c
;
288 for (--p
; n
< se
; ++n
)
290 /* Only call fnmatch if the first character indicates a
291 possible match. We can check the first character if
292 we're not doing an extended glob match. */
293 if ((flags
& FNM_EXTMATCH
) == 0 && c
!= '[' && FOLD (*n
) != c1
)
296 /* If we're doing an extended glob match and the pattern is not
297 one of the extended glob patterns, we can check the first
299 if ((flags
& FNM_EXTMATCH
) && p
[1] != '(' && /*)*/
300 strchr ("?*+@!", *p
) == 0 && c
!= '[' && FOLD (*n
) != c1
)
303 /* Otherwise, we just recurse. */
304 if (gmatch (n
, se
, p
, pe
, flags
& ~FNM_PERIOD
) == 0)
312 if (sc
== '\0' || n
== se
)
315 /* A character class cannot match a `.' if it is the first
316 character of the string or if it is the first character
317 following a slash and we are matching a pathname. */
318 if ((flags
& FNM_PERIOD
) && sc
== '.' &&
319 (n
== string
|| ((flags
& FNM_PATHNAME
) && n
[-1] == '/')))
320 return (FNM_NOMATCH
);
322 p
= brackmatch (p
, sc
, flags
);
329 if ((unsigned char)c
!= FOLD (sc
))
330 return (FNM_NOMATCH
);
339 if ((flags
& FNM_LEADING_DIR
) && *n
== '/')
340 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
343 return (FNM_NOMATCH
);
346 /* Parse a bracket expression collating symbol ([.sym.]) starting at P, find
347 the value of the symbol, and move P past the collating symbol expression.
348 The value is returned in *VP, if VP is not null. */
350 parse_collsym (p
, vp
)
357 p
++; /* move past the `.' */
359 for (pc
= 0; p
[pc
]; pc
++)
360 if (p
[pc
] == '.' && p
[pc
+1] == ']')
362 val
= collsym (p
, pc
);
369 brackmatch (p
, test
, flags
)
374 register char cstart
, cend
, c
;
375 register int not; /* Nonzero if the sense of the character class is inverted. */
383 /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the
384 circumflex (`^') in its role in a `nonmatching list'. A bracket
385 expression starging with an unquoted circumflex character produces
386 unspecified results. This implementation treats the two identically. */
387 if (not = (*p
== '!' || *p
== '^'))
393 /* Initialize cstart and cend in case `-' is the last
394 character of the pattern. */
397 /* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find
398 the end of the equivalence class, move the pattern pointer past
399 it, and check for equivalence. XXX - this handles only
400 single-character equivalence classes, which is wrong, or at
402 if (c
== '[' && *p
== '=' && p
[2] == '=' && p
[3] == ']')
406 if (collequiv (test
, pc
))
412 return ((test
== '[') ? savep
: (char *)0);
418 /* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */
419 if (c
== '[' && *p
== ':')
421 pc
= 0; /* make sure invalid char classes don't match. */
422 if (STREQN (p
+1, "alnum:]", 7))
423 { pc
= isalnum (test
); p
+= 8; }
424 else if (STREQN (p
+1, "alpha:]", 7))
425 { pc
= isalpha (test
); p
+= 8; }
426 else if (STREQN (p
+1, "blank:]", 7))
427 { pc
= isblank (test
); p
+= 8; }
428 else if (STREQN (p
+1, "cntrl:]", 7))
429 { pc
= iscntrl (test
); p
+= 8; }
430 else if (STREQN (p
+1, "digit:]", 7))
431 { pc
= isdigit (test
); p
+= 8; }
432 else if (STREQN (p
+1, "graph:]", 7))
433 { pc
= isgraph (test
); p
+= 8; }
434 else if (STREQN (p
+1, "lower:]", 7))
435 { pc
= islower (test
); p
+= 8; }
436 else if (STREQN (p
+1, "print:]", 7))
437 { pc
= isprint (test
); p
+= 8; }
438 else if (STREQN (p
+1, "punct:]", 7))
439 { pc
= ispunct (test
); p
+= 8; }
440 else if (STREQN (p
+1, "space:]", 7))
441 { pc
= isspace (test
); p
+= 8; }
442 else if (STREQN (p
+1, "upper:]", 7))
443 { pc
= isupper (test
); p
+= 8; }
444 else if (STREQN (p
+1, "xdigit:]", 8))
445 { pc
= isxdigit (test
); p
+= 9; }
446 else if (STREQN (p
+1, "ascii:]", 7))
447 { pc
= isascii (test
); p
+= 8; }
452 /* continue the loop here, since this expression can't be
453 the first part of a range expression. */
456 return ((test
== '[') ? savep
: (char *)0);
464 /* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of
465 the symbol name, make sure it is terminated by `.]', translate
466 the name to a character using the external table, and do the
468 if (c
== '[' && *p
== '.')
470 p
= parse_collsym (p
, &pc
);
471 /* An invalid collating symbol cannot be the first point of a
472 range. If it is, we set cstart to one greater than `test',
473 so any comparisons later will fail. */
474 cstart
= (pc
== -1) ? test
+ 1 : pc
;
477 if (!(flags
& FNM_NOESCAPE
) && c
== '\\')
481 cstart
= cend
= *p
++;
484 cstart
= cend
= FOLD (cstart
);
486 /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that
487 is not preceded by a backslash and is not part of a bracket
488 expression produces undefined results.' This implementation
489 treats the `[' as just a character to be matched if there is
490 not a closing `]'. */
492 return ((test
== '[') ? savep
: (char *)0);
497 if ((flags
& FNM_PATHNAME
) && c
== '/')
498 /* [/] can never match when matching a pathname. */
501 /* This introduces a range, unless the `-' is the last
502 character of the class. Find the end of the range
504 if (c
== '-' && *p
!= ']')
507 if (!(flags
& FNM_NOESCAPE
) && cend
== '\\')
511 if (cend
== '[' && *p
== '.')
513 p
= parse_collsym (p
, &pc
);
514 /* An invalid collating symbol cannot be the second part of a
515 range expression. If we get one, we set cend to one fewer
516 than the test character to make sure the range test fails. */
517 cend
= (pc
== -1) ? test
- 1 : pc
;
523 /* POSIX.2 2.8.3.2: ``The ending range point shall collate
524 equal to or higher than the starting range point; otherwise
525 the expression shall be treated as invalid.'' Note that this
526 applies to only the range expression; the rest of the bracket
527 expression is still checked for matches. */
528 if (rangecmp (cstart
, cend
) > 0)
537 if (rangecmp (test
, cstart
) >= 0 && rangecmp (test
, cend
) <= 0)
544 return (!not ? (char *)0 : p
);
547 /* Skip the rest of the [...] that already matched. */
548 brcnt
= (c
!= ']') + (c
== '[' && (*p
== '=' || *p
== ':' || *p
== '.'));
551 /* A `[' without a matching `]' is just another character to match. */
553 return ((test
== '[') ? savep
: (char *)0);
556 if (c
== '[' && (*p
== '=' || *p
== ':' || *p
== '.'))
560 else if (!(flags
& FNM_NOESCAPE
) && c
== '\\')
564 /* XXX 1003.2d11 is unclear if this is right. */
568 return (not ? (char *)0 : p
);
571 #if defined (EXTENDED_GLOB)
572 /* ksh-like extended pattern matching:
576 where pat-list is a list of one or patterns separated by `|'. Operation
579 ?(patlist) match zero or one of the given patterns
580 *(patlist) match zero or more of the given patterns
581 +(patlist) match one or more of the given patterns
582 @(patlist) match exactly one of the given patterns
583 !(patlist) match anything except one of the given patterns
586 /* Scan a pattern starting at STRING and ending at END, keeping track of
587 embedded () and []. If DELIM is 0, we scan until a matching `)'
588 because we're scanning a `patlist'. Otherwise, we scan until we see
589 DELIM. In all cases, we never scan past END. The return value is the
590 first character after the matching DELIM. */
592 patscan (string
, end
, delim
)
600 for (s
= string
; c
= *s
; s
++)
626 if (bnest
== 0 && pnest
-- <= 0)
631 if (bnest
== 0 && pnest
== 0 && delim
== '|')
640 /* Return 0 if dequoted pattern matches S in the current locale. */
642 strcompare (p
, pe
, s
, se
)
643 char *p
, *pe
, *s
, *se
;
652 #if defined (HAVE_STRCOLL)
653 ret
= strcoll (p
, s
);
661 return (ret
== 0 ? ret
: FNM_NOMATCH
);
664 /* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or
665 0 on success. This is handed the entire rest of the pattern and string
666 the first time an extended pattern specifier is encountered, so it calls
667 gmatch recursively. */
669 extmatch (xc
, s
, se
, p
, pe
, flags
)
670 int xc
; /* select which operation */
675 char *prest
; /* pointer to rest of pattern */
676 char *psub
; /* pointer to sub-pattern */
677 char *pnext
; /* pointer to next sub-pattern */
678 char *srest
; /* pointer to rest of string */
682 fprintf(stderr
, "extmatch: xc = %c\n", xc
);
683 fprintf(stderr
, "extmatch: s = %s; se = %s\n", s
, se
);
684 fprintf(stderr
, "extmatch: p = %s; pe = %s\n", p
, pe
);
687 prest
= patscan (p
+ (*p
== '('), pe
, 0); /* ) */
689 /* If PREST is 0, we failed to scan a valid pattern. In this
690 case, we just want to compare the two as strings. */
691 return (strcompare (p
- 1, pe
, s
, se
));
695 case '+': /* match one or more occurrences */
696 case '*': /* match zero or more occurrences */
697 /* If we can get away with no matches, don't even bother. Just
698 call gmatch on the rest of the pattern and return success if
700 if (xc
== '*' && (gmatch (s
, se
, prest
, pe
, flags
) == 0))
703 /* OK, we have to do this the hard way. First, we make sure one of
704 the subpatterns matches, then we try to match the rest of the
706 for (psub
= p
+ 1; ; psub
= pnext
)
708 pnext
= patscan (psub
, pe
, '|');
709 for (srest
= s
; srest
<= se
; srest
++)
711 /* Match this substring (S -> SREST) against this
712 subpattern (psub -> pnext - 1) */
713 m1
= gmatch (s
, srest
, psub
, pnext
- 1, flags
) == 0;
714 /* OK, we matched a subpattern, so make sure the rest of the
715 string matches the rest of the pattern. Also handle
716 multiple matches of the pattern. */
718 m2
= (gmatch (srest
, se
, prest
, pe
, flags
) == 0) ||
719 (s
!= srest
&& gmatch (srest
, se
, p
- 1, pe
, flags
) == 0);
726 return (FNM_NOMATCH
);
728 case '?': /* match zero or one of the patterns */
729 case '@': /* match exactly one of the patterns */
730 /* If we can get away with no matches, don't even bother. Just
731 call gmatch on the rest of the pattern and return success if
733 if (xc
== '?' && (gmatch (s
, se
, prest
, pe
, flags
) == 0))
736 /* OK, we have to do this the hard way. First, we see if one of
737 the subpatterns matches, then, if it does, we try to match the
738 rest of the string. */
739 for (psub
= p
+ 1; ; psub
= pnext
)
741 pnext
= patscan (psub
, pe
, '|');
742 srest
= (prest
== pe
) ? se
: s
;
743 for ( ; srest
<= se
; srest
++)
745 if (gmatch (s
, srest
, psub
, pnext
- 1, flags
) == 0 &&
746 gmatch (srest
, se
, prest
, pe
, flags
) == 0)
752 return (FNM_NOMATCH
);
754 case '!': /* match anything *except* one of the patterns */
755 for (srest
= s
; srest
<= se
; srest
++)
758 for (psub
= p
+ 1; ; psub
= pnext
)
760 pnext
= patscan (psub
, pe
, '|');
761 /* If one of the patterns matches, just bail immediately. */
762 if (m1
= (gmatch (s
, srest
, psub
, pnext
- 1, flags
) == 0))
767 if (m1
== 0 && gmatch (srest
, se
, prest
, pe
, flags
) == 0)
770 return (FNM_NOMATCH
);
773 return (FNM_NOMATCH
);
775 #endif /* EXTENDED_GLOB */
787 if (fnmatch (pat
, string
, 0) == 0)
789 printf ("%s matches %s\n", string
, pat
);
794 printf ("%s does not match %s\n", string
, pat
);