]>
git.ipfire.org Git - thirdparty/bash.git/blob - lib/glob/strmatch.c
1 /* strmatch.c -- ksh-like extended pattern matching for the shell and filename
4 /* Copyright (C) 1991, 1997 Free Software Foundation, Inc.
6 This file is part of GNU Bash, the Bourne Again SHell.
8 Bash is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
13 Bash is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License along
19 with Bash; see the file COPYING. If not, write to the Free Software
20 Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
24 #include <stdio.h> /* for debugging */
28 #include <chartypes.h>
30 #if defined (HAVE_STRING_H)
34 #endif /* HAVE_STRING_H */
37 static char *brackmatch ();
39 static int extmatch ();
40 static char *patscan ();
43 #if !defined (isascii) && !defined (HAVE_ISASCII)
44 # define isascii(c) ((unsigned int)(c) <= 0177)
47 /* The result of FOLD is an `unsigned char' */
48 # define FOLD(c) ((flags & FNM_CASEFOLD) \
49 ? TOLOWER ((unsigned char)c) \
53 #define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
54 #define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
57 /* We use strcoll(3) for range comparisons in bracket expressions,
58 even though it can have unwanted side effects in locales
59 other than POSIX or US. For instance, in the de locale, [A-Z] matches
62 #if defined (HAVE_STRCOLL)
63 /* Helper function for collating symbol equivalence. */
64 static int rangecmp (c1
, c2
)
67 static char s1
[2] = { ' ', '\0' };
68 static char s2
[2] = { ' ', '\0' };
71 /* Eight bits only. Period. */
81 if ((ret
= strcoll (s1
, s2
)) != 0)
85 #else /* !HAVE_STRCOLL */
86 # define rangecmp(c1, c2) ((int)(c1) - (int)(c2))
87 #endif /* !HAVE_STRCOLL */
89 #if defined (HAVE_STRCOLL)
90 static int collequiv (c1
, c2
)
93 return (rangecmp (c1
, c2
) == 0);
96 # define collequiv(c1, c2) ((c1) == (c2))
104 register struct _collsym
*csp
;
106 for (csp
= posix_collsyms
; csp
->name
; csp
++)
108 if (STREQN(csp
->name
, s
, len
) && csp
->name
[len
] == '\0')
116 #ifdef HAVE_LIBC_FNM_EXTMATCH
118 strmatch (pattern
, string
, flags
)
125 if (string
== 0 || pattern
== 0)
128 return (fnmatch (pattern
, string
, flags
));
130 #else /* !HAVE_LIBC_FNM_EXTMATCH */
132 strmatch (pattern
, string
, flags
)
139 if (string
== 0 || pattern
== 0)
142 se
= string
+ strlen (string
);
143 pe
= pattern
+ strlen (pattern
);
145 return (gmatch (string
, se
, pattern
, pe
, flags
));
147 #endif /* !HAVE_LIBC_FNM_EXTMATCH */
149 /* Match STRING against the filename pattern PATTERN, returning zero if
150 it matches, FNM_NOMATCH if not. */
152 gmatch (string
, se
, pattern
, pe
, flags
)
157 register char *p
, *n
; /* pattern, string */
158 register char c
; /* current pattern character */
159 register char sc
; /* current string character */
164 if (string
== 0 || pattern
== 0)
168 fprintf(stderr
, "gmatch: string = %s; se = %s\n", string
, se
);
169 fprintf(stderr
, "gmatch: pattern = %s; pe = %s\n", pattern
, pe
);
177 sc
= n
< se
? *n
: '\0';
180 /* extmatch () will handle recursively calling gmatch, so we can
181 just return what extmatch() returns. */
182 if ((flags
& FNM_EXTMATCH
) && *p
== '(' &&
183 (c
== '+' || c
== '*' || c
== '?' || c
== '@' || c
== '!')) /* ) */
186 /* If we're not matching the start of the string, we're not
187 concerned about the special cases for matching `.' */
188 lflags
= (n
== string
) ? flags
: (flags
& ~FNM_PERIOD
);
189 return (extmatch (c
, n
, se
, p
, pe
, lflags
));
195 case '?': /* Match single character */
198 else if ((flags
& FNM_PATHNAME
) && sc
== '/')
199 /* If we are matching a pathname, `?' can never match a `/'. */
201 else if ((flags
& FNM_PERIOD
) && sc
== '.' &&
202 (n
== string
|| ((flags
& FNM_PATHNAME
) && n
[-1] == '/')))
203 /* `?' cannot match a `.' if it is the first character of the
204 string or if it is the first character following a slash and
205 we are matching a pathname. */
209 case '\\': /* backslash escape removes special meaning */
213 if ((flags
& FNM_NOESCAPE
) == 0)
216 /* A trailing `\' cannot match. */
221 if (FOLD (sc
) != (unsigned char)c
)
225 case '*': /* Match zero or more characters */
229 if ((flags
& FNM_PERIOD
) && sc
== '.' &&
230 (n
== string
|| ((flags
& FNM_PATHNAME
) && n
[-1] == '/')))
231 /* `*' cannot match a `.' if it is the first character of the
232 string or if it is the first character following a slash and
233 we are matching a pathname. */
236 /* Collapse multiple consecutive `*' and `?', but make sure that
237 one character of the string is consumed for each `?'. */
238 for (c
= *p
++; (c
== '?' || c
== '*'); c
= *p
++)
240 if ((flags
& FNM_PATHNAME
) && sc
== '/')
241 /* A slash does not match a wildcard under FNM_PATHNAME. */
247 /* One character of the string is consumed in matching
248 this ? wildcard, so *??? won't match if there are
249 fewer than three characters. */
251 sc
= n
< se
? *n
: '\0';
255 /* Handle ******(patlist) */
256 if ((flags
& FNM_EXTMATCH
) && c
== '*' && *p
== '(') /*)*/
259 /* We need to check whether or not the extended glob
260 pattern matches the remainder of the string.
261 If it does, we match the entire pattern. */
262 for (newn
= n
; newn
< se
; ++newn
)
264 if (extmatch (c
, newn
, se
, p
, pe
, flags
) == 0)
267 /* We didn't match the extended glob pattern, but
268 that's OK, since we can match 0 or more occurrences.
269 We need to skip the glob pattern and see if we
270 match the rest of the string. */
271 newn
= patscan (p
+ 1, pe
, 0);
272 /* If NEWN is 0, we have an ill-formed pattern. */
273 p
= newn
? newn
: pe
;
280 /* If we've hit the end of the pattern and the last character of
281 the pattern was handled by the loop above, we've succeeded.
282 Otherwise, we need to match that last character. */
283 if (p
== pe
&& (c
== '?' || c
== '*'))
286 /* General case, use recursion. */
290 c1
= (unsigned char)((flags
& FNM_NOESCAPE
) == 0 && c
== '\\') ? *p
: c
;
292 for (--p
; n
< se
; ++n
)
294 /* Only call strmatch if the first character indicates a
295 possible match. We can check the first character if
296 we're not doing an extended glob match. */
297 if ((flags
& FNM_EXTMATCH
) == 0 && c
!= '[' && FOLD (*n
) != c1
) /*]*/
300 /* If we're doing an extended glob match and the pattern is not
301 one of the extended glob patterns, we can check the first
303 if ((flags
& FNM_EXTMATCH
) && p
[1] != '(' && /*)*/
304 strchr ("?*+@!", *p
) == 0 && c
!= '[' && FOLD (*n
) != c1
) /*]*/
307 /* Otherwise, we just recurse. */
308 if (gmatch (n
, se
, p
, pe
, flags
& ~FNM_PERIOD
) == 0)
316 if (sc
== '\0' || n
== se
)
319 /* A character class cannot match a `.' if it is the first
320 character of the string or if it is the first character
321 following a slash and we are matching a pathname. */
322 if ((flags
& FNM_PERIOD
) && sc
== '.' &&
323 (n
== string
|| ((flags
& FNM_PATHNAME
) && n
[-1] == '/')))
324 return (FNM_NOMATCH
);
326 p
= brackmatch (p
, sc
, flags
);
333 if ((unsigned char)c
!= FOLD (sc
))
334 return (FNM_NOMATCH
);
343 if ((flags
& FNM_LEADING_DIR
) && *n
== '/')
344 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
347 return (FNM_NOMATCH
);
350 /* Parse a bracket expression collating symbol ([.sym.]) starting at P, find
351 the value of the symbol, and move P past the collating symbol expression.
352 The value is returned in *VP, if VP is not null. */
354 parse_collsym (p
, vp
)
361 p
++; /* move past the `.' */
363 for (pc
= 0; p
[pc
]; pc
++)
364 if (p
[pc
] == '.' && p
[pc
+1] == ']')
366 val
= collsym (p
, pc
);
373 brackmatch (p
, test
, flags
)
378 register char cstart
, cend
, c
;
379 register int not; /* Nonzero if the sense of the character class is inverted. */
387 /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the
388 circumflex (`^') in its role in a `nonmatching list'. A bracket
389 expression starting with an unquoted circumflex character produces
390 unspecified results. This implementation treats the two identically. */
391 if (not = (*p
== '!' || *p
== '^'))
397 /* Initialize cstart and cend in case `-' is the last
398 character of the pattern. */
401 /* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find
402 the end of the equivalence class, move the pattern pointer past
403 it, and check for equivalence. XXX - this handles only
404 single-character equivalence classes, which is wrong, or at
406 if (c
== '[' && *p
== '=' && p
[2] == '=' && p
[3] == ']')
410 if (collequiv (test
, pc
))
412 /*[*/ /* Move past the closing `]', since the first thing we do at
413 the `matched:' label is back p up one. */
421 return ((test
== '[') ? savep
: (char *)0); /*]*/
427 /* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */
428 if (c
== '[' && *p
== ':') /*]*/
430 pc
= 0; /* make sure invalid char classes don't match. */
431 if (STREQN (p
+1, "alnum:]", 7))
432 { pc
= ISALNUM (test
); p
+= 8; }
433 else if (STREQN (p
+1, "alpha:]", 7))
434 { pc
= ISALPHA (test
); p
+= 8; }
435 else if (STREQN (p
+1, "blank:]", 7))
436 { pc
= ISBLANK (test
); p
+= 8; }
437 else if (STREQN (p
+1, "cntrl:]", 7))
438 { pc
= ISCNTRL (test
); p
+= 8; }
439 else if (STREQN (p
+1, "digit:]", 7))
440 { pc
= ISDIGIT (test
); p
+= 8; }
441 else if (STREQN (p
+1, "graph:]", 7))
442 { pc
= ISGRAPH (test
); p
+= 8; }
443 else if (STREQN (p
+1, "lower:]", 7))
444 { pc
= ISLOWER (test
); p
+= 8; }
445 else if (STREQN (p
+1, "print:]", 7))
446 { pc
= ISPRINT (test
); p
+= 8; }
447 else if (STREQN (p
+1, "punct:]", 7))
448 { pc
= ISPUNCT (test
); p
+= 8; }
449 else if (STREQN (p
+1, "space:]", 7))
450 { pc
= ISSPACE (test
); p
+= 8; }
451 else if (STREQN (p
+1, "upper:]", 7))
452 { pc
= ISUPPER (test
); p
+= 8; }
453 else if (STREQN (p
+1, "xdigit:]", 8))
454 { pc
= ISXDIGIT (test
); p
+= 9; }
455 else if (STREQN (p
+1, "ascii:]", 7))
456 { pc
= isascii (test
); p
+= 8; }
459 /*[*/ /* Move past the closing `]', since the first thing we do at
460 the `matched:' label is back p up one. */
466 /* continue the loop here, since this expression can't be
467 the first part of a range expression. */
470 return ((test
== '[') ? savep
: (char *)0);
478 /* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of
479 the symbol name, make sure it is terminated by `.]', translate
480 the name to a character using the external table, and do the
482 if (c
== '[' && *p
== '.')
484 p
= parse_collsym (p
, &pc
);
485 /* An invalid collating symbol cannot be the first point of a
486 range. If it is, we set cstart to one greater than `test',
487 so any comparisons later will fail. */
488 cstart
= (pc
== -1) ? test
+ 1 : pc
;
491 if (!(flags
& FNM_NOESCAPE
) && c
== '\\')
495 cstart
= cend
= *p
++;
498 cstart
= cend
= FOLD (cstart
);
500 /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that
501 is not preceded by a backslash and is not part of a bracket
502 expression produces undefined results.' This implementation
503 treats the `[' as just a character to be matched if there is
504 not a closing `]'. */
506 return ((test
== '[') ? savep
: (char *)0);
511 if ((flags
& FNM_PATHNAME
) && c
== '/')
512 /* [/] can never match when matching a pathname. */
515 /* This introduces a range, unless the `-' is the last
516 character of the class. Find the end of the range
518 if (c
== '-' && *p
!= ']')
521 if (!(flags
& FNM_NOESCAPE
) && cend
== '\\')
525 if (cend
== '[' && *p
== '.')
527 p
= parse_collsym (p
, &pc
);
528 /* An invalid collating symbol cannot be the second part of a
529 range expression. If we get one, we set cend to one fewer
530 than the test character to make sure the range test fails. */
531 cend
= (pc
== -1) ? test
- 1 : pc
;
537 /* POSIX.2 2.8.3.2: ``The ending range point shall collate
538 equal to or higher than the starting range point; otherwise
539 the expression shall be treated as invalid.'' Note that this
540 applies to only the range expression; the rest of the bracket
541 expression is still checked for matches. */
542 if (rangecmp (cstart
, cend
) > 0)
551 if (rangecmp (test
, cstart
) >= 0 && rangecmp (test
, cend
) <= 0)
558 return (!not ? (char *)0 : p
);
561 /* Skip the rest of the [...] that already matched. */
563 brcnt
= (c
!= ']') + (c
== '[' && (*p
== '=' || *p
== ':' || *p
== '.'));
570 /* A `[' without a matching `]' is just another character to match. */
572 return ((test
== '[') ? savep
: (char *)0);
575 if (c
== '[' && (*p
== '=' || *p
== ':' || *p
== '.'))
579 else if (!(flags
& FNM_NOESCAPE
) && c
== '\\')
583 /* XXX 1003.2d11 is unclear if this is right. */
587 return (not ? (char *)0 : p
);
590 #if defined (EXTENDED_GLOB)
591 /* ksh-like extended pattern matching:
595 where pat-list is a list of one or patterns separated by `|'. Operation
598 ?(patlist) match zero or one of the given patterns
599 *(patlist) match zero or more of the given patterns
600 +(patlist) match one or more of the given patterns
601 @(patlist) match exactly one of the given patterns
602 !(patlist) match anything except one of the given patterns
605 /* Scan a pattern starting at STRING and ending at END, keeping track of
606 embedded () and []. If DELIM is 0, we scan until a matching `)'
607 because we're scanning a `patlist'. Otherwise, we scan until we see
608 DELIM. In all cases, we never scan past END. The return value is the
609 first character after the matching DELIM. */
611 patscan (string
, end
, delim
)
615 int pnest
, bnest
, cchar
;
618 pnest
= bnest
= cchar
= 0;
620 for (s
= string
; c
= *s
; s
++)
629 /* `[' is not special inside a bracket expression, but it may
630 introduce one of the special POSIX bracket expressions
631 ([.SYM.], [=c=], [: ... :]) that needs special handling. */
636 if (*bfirst
== '!' || *bfirst
== '^')
640 else if (s
[1] == ':' || s
[1] == '.' || s
[1] == '=')
644 /* `]' is not special if it's the first char (after a leading `!'
645 or `^') in a bracket expression or if it's part of one of the
646 special POSIX bracket expressions ([.SYM.], [=c=], [: ... :]) */
650 if (cchar
&& s
[-1] == cchar
)
652 else if (s
!= bfirst
)
672 if (bnest
== 0 && pnest
-- <= 0)
678 if (bnest
== 0 && pnest
== 0 && delim
== '|')
687 /* Return 0 if dequoted pattern matches S in the current locale. */
689 strcompare (p
, pe
, s
, se
)
690 char *p
, *pe
, *s
, *se
;
699 #if defined (HAVE_STRCOLL)
700 ret
= strcoll (p
, s
);
708 return (ret
== 0 ? ret
: FNM_NOMATCH
);
711 /* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or
712 0 on success. This is handed the entire rest of the pattern and string
713 the first time an extended pattern specifier is encountered, so it calls
714 gmatch recursively. */
716 extmatch (xc
, s
, se
, p
, pe
, flags
)
717 int xc
; /* select which operation */
722 char *prest
; /* pointer to rest of pattern */
723 char *psub
; /* pointer to sub-pattern */
724 char *pnext
; /* pointer to next sub-pattern */
725 char *srest
; /* pointer to rest of string */
729 fprintf(stderr
, "extmatch: xc = %c\n", xc
);
730 fprintf(stderr
, "extmatch: s = %s; se = %s\n", s
, se
);
731 fprintf(stderr
, "extmatch: p = %s; pe = %s\n", p
, pe
);
734 prest
= patscan (p
+ (*p
== '('), pe
, 0); /* ) */
736 /* If PREST is 0, we failed to scan a valid pattern. In this
737 case, we just want to compare the two as strings. */
738 return (strcompare (p
- 1, pe
, s
, se
));
742 case '+': /* match one or more occurrences */
743 case '*': /* match zero or more occurrences */
744 /* If we can get away with no matches, don't even bother. Just
745 call gmatch on the rest of the pattern and return success if
747 if (xc
== '*' && (gmatch (s
, se
, prest
, pe
, flags
) == 0))
750 /* OK, we have to do this the hard way. First, we make sure one of
751 the subpatterns matches, then we try to match the rest of the
753 for (psub
= p
+ 1; ; psub
= pnext
)
755 pnext
= patscan (psub
, pe
, '|');
756 for (srest
= s
; srest
<= se
; srest
++)
758 /* Match this substring (S -> SREST) against this
759 subpattern (psub -> pnext - 1) */
760 m1
= gmatch (s
, srest
, psub
, pnext
- 1, flags
) == 0;
761 /* OK, we matched a subpattern, so make sure the rest of the
762 string matches the rest of the pattern. Also handle
763 multiple matches of the pattern. */
765 m2
= (gmatch (srest
, se
, prest
, pe
, flags
) == 0) ||
766 (s
!= srest
&& gmatch (srest
, se
, p
- 1, pe
, flags
) == 0);
773 return (FNM_NOMATCH
);
775 case '?': /* match zero or one of the patterns */
776 case '@': /* match exactly one of the patterns */
777 /* If we can get away with no matches, don't even bother. Just
778 call gmatch on the rest of the pattern and return success if
780 if (xc
== '?' && (gmatch (s
, se
, prest
, pe
, flags
) == 0))
783 /* OK, we have to do this the hard way. First, we see if one of
784 the subpatterns matches, then, if it does, we try to match the
785 rest of the string. */
786 for (psub
= p
+ 1; ; psub
= pnext
)
788 pnext
= patscan (psub
, pe
, '|');
789 srest
= (prest
== pe
) ? se
: s
;
790 for ( ; srest
<= se
; srest
++)
792 if (gmatch (s
, srest
, psub
, pnext
- 1, flags
) == 0 &&
793 gmatch (srest
, se
, prest
, pe
, flags
) == 0)
799 return (FNM_NOMATCH
);
801 case '!': /* match anything *except* one of the patterns */
802 for (srest
= s
; srest
<= se
; srest
++)
805 for (psub
= p
+ 1; ; psub
= pnext
)
807 pnext
= patscan (psub
, pe
, '|');
808 /* If one of the patterns matches, just bail immediately. */
809 if (m1
= (gmatch (s
, srest
, psub
, pnext
- 1, flags
) == 0))
814 if (m1
== 0 && gmatch (srest
, se
, prest
, pe
, flags
) == 0)
817 return (FNM_NOMATCH
);
820 return (FNM_NOMATCH
);
822 #endif /* EXTENDED_GLOB */
834 if (strmatch (pat
, string
, 0) == 0)
836 printf ("%s matches %s\n", string
, pat
);
841 printf ("%s does not match %s\n", string
, pat
);