]>
Commit | Line | Data |
---|---|---|
cce855bc JA |
1 | /* fnmatch.c -- ksh-like extended pattern matching for the shell and filename |
2 | globbing. */ | |
3 | ||
4 | /* Copyright (C) 1991, 1997 Free Software Foundation, Inc. | |
5 | ||
6 | This file is part of GNU Bash, the Bourne Again SHell. | |
7 | ||
8 | Bash is free software; you can redistribute it and/or modify it under | |
9 | the terms of the GNU General Public License as published by the Free | |
10 | Software Foundation; either version 2, or (at your option) any later | |
11 | version. | |
28ef6c31 | 12 | |
cce855bc JA |
13 | Bash is distributed in the hope that it will be useful, but WITHOUT ANY |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 | for more details. | |
28ef6c31 | 17 | |
cce855bc JA |
18 | You should have received a copy of the GNU General Public License along |
19 | with Bash; see the file COPYING. If not, write to the Free Software | |
bb70624e | 20 | Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */ |
cce855bc JA |
21 | |
22 | #include <config.h> | |
bb70624e JA |
23 | |
24 | #include <stdio.h> /* for debugging */ | |
28ef6c31 | 25 | |
cce855bc JA |
26 | #include "fnmatch.h" |
27 | #include "collsyms.h" | |
28 | #include <ctype.h> | |
726f6388 | 29 | |
b72432fd JA |
30 | #if defined (HAVE_STRING_H) |
31 | # include <string.h> | |
32 | #else | |
33 | # include <strings.h> | |
34 | #endif /* HAVE_STRING_H */ | |
35 | ||
cce855bc JA |
36 | static int gmatch (); |
37 | static char *brackmatch (); | |
38 | #ifdef EXTENDED_GLOB | |
39 | static int extmatch (); | |
b72432fd | 40 | static char *patscan (); |
cce855bc JA |
41 | #endif |
42 | ||
43 | #if !defined (isascii) | |
44 | # define isascii(c) ((unsigned int)(c) <= 0177) | |
45 | #endif | |
726f6388 | 46 | |
cce855bc | 47 | /* Note that these evaluate C many times. */ |
726f6388 | 48 | |
cce855bc JA |
49 | #ifndef isblank |
50 | # define isblank(c) ((c) == ' ' || (c) == '\t') | |
51 | #endif | |
726f6388 | 52 | |
cce855bc JA |
53 | #ifndef isgraph |
54 | # define isgraph(c) ((c) != ' ' && isprint((c))) | |
726f6388 JA |
55 | #endif |
56 | ||
cce855bc JA |
57 | #ifndef isxdigit |
58 | # define isxdigit(c) (((c) >= '0' && (c) <= '9') || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F')) | |
59 | #endif | |
60 | ||
bc4cd23c JA |
61 | /* The result of FOLD is an `unsigned char' */ |
62 | # define FOLD(c) ((flags & FNM_CASEFOLD) && isupper ((unsigned char)c) \ | |
63 | ? tolower ((unsigned char)c) \ | |
64 | : ((unsigned char)c)) | |
cce855bc JA |
65 | |
66 | #ifndef STREQ | |
67 | #define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0) | |
68 | #define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0) | |
69 | #endif | |
70 | ||
28ef6c31 JA |
71 | /* We use strcoll(3) for range comparisons in bracket expressions, |
72 | even though it can have unwanted side effects in locales | |
bb70624e | 73 | other than POSIX or US. For instance, in the de locale, [A-Z] matches |
28ef6c31 | 74 | all characters. */ |
bb70624e | 75 | |
cce855bc | 76 | #if defined (HAVE_STRCOLL) |
bb70624e | 77 | /* Helper function for collating symbol equivalence. */ |
28ef6c31 | 78 | static int rangecmp (c1, c2) |
cce855bc JA |
79 | int c1, c2; |
80 | { | |
81 | static char s1[2] = { ' ', '\0' }; | |
82 | static char s2[2] = { ' ', '\0' }; | |
83 | int ret; | |
84 | ||
85 | /* Eight bits only. Period. */ | |
86 | c1 &= 0xFF; | |
87 | c2 &= 0xFF; | |
88 | ||
89 | if (c1 == c2) | |
90 | return (0); | |
91 | ||
92 | s1[0] = c1; | |
93 | s2[0] = c2; | |
94 | ||
95 | if ((ret = strcoll (s1, s2)) != 0) | |
96 | return ret; | |
97 | return (c1 - c2); | |
98 | } | |
99 | #else /* !HAVE_STRCOLL */ | |
28ef6c31 | 100 | # define rangecmp(c1, c2) ((int)(c1) - (int)(c2)) |
cce855bc JA |
101 | #endif /* !HAVE_STRCOLL */ |
102 | ||
103 | #if defined (HAVE_STRCOLL) | |
104 | static int collequiv (c1, c2) | |
105 | int c1, c2; | |
106 | { | |
28ef6c31 | 107 | return (rangecmp (c1, c2) == 0); |
cce855bc JA |
108 | } |
109 | #else | |
110 | # define collequiv(c1, c2) ((c1) == (c2)) | |
111 | #endif | |
112 | ||
113 | static int | |
114 | collsym (s, len) | |
115 | char *s; | |
116 | int len; | |
117 | { | |
118 | register struct _collsym *csp; | |
119 | ||
120 | for (csp = posix_collsyms; csp->name; csp++) | |
121 | { | |
122 | if (STREQN(csp->name, s, len) && csp->name[len] == '\0') | |
28ef6c31 | 123 | return (csp->code); |
cce855bc JA |
124 | } |
125 | if (len == 1) | |
126 | return s[0]; | |
127 | return -1; | |
128 | } | |
129 | ||
726f6388 JA |
130 | int |
131 | fnmatch (pattern, string, flags) | |
132 | char *pattern; | |
133 | char *string; | |
134 | int flags; | |
135 | { | |
cce855bc | 136 | char *se, *pe; |
726f6388 | 137 | |
cce855bc JA |
138 | if (string == 0 || pattern == 0) |
139 | return FNM_NOMATCH; | |
140 | ||
141 | se = string + strlen (string); | |
142 | pe = pattern + strlen (pattern); | |
143 | ||
144 | return (gmatch (string, se, pattern, pe, flags)); | |
145 | } | |
146 | ||
147 | /* Match STRING against the filename pattern PATTERN, returning zero if | |
148 | it matches, FNM_NOMATCH if not. */ | |
149 | static int | |
150 | gmatch (string, se, pattern, pe, flags) | |
151 | char *string, *se; | |
152 | char *pattern, *pe; | |
153 | int flags; | |
154 | { | |
155 | register char *p, *n; /* pattern, string */ | |
156 | register char c; /* current pattern character */ | |
157 | register char sc; /* current string character */ | |
726f6388 | 158 | |
cce855bc JA |
159 | p = pattern; |
160 | n = string; | |
161 | ||
162 | if (string == 0 || pattern == 0) | |
163 | return FNM_NOMATCH; | |
164 | ||
28ef6c31 JA |
165 | #if DEBUG_MATCHING |
166 | fprintf(stderr, "gmatch: string = %s; se = %s\n", string, se); | |
167 | fprintf(stderr, "gmatch: pattern = %s; pe = %s\n", pattern, pe); | |
168 | #endif | |
169 | ||
cce855bc | 170 | while (p < pe) |
726f6388 | 171 | { |
cce855bc JA |
172 | c = *p++; |
173 | c = FOLD (c); | |
174 | ||
175 | sc = n < se ? *n : '\0'; | |
176 | ||
177 | #ifdef EXTENDED_GLOB | |
b72432fd JA |
178 | /* extmatch () will handle recursively calling gmatch, so we can |
179 | just return what extmatch() returns. */ | |
cce855bc JA |
180 | if ((flags & FNM_EXTMATCH) && *p == '(' && |
181 | (c == '+' || c == '*' || c == '?' || c == '@' || c == '!')) /* ) */ | |
b72432fd JA |
182 | { |
183 | int lflags; | |
184 | /* If we're not matching the start of the string, we're not | |
185 | concerned about the special cases for matching `.' */ | |
186 | lflags = (n == string) ? flags : (flags & ~FNM_PERIOD); | |
187 | return (extmatch (c, n, se, p, pe, lflags)); | |
188 | } | |
cce855bc JA |
189 | #endif |
190 | ||
726f6388 JA |
191 | switch (c) |
192 | { | |
cce855bc JA |
193 | case '?': /* Match single character */ |
194 | if (sc == '\0') | |
195 | return FNM_NOMATCH; | |
196 | else if ((flags & FNM_PATHNAME) && sc == '/') | |
d166f048 | 197 | /* If we are matching a pathname, `?' can never match a `/'. */ |
cce855bc JA |
198 | return FNM_NOMATCH; |
199 | else if ((flags & FNM_PERIOD) && sc == '.' && | |
726f6388 | 200 | (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/'))) |
d166f048 JA |
201 | /* `?' cannot match a `.' if it is the first character of the |
202 | string or if it is the first character following a slash and | |
203 | we are matching a pathname. */ | |
cce855bc | 204 | return FNM_NOMATCH; |
726f6388 JA |
205 | break; |
206 | ||
cce855bc JA |
207 | case '\\': /* backslash escape removes special meaning */ |
208 | if (p == pe) | |
209 | return FNM_NOMATCH; | |
210 | ||
211 | if ((flags & FNM_NOESCAPE) == 0) | |
d166f048 JA |
212 | { |
213 | c = *p++; | |
cce855bc JA |
214 | /* A trailing `\' cannot match. */ |
215 | if (p > pe) | |
216 | return FNM_NOMATCH; | |
217 | c = FOLD (c); | |
d166f048 | 218 | } |
bc4cd23c | 219 | if (FOLD (sc) != (unsigned char)c) |
cce855bc | 220 | return FNM_NOMATCH; |
726f6388 JA |
221 | break; |
222 | ||
cce855bc JA |
223 | case '*': /* Match zero or more characters */ |
224 | if (p == pe) | |
225 | return 0; | |
226 | ||
227 | if ((flags & FNM_PERIOD) && sc == '.' && | |
726f6388 | 228 | (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/'))) |
d166f048 JA |
229 | /* `*' cannot match a `.' if it is the first character of the |
230 | string or if it is the first character following a slash and | |
231 | we are matching a pathname. */ | |
cce855bc | 232 | return FNM_NOMATCH; |
726f6388 | 233 | |
d166f048 JA |
234 | /* Collapse multiple consecutive, `*' and `?', but make sure that |
235 | one character of the string is consumed for each `?'. */ | |
cce855bc | 236 | for (c = *p++; (c == '?' || c == '*'); c = *p++) |
ccc6cda3 | 237 | { |
cce855bc | 238 | if ((flags & FNM_PATHNAME) && sc == '/') |
d166f048 | 239 | /* A slash does not match a wildcard under FNM_PATHNAME. */ |
cce855bc | 240 | return FNM_NOMATCH; |
d166f048 JA |
241 | else if (c == '?') |
242 | { | |
cce855bc JA |
243 | if (sc == '\0') |
244 | return FNM_NOMATCH; | |
d166f048 JA |
245 | /* One character of the string is consumed in matching |
246 | this ? wildcard, so *??? won't match if there are | |
247 | fewer than three characters. */ | |
248 | n++; | |
cce855bc | 249 | sc = n < se ? *n : '\0'; |
d166f048 | 250 | } |
cce855bc JA |
251 | |
252 | #ifdef EXTENDED_GLOB | |
253 | /* Handle ******(patlist) */ | |
254 | if ((flags & FNM_EXTMATCH) && c == '*' && *p == '(') /*)*/ | |
b72432fd JA |
255 | { |
256 | char *newn; | |
257 | /* We need to check whether or not the extended glob | |
258 | pattern matches the remainder of the string. | |
259 | If it does, we match the entire pattern. */ | |
260 | for (newn = n; newn < se; ++newn) | |
261 | { | |
262 | if (extmatch (c, newn, se, p, pe, flags) == 0) | |
263 | return (0); | |
264 | } | |
265 | /* We didn't match the extended glob pattern, but | |
266 | that's OK, since we can match 0 or more occurrences. | |
267 | We need to skip the glob pattern and see if we | |
268 | match the rest of the string. */ | |
bb70624e | 269 | newn = patscan (p + 1, pe, 0); |
b72432fd JA |
270 | p = newn; |
271 | } | |
cce855bc JA |
272 | #endif |
273 | if (p == pe) | |
28ef6c31 | 274 | break; |
ccc6cda3 | 275 | } |
726f6388 | 276 | |
cce855bc JA |
277 | /* If we've hit the end of the pattern and the last character of |
278 | the pattern was handled by the loop above, we've succeeded. | |
279 | Otherwise, we need to match that last character. */ | |
280 | if (p == pe && (c == '?' || c == '*')) | |
726f6388 JA |
281 | return (0); |
282 | ||
d166f048 | 283 | /* General case, use recursion. */ |
726f6388 | 284 | { |
bc4cd23c | 285 | unsigned char c1; |
cce855bc | 286 | |
bc4cd23c | 287 | c1 = (unsigned char)((flags & FNM_NOESCAPE) == 0 && c == '\\') ? *p : c; |
cce855bc JA |
288 | c1 = FOLD (c1); |
289 | for (--p; n < se; ++n) | |
b72432fd JA |
290 | { |
291 | /* Only call fnmatch if the first character indicates a | |
292 | possible match. We can check the first character if | |
293 | we're not doing an extended glob match. */ | |
28ef6c31 | 294 | if ((flags & FNM_EXTMATCH) == 0 && c != '[' && FOLD (*n) != c1) /*]*/ |
b72432fd JA |
295 | continue; |
296 | ||
297 | /* If we're doing an extended glob match and the pattern is not | |
298 | one of the extended glob patterns, we can check the first | |
299 | character. */ | |
300 | if ((flags & FNM_EXTMATCH) && p[1] != '(' && /*)*/ | |
28ef6c31 | 301 | strchr ("?*+@!", *p) == 0 && c != '[' && FOLD (*n) != c1) /*]*/ |
b72432fd JA |
302 | continue; |
303 | ||
304 | /* Otherwise, we just recurse. */ | |
305 | if (gmatch (n, se, p, pe, flags & ~FNM_PERIOD) == 0) | |
306 | return (0); | |
307 | } | |
cce855bc | 308 | return FNM_NOMATCH; |
726f6388 JA |
309 | } |
310 | ||
311 | case '[': | |
312 | { | |
cce855bc JA |
313 | if (sc == '\0' || n == se) |
314 | return FNM_NOMATCH; | |
726f6388 | 315 | |
d166f048 JA |
316 | /* A character class cannot match a `.' if it is the first |
317 | character of the string or if it is the first character | |
318 | following a slash and we are matching a pathname. */ | |
cce855bc | 319 | if ((flags & FNM_PERIOD) && sc == '.' && |
726f6388 JA |
320 | (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/'))) |
321 | return (FNM_NOMATCH); | |
322 | ||
cce855bc JA |
323 | p = brackmatch (p, sc, flags); |
324 | if (p == 0) | |
325 | return FNM_NOMATCH; | |
726f6388 JA |
326 | } |
327 | break; | |
328 | ||
329 | default: | |
bc4cd23c | 330 | if ((unsigned char)c != FOLD (sc)) |
726f6388 JA |
331 | return (FNM_NOMATCH); |
332 | } | |
333 | ||
334 | ++n; | |
335 | } | |
336 | ||
cce855bc | 337 | if (n == se) |
726f6388 JA |
338 | return (0); |
339 | ||
cce855bc JA |
340 | if ((flags & FNM_LEADING_DIR) && *n == '/') |
341 | /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ | |
342 | return 0; | |
28ef6c31 | 343 | |
cce855bc JA |
344 | return (FNM_NOMATCH); |
345 | } | |
346 | ||
347 | /* Parse a bracket expression collating symbol ([.sym.]) starting at P, find | |
348 | the value of the symbol, and move P past the collating symbol expression. | |
349 | The value is returned in *VP, if VP is not null. */ | |
350 | static char * | |
351 | parse_collsym (p, vp) | |
352 | char *p; | |
353 | int *vp; | |
354 | { | |
355 | register int pc; | |
356 | int val; | |
357 | ||
358 | p++; /* move past the `.' */ | |
28ef6c31 | 359 | |
cce855bc JA |
360 | for (pc = 0; p[pc]; pc++) |
361 | if (p[pc] == '.' && p[pc+1] == ']') | |
362 | break; | |
363 | val = collsym (p, pc); | |
364 | if (vp) | |
365 | *vp = val; | |
366 | return (p + pc + 2); | |
367 | } | |
368 | ||
369 | static char * | |
370 | brackmatch (p, test, flags) | |
371 | char *p; | |
372 | unsigned char test; | |
373 | int flags; | |
374 | { | |
375 | register char cstart, cend, c; | |
376 | register int not; /* Nonzero if the sense of the character class is inverted. */ | |
377 | int pc, brcnt; | |
378 | char *savep; | |
379 | ||
380 | test = FOLD (test); | |
381 | ||
382 | savep = p; | |
383 | ||
384 | /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the | |
385 | circumflex (`^') in its role in a `nonmatching list'. A bracket | |
28ef6c31 | 386 | expression starting with an unquoted circumflex character produces |
cce855bc JA |
387 | unspecified results. This implementation treats the two identically. */ |
388 | if (not = (*p == '!' || *p == '^')) | |
389 | ++p; | |
390 | ||
391 | c = *p++; | |
392 | for (;;) | |
393 | { | |
394 | /* Initialize cstart and cend in case `-' is the last | |
395 | character of the pattern. */ | |
396 | cstart = cend = c; | |
397 | ||
398 | /* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find | |
399 | the end of the equivalence class, move the pattern pointer past | |
400 | it, and check for equivalence. XXX - this handles only | |
401 | single-character equivalence classes, which is wrong, or at | |
402 | least incomplete. */ | |
403 | if (c == '[' && *p == '=' && p[2] == '=' && p[3] == ']') | |
404 | { | |
405 | pc = FOLD (p[1]); | |
406 | p += 4; | |
407 | if (collequiv (test, pc)) | |
28ef6c31 JA |
408 | { |
409 | /*[*/ /* Move past the closing `]', since the first thing we do at | |
410 | the `matched:' label is back p up one. */ | |
411 | p++; | |
412 | goto matched; | |
413 | } | |
cce855bc JA |
414 | else |
415 | { | |
416 | c = *p++; | |
417 | if (c == '\0') | |
28ef6c31 | 418 | return ((test == '[') ? savep : (char *)0); /*]*/ |
cce855bc JA |
419 | c = FOLD (c); |
420 | continue; | |
421 | } | |
28ef6c31 | 422 | } |
cce855bc JA |
423 | |
424 | /* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */ | |
28ef6c31 | 425 | if (c == '[' && *p == ':') /*]*/ |
cce855bc JA |
426 | { |
427 | pc = 0; /* make sure invalid char classes don't match. */ | |
428 | if (STREQN (p+1, "alnum:]", 7)) | |
429 | { pc = isalnum (test); p += 8; } | |
430 | else if (STREQN (p+1, "alpha:]", 7)) | |
431 | { pc = isalpha (test); p += 8; } | |
432 | else if (STREQN (p+1, "blank:]", 7)) | |
433 | { pc = isblank (test); p += 8; } | |
434 | else if (STREQN (p+1, "cntrl:]", 7)) | |
435 | { pc = iscntrl (test); p += 8; } | |
436 | else if (STREQN (p+1, "digit:]", 7)) | |
437 | { pc = isdigit (test); p += 8; } | |
438 | else if (STREQN (p+1, "graph:]", 7)) | |
439 | { pc = isgraph (test); p += 8; } | |
440 | else if (STREQN (p+1, "lower:]", 7)) | |
bc4cd23c | 441 | { pc = islower (test); p += 8; } |
cce855bc JA |
442 | else if (STREQN (p+1, "print:]", 7)) |
443 | { pc = isprint (test); p += 8; } | |
444 | else if (STREQN (p+1, "punct:]", 7)) | |
445 | { pc = ispunct (test); p += 8; } | |
446 | else if (STREQN (p+1, "space:]", 7)) | |
447 | { pc = isspace (test); p += 8; } | |
448 | else if (STREQN (p+1, "upper:]", 7)) | |
bc4cd23c | 449 | { pc = isupper (test); p += 8; } |
cce855bc JA |
450 | else if (STREQN (p+1, "xdigit:]", 8)) |
451 | { pc = isxdigit (test); p += 9; } | |
452 | else if (STREQN (p+1, "ascii:]", 7)) | |
453 | { pc = isascii (test); p += 8; } | |
454 | if (pc) | |
28ef6c31 JA |
455 | { |
456 | /*[*/ /* Move past the closing `]', since the first thing we do at | |
457 | the `matched:' label is back p up one. */ | |
458 | p++; | |
cce855bc | 459 | goto matched; |
28ef6c31 | 460 | } |
cce855bc JA |
461 | else |
462 | { | |
463 | /* continue the loop here, since this expression can't be | |
464 | the first part of a range expression. */ | |
465 | c = *p++; | |
466 | if (c == '\0') | |
467 | return ((test == '[') ? savep : (char *)0); | |
468 | else if (c == ']') | |
28ef6c31 | 469 | break; |
cce855bc JA |
470 | c = FOLD (c); |
471 | continue; | |
472 | } | |
473 | } | |
474 | ||
475 | /* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of | |
476 | the symbol name, make sure it is terminated by `.]', translate | |
477 | the name to a character using the external table, and do the | |
478 | comparison. */ | |
479 | if (c == '[' && *p == '.') | |
480 | { | |
481 | p = parse_collsym (p, &pc); | |
482 | /* An invalid collating symbol cannot be the first point of a | |
483 | range. If it is, we set cstart to one greater than `test', | |
484 | so any comparisons later will fail. */ | |
485 | cstart = (pc == -1) ? test + 1 : pc; | |
486 | } | |
487 | ||
488 | if (!(flags & FNM_NOESCAPE) && c == '\\') | |
489 | { | |
490 | if (*p == '\0') | |
491 | return (char *)0; | |
492 | cstart = cend = *p++; | |
493 | } | |
494 | ||
495 | cstart = cend = FOLD (cstart); | |
496 | ||
497 | /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that | |
498 | is not preceded by a backslash and is not part of a bracket | |
499 | expression produces undefined results.' This implementation | |
500 | treats the `[' as just a character to be matched if there is | |
501 | not a closing `]'. */ | |
502 | if (c == '\0') | |
503 | return ((test == '[') ? savep : (char *)0); | |
504 | ||
505 | c = *p++; | |
506 | c = FOLD (c); | |
507 | ||
508 | if ((flags & FNM_PATHNAME) && c == '/') | |
509 | /* [/] can never match when matching a pathname. */ | |
510 | return (char *)0; | |
511 | ||
512 | /* This introduces a range, unless the `-' is the last | |
513 | character of the class. Find the end of the range | |
514 | and move past it. */ | |
515 | if (c == '-' && *p != ']') | |
516 | { | |
517 | cend = *p++; | |
518 | if (!(flags & FNM_NOESCAPE) && cend == '\\') | |
519 | cend = *p++; | |
520 | if (cend == '\0') | |
521 | return (char *)0; | |
522 | if (cend == '[' && *p == '.') | |
523 | { | |
524 | p = parse_collsym (p, &pc); | |
525 | /* An invalid collating symbol cannot be the second part of a | |
526 | range expression. If we get one, we set cend to one fewer | |
527 | than the test character to make sure the range test fails. */ | |
528 | cend = (pc == -1) ? test - 1 : pc; | |
529 | } | |
530 | cend = FOLD (cend); | |
531 | ||
532 | c = *p++; | |
533 | ||
534 | /* POSIX.2 2.8.3.2: ``The ending range point shall collate | |
535 | equal to or higher than the starting range point; otherwise | |
536 | the expression shall be treated as invalid.'' Note that this | |
537 | applies to only the range expression; the rest of the bracket | |
538 | expression is still checked for matches. */ | |
539 | if (rangecmp (cstart, cend) > 0) | |
540 | { | |
541 | if (c == ']') | |
28ef6c31 | 542 | break; |
cce855bc JA |
543 | c = FOLD (c); |
544 | continue; | |
545 | } | |
546 | } | |
547 | ||
548 | if (rangecmp (test, cstart) >= 0 && rangecmp (test, cend) <= 0) | |
549 | goto matched; | |
550 | ||
551 | if (c == ']') | |
552 | break; | |
553 | } | |
554 | /* No match. */ | |
555 | return (!not ? (char *)0 : p); | |
556 | ||
557 | matched: | |
558 | /* Skip the rest of the [...] that already matched. */ | |
28ef6c31 | 559 | #if 0 |
cce855bc | 560 | brcnt = (c != ']') + (c == '[' && (*p == '=' || *p == ':' || *p == '.')); |
28ef6c31 JA |
561 | #else |
562 | c = *--p; | |
563 | brcnt = 1; | |
564 | #endif | |
cce855bc JA |
565 | while (brcnt > 0) |
566 | { | |
567 | /* A `[' without a matching `]' is just another character to match. */ | |
568 | if (c == '\0') | |
569 | return ((test == '[') ? savep : (char *)0); | |
570 | ||
571 | c = *p++; | |
572 | if (c == '[' && (*p == '=' || *p == ':' || *p == '.')) | |
28ef6c31 | 573 | brcnt++; |
cce855bc | 574 | else if (c == ']') |
28ef6c31 | 575 | brcnt--; |
cce855bc JA |
576 | else if (!(flags & FNM_NOESCAPE) && c == '\\') |
577 | { | |
578 | if (*p == '\0') | |
579 | return (char *)0; | |
580 | /* XXX 1003.2d11 is unclear if this is right. */ | |
581 | ++p; | |
582 | } | |
583 | } | |
584 | return (not ? (char *)0 : p); | |
585 | } | |
586 | ||
587 | #if defined (EXTENDED_GLOB) | |
588 | /* ksh-like extended pattern matching: | |
589 | ||
590 | [?*+@!](pat-list) | |
591 | ||
592 | where pat-list is a list of one or patterns separated by `|'. Operation | |
593 | is as follows: | |
594 | ||
595 | ?(patlist) match zero or one of the given patterns | |
596 | *(patlist) match zero or more of the given patterns | |
597 | +(patlist) match one or more of the given patterns | |
598 | @(patlist) match exactly one of the given patterns | |
599 | !(patlist) match anything except one of the given patterns | |
600 | */ | |
601 | ||
602 | /* Scan a pattern starting at STRING and ending at END, keeping track of | |
603 | embedded () and []. If DELIM is 0, we scan until a matching `)' | |
604 | because we're scanning a `patlist'. Otherwise, we scan until we see | |
605 | DELIM. In all cases, we never scan past END. The return value is the | |
606 | first character after the matching DELIM. */ | |
607 | static char * | |
608 | patscan (string, end, delim) | |
609 | char *string, *end; | |
610 | int delim; | |
611 | { | |
28ef6c31 JA |
612 | int pnest, bnest, cchar; |
613 | char *s, c, *bfirst; | |
cce855bc | 614 | |
28ef6c31 JA |
615 | pnest = bnest = cchar = 0; |
616 | bfirst = 0; | |
cce855bc JA |
617 | for (s = string; c = *s; s++) |
618 | { | |
bb70624e | 619 | if (s >= end) |
28ef6c31 | 620 | return (s); |
cce855bc JA |
621 | switch (c) |
622 | { | |
623 | case '\0': | |
624 | return ((char *)0); | |
28ef6c31 JA |
625 | |
626 | /* `[' is not special inside a bracket expression, but it may | |
627 | introduce one of the special POSIX bracket expressions | |
628 | ([.SYM.], [=c=], [: ... :]) that needs special handling. */ | |
cce855bc | 629 | case '[': |
28ef6c31 JA |
630 | if (bnest == 0) |
631 | { | |
632 | bfirst = s + 1; | |
633 | if (*bfirst == '!' || *bfirst == '^') | |
634 | bfirst++; | |
635 | bnest++; | |
636 | } | |
637 | else if (s[1] == ':' || s[1] == '.' || s[1] == '=') | |
638 | cchar = s[1]; | |
cce855bc | 639 | break; |
28ef6c31 JA |
640 | |
641 | /* `]' is not special if it's the first char (after a leading `!' | |
642 | or `^') in a bracket expression or if it's part of one of the | |
643 | special POSIX bracket expressions ([.SYM.], [=c=], [: ... :]) */ | |
cce855bc JA |
644 | case ']': |
645 | if (bnest) | |
28ef6c31 JA |
646 | { |
647 | if (cchar && s[-1] == cchar) | |
648 | cchar = 0; | |
649 | else if (s != bfirst) | |
650 | { | |
651 | bnest--; | |
652 | bfirst = 0; | |
653 | } | |
654 | } | |
cce855bc | 655 | break; |
28ef6c31 | 656 | |
cce855bc JA |
657 | case '(': |
658 | if (bnest == 0) | |
659 | pnest++; | |
660 | break; | |
28ef6c31 | 661 | |
cce855bc | 662 | case ')': |
bb70624e | 663 | #if 0 |
cce855bc JA |
664 | if (bnest == 0) |
665 | pnest--; | |
666 | if (pnest <= 0) | |
667 | return ++s; | |
bb70624e JA |
668 | #else |
669 | if (bnest == 0 && pnest-- <= 0) | |
670 | return ++s; | |
671 | #endif | |
cce855bc | 672 | break; |
28ef6c31 | 673 | |
cce855bc JA |
674 | case '|': |
675 | if (bnest == 0 && pnest == 0 && delim == '|') | |
676 | return ++s; | |
677 | break; | |
678 | } | |
679 | } | |
bb70624e | 680 | |
cce855bc JA |
681 | return (char *)0; |
682 | } | |
683 | ||
684 | /* Return 0 if dequoted pattern matches S in the current locale. */ | |
685 | static int | |
686 | strcompare (p, pe, s, se) | |
687 | char *p, *pe, *s, *se; | |
688 | { | |
689 | int ret; | |
690 | char c1, c2; | |
691 | ||
692 | c1 = *pe; | |
693 | c2 = *se; | |
694 | ||
695 | *pe = *se = '\0'; | |
696 | #if defined (HAVE_STRCOLL) | |
697 | ret = strcoll (p, s); | |
698 | #else | |
699 | ret = strcmp (p, s); | |
700 | #endif | |
701 | ||
702 | *pe = c1; | |
703 | *se = c2; | |
704 | ||
705 | return (ret == 0 ? ret : FNM_NOMATCH); | |
706 | } | |
707 | ||
708 | /* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or | |
709 | 0 on success. This is handed the entire rest of the pattern and string | |
710 | the first time an extended pattern specifier is encountered, so it calls | |
711 | gmatch recursively. */ | |
712 | static int | |
713 | extmatch (xc, s, se, p, pe, flags) | |
714 | int xc; /* select which operation */ | |
715 | char *s, *se; | |
716 | char *p, *pe; | |
717 | int flags; | |
718 | { | |
719 | char *prest; /* pointer to rest of pattern */ | |
720 | char *psub; /* pointer to sub-pattern */ | |
721 | char *pnext; /* pointer to next sub-pattern */ | |
722 | char *srest; /* pointer to rest of string */ | |
723 | int m1, m2; | |
724 | ||
28ef6c31 | 725 | #if DEBUG_MATCHING |
bb70624e JA |
726 | fprintf(stderr, "extmatch: xc = %c\n", xc); |
727 | fprintf(stderr, "extmatch: s = %s; se = %s\n", s, se); | |
728 | fprintf(stderr, "extmatch: p = %s; pe = %s\n", p, pe); | |
729 | #endif | |
730 | ||
731 | prest = patscan (p + (*p == '('), pe, 0); /* ) */ | |
732 | if (prest == 0) | |
733 | /* If PREST is 0, we failed to scan a valid pattern. In this | |
734 | case, we just want to compare the two as strings. */ | |
735 | return (strcompare (p - 1, pe, s, se)); | |
736 | ||
cce855bc JA |
737 | switch (xc) |
738 | { | |
739 | case '+': /* match one or more occurrences */ | |
740 | case '*': /* match zero or more occurrences */ | |
cce855bc JA |
741 | /* If we can get away with no matches, don't even bother. Just |
742 | call gmatch on the rest of the pattern and return success if | |
743 | it succeeds. */ | |
744 | if (xc == '*' && (gmatch (s, se, prest, pe, flags) == 0)) | |
745 | return 0; | |
746 | ||
747 | /* OK, we have to do this the hard way. First, we make sure one of | |
28ef6c31 JA |
748 | the subpatterns matches, then we try to match the rest of the |
749 | string. */ | |
cce855bc JA |
750 | for (psub = p + 1; ; psub = pnext) |
751 | { | |
752 | pnext = patscan (psub, pe, '|'); | |
753 | for (srest = s; srest <= se; srest++) | |
754 | { | |
755 | /* Match this substring (S -> SREST) against this | |
756 | subpattern (psub -> pnext - 1) */ | |
757 | m1 = gmatch (s, srest, psub, pnext - 1, flags) == 0; | |
758 | /* OK, we matched a subpattern, so make sure the rest of the | |
759 | string matches the rest of the pattern. Also handle | |
760 | multiple matches of the pattern. */ | |
761 | if (m1) | |
762 | m2 = (gmatch (srest, se, prest, pe, flags) == 0) || | |
763 | (s != srest && gmatch (srest, se, p - 1, pe, flags) == 0); | |
764 | if (m1 && m2) | |
28ef6c31 | 765 | return (0); |
cce855bc JA |
766 | } |
767 | if (pnext == prest) | |
768 | break; | |
769 | } | |
770 | return (FNM_NOMATCH); | |
771 | ||
772 | case '?': /* match zero or one of the patterns */ | |
773 | case '@': /* match exactly one of the patterns */ | |
cce855bc JA |
774 | /* If we can get away with no matches, don't even bother. Just |
775 | call gmatch on the rest of the pattern and return success if | |
776 | it succeeds. */ | |
777 | if (xc == '?' && (gmatch (s, se, prest, pe, flags) == 0)) | |
778 | return 0; | |
779 | ||
780 | /* OK, we have to do this the hard way. First, we see if one of | |
781 | the subpatterns matches, then, if it does, we try to match the | |
782 | rest of the string. */ | |
783 | for (psub = p + 1; ; psub = pnext) | |
784 | { | |
785 | pnext = patscan (psub, pe, '|'); | |
786 | srest = (prest == pe) ? se : s; | |
787 | for ( ; srest <= se; srest++) | |
788 | { | |
789 | if (gmatch (s, srest, psub, pnext - 1, flags) == 0 && | |
790 | gmatch (srest, se, prest, pe, flags) == 0) | |
791 | return (0); | |
792 | } | |
793 | if (pnext == prest) | |
794 | break; | |
795 | } | |
796 | return (FNM_NOMATCH); | |
797 | ||
798 | case '!': /* match anything *except* one of the patterns */ | |
cce855bc JA |
799 | for (srest = s; srest <= se; srest++) |
800 | { | |
801 | m1 = 0; | |
802 | for (psub = p + 1; ; psub = pnext) | |
803 | { | |
804 | pnext = patscan (psub, pe, '|'); | |
805 | /* If one of the patterns matches, just bail immediately. */ | |
806 | if (m1 = (gmatch (s, srest, psub, pnext - 1, flags) == 0)) | |
807 | break; | |
808 | if (pnext == prest) | |
809 | break; | |
810 | } | |
811 | if (m1 == 0 && gmatch (srest, se, prest, pe, flags) == 0) | |
28ef6c31 | 812 | return (0); |
cce855bc JA |
813 | } |
814 | return (FNM_NOMATCH); | |
815 | } | |
816 | ||
726f6388 JA |
817 | return (FNM_NOMATCH); |
818 | } | |
cce855bc JA |
819 | #endif /* EXTENDED_GLOB */ |
820 | ||
821 | #ifdef TEST | |
822 | main (c, v) | |
823 | int c; | |
824 | char **v; | |
825 | { | |
826 | char *string, *pat; | |
827 | ||
828 | string = v[1]; | |
829 | pat = v[2]; | |
830 | ||
831 | if (fnmatch (pat, string, 0) == 0) | |
832 | { | |
833 | printf ("%s matches %s\n", string, pat); | |
834 | exit (0); | |
835 | } | |
836 | else | |
837 | { | |
838 | printf ("%s does not match %s\n", string, pat); | |
839 | exit (1); | |
840 | } | |
841 | } | |
842 | #endif |