]>
Commit | Line | Data |
---|---|---|
f73dda09 | 1 | /* strmatch.c -- ksh-like extended pattern matching for the shell and filename |
cce855bc JA |
2 | globbing. */ |
3 | ||
4 | /* Copyright (C) 1991, 1997 Free Software Foundation, Inc. | |
5 | ||
6 | This file is part of GNU Bash, the Bourne Again SHell. | |
7 | ||
8 | Bash is free software; you can redistribute it and/or modify it under | |
9 | the terms of the GNU General Public License as published by the Free | |
10 | Software Foundation; either version 2, or (at your option) any later | |
11 | version. | |
28ef6c31 | 12 | |
cce855bc JA |
13 | Bash is distributed in the hope that it will be useful, but WITHOUT ANY |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 | for more details. | |
28ef6c31 | 17 | |
cce855bc JA |
18 | You should have received a copy of the GNU General Public License along |
19 | with Bash; see the file COPYING. If not, write to the Free Software | |
bb70624e | 20 | Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */ |
cce855bc JA |
21 | |
22 | #include <config.h> | |
bb70624e JA |
23 | |
24 | #include <stdio.h> /* for debugging */ | |
28ef6c31 | 25 | |
f73dda09 | 26 | #include "strmatch.h" |
cce855bc | 27 | #include "collsyms.h" |
f73dda09 | 28 | #include <chartypes.h> |
726f6388 | 29 | |
b72432fd JA |
30 | #if defined (HAVE_STRING_H) |
31 | # include <string.h> | |
32 | #else | |
33 | # include <strings.h> | |
34 | #endif /* HAVE_STRING_H */ | |
35 | ||
cce855bc JA |
36 | static int gmatch (); |
37 | static char *brackmatch (); | |
38 | #ifdef EXTENDED_GLOB | |
39 | static int extmatch (); | |
b72432fd | 40 | static char *patscan (); |
cce855bc JA |
41 | #endif |
42 | ||
f73dda09 | 43 | #if !defined (isascii) && !defined (HAVE_ISASCII) |
cce855bc JA |
44 | # define isascii(c) ((unsigned int)(c) <= 0177) |
45 | #endif | |
726f6388 | 46 | |
bc4cd23c | 47 | /* The result of FOLD is an `unsigned char' */ |
f73dda09 JA |
48 | # define FOLD(c) ((flags & FNM_CASEFOLD) \ |
49 | ? TOLOWER ((unsigned char)c) \ | |
bc4cd23c | 50 | : ((unsigned char)c)) |
cce855bc JA |
51 | |
52 | #ifndef STREQ | |
53 | #define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0) | |
54 | #define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0) | |
55 | #endif | |
56 | ||
28ef6c31 JA |
57 | /* We use strcoll(3) for range comparisons in bracket expressions, |
58 | even though it can have unwanted side effects in locales | |
bb70624e | 59 | other than POSIX or US. For instance, in the de locale, [A-Z] matches |
28ef6c31 | 60 | all characters. */ |
bb70624e | 61 | |
cce855bc | 62 | #if defined (HAVE_STRCOLL) |
bb70624e | 63 | /* Helper function for collating symbol equivalence. */ |
28ef6c31 | 64 | static int rangecmp (c1, c2) |
cce855bc JA |
65 | int c1, c2; |
66 | { | |
67 | static char s1[2] = { ' ', '\0' }; | |
68 | static char s2[2] = { ' ', '\0' }; | |
69 | int ret; | |
70 | ||
71 | /* Eight bits only. Period. */ | |
72 | c1 &= 0xFF; | |
73 | c2 &= 0xFF; | |
74 | ||
75 | if (c1 == c2) | |
76 | return (0); | |
77 | ||
78 | s1[0] = c1; | |
79 | s2[0] = c2; | |
80 | ||
81 | if ((ret = strcoll (s1, s2)) != 0) | |
82 | return ret; | |
83 | return (c1 - c2); | |
84 | } | |
85 | #else /* !HAVE_STRCOLL */ | |
28ef6c31 | 86 | # define rangecmp(c1, c2) ((int)(c1) - (int)(c2)) |
cce855bc JA |
87 | #endif /* !HAVE_STRCOLL */ |
88 | ||
89 | #if defined (HAVE_STRCOLL) | |
90 | static int collequiv (c1, c2) | |
91 | int c1, c2; | |
92 | { | |
28ef6c31 | 93 | return (rangecmp (c1, c2) == 0); |
cce855bc JA |
94 | } |
95 | #else | |
96 | # define collequiv(c1, c2) ((c1) == (c2)) | |
97 | #endif | |
98 | ||
99 | static int | |
100 | collsym (s, len) | |
101 | char *s; | |
102 | int len; | |
103 | { | |
104 | register struct _collsym *csp; | |
105 | ||
106 | for (csp = posix_collsyms; csp->name; csp++) | |
107 | { | |
108 | if (STREQN(csp->name, s, len) && csp->name[len] == '\0') | |
28ef6c31 | 109 | return (csp->code); |
cce855bc JA |
110 | } |
111 | if (len == 1) | |
112 | return s[0]; | |
113 | return -1; | |
114 | } | |
115 | ||
f73dda09 JA |
116 | #ifdef HAVE_LIBC_FNM_EXTMATCH |
117 | int | |
118 | strmatch (pattern, string, flags) | |
119 | char *pattern; | |
120 | char *string; | |
121 | int flags; | |
122 | { | |
123 | char *se, *pe; | |
124 | ||
125 | if (string == 0 || pattern == 0) | |
126 | return FNM_NOMATCH; | |
127 | ||
128 | return (fnmatch (pattern, string, flags)); | |
129 | } | |
130 | #else /* !HAVE_LIBC_FNM_EXTMATCH */ | |
726f6388 | 131 | int |
f73dda09 | 132 | strmatch (pattern, string, flags) |
726f6388 JA |
133 | char *pattern; |
134 | char *string; | |
135 | int flags; | |
136 | { | |
cce855bc | 137 | char *se, *pe; |
726f6388 | 138 | |
cce855bc JA |
139 | if (string == 0 || pattern == 0) |
140 | return FNM_NOMATCH; | |
141 | ||
142 | se = string + strlen (string); | |
143 | pe = pattern + strlen (pattern); | |
144 | ||
145 | return (gmatch (string, se, pattern, pe, flags)); | |
146 | } | |
f73dda09 | 147 | #endif /* !HAVE_LIBC_FNM_EXTMATCH */ |
cce855bc JA |
148 | |
149 | /* Match STRING against the filename pattern PATTERN, returning zero if | |
150 | it matches, FNM_NOMATCH if not. */ | |
151 | static int | |
152 | gmatch (string, se, pattern, pe, flags) | |
153 | char *string, *se; | |
154 | char *pattern, *pe; | |
155 | int flags; | |
156 | { | |
157 | register char *p, *n; /* pattern, string */ | |
158 | register char c; /* current pattern character */ | |
159 | register char sc; /* current string character */ | |
726f6388 | 160 | |
cce855bc JA |
161 | p = pattern; |
162 | n = string; | |
163 | ||
164 | if (string == 0 || pattern == 0) | |
165 | return FNM_NOMATCH; | |
166 | ||
28ef6c31 JA |
167 | #if DEBUG_MATCHING |
168 | fprintf(stderr, "gmatch: string = %s; se = %s\n", string, se); | |
169 | fprintf(stderr, "gmatch: pattern = %s; pe = %s\n", pattern, pe); | |
170 | #endif | |
171 | ||
cce855bc | 172 | while (p < pe) |
726f6388 | 173 | { |
cce855bc JA |
174 | c = *p++; |
175 | c = FOLD (c); | |
176 | ||
177 | sc = n < se ? *n : '\0'; | |
178 | ||
179 | #ifdef EXTENDED_GLOB | |
b72432fd JA |
180 | /* extmatch () will handle recursively calling gmatch, so we can |
181 | just return what extmatch() returns. */ | |
cce855bc JA |
182 | if ((flags & FNM_EXTMATCH) && *p == '(' && |
183 | (c == '+' || c == '*' || c == '?' || c == '@' || c == '!')) /* ) */ | |
b72432fd JA |
184 | { |
185 | int lflags; | |
186 | /* If we're not matching the start of the string, we're not | |
187 | concerned about the special cases for matching `.' */ | |
188 | lflags = (n == string) ? flags : (flags & ~FNM_PERIOD); | |
189 | return (extmatch (c, n, se, p, pe, lflags)); | |
190 | } | |
cce855bc JA |
191 | #endif |
192 | ||
726f6388 JA |
193 | switch (c) |
194 | { | |
cce855bc JA |
195 | case '?': /* Match single character */ |
196 | if (sc == '\0') | |
197 | return FNM_NOMATCH; | |
198 | else if ((flags & FNM_PATHNAME) && sc == '/') | |
d166f048 | 199 | /* If we are matching a pathname, `?' can never match a `/'. */ |
cce855bc JA |
200 | return FNM_NOMATCH; |
201 | else if ((flags & FNM_PERIOD) && sc == '.' && | |
726f6388 | 202 | (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/'))) |
d166f048 JA |
203 | /* `?' cannot match a `.' if it is the first character of the |
204 | string or if it is the first character following a slash and | |
205 | we are matching a pathname. */ | |
cce855bc | 206 | return FNM_NOMATCH; |
726f6388 JA |
207 | break; |
208 | ||
cce855bc JA |
209 | case '\\': /* backslash escape removes special meaning */ |
210 | if (p == pe) | |
211 | return FNM_NOMATCH; | |
212 | ||
213 | if ((flags & FNM_NOESCAPE) == 0) | |
d166f048 JA |
214 | { |
215 | c = *p++; | |
cce855bc JA |
216 | /* A trailing `\' cannot match. */ |
217 | if (p > pe) | |
218 | return FNM_NOMATCH; | |
219 | c = FOLD (c); | |
d166f048 | 220 | } |
bc4cd23c | 221 | if (FOLD (sc) != (unsigned char)c) |
cce855bc | 222 | return FNM_NOMATCH; |
726f6388 JA |
223 | break; |
224 | ||
cce855bc JA |
225 | case '*': /* Match zero or more characters */ |
226 | if (p == pe) | |
227 | return 0; | |
228 | ||
229 | if ((flags & FNM_PERIOD) && sc == '.' && | |
726f6388 | 230 | (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/'))) |
d166f048 JA |
231 | /* `*' cannot match a `.' if it is the first character of the |
232 | string or if it is the first character following a slash and | |
233 | we are matching a pathname. */ | |
cce855bc | 234 | return FNM_NOMATCH; |
726f6388 | 235 | |
f73dda09 | 236 | /* Collapse multiple consecutive `*' and `?', but make sure that |
d166f048 | 237 | one character of the string is consumed for each `?'. */ |
cce855bc | 238 | for (c = *p++; (c == '?' || c == '*'); c = *p++) |
ccc6cda3 | 239 | { |
cce855bc | 240 | if ((flags & FNM_PATHNAME) && sc == '/') |
d166f048 | 241 | /* A slash does not match a wildcard under FNM_PATHNAME. */ |
cce855bc | 242 | return FNM_NOMATCH; |
d166f048 JA |
243 | else if (c == '?') |
244 | { | |
cce855bc JA |
245 | if (sc == '\0') |
246 | return FNM_NOMATCH; | |
d166f048 JA |
247 | /* One character of the string is consumed in matching |
248 | this ? wildcard, so *??? won't match if there are | |
249 | fewer than three characters. */ | |
250 | n++; | |
cce855bc | 251 | sc = n < se ? *n : '\0'; |
d166f048 | 252 | } |
cce855bc JA |
253 | |
254 | #ifdef EXTENDED_GLOB | |
255 | /* Handle ******(patlist) */ | |
256 | if ((flags & FNM_EXTMATCH) && c == '*' && *p == '(') /*)*/ | |
b72432fd JA |
257 | { |
258 | char *newn; | |
259 | /* We need to check whether or not the extended glob | |
260 | pattern matches the remainder of the string. | |
261 | If it does, we match the entire pattern. */ | |
262 | for (newn = n; newn < se; ++newn) | |
263 | { | |
264 | if (extmatch (c, newn, se, p, pe, flags) == 0) | |
265 | return (0); | |
266 | } | |
267 | /* We didn't match the extended glob pattern, but | |
268 | that's OK, since we can match 0 or more occurrences. | |
269 | We need to skip the glob pattern and see if we | |
270 | match the rest of the string. */ | |
bb70624e | 271 | newn = patscan (p + 1, pe, 0); |
f73dda09 JA |
272 | /* If NEWN is 0, we have an ill-formed pattern. */ |
273 | p = newn ? newn : pe; | |
b72432fd | 274 | } |
cce855bc JA |
275 | #endif |
276 | if (p == pe) | |
28ef6c31 | 277 | break; |
ccc6cda3 | 278 | } |
726f6388 | 279 | |
cce855bc JA |
280 | /* If we've hit the end of the pattern and the last character of |
281 | the pattern was handled by the loop above, we've succeeded. | |
282 | Otherwise, we need to match that last character. */ | |
283 | if (p == pe && (c == '?' || c == '*')) | |
726f6388 JA |
284 | return (0); |
285 | ||
d166f048 | 286 | /* General case, use recursion. */ |
726f6388 | 287 | { |
bc4cd23c | 288 | unsigned char c1; |
cce855bc | 289 | |
bc4cd23c | 290 | c1 = (unsigned char)((flags & FNM_NOESCAPE) == 0 && c == '\\') ? *p : c; |
cce855bc JA |
291 | c1 = FOLD (c1); |
292 | for (--p; n < se; ++n) | |
b72432fd | 293 | { |
f73dda09 | 294 | /* Only call strmatch if the first character indicates a |
b72432fd JA |
295 | possible match. We can check the first character if |
296 | we're not doing an extended glob match. */ | |
28ef6c31 | 297 | if ((flags & FNM_EXTMATCH) == 0 && c != '[' && FOLD (*n) != c1) /*]*/ |
b72432fd JA |
298 | continue; |
299 | ||
300 | /* If we're doing an extended glob match and the pattern is not | |
301 | one of the extended glob patterns, we can check the first | |
302 | character. */ | |
303 | if ((flags & FNM_EXTMATCH) && p[1] != '(' && /*)*/ | |
28ef6c31 | 304 | strchr ("?*+@!", *p) == 0 && c != '[' && FOLD (*n) != c1) /*]*/ |
b72432fd JA |
305 | continue; |
306 | ||
307 | /* Otherwise, we just recurse. */ | |
308 | if (gmatch (n, se, p, pe, flags & ~FNM_PERIOD) == 0) | |
309 | return (0); | |
310 | } | |
cce855bc | 311 | return FNM_NOMATCH; |
726f6388 JA |
312 | } |
313 | ||
314 | case '[': | |
315 | { | |
cce855bc JA |
316 | if (sc == '\0' || n == se) |
317 | return FNM_NOMATCH; | |
726f6388 | 318 | |
d166f048 JA |
319 | /* A character class cannot match a `.' if it is the first |
320 | character of the string or if it is the first character | |
321 | following a slash and we are matching a pathname. */ | |
cce855bc | 322 | if ((flags & FNM_PERIOD) && sc == '.' && |
726f6388 JA |
323 | (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/'))) |
324 | return (FNM_NOMATCH); | |
325 | ||
cce855bc JA |
326 | p = brackmatch (p, sc, flags); |
327 | if (p == 0) | |
328 | return FNM_NOMATCH; | |
726f6388 JA |
329 | } |
330 | break; | |
331 | ||
332 | default: | |
bc4cd23c | 333 | if ((unsigned char)c != FOLD (sc)) |
726f6388 JA |
334 | return (FNM_NOMATCH); |
335 | } | |
336 | ||
337 | ++n; | |
338 | } | |
339 | ||
cce855bc | 340 | if (n == se) |
726f6388 JA |
341 | return (0); |
342 | ||
cce855bc JA |
343 | if ((flags & FNM_LEADING_DIR) && *n == '/') |
344 | /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ | |
345 | return 0; | |
28ef6c31 | 346 | |
cce855bc JA |
347 | return (FNM_NOMATCH); |
348 | } | |
349 | ||
350 | /* Parse a bracket expression collating symbol ([.sym.]) starting at P, find | |
351 | the value of the symbol, and move P past the collating symbol expression. | |
352 | The value is returned in *VP, if VP is not null. */ | |
353 | static char * | |
354 | parse_collsym (p, vp) | |
355 | char *p; | |
356 | int *vp; | |
357 | { | |
358 | register int pc; | |
359 | int val; | |
360 | ||
361 | p++; /* move past the `.' */ | |
28ef6c31 | 362 | |
cce855bc JA |
363 | for (pc = 0; p[pc]; pc++) |
364 | if (p[pc] == '.' && p[pc+1] == ']') | |
365 | break; | |
366 | val = collsym (p, pc); | |
367 | if (vp) | |
368 | *vp = val; | |
369 | return (p + pc + 2); | |
370 | } | |
371 | ||
372 | static char * | |
373 | brackmatch (p, test, flags) | |
374 | char *p; | |
375 | unsigned char test; | |
376 | int flags; | |
377 | { | |
378 | register char cstart, cend, c; | |
379 | register int not; /* Nonzero if the sense of the character class is inverted. */ | |
380 | int pc, brcnt; | |
381 | char *savep; | |
382 | ||
383 | test = FOLD (test); | |
384 | ||
385 | savep = p; | |
386 | ||
387 | /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the | |
388 | circumflex (`^') in its role in a `nonmatching list'. A bracket | |
28ef6c31 | 389 | expression starting with an unquoted circumflex character produces |
cce855bc JA |
390 | unspecified results. This implementation treats the two identically. */ |
391 | if (not = (*p == '!' || *p == '^')) | |
392 | ++p; | |
393 | ||
394 | c = *p++; | |
395 | for (;;) | |
396 | { | |
397 | /* Initialize cstart and cend in case `-' is the last | |
398 | character of the pattern. */ | |
399 | cstart = cend = c; | |
400 | ||
401 | /* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find | |
402 | the end of the equivalence class, move the pattern pointer past | |
403 | it, and check for equivalence. XXX - this handles only | |
404 | single-character equivalence classes, which is wrong, or at | |
405 | least incomplete. */ | |
406 | if (c == '[' && *p == '=' && p[2] == '=' && p[3] == ']') | |
407 | { | |
408 | pc = FOLD (p[1]); | |
409 | p += 4; | |
410 | if (collequiv (test, pc)) | |
28ef6c31 JA |
411 | { |
412 | /*[*/ /* Move past the closing `]', since the first thing we do at | |
413 | the `matched:' label is back p up one. */ | |
414 | p++; | |
415 | goto matched; | |
416 | } | |
cce855bc JA |
417 | else |
418 | { | |
419 | c = *p++; | |
420 | if (c == '\0') | |
28ef6c31 | 421 | return ((test == '[') ? savep : (char *)0); /*]*/ |
cce855bc JA |
422 | c = FOLD (c); |
423 | continue; | |
424 | } | |
28ef6c31 | 425 | } |
cce855bc JA |
426 | |
427 | /* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */ | |
28ef6c31 | 428 | if (c == '[' && *p == ':') /*]*/ |
cce855bc JA |
429 | { |
430 | pc = 0; /* make sure invalid char classes don't match. */ | |
431 | if (STREQN (p+1, "alnum:]", 7)) | |
f73dda09 | 432 | { pc = ISALNUM (test); p += 8; } |
cce855bc | 433 | else if (STREQN (p+1, "alpha:]", 7)) |
f73dda09 | 434 | { pc = ISALPHA (test); p += 8; } |
cce855bc | 435 | else if (STREQN (p+1, "blank:]", 7)) |
f73dda09 | 436 | { pc = ISBLANK (test); p += 8; } |
cce855bc | 437 | else if (STREQN (p+1, "cntrl:]", 7)) |
f73dda09 | 438 | { pc = ISCNTRL (test); p += 8; } |
cce855bc | 439 | else if (STREQN (p+1, "digit:]", 7)) |
f73dda09 | 440 | { pc = ISDIGIT (test); p += 8; } |
cce855bc | 441 | else if (STREQN (p+1, "graph:]", 7)) |
f73dda09 | 442 | { pc = ISGRAPH (test); p += 8; } |
cce855bc | 443 | else if (STREQN (p+1, "lower:]", 7)) |
f73dda09 | 444 | { pc = ISLOWER (test); p += 8; } |
cce855bc | 445 | else if (STREQN (p+1, "print:]", 7)) |
f73dda09 | 446 | { pc = ISPRINT (test); p += 8; } |
cce855bc | 447 | else if (STREQN (p+1, "punct:]", 7)) |
f73dda09 | 448 | { pc = ISPUNCT (test); p += 8; } |
cce855bc | 449 | else if (STREQN (p+1, "space:]", 7)) |
f73dda09 | 450 | { pc = ISSPACE (test); p += 8; } |
cce855bc | 451 | else if (STREQN (p+1, "upper:]", 7)) |
f73dda09 | 452 | { pc = ISUPPER (test); p += 8; } |
cce855bc | 453 | else if (STREQN (p+1, "xdigit:]", 8)) |
f73dda09 | 454 | { pc = ISXDIGIT (test); p += 9; } |
cce855bc JA |
455 | else if (STREQN (p+1, "ascii:]", 7)) |
456 | { pc = isascii (test); p += 8; } | |
457 | if (pc) | |
28ef6c31 JA |
458 | { |
459 | /*[*/ /* Move past the closing `]', since the first thing we do at | |
460 | the `matched:' label is back p up one. */ | |
461 | p++; | |
cce855bc | 462 | goto matched; |
28ef6c31 | 463 | } |
cce855bc JA |
464 | else |
465 | { | |
466 | /* continue the loop here, since this expression can't be | |
467 | the first part of a range expression. */ | |
468 | c = *p++; | |
469 | if (c == '\0') | |
470 | return ((test == '[') ? savep : (char *)0); | |
471 | else if (c == ']') | |
28ef6c31 | 472 | break; |
cce855bc JA |
473 | c = FOLD (c); |
474 | continue; | |
475 | } | |
476 | } | |
477 | ||
478 | /* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of | |
479 | the symbol name, make sure it is terminated by `.]', translate | |
480 | the name to a character using the external table, and do the | |
481 | comparison. */ | |
482 | if (c == '[' && *p == '.') | |
483 | { | |
484 | p = parse_collsym (p, &pc); | |
485 | /* An invalid collating symbol cannot be the first point of a | |
486 | range. If it is, we set cstart to one greater than `test', | |
487 | so any comparisons later will fail. */ | |
488 | cstart = (pc == -1) ? test + 1 : pc; | |
489 | } | |
490 | ||
491 | if (!(flags & FNM_NOESCAPE) && c == '\\') | |
492 | { | |
493 | if (*p == '\0') | |
494 | return (char *)0; | |
495 | cstart = cend = *p++; | |
496 | } | |
497 | ||
498 | cstart = cend = FOLD (cstart); | |
499 | ||
500 | /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that | |
501 | is not preceded by a backslash and is not part of a bracket | |
502 | expression produces undefined results.' This implementation | |
503 | treats the `[' as just a character to be matched if there is | |
504 | not a closing `]'. */ | |
505 | if (c == '\0') | |
506 | return ((test == '[') ? savep : (char *)0); | |
507 | ||
508 | c = *p++; | |
509 | c = FOLD (c); | |
510 | ||
511 | if ((flags & FNM_PATHNAME) && c == '/') | |
512 | /* [/] can never match when matching a pathname. */ | |
513 | return (char *)0; | |
514 | ||
515 | /* This introduces a range, unless the `-' is the last | |
516 | character of the class. Find the end of the range | |
517 | and move past it. */ | |
518 | if (c == '-' && *p != ']') | |
519 | { | |
520 | cend = *p++; | |
521 | if (!(flags & FNM_NOESCAPE) && cend == '\\') | |
522 | cend = *p++; | |
523 | if (cend == '\0') | |
524 | return (char *)0; | |
525 | if (cend == '[' && *p == '.') | |
526 | { | |
527 | p = parse_collsym (p, &pc); | |
528 | /* An invalid collating symbol cannot be the second part of a | |
529 | range expression. If we get one, we set cend to one fewer | |
530 | than the test character to make sure the range test fails. */ | |
531 | cend = (pc == -1) ? test - 1 : pc; | |
532 | } | |
533 | cend = FOLD (cend); | |
534 | ||
535 | c = *p++; | |
536 | ||
537 | /* POSIX.2 2.8.3.2: ``The ending range point shall collate | |
538 | equal to or higher than the starting range point; otherwise | |
539 | the expression shall be treated as invalid.'' Note that this | |
540 | applies to only the range expression; the rest of the bracket | |
541 | expression is still checked for matches. */ | |
542 | if (rangecmp (cstart, cend) > 0) | |
543 | { | |
544 | if (c == ']') | |
28ef6c31 | 545 | break; |
cce855bc JA |
546 | c = FOLD (c); |
547 | continue; | |
548 | } | |
549 | } | |
550 | ||
551 | if (rangecmp (test, cstart) >= 0 && rangecmp (test, cend) <= 0) | |
552 | goto matched; | |
553 | ||
554 | if (c == ']') | |
555 | break; | |
556 | } | |
557 | /* No match. */ | |
558 | return (!not ? (char *)0 : p); | |
559 | ||
560 | matched: | |
561 | /* Skip the rest of the [...] that already matched. */ | |
28ef6c31 | 562 | #if 0 |
cce855bc | 563 | brcnt = (c != ']') + (c == '[' && (*p == '=' || *p == ':' || *p == '.')); |
28ef6c31 JA |
564 | #else |
565 | c = *--p; | |
566 | brcnt = 1; | |
567 | #endif | |
cce855bc JA |
568 | while (brcnt > 0) |
569 | { | |
570 | /* A `[' without a matching `]' is just another character to match. */ | |
571 | if (c == '\0') | |
572 | return ((test == '[') ? savep : (char *)0); | |
573 | ||
574 | c = *p++; | |
575 | if (c == '[' && (*p == '=' || *p == ':' || *p == '.')) | |
28ef6c31 | 576 | brcnt++; |
cce855bc | 577 | else if (c == ']') |
28ef6c31 | 578 | brcnt--; |
cce855bc JA |
579 | else if (!(flags & FNM_NOESCAPE) && c == '\\') |
580 | { | |
581 | if (*p == '\0') | |
582 | return (char *)0; | |
583 | /* XXX 1003.2d11 is unclear if this is right. */ | |
584 | ++p; | |
585 | } | |
586 | } | |
587 | return (not ? (char *)0 : p); | |
588 | } | |
589 | ||
590 | #if defined (EXTENDED_GLOB) | |
591 | /* ksh-like extended pattern matching: | |
592 | ||
593 | [?*+@!](pat-list) | |
594 | ||
595 | where pat-list is a list of one or patterns separated by `|'. Operation | |
596 | is as follows: | |
597 | ||
598 | ?(patlist) match zero or one of the given patterns | |
599 | *(patlist) match zero or more of the given patterns | |
600 | +(patlist) match one or more of the given patterns | |
601 | @(patlist) match exactly one of the given patterns | |
602 | !(patlist) match anything except one of the given patterns | |
603 | */ | |
604 | ||
605 | /* Scan a pattern starting at STRING and ending at END, keeping track of | |
606 | embedded () and []. If DELIM is 0, we scan until a matching `)' | |
607 | because we're scanning a `patlist'. Otherwise, we scan until we see | |
608 | DELIM. In all cases, we never scan past END. The return value is the | |
609 | first character after the matching DELIM. */ | |
610 | static char * | |
611 | patscan (string, end, delim) | |
612 | char *string, *end; | |
613 | int delim; | |
614 | { | |
28ef6c31 JA |
615 | int pnest, bnest, cchar; |
616 | char *s, c, *bfirst; | |
cce855bc | 617 | |
28ef6c31 JA |
618 | pnest = bnest = cchar = 0; |
619 | bfirst = 0; | |
cce855bc JA |
620 | for (s = string; c = *s; s++) |
621 | { | |
bb70624e | 622 | if (s >= end) |
28ef6c31 | 623 | return (s); |
cce855bc JA |
624 | switch (c) |
625 | { | |
626 | case '\0': | |
627 | return ((char *)0); | |
28ef6c31 JA |
628 | |
629 | /* `[' is not special inside a bracket expression, but it may | |
630 | introduce one of the special POSIX bracket expressions | |
631 | ([.SYM.], [=c=], [: ... :]) that needs special handling. */ | |
cce855bc | 632 | case '[': |
28ef6c31 JA |
633 | if (bnest == 0) |
634 | { | |
635 | bfirst = s + 1; | |
636 | if (*bfirst == '!' || *bfirst == '^') | |
637 | bfirst++; | |
638 | bnest++; | |
639 | } | |
640 | else if (s[1] == ':' || s[1] == '.' || s[1] == '=') | |
641 | cchar = s[1]; | |
cce855bc | 642 | break; |
28ef6c31 JA |
643 | |
644 | /* `]' is not special if it's the first char (after a leading `!' | |
645 | or `^') in a bracket expression or if it's part of one of the | |
646 | special POSIX bracket expressions ([.SYM.], [=c=], [: ... :]) */ | |
cce855bc JA |
647 | case ']': |
648 | if (bnest) | |
28ef6c31 JA |
649 | { |
650 | if (cchar && s[-1] == cchar) | |
651 | cchar = 0; | |
652 | else if (s != bfirst) | |
653 | { | |
654 | bnest--; | |
655 | bfirst = 0; | |
656 | } | |
657 | } | |
cce855bc | 658 | break; |
28ef6c31 | 659 | |
cce855bc JA |
660 | case '(': |
661 | if (bnest == 0) | |
662 | pnest++; | |
663 | break; | |
28ef6c31 | 664 | |
cce855bc | 665 | case ')': |
bb70624e | 666 | #if 0 |
cce855bc JA |
667 | if (bnest == 0) |
668 | pnest--; | |
669 | if (pnest <= 0) | |
670 | return ++s; | |
bb70624e JA |
671 | #else |
672 | if (bnest == 0 && pnest-- <= 0) | |
673 | return ++s; | |
674 | #endif | |
cce855bc | 675 | break; |
28ef6c31 | 676 | |
cce855bc JA |
677 | case '|': |
678 | if (bnest == 0 && pnest == 0 && delim == '|') | |
679 | return ++s; | |
680 | break; | |
681 | } | |
682 | } | |
bb70624e | 683 | |
cce855bc JA |
684 | return (char *)0; |
685 | } | |
686 | ||
687 | /* Return 0 if dequoted pattern matches S in the current locale. */ | |
688 | static int | |
689 | strcompare (p, pe, s, se) | |
690 | char *p, *pe, *s, *se; | |
691 | { | |
692 | int ret; | |
693 | char c1, c2; | |
694 | ||
695 | c1 = *pe; | |
696 | c2 = *se; | |
697 | ||
698 | *pe = *se = '\0'; | |
699 | #if defined (HAVE_STRCOLL) | |
700 | ret = strcoll (p, s); | |
701 | #else | |
702 | ret = strcmp (p, s); | |
703 | #endif | |
704 | ||
705 | *pe = c1; | |
706 | *se = c2; | |
707 | ||
708 | return (ret == 0 ? ret : FNM_NOMATCH); | |
709 | } | |
710 | ||
711 | /* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or | |
712 | 0 on success. This is handed the entire rest of the pattern and string | |
713 | the first time an extended pattern specifier is encountered, so it calls | |
714 | gmatch recursively. */ | |
715 | static int | |
716 | extmatch (xc, s, se, p, pe, flags) | |
717 | int xc; /* select which operation */ | |
718 | char *s, *se; | |
719 | char *p, *pe; | |
720 | int flags; | |
721 | { | |
722 | char *prest; /* pointer to rest of pattern */ | |
723 | char *psub; /* pointer to sub-pattern */ | |
724 | char *pnext; /* pointer to next sub-pattern */ | |
725 | char *srest; /* pointer to rest of string */ | |
726 | int m1, m2; | |
727 | ||
28ef6c31 | 728 | #if DEBUG_MATCHING |
bb70624e JA |
729 | fprintf(stderr, "extmatch: xc = %c\n", xc); |
730 | fprintf(stderr, "extmatch: s = %s; se = %s\n", s, se); | |
731 | fprintf(stderr, "extmatch: p = %s; pe = %s\n", p, pe); | |
732 | #endif | |
733 | ||
734 | prest = patscan (p + (*p == '('), pe, 0); /* ) */ | |
735 | if (prest == 0) | |
736 | /* If PREST is 0, we failed to scan a valid pattern. In this | |
737 | case, we just want to compare the two as strings. */ | |
738 | return (strcompare (p - 1, pe, s, se)); | |
739 | ||
cce855bc JA |
740 | switch (xc) |
741 | { | |
742 | case '+': /* match one or more occurrences */ | |
743 | case '*': /* match zero or more occurrences */ | |
cce855bc JA |
744 | /* If we can get away with no matches, don't even bother. Just |
745 | call gmatch on the rest of the pattern and return success if | |
746 | it succeeds. */ | |
747 | if (xc == '*' && (gmatch (s, se, prest, pe, flags) == 0)) | |
748 | return 0; | |
749 | ||
750 | /* OK, we have to do this the hard way. First, we make sure one of | |
28ef6c31 JA |
751 | the subpatterns matches, then we try to match the rest of the |
752 | string. */ | |
cce855bc JA |
753 | for (psub = p + 1; ; psub = pnext) |
754 | { | |
755 | pnext = patscan (psub, pe, '|'); | |
756 | for (srest = s; srest <= se; srest++) | |
757 | { | |
758 | /* Match this substring (S -> SREST) against this | |
759 | subpattern (psub -> pnext - 1) */ | |
760 | m1 = gmatch (s, srest, psub, pnext - 1, flags) == 0; | |
761 | /* OK, we matched a subpattern, so make sure the rest of the | |
762 | string matches the rest of the pattern. Also handle | |
763 | multiple matches of the pattern. */ | |
764 | if (m1) | |
765 | m2 = (gmatch (srest, se, prest, pe, flags) == 0) || | |
766 | (s != srest && gmatch (srest, se, p - 1, pe, flags) == 0); | |
767 | if (m1 && m2) | |
28ef6c31 | 768 | return (0); |
cce855bc JA |
769 | } |
770 | if (pnext == prest) | |
771 | break; | |
772 | } | |
773 | return (FNM_NOMATCH); | |
774 | ||
775 | case '?': /* match zero or one of the patterns */ | |
776 | case '@': /* match exactly one of the patterns */ | |
cce855bc JA |
777 | /* If we can get away with no matches, don't even bother. Just |
778 | call gmatch on the rest of the pattern and return success if | |
779 | it succeeds. */ | |
780 | if (xc == '?' && (gmatch (s, se, prest, pe, flags) == 0)) | |
781 | return 0; | |
782 | ||
783 | /* OK, we have to do this the hard way. First, we see if one of | |
784 | the subpatterns matches, then, if it does, we try to match the | |
785 | rest of the string. */ | |
786 | for (psub = p + 1; ; psub = pnext) | |
787 | { | |
788 | pnext = patscan (psub, pe, '|'); | |
789 | srest = (prest == pe) ? se : s; | |
790 | for ( ; srest <= se; srest++) | |
791 | { | |
792 | if (gmatch (s, srest, psub, pnext - 1, flags) == 0 && | |
793 | gmatch (srest, se, prest, pe, flags) == 0) | |
794 | return (0); | |
795 | } | |
796 | if (pnext == prest) | |
797 | break; | |
798 | } | |
799 | return (FNM_NOMATCH); | |
800 | ||
801 | case '!': /* match anything *except* one of the patterns */ | |
cce855bc JA |
802 | for (srest = s; srest <= se; srest++) |
803 | { | |
804 | m1 = 0; | |
805 | for (psub = p + 1; ; psub = pnext) | |
806 | { | |
807 | pnext = patscan (psub, pe, '|'); | |
808 | /* If one of the patterns matches, just bail immediately. */ | |
809 | if (m1 = (gmatch (s, srest, psub, pnext - 1, flags) == 0)) | |
810 | break; | |
811 | if (pnext == prest) | |
812 | break; | |
813 | } | |
814 | if (m1 == 0 && gmatch (srest, se, prest, pe, flags) == 0) | |
28ef6c31 | 815 | return (0); |
cce855bc JA |
816 | } |
817 | return (FNM_NOMATCH); | |
818 | } | |
819 | ||
726f6388 JA |
820 | return (FNM_NOMATCH); |
821 | } | |
cce855bc JA |
822 | #endif /* EXTENDED_GLOB */ |
823 | ||
824 | #ifdef TEST | |
825 | main (c, v) | |
826 | int c; | |
827 | char **v; | |
828 | { | |
829 | char *string, *pat; | |
830 | ||
831 | string = v[1]; | |
832 | pat = v[2]; | |
833 | ||
f73dda09 | 834 | if (strmatch (pat, string, 0) == 0) |
cce855bc JA |
835 | { |
836 | printf ("%s matches %s\n", string, pat); | |
837 | exit (0); | |
838 | } | |
839 | else | |
840 | { | |
841 | printf ("%s does not match %s\n", string, pat); | |
842 | exit (1); | |
843 | } | |
844 | } | |
845 | #endif |