]> git.ipfire.org Git - thirdparty/bash.git/blame - lib/glob/strmatch.c
Imported from ../bash-2.05a.tar.gz.
[thirdparty/bash.git] / lib / glob / strmatch.c
CommitLineData
f73dda09 1/* strmatch.c -- ksh-like extended pattern matching for the shell and filename
cce855bc
JA
2 globbing. */
3
4/* Copyright (C) 1991, 1997 Free Software Foundation, Inc.
5
6 This file is part of GNU Bash, the Bourne Again SHell.
7
8 Bash is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
28ef6c31 12
cce855bc
JA
13 Bash is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
28ef6c31 17
cce855bc
JA
18 You should have received a copy of the GNU General Public License along
19 with Bash; see the file COPYING. If not, write to the Free Software
bb70624e 20 Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
cce855bc
JA
21
22#include <config.h>
bb70624e
JA
23
24#include <stdio.h> /* for debugging */
28ef6c31 25
f73dda09 26#include "strmatch.h"
cce855bc 27#include "collsyms.h"
f73dda09 28#include <chartypes.h>
726f6388 29
b72432fd
JA
30#if defined (HAVE_STRING_H)
31# include <string.h>
32#else
33# include <strings.h>
34#endif /* HAVE_STRING_H */
35
cce855bc
JA
36static int gmatch ();
37static char *brackmatch ();
38#ifdef EXTENDED_GLOB
39static int extmatch ();
b72432fd 40static char *patscan ();
cce855bc
JA
41#endif
42
f73dda09 43#if !defined (isascii) && !defined (HAVE_ISASCII)
cce855bc
JA
44# define isascii(c) ((unsigned int)(c) <= 0177)
45#endif
726f6388 46
bc4cd23c 47/* The result of FOLD is an `unsigned char' */
f73dda09
JA
48# define FOLD(c) ((flags & FNM_CASEFOLD) \
49 ? TOLOWER ((unsigned char)c) \
bc4cd23c 50 : ((unsigned char)c))
cce855bc
JA
51
52#ifndef STREQ
53#define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
54#define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
55#endif
56
28ef6c31
JA
57/* We use strcoll(3) for range comparisons in bracket expressions,
58 even though it can have unwanted side effects in locales
bb70624e 59 other than POSIX or US. For instance, in the de locale, [A-Z] matches
28ef6c31 60 all characters. */
bb70624e 61
cce855bc 62#if defined (HAVE_STRCOLL)
bb70624e 63/* Helper function for collating symbol equivalence. */
28ef6c31 64static int rangecmp (c1, c2)
cce855bc
JA
65 int c1, c2;
66{
67 static char s1[2] = { ' ', '\0' };
68 static char s2[2] = { ' ', '\0' };
69 int ret;
70
71 /* Eight bits only. Period. */
72 c1 &= 0xFF;
73 c2 &= 0xFF;
74
75 if (c1 == c2)
76 return (0);
77
78 s1[0] = c1;
79 s2[0] = c2;
80
81 if ((ret = strcoll (s1, s2)) != 0)
82 return ret;
83 return (c1 - c2);
84}
85#else /* !HAVE_STRCOLL */
28ef6c31 86# define rangecmp(c1, c2) ((int)(c1) - (int)(c2))
cce855bc
JA
87#endif /* !HAVE_STRCOLL */
88
89#if defined (HAVE_STRCOLL)
90static int collequiv (c1, c2)
91 int c1, c2;
92{
28ef6c31 93 return (rangecmp (c1, c2) == 0);
cce855bc
JA
94}
95#else
96# define collequiv(c1, c2) ((c1) == (c2))
97#endif
98
99static int
100collsym (s, len)
101 char *s;
102 int len;
103{
104 register struct _collsym *csp;
105
106 for (csp = posix_collsyms; csp->name; csp++)
107 {
108 if (STREQN(csp->name, s, len) && csp->name[len] == '\0')
28ef6c31 109 return (csp->code);
cce855bc
JA
110 }
111 if (len == 1)
112 return s[0];
113 return -1;
114}
115
f73dda09
JA
116#ifdef HAVE_LIBC_FNM_EXTMATCH
117int
118strmatch (pattern, string, flags)
119 char *pattern;
120 char *string;
121 int flags;
122{
123 char *se, *pe;
124
125 if (string == 0 || pattern == 0)
126 return FNM_NOMATCH;
127
128 return (fnmatch (pattern, string, flags));
129}
130#else /* !HAVE_LIBC_FNM_EXTMATCH */
726f6388 131int
f73dda09 132strmatch (pattern, string, flags)
726f6388
JA
133 char *pattern;
134 char *string;
135 int flags;
136{
cce855bc 137 char *se, *pe;
726f6388 138
cce855bc
JA
139 if (string == 0 || pattern == 0)
140 return FNM_NOMATCH;
141
142 se = string + strlen (string);
143 pe = pattern + strlen (pattern);
144
145 return (gmatch (string, se, pattern, pe, flags));
146}
f73dda09 147#endif /* !HAVE_LIBC_FNM_EXTMATCH */
cce855bc
JA
148
149/* Match STRING against the filename pattern PATTERN, returning zero if
150 it matches, FNM_NOMATCH if not. */
151static int
152gmatch (string, se, pattern, pe, flags)
153 char *string, *se;
154 char *pattern, *pe;
155 int flags;
156{
157 register char *p, *n; /* pattern, string */
158 register char c; /* current pattern character */
159 register char sc; /* current string character */
726f6388 160
cce855bc
JA
161 p = pattern;
162 n = string;
163
164 if (string == 0 || pattern == 0)
165 return FNM_NOMATCH;
166
28ef6c31
JA
167#if DEBUG_MATCHING
168fprintf(stderr, "gmatch: string = %s; se = %s\n", string, se);
169fprintf(stderr, "gmatch: pattern = %s; pe = %s\n", pattern, pe);
170#endif
171
cce855bc 172 while (p < pe)
726f6388 173 {
cce855bc
JA
174 c = *p++;
175 c = FOLD (c);
176
177 sc = n < se ? *n : '\0';
178
179#ifdef EXTENDED_GLOB
b72432fd
JA
180 /* extmatch () will handle recursively calling gmatch, so we can
181 just return what extmatch() returns. */
cce855bc
JA
182 if ((flags & FNM_EXTMATCH) && *p == '(' &&
183 (c == '+' || c == '*' || c == '?' || c == '@' || c == '!')) /* ) */
b72432fd
JA
184 {
185 int lflags;
186 /* If we're not matching the start of the string, we're not
187 concerned about the special cases for matching `.' */
188 lflags = (n == string) ? flags : (flags & ~FNM_PERIOD);
189 return (extmatch (c, n, se, p, pe, lflags));
190 }
cce855bc
JA
191#endif
192
726f6388
JA
193 switch (c)
194 {
cce855bc
JA
195 case '?': /* Match single character */
196 if (sc == '\0')
197 return FNM_NOMATCH;
198 else if ((flags & FNM_PATHNAME) && sc == '/')
d166f048 199 /* If we are matching a pathname, `?' can never match a `/'. */
cce855bc
JA
200 return FNM_NOMATCH;
201 else if ((flags & FNM_PERIOD) && sc == '.' &&
726f6388 202 (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
d166f048
JA
203 /* `?' cannot match a `.' if it is the first character of the
204 string or if it is the first character following a slash and
205 we are matching a pathname. */
cce855bc 206 return FNM_NOMATCH;
726f6388
JA
207 break;
208
cce855bc
JA
209 case '\\': /* backslash escape removes special meaning */
210 if (p == pe)
211 return FNM_NOMATCH;
212
213 if ((flags & FNM_NOESCAPE) == 0)
d166f048
JA
214 {
215 c = *p++;
cce855bc
JA
216 /* A trailing `\' cannot match. */
217 if (p > pe)
218 return FNM_NOMATCH;
219 c = FOLD (c);
d166f048 220 }
bc4cd23c 221 if (FOLD (sc) != (unsigned char)c)
cce855bc 222 return FNM_NOMATCH;
726f6388
JA
223 break;
224
cce855bc
JA
225 case '*': /* Match zero or more characters */
226 if (p == pe)
227 return 0;
228
229 if ((flags & FNM_PERIOD) && sc == '.' &&
726f6388 230 (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
d166f048
JA
231 /* `*' cannot match a `.' if it is the first character of the
232 string or if it is the first character following a slash and
233 we are matching a pathname. */
cce855bc 234 return FNM_NOMATCH;
726f6388 235
f73dda09 236 /* Collapse multiple consecutive `*' and `?', but make sure that
d166f048 237 one character of the string is consumed for each `?'. */
cce855bc 238 for (c = *p++; (c == '?' || c == '*'); c = *p++)
ccc6cda3 239 {
cce855bc 240 if ((flags & FNM_PATHNAME) && sc == '/')
d166f048 241 /* A slash does not match a wildcard under FNM_PATHNAME. */
cce855bc 242 return FNM_NOMATCH;
d166f048
JA
243 else if (c == '?')
244 {
cce855bc
JA
245 if (sc == '\0')
246 return FNM_NOMATCH;
d166f048
JA
247 /* One character of the string is consumed in matching
248 this ? wildcard, so *??? won't match if there are
249 fewer than three characters. */
250 n++;
cce855bc 251 sc = n < se ? *n : '\0';
d166f048 252 }
cce855bc
JA
253
254#ifdef EXTENDED_GLOB
255 /* Handle ******(patlist) */
256 if ((flags & FNM_EXTMATCH) && c == '*' && *p == '(') /*)*/
b72432fd
JA
257 {
258 char *newn;
259 /* We need to check whether or not the extended glob
260 pattern matches the remainder of the string.
261 If it does, we match the entire pattern. */
262 for (newn = n; newn < se; ++newn)
263 {
264 if (extmatch (c, newn, se, p, pe, flags) == 0)
265 return (0);
266 }
267 /* We didn't match the extended glob pattern, but
268 that's OK, since we can match 0 or more occurrences.
269 We need to skip the glob pattern and see if we
270 match the rest of the string. */
bb70624e 271 newn = patscan (p + 1, pe, 0);
f73dda09
JA
272 /* If NEWN is 0, we have an ill-formed pattern. */
273 p = newn ? newn : pe;
b72432fd 274 }
cce855bc
JA
275#endif
276 if (p == pe)
28ef6c31 277 break;
ccc6cda3 278 }
726f6388 279
cce855bc
JA
280 /* If we've hit the end of the pattern and the last character of
281 the pattern was handled by the loop above, we've succeeded.
282 Otherwise, we need to match that last character. */
283 if (p == pe && (c == '?' || c == '*'))
726f6388
JA
284 return (0);
285
d166f048 286 /* General case, use recursion. */
726f6388 287 {
bc4cd23c 288 unsigned char c1;
cce855bc 289
bc4cd23c 290 c1 = (unsigned char)((flags & FNM_NOESCAPE) == 0 && c == '\\') ? *p : c;
cce855bc
JA
291 c1 = FOLD (c1);
292 for (--p; n < se; ++n)
b72432fd 293 {
f73dda09 294 /* Only call strmatch if the first character indicates a
b72432fd
JA
295 possible match. We can check the first character if
296 we're not doing an extended glob match. */
28ef6c31 297 if ((flags & FNM_EXTMATCH) == 0 && c != '[' && FOLD (*n) != c1) /*]*/
b72432fd
JA
298 continue;
299
300 /* If we're doing an extended glob match and the pattern is not
301 one of the extended glob patterns, we can check the first
302 character. */
303 if ((flags & FNM_EXTMATCH) && p[1] != '(' && /*)*/
28ef6c31 304 strchr ("?*+@!", *p) == 0 && c != '[' && FOLD (*n) != c1) /*]*/
b72432fd
JA
305 continue;
306
307 /* Otherwise, we just recurse. */
308 if (gmatch (n, se, p, pe, flags & ~FNM_PERIOD) == 0)
309 return (0);
310 }
cce855bc 311 return FNM_NOMATCH;
726f6388
JA
312 }
313
314 case '[':
315 {
cce855bc
JA
316 if (sc == '\0' || n == se)
317 return FNM_NOMATCH;
726f6388 318
d166f048
JA
319 /* A character class cannot match a `.' if it is the first
320 character of the string or if it is the first character
321 following a slash and we are matching a pathname. */
cce855bc 322 if ((flags & FNM_PERIOD) && sc == '.' &&
726f6388
JA
323 (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
324 return (FNM_NOMATCH);
325
cce855bc
JA
326 p = brackmatch (p, sc, flags);
327 if (p == 0)
328 return FNM_NOMATCH;
726f6388
JA
329 }
330 break;
331
332 default:
bc4cd23c 333 if ((unsigned char)c != FOLD (sc))
726f6388
JA
334 return (FNM_NOMATCH);
335 }
336
337 ++n;
338 }
339
cce855bc 340 if (n == se)
726f6388
JA
341 return (0);
342
cce855bc
JA
343 if ((flags & FNM_LEADING_DIR) && *n == '/')
344 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
345 return 0;
28ef6c31 346
cce855bc
JA
347 return (FNM_NOMATCH);
348}
349
350/* Parse a bracket expression collating symbol ([.sym.]) starting at P, find
351 the value of the symbol, and move P past the collating symbol expression.
352 The value is returned in *VP, if VP is not null. */
353static char *
354parse_collsym (p, vp)
355 char *p;
356 int *vp;
357{
358 register int pc;
359 int val;
360
361 p++; /* move past the `.' */
28ef6c31 362
cce855bc
JA
363 for (pc = 0; p[pc]; pc++)
364 if (p[pc] == '.' && p[pc+1] == ']')
365 break;
366 val = collsym (p, pc);
367 if (vp)
368 *vp = val;
369 return (p + pc + 2);
370}
371
372static char *
373brackmatch (p, test, flags)
374 char *p;
375 unsigned char test;
376 int flags;
377{
378 register char cstart, cend, c;
379 register int not; /* Nonzero if the sense of the character class is inverted. */
380 int pc, brcnt;
381 char *savep;
382
383 test = FOLD (test);
384
385 savep = p;
386
387 /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the
388 circumflex (`^') in its role in a `nonmatching list'. A bracket
28ef6c31 389 expression starting with an unquoted circumflex character produces
cce855bc
JA
390 unspecified results. This implementation treats the two identically. */
391 if (not = (*p == '!' || *p == '^'))
392 ++p;
393
394 c = *p++;
395 for (;;)
396 {
397 /* Initialize cstart and cend in case `-' is the last
398 character of the pattern. */
399 cstart = cend = c;
400
401 /* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find
402 the end of the equivalence class, move the pattern pointer past
403 it, and check for equivalence. XXX - this handles only
404 single-character equivalence classes, which is wrong, or at
405 least incomplete. */
406 if (c == '[' && *p == '=' && p[2] == '=' && p[3] == ']')
407 {
408 pc = FOLD (p[1]);
409 p += 4;
410 if (collequiv (test, pc))
28ef6c31
JA
411 {
412/*[*/ /* Move past the closing `]', since the first thing we do at
413 the `matched:' label is back p up one. */
414 p++;
415 goto matched;
416 }
cce855bc
JA
417 else
418 {
419 c = *p++;
420 if (c == '\0')
28ef6c31 421 return ((test == '[') ? savep : (char *)0); /*]*/
cce855bc
JA
422 c = FOLD (c);
423 continue;
424 }
28ef6c31 425 }
cce855bc
JA
426
427 /* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */
28ef6c31 428 if (c == '[' && *p == ':') /*]*/
cce855bc
JA
429 {
430 pc = 0; /* make sure invalid char classes don't match. */
431 if (STREQN (p+1, "alnum:]", 7))
f73dda09 432 { pc = ISALNUM (test); p += 8; }
cce855bc 433 else if (STREQN (p+1, "alpha:]", 7))
f73dda09 434 { pc = ISALPHA (test); p += 8; }
cce855bc 435 else if (STREQN (p+1, "blank:]", 7))
f73dda09 436 { pc = ISBLANK (test); p += 8; }
cce855bc 437 else if (STREQN (p+1, "cntrl:]", 7))
f73dda09 438 { pc = ISCNTRL (test); p += 8; }
cce855bc 439 else if (STREQN (p+1, "digit:]", 7))
f73dda09 440 { pc = ISDIGIT (test); p += 8; }
cce855bc 441 else if (STREQN (p+1, "graph:]", 7))
f73dda09 442 { pc = ISGRAPH (test); p += 8; }
cce855bc 443 else if (STREQN (p+1, "lower:]", 7))
f73dda09 444 { pc = ISLOWER (test); p += 8; }
cce855bc 445 else if (STREQN (p+1, "print:]", 7))
f73dda09 446 { pc = ISPRINT (test); p += 8; }
cce855bc 447 else if (STREQN (p+1, "punct:]", 7))
f73dda09 448 { pc = ISPUNCT (test); p += 8; }
cce855bc 449 else if (STREQN (p+1, "space:]", 7))
f73dda09 450 { pc = ISSPACE (test); p += 8; }
cce855bc 451 else if (STREQN (p+1, "upper:]", 7))
f73dda09 452 { pc = ISUPPER (test); p += 8; }
cce855bc 453 else if (STREQN (p+1, "xdigit:]", 8))
f73dda09 454 { pc = ISXDIGIT (test); p += 9; }
cce855bc
JA
455 else if (STREQN (p+1, "ascii:]", 7))
456 { pc = isascii (test); p += 8; }
457 if (pc)
28ef6c31
JA
458 {
459/*[*/ /* Move past the closing `]', since the first thing we do at
460 the `matched:' label is back p up one. */
461 p++;
cce855bc 462 goto matched;
28ef6c31 463 }
cce855bc
JA
464 else
465 {
466 /* continue the loop here, since this expression can't be
467 the first part of a range expression. */
468 c = *p++;
469 if (c == '\0')
470 return ((test == '[') ? savep : (char *)0);
471 else if (c == ']')
28ef6c31 472 break;
cce855bc
JA
473 c = FOLD (c);
474 continue;
475 }
476 }
477
478 /* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of
479 the symbol name, make sure it is terminated by `.]', translate
480 the name to a character using the external table, and do the
481 comparison. */
482 if (c == '[' && *p == '.')
483 {
484 p = parse_collsym (p, &pc);
485 /* An invalid collating symbol cannot be the first point of a
486 range. If it is, we set cstart to one greater than `test',
487 so any comparisons later will fail. */
488 cstart = (pc == -1) ? test + 1 : pc;
489 }
490
491 if (!(flags & FNM_NOESCAPE) && c == '\\')
492 {
493 if (*p == '\0')
494 return (char *)0;
495 cstart = cend = *p++;
496 }
497
498 cstart = cend = FOLD (cstart);
499
500 /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that
501 is not preceded by a backslash and is not part of a bracket
502 expression produces undefined results.' This implementation
503 treats the `[' as just a character to be matched if there is
504 not a closing `]'. */
505 if (c == '\0')
506 return ((test == '[') ? savep : (char *)0);
507
508 c = *p++;
509 c = FOLD (c);
510
511 if ((flags & FNM_PATHNAME) && c == '/')
512 /* [/] can never match when matching a pathname. */
513 return (char *)0;
514
515 /* This introduces a range, unless the `-' is the last
516 character of the class. Find the end of the range
517 and move past it. */
518 if (c == '-' && *p != ']')
519 {
520 cend = *p++;
521 if (!(flags & FNM_NOESCAPE) && cend == '\\')
522 cend = *p++;
523 if (cend == '\0')
524 return (char *)0;
525 if (cend == '[' && *p == '.')
526 {
527 p = parse_collsym (p, &pc);
528 /* An invalid collating symbol cannot be the second part of a
529 range expression. If we get one, we set cend to one fewer
530 than the test character to make sure the range test fails. */
531 cend = (pc == -1) ? test - 1 : pc;
532 }
533 cend = FOLD (cend);
534
535 c = *p++;
536
537 /* POSIX.2 2.8.3.2: ``The ending range point shall collate
538 equal to or higher than the starting range point; otherwise
539 the expression shall be treated as invalid.'' Note that this
540 applies to only the range expression; the rest of the bracket
541 expression is still checked for matches. */
542 if (rangecmp (cstart, cend) > 0)
543 {
544 if (c == ']')
28ef6c31 545 break;
cce855bc
JA
546 c = FOLD (c);
547 continue;
548 }
549 }
550
551 if (rangecmp (test, cstart) >= 0 && rangecmp (test, cend) <= 0)
552 goto matched;
553
554 if (c == ']')
555 break;
556 }
557 /* No match. */
558 return (!not ? (char *)0 : p);
559
560matched:
561 /* Skip the rest of the [...] that already matched. */
28ef6c31 562#if 0
cce855bc 563 brcnt = (c != ']') + (c == '[' && (*p == '=' || *p == ':' || *p == '.'));
28ef6c31
JA
564#else
565 c = *--p;
566 brcnt = 1;
567#endif
cce855bc
JA
568 while (brcnt > 0)
569 {
570 /* A `[' without a matching `]' is just another character to match. */
571 if (c == '\0')
572 return ((test == '[') ? savep : (char *)0);
573
574 c = *p++;
575 if (c == '[' && (*p == '=' || *p == ':' || *p == '.'))
28ef6c31 576 brcnt++;
cce855bc 577 else if (c == ']')
28ef6c31 578 brcnt--;
cce855bc
JA
579 else if (!(flags & FNM_NOESCAPE) && c == '\\')
580 {
581 if (*p == '\0')
582 return (char *)0;
583 /* XXX 1003.2d11 is unclear if this is right. */
584 ++p;
585 }
586 }
587 return (not ? (char *)0 : p);
588}
589
590#if defined (EXTENDED_GLOB)
591/* ksh-like extended pattern matching:
592
593 [?*+@!](pat-list)
594
595 where pat-list is a list of one or patterns separated by `|'. Operation
596 is as follows:
597
598 ?(patlist) match zero or one of the given patterns
599 *(patlist) match zero or more of the given patterns
600 +(patlist) match one or more of the given patterns
601 @(patlist) match exactly one of the given patterns
602 !(patlist) match anything except one of the given patterns
603*/
604
605/* Scan a pattern starting at STRING and ending at END, keeping track of
606 embedded () and []. If DELIM is 0, we scan until a matching `)'
607 because we're scanning a `patlist'. Otherwise, we scan until we see
608 DELIM. In all cases, we never scan past END. The return value is the
609 first character after the matching DELIM. */
610static char *
611patscan (string, end, delim)
612 char *string, *end;
613 int delim;
614{
28ef6c31
JA
615 int pnest, bnest, cchar;
616 char *s, c, *bfirst;
cce855bc 617
28ef6c31
JA
618 pnest = bnest = cchar = 0;
619 bfirst = 0;
cce855bc
JA
620 for (s = string; c = *s; s++)
621 {
bb70624e 622 if (s >= end)
28ef6c31 623 return (s);
cce855bc
JA
624 switch (c)
625 {
626 case '\0':
627 return ((char *)0);
28ef6c31
JA
628
629 /* `[' is not special inside a bracket expression, but it may
630 introduce one of the special POSIX bracket expressions
631 ([.SYM.], [=c=], [: ... :]) that needs special handling. */
cce855bc 632 case '[':
28ef6c31
JA
633 if (bnest == 0)
634 {
635 bfirst = s + 1;
636 if (*bfirst == '!' || *bfirst == '^')
637 bfirst++;
638 bnest++;
639 }
640 else if (s[1] == ':' || s[1] == '.' || s[1] == '=')
641 cchar = s[1];
cce855bc 642 break;
28ef6c31
JA
643
644 /* `]' is not special if it's the first char (after a leading `!'
645 or `^') in a bracket expression or if it's part of one of the
646 special POSIX bracket expressions ([.SYM.], [=c=], [: ... :]) */
cce855bc
JA
647 case ']':
648 if (bnest)
28ef6c31
JA
649 {
650 if (cchar && s[-1] == cchar)
651 cchar = 0;
652 else if (s != bfirst)
653 {
654 bnest--;
655 bfirst = 0;
656 }
657 }
cce855bc 658 break;
28ef6c31 659
cce855bc
JA
660 case '(':
661 if (bnest == 0)
662 pnest++;
663 break;
28ef6c31 664
cce855bc 665 case ')':
bb70624e 666#if 0
cce855bc
JA
667 if (bnest == 0)
668 pnest--;
669 if (pnest <= 0)
670 return ++s;
bb70624e
JA
671#else
672 if (bnest == 0 && pnest-- <= 0)
673 return ++s;
674#endif
cce855bc 675 break;
28ef6c31 676
cce855bc
JA
677 case '|':
678 if (bnest == 0 && pnest == 0 && delim == '|')
679 return ++s;
680 break;
681 }
682 }
bb70624e 683
cce855bc
JA
684 return (char *)0;
685}
686
687/* Return 0 if dequoted pattern matches S in the current locale. */
688static int
689strcompare (p, pe, s, se)
690 char *p, *pe, *s, *se;
691{
692 int ret;
693 char c1, c2;
694
695 c1 = *pe;
696 c2 = *se;
697
698 *pe = *se = '\0';
699#if defined (HAVE_STRCOLL)
700 ret = strcoll (p, s);
701#else
702 ret = strcmp (p, s);
703#endif
704
705 *pe = c1;
706 *se = c2;
707
708 return (ret == 0 ? ret : FNM_NOMATCH);
709}
710
711/* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or
712 0 on success. This is handed the entire rest of the pattern and string
713 the first time an extended pattern specifier is encountered, so it calls
714 gmatch recursively. */
715static int
716extmatch (xc, s, se, p, pe, flags)
717 int xc; /* select which operation */
718 char *s, *se;
719 char *p, *pe;
720 int flags;
721{
722 char *prest; /* pointer to rest of pattern */
723 char *psub; /* pointer to sub-pattern */
724 char *pnext; /* pointer to next sub-pattern */
725 char *srest; /* pointer to rest of string */
726 int m1, m2;
727
28ef6c31 728#if DEBUG_MATCHING
bb70624e
JA
729fprintf(stderr, "extmatch: xc = %c\n", xc);
730fprintf(stderr, "extmatch: s = %s; se = %s\n", s, se);
731fprintf(stderr, "extmatch: p = %s; pe = %s\n", p, pe);
732#endif
733
734 prest = patscan (p + (*p == '('), pe, 0); /* ) */
735 if (prest == 0)
736 /* If PREST is 0, we failed to scan a valid pattern. In this
737 case, we just want to compare the two as strings. */
738 return (strcompare (p - 1, pe, s, se));
739
cce855bc
JA
740 switch (xc)
741 {
742 case '+': /* match one or more occurrences */
743 case '*': /* match zero or more occurrences */
cce855bc
JA
744 /* If we can get away with no matches, don't even bother. Just
745 call gmatch on the rest of the pattern and return success if
746 it succeeds. */
747 if (xc == '*' && (gmatch (s, se, prest, pe, flags) == 0))
748 return 0;
749
750 /* OK, we have to do this the hard way. First, we make sure one of
28ef6c31
JA
751 the subpatterns matches, then we try to match the rest of the
752 string. */
cce855bc
JA
753 for (psub = p + 1; ; psub = pnext)
754 {
755 pnext = patscan (psub, pe, '|');
756 for (srest = s; srest <= se; srest++)
757 {
758 /* Match this substring (S -> SREST) against this
759 subpattern (psub -> pnext - 1) */
760 m1 = gmatch (s, srest, psub, pnext - 1, flags) == 0;
761 /* OK, we matched a subpattern, so make sure the rest of the
762 string matches the rest of the pattern. Also handle
763 multiple matches of the pattern. */
764 if (m1)
765 m2 = (gmatch (srest, se, prest, pe, flags) == 0) ||
766 (s != srest && gmatch (srest, se, p - 1, pe, flags) == 0);
767 if (m1 && m2)
28ef6c31 768 return (0);
cce855bc
JA
769 }
770 if (pnext == prest)
771 break;
772 }
773 return (FNM_NOMATCH);
774
775 case '?': /* match zero or one of the patterns */
776 case '@': /* match exactly one of the patterns */
cce855bc
JA
777 /* If we can get away with no matches, don't even bother. Just
778 call gmatch on the rest of the pattern and return success if
779 it succeeds. */
780 if (xc == '?' && (gmatch (s, se, prest, pe, flags) == 0))
781 return 0;
782
783 /* OK, we have to do this the hard way. First, we see if one of
784 the subpatterns matches, then, if it does, we try to match the
785 rest of the string. */
786 for (psub = p + 1; ; psub = pnext)
787 {
788 pnext = patscan (psub, pe, '|');
789 srest = (prest == pe) ? se : s;
790 for ( ; srest <= se; srest++)
791 {
792 if (gmatch (s, srest, psub, pnext - 1, flags) == 0 &&
793 gmatch (srest, se, prest, pe, flags) == 0)
794 return (0);
795 }
796 if (pnext == prest)
797 break;
798 }
799 return (FNM_NOMATCH);
800
801 case '!': /* match anything *except* one of the patterns */
cce855bc
JA
802 for (srest = s; srest <= se; srest++)
803 {
804 m1 = 0;
805 for (psub = p + 1; ; psub = pnext)
806 {
807 pnext = patscan (psub, pe, '|');
808 /* If one of the patterns matches, just bail immediately. */
809 if (m1 = (gmatch (s, srest, psub, pnext - 1, flags) == 0))
810 break;
811 if (pnext == prest)
812 break;
813 }
814 if (m1 == 0 && gmatch (srest, se, prest, pe, flags) == 0)
28ef6c31 815 return (0);
cce855bc
JA
816 }
817 return (FNM_NOMATCH);
818 }
819
726f6388
JA
820 return (FNM_NOMATCH);
821}
cce855bc
JA
822#endif /* EXTENDED_GLOB */
823
824#ifdef TEST
825main (c, v)
826 int c;
827 char **v;
828{
829 char *string, *pat;
830
831 string = v[1];
832 pat = v[2];
833
f73dda09 834 if (strmatch (pat, string, 0) == 0)
cce855bc
JA
835 {
836 printf ("%s matches %s\n", string, pat);
837 exit (0);
838 }
839 else
840 {
841 printf ("%s does not match %s\n", string, pat);
842 exit (1);
843 }
844}
845#endif