]> git.ipfire.org Git - thirdparty/glibc.git/blame - posix/fnmatch_loop.c
2.5-18.1
[thirdparty/glibc.git] / posix / fnmatch_loop.c
CommitLineData
0ecb606c
JJ
1/* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2003,2004,2005
2 Free Software Foundation, Inc.
1fc82a56
UD
3 This file is part of the GNU C Library.
4
41bdb6e2
AJ
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
1fc82a56 9
41bdb6e2 10 The GNU C Library is distributed in the hope that it will be useful,
1fc82a56
UD
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 13 Lesser General Public License for more details.
1fc82a56 14
41bdb6e2
AJ
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA. */
1fc82a56
UD
19
20/* Match STRING against the filename pattern PATTERN, returning zero if
21 it matches, nonzero if not. */
22static int FCT (const CHAR *pattern, const CHAR *string,
955994e1
UD
23 const CHAR *string_end, int no_leading_period, int flags)
24 internal_function;
25static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
26 const CHAR *string_end, int no_leading_period, int flags)
27 internal_function;
28static const CHAR *END (const CHAR *patternp) internal_function;
1fc82a56
UD
29
30static int
31internal_function
955994e1 32FCT (pattern, string, string_end, no_leading_period, flags)
1fc82a56
UD
33 const CHAR *pattern;
34 const CHAR *string;
955994e1 35 const CHAR *string_end;
1fc82a56
UD
36 int no_leading_period;
37 int flags;
38{
39 register const CHAR *p = pattern, *n = string;
40 register UCHAR c;
acb5ee2e 41#ifdef _LIBC
4c7d276e
UD
42# if WIDE_CHAR_VERSION
43 const char *collseq = (const char *)
44 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
45# else
acb5ee2e 46 const UCHAR *collseq = (const UCHAR *)
4c7d276e 47 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
acb5ee2e
UD
48# endif
49#endif
1fc82a56
UD
50
51 while ((c = *p++) != L('\0'))
52 {
955994e1 53 int new_no_leading_period = 0;
1fc82a56
UD
54 c = FOLD (c);
55
56 switch (c)
57 {
58 case L('?'):
955994e1
UD
59 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
60 {
61 int res;
62
63 res = EXT (c, p, n, string_end, no_leading_period,
64 flags);
65 if (res != -1)
66 return res;
67 }
68
69 if (n == string_end)
1fc82a56
UD
70 return FNM_NOMATCH;
71 else if (*n == L('/') && (flags & FNM_FILE_NAME))
72 return FNM_NOMATCH;
955994e1 73 else if (*n == L('.') && no_leading_period)
1fc82a56
UD
74 return FNM_NOMATCH;
75 break;
76
77 case L('\\'):
78 if (!(flags & FNM_NOESCAPE))
79 {
80 c = *p++;
81 if (c == L('\0'))
82 /* Trailing \ loses. */
83 return FNM_NOMATCH;
84 c = FOLD (c);
85 }
955994e1 86 if (n == string_end || FOLD ((UCHAR) *n) != c)
1fc82a56
UD
87 return FNM_NOMATCH;
88 break;
89
90 case L('*'):
955994e1
UD
91 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
92 {
93 int res;
94
95 res = EXT (c, p, n, string_end, no_leading_period,
96 flags);
97 if (res != -1)
98 return res;
99 }
100
101 if (n != string_end && *n == L('.') && no_leading_period)
1fc82a56
UD
102 return FNM_NOMATCH;
103
104 for (c = *p++; c == L('?') || c == L('*'); c = *p++)
105 {
955994e1
UD
106 if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
107 {
108 const CHAR *endp = END (p);
109 if (endp != p)
110 {
111 /* This is a pattern. Skip over it. */
112 p = endp;
113 continue;
114 }
115 }
116
117 if (c == L('?'))
1fc82a56
UD
118 {
119 /* A ? needs to match one character. */
955994e1 120 if (n == string_end)
1fc82a56
UD
121 /* There isn't another character; no match. */
122 return FNM_NOMATCH;
955994e1
UD
123 else if (*n == L('/')
124 && __builtin_expect (flags & FNM_FILE_NAME, 0))
125 /* A slash does not match a wildcard under
126 FNM_FILE_NAME. */
127 return FNM_NOMATCH;
1fc82a56
UD
128 else
129 /* One character of the string is consumed in matching
130 this ? wildcard, so *??? won't match if there are
131 less than three characters. */
132 ++n;
133 }
134 }
135
136 if (c == L('\0'))
137 /* The wildcard(s) is/are the last element of the pattern.
138 If the name is a file name and contains another slash
a4607c1f
UD
139 this means it cannot match, unless the FNM_LEADING_DIR
140 flag is set. */
47924894
UD
141 {
142 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
143
144 if (flags & FNM_FILE_NAME)
145 {
47924894 146 if (flags & FNM_LEADING_DIR)
a4607c1f 147 result = 0;
47924894
UD
148 else
149 {
955994e1 150 if (MEMCHR (n, L('/'), string_end - n) == NULL)
47924894
UD
151 result = 0;
152 }
153 }
154
155 return result;
156 }
1fc82a56
UD
157 else
158 {
159 const CHAR *endp;
160
955994e1
UD
161 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
162 string_end - n);
163 if (endp == NULL)
164 endp = string_end;
1fc82a56 165
955994e1
UD
166 if (c == L('[')
167 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
228293b5
UD
168 && (c == L('@') || c == L('+') || c == L('!'))
169 && *p == L('(')))
1fc82a56
UD
170 {
171 int flags2 = ((flags & FNM_FILE_NAME)
172 ? flags : (flags & ~FNM_PERIOD));
955994e1 173 int no_leading_period2 = no_leading_period;
1fc82a56 174
955994e1
UD
175 for (--p; n < endp; ++n, no_leading_period2 = 0)
176 if (FCT (p, n, string_end, no_leading_period2, flags2)
177 == 0)
1fc82a56
UD
178 return 0;
179 }
180 else if (c == L('/') && (flags & FNM_FILE_NAME))
181 {
955994e1 182 while (n < string_end && *n != L('/'))
1fc82a56 183 ++n;
955994e1
UD
184 if (n < string_end && *n == L('/')
185 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
186 == 0))
1fc82a56
UD
187 return 0;
188 }
189 else
190 {
191 int flags2 = ((flags & FNM_FILE_NAME)
192 ? flags : (flags & ~FNM_PERIOD));
955994e1 193 int no_leading_period2 = no_leading_period;
1fc82a56
UD
194
195 if (c == L('\\') && !(flags & FNM_NOESCAPE))
196 c = *p;
197 c = FOLD (c);
955994e1 198 for (--p; n < endp; ++n, no_leading_period2 = 0)
1fc82a56 199 if (FOLD ((UCHAR) *n) == c
955994e1
UD
200 && (FCT (p, n, string_end, no_leading_period2, flags2)
201 == 0))
1fc82a56
UD
202 return 0;
203 }
204 }
205
206 /* If we come here no match is possible with the wildcard. */
207 return FNM_NOMATCH;
208
209 case L('['):
210 {
83b1b6d8 211 /* Nonzero if the sense of the character class is inverted. */
1fc82a56
UD
212 register int not;
213 CHAR cold;
955994e1 214 UCHAR fn;
1fc82a56
UD
215
216 if (posixly_correct == 0)
217 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
218
955994e1 219 if (n == string_end)
1fc82a56
UD
220 return FNM_NOMATCH;
221
955994e1 222 if (*n == L('.') && no_leading_period)
1fc82a56
UD
223 return FNM_NOMATCH;
224
225 if (*n == L('/') && (flags & FNM_FILE_NAME))
226 /* `/' cannot be matched. */
227 return FNM_NOMATCH;
228
229 not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
230 if (not)
231 ++p;
232
955994e1
UD
233 fn = FOLD ((UCHAR) *n);
234
1fc82a56
UD
235 c = *p++;
236 for (;;)
237 {
1fc82a56
UD
238 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
239 {
240 if (*p == L('\0'))
241 return FNM_NOMATCH;
242 c = FOLD ((UCHAR) *p);
243 ++p;
244
aae95a1b 245 goto normal_bracket;
1fc82a56
UD
246 }
247 else if (c == L('[') && *p == L(':'))
248 {
249 /* Leave room for the null. */
250 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
251 size_t c1 = 0;
acb5ee2e 252#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
1fc82a56 253 wctype_t wt;
acb5ee2e 254#endif
1fc82a56
UD
255 const CHAR *startp = p;
256
257 for (;;)
258 {
259 if (c1 == CHAR_CLASS_MAX_LENGTH)
260 /* The name is too long and therefore the pattern
261 is ill-formed. */
262 return FNM_NOMATCH;
263
264 c = *++p;
265 if (c == L(':') && p[1] == L(']'))
266 {
267 p += 2;
268 break;
269 }
270 if (c < L('a') || c >= L('z'))
271 {
272 /* This cannot possibly be a character class name.
273 Match it as a normal range. */
274 p = startp;
275 c = L('[');
276 goto normal_bracket;
277 }
278 str[c1++] = c;
279 }
280 str[c1] = L('\0');
281
acb5ee2e 282#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
1fc82a56
UD
283 wt = IS_CHAR_CLASS (str);
284 if (wt == 0)
285 /* Invalid character class name. */
286 return FNM_NOMATCH;
287
7239b3a4 288# if defined _LIBC && ! WIDE_CHAR_VERSION
8fb81470
UD
289 /* The following code is glibc specific but does
290 there a good job in speeding up the code since
291 we can avoid the btowc() call. */
4c7d276e
UD
292 if (_ISCTYPE ((UCHAR) *n, wt))
293 goto matched;
8fb81470
UD
294# else
295 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
296 goto matched;
7239b3a4 297# endif
acb5ee2e 298#else
1fc82a56
UD
299 if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
300 || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
301 || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
302 || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
303 || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
304 || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
305 || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
306 || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
307 || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
308 || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
309 || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
310 || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
311 goto matched;
acb5ee2e 312#endif
83b1b6d8 313 c = *p++;
1fc82a56 314 }
ecce00a9
UD
315#ifdef _LIBC
316 else if (c == L('[') && *p == L('='))
317 {
318 UCHAR str[1];
319 uint32_t nrules =
320 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
321 const CHAR *startp = p;
322
323 c = *++p;
324 if (c == L('\0'))
325 {
326 p = startp;
327 c = L('[');
328 goto normal_bracket;
329 }
330 str[0] = c;
331
332 c = *++p;
333 if (c != L('=') || p[1] != L(']'))
334 {
335 p = startp;
336 c = L('[');
337 goto normal_bracket;
338 }
339 p += 2;
340
341 if (nrules == 0)
342 {
343 if ((UCHAR) *n == str[0])
344 goto matched;
345 }
346 else
347 {
348 const int32_t *table;
349# if WIDE_CHAR_VERSION
350 const int32_t *weights;
351 const int32_t *extra;
352# else
353 const unsigned char *weights;
354 const unsigned char *extra;
355# endif
356 const int32_t *indirect;
357 int32_t idx;
358 const UCHAR *cp = (const UCHAR *) str;
359
360 /* This #include defines a local function! */
361# if WIDE_CHAR_VERSION
362# include <locale/weightwc.h>
363# else
364# include <locale/weight.h>
365# endif
366
367# if WIDE_CHAR_VERSION
368 table = (const int32_t *)
369 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
370 weights = (const int32_t *)
371 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
372 extra = (const int32_t *)
373 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
374 indirect = (const int32_t *)
375 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
376# else
377 table = (const int32_t *)
378 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
379 weights = (const unsigned char *)
380 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
381 extra = (const unsigned char *)
382 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
383 indirect = (const int32_t *)
384 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
385# endif
386
387 idx = findidx (&cp);
388 if (idx != 0)
389 {
390 /* We found a table entry. Now see whether the
391 character we are currently at has the same
392 equivalance class value. */
393 int len = weights[idx];
394 int32_t idx2;
395 const UCHAR *np = (const UCHAR *) n;
396
397 idx2 = findidx (&np);
ecce00a9
UD
398 if (idx2 != 0 && len == weights[idx2])
399 {
400 int cnt = 0;
401
402 while (cnt < len
403 && (weights[idx + 1 + cnt]
404 == weights[idx2 + 1 + cnt]))
405 ++cnt;
406
407 if (cnt == len)
408 goto matched;
409 }
ecce00a9
UD
410 }
411 }
412
413 c = *p++;
414 }
415#endif
1fc82a56
UD
416 else if (c == L('\0'))
417 /* [ (unterminated) loses. */
418 return FNM_NOMATCH;
419 else
420 {
f3e29a1a 421 int is_range = 0;
1fc82a56 422
f3e29a1a 423#ifdef _LIBC
c1ac11df
UD
424 int is_seqval = 0;
425
f3e29a1a
UD
426 if (c == L('[') && *p == L('.'))
427 {
428 uint32_t nrules =
429 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
430 const CHAR *startp = p;
431 size_t c1 = 0;
432
433 while (1)
434 {
435 c = *++p;
436 if (c == L('.') && p[1] == L(']'))
437 {
438 p += 2;
439 break;
440 }
441 if (c == '\0')
442 return FNM_NOMATCH;
443 ++c1;
444 }
445
446 /* We have to handling the symbols differently in
447 ranges since then the collation sequence is
448 important. */
449 is_range = *p == L('-') && p[1] != L('\0');
450
451 if (nrules == 0)
452 {
453 /* There are no names defined in the collation
454 data. Therefore we only accept the trivial
455 names consisting of the character itself. */
456 if (c1 != 1)
457 return FNM_NOMATCH;
458
459 if (!is_range && *n == startp[1])
460 goto matched;
461
462 cold = startp[1];
463 c = *p++;
464 }
465 else
466 {
467 int32_t table_size;
468 const int32_t *symb_table;
469# ifdef WIDE_CHAR_VERSION
470 char str[c1];
2e47aff5 471 unsigned int strcnt;
f3e29a1a
UD
472# else
473# define str (startp + 1)
474# endif
475 const unsigned char *extra;
476 int32_t idx;
477 int32_t elem;
478 int32_t second;
479 int32_t hash;
480
481# ifdef WIDE_CHAR_VERSION
482 /* We have to convert the name to a single-byte
483 string. This is possible since the names
484 consist of ASCII characters and the internal
485 representation is UCS4. */
486 for (strcnt = 0; strcnt < c1; ++strcnt)
487 str[strcnt] = startp[1 + strcnt];
488#endif
489
490 table_size =
491 _NL_CURRENT_WORD (LC_COLLATE,
492 _NL_COLLATE_SYMB_HASH_SIZEMB);
493 symb_table = (const int32_t *)
494 _NL_CURRENT (LC_COLLATE,
495 _NL_COLLATE_SYMB_TABLEMB);
496 extra = (const unsigned char *)
497 _NL_CURRENT (LC_COLLATE,
498 _NL_COLLATE_SYMB_EXTRAMB);
499
500 /* Locate the character in the hashing table. */
501 hash = elem_hash (str, c1);
502
503 idx = 0;
504 elem = hash % table_size;
0ecb606c 505 if (symb_table[2 * elem] != 0)
f3e29a1a 506 {
0ecb606c
JJ
507 second = hash % (table_size - 2) + 1;
508
509 do
f3e29a1a 510 {
0ecb606c
JJ
511 /* First compare the hashing value. */
512 if (symb_table[2 * elem] == hash
513 && (c1
514 == extra[symb_table[2 * elem + 1]])
515 && memcmp (str,
516 &extra[symb_table[2 * elem
517 + 1]
518 + 1], c1) == 0)
519 {
520 /* Yep, this is the entry. */
521 idx = symb_table[2 * elem + 1];
522 idx += 1 + extra[idx];
523 break;
524 }
f3e29a1a 525
0ecb606c
JJ
526 /* Next entry. */
527 elem += second;
528 }
529 while (symb_table[2 * elem] != 0);
f3e29a1a
UD
530 }
531
532 if (symb_table[2 * elem] != 0)
533 {
534 /* Compare the byte sequence but only if
535 this is not part of a range. */
536# ifdef WIDE_CHAR_VERSION
537 int32_t *wextra;
538
539 idx += 1 + extra[idx];
540 /* Adjust for the alignment. */
2f76d88d 541 idx = (idx + 3) & ~3;
f3e29a1a
UD
542
543 wextra = (int32_t *) &extra[idx + 4];
544# endif
545
546 if (! is_range)
547 {
548# ifdef WIDE_CHAR_VERSION
eb64f8cb
RM
549 for (c1 = 0;
550 (int32_t) c1 < wextra[idx];
551 ++c1)
f3e29a1a
UD
552 if (n[c1] != wextra[1 + c1])
553 break;
554
eb64f8cb 555 if ((int32_t) c1 == wextra[idx])
f3e29a1a
UD
556 goto matched;
557# else
558 for (c1 = 0; c1 < extra[idx]; ++c1)
559 if (n[c1] != extra[1 + c1])
560 break;
561
562 if (c1 == extra[idx])
563 goto matched;
564# endif
565 }
566
567 /* Get the collation sequence value. */
568 is_seqval = 1;
569# ifdef WIDE_CHAR_VERSION
570 cold = wextra[1 + wextra[idx]];
571# else
572 /* Adjust for the alignment. */
573 idx += 1 + extra[idx];
574 idx = (idx + 3) & ~4;
575 cold = *((int32_t *) &extra[idx]);
576# endif
577
578 c = *p++;
579 }
70dc5068 580 else if (c1 == 1)
f3e29a1a
UD
581 {
582 /* No valid character. Match it as a
583 single byte. */
584 if (!is_range && *n == str[0])
585 goto matched;
586
587 cold = str[0];
588 c = *p++;
589 }
590 else
591 return FNM_NOMATCH;
592 }
593 }
594 else
595# undef str
596#endif
597 {
598 c = FOLD (c);
599 normal_bracket:
600
601 /* We have to handling the symbols differently in
602 ranges since then the collation sequence is
603 important. */
be29c482
UD
604 is_range = (*p == L('-') && p[1] != L('\0')
605 && p[1] != L(']'));
f3e29a1a
UD
606
607 if (!is_range && c == fn)
608 goto matched;
609
469dcb0d
RM
610 /* This is needed if we goto normal_bracket; from
611 outside of is_seqval's scope. */
612 is_seqval = 0;
f3e29a1a
UD
613 cold = c;
614 c = *p++;
615 }
1fc82a56
UD
616
617 if (c == L('-') && *p != L(']'))
618 {
acb5ee2e
UD
619#if _LIBC
620 /* We have to find the collation sequence
621 value for C. Collation sequence is nothing
622 we can regularly access. The sequence
623 value is defined by the order in which the
624 definitions of the collation values for the
625 various characters appear in the source
626 file. A strange concept, nowhere
627 documented. */
f3e29a1a
UD
628 uint32_t fcollseq;
629 uint32_t lcollseq;
1fc82a56 630 UCHAR cend = *p++;
acb5ee2e 631
acb5ee2e
UD
632# ifdef WIDE_CHAR_VERSION
633 /* Search in the `names' array for the characters. */
25337753 634 fcollseq = __collseq_table_lookup (collseq, fn);
4c7d276e
UD
635 if (fcollseq == ~((uint32_t) 0))
636 /* XXX We don't know anything about the character
637 we are supposed to match. This means we are
638 failing. */
639 goto range_not_matched;
f3e29a1a
UD
640
641 if (is_seqval)
642 lcollseq = cold;
643 else
25337753 644 lcollseq = __collseq_table_lookup (collseq, cold);
acb5ee2e 645# else
f3e29a1a
UD
646 fcollseq = collseq[fn];
647 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
648# endif
649
650 is_seqval = 0;
651 if (cend == L('[') && *p == L('.'))
652 {
653 uint32_t nrules =
654 _NL_CURRENT_WORD (LC_COLLATE,
655 _NL_COLLATE_NRULES);
656 const CHAR *startp = p;
657 size_t c1 = 0;
658
659 while (1)
660 {
661 c = *++p;
662 if (c == L('.') && p[1] == L(']'))
663 {
664 p += 2;
665 break;
666 }
667 if (c == '\0')
668 return FNM_NOMATCH;
669 ++c1;
670 }
671
672 if (nrules == 0)
673 {
674 /* There are no names defined in the
675 collation data. Therefore we only
676 accept the trivial names consisting
677 of the character itself. */
678 if (c1 != 1)
679 return FNM_NOMATCH;
680
681 cend = startp[1];
682 }
683 else
684 {
685 int32_t table_size;
686 const int32_t *symb_table;
687# ifdef WIDE_CHAR_VERSION
688 char str[c1];
2e47aff5 689 unsigned int strcnt;
f3e29a1a
UD
690# else
691# define str (startp + 1)
acb5ee2e 692# endif
f3e29a1a
UD
693 const unsigned char *extra;
694 int32_t idx;
695 int32_t elem;
696 int32_t second;
697 int32_t hash;
698
699# ifdef WIDE_CHAR_VERSION
700 /* We have to convert the name to a single-byte
701 string. This is possible since the names
702 consist of ASCII characters and the internal
703 representation is UCS4. */
704 for (strcnt = 0; strcnt < c1; ++strcnt)
705 str[strcnt] = startp[1 + strcnt];
82eafaf7 706# endif
f3e29a1a
UD
707
708 table_size =
709 _NL_CURRENT_WORD (LC_COLLATE,
710 _NL_COLLATE_SYMB_HASH_SIZEMB);
711 symb_table = (const int32_t *)
712 _NL_CURRENT (LC_COLLATE,
713 _NL_COLLATE_SYMB_TABLEMB);
714 extra = (const unsigned char *)
715 _NL_CURRENT (LC_COLLATE,
716 _NL_COLLATE_SYMB_EXTRAMB);
717
718 /* Locate the character in the hashing
719 table. */
720 hash = elem_hash (str, c1);
721
722 idx = 0;
723 elem = hash % table_size;
0ecb606c 724 if (symb_table[2 * elem] != 0)
f3e29a1a 725 {
0ecb606c
JJ
726 second = hash % (table_size - 2) + 1;
727
728 do
f3e29a1a 729 {
0ecb606c
JJ
730 /* First compare the hashing value. */
731 if (symb_table[2 * elem] == hash
732 && (c1
733 == extra[symb_table[2 * elem + 1]])
734 && memcmp (str,
735 &extra[symb_table[2 * elem + 1]
736 + 1], c1) == 0)
737 {
738 /* Yep, this is the entry. */
739 idx = symb_table[2 * elem + 1];
740 idx += 1 + extra[idx];
741 break;
742 }
743
744 /* Next entry. */
745 elem += second;
f3e29a1a 746 }
0ecb606c 747 while (symb_table[2 * elem] != 0);
f3e29a1a
UD
748 }
749
750 if (symb_table[2 * elem] != 0)
751 {
752 /* Compare the byte sequence but only if
753 this is not part of a range. */
754# ifdef WIDE_CHAR_VERSION
755 int32_t *wextra;
756
757 idx += 1 + extra[idx];
758 /* Adjust for the alignment. */
759 idx = (idx + 3) & ~4;
760
761 wextra = (int32_t *) &extra[idx + 4];
762# endif
763 /* Get the collation sequence value. */
764 is_seqval = 1;
765# ifdef WIDE_CHAR_VERSION
766 cend = wextra[1 + wextra[idx]];
767# else
768 /* Adjust for the alignment. */
769 idx += 1 + extra[idx];
770 idx = (idx + 3) & ~4;
771 cend = *((int32_t *) &extra[idx]);
772# endif
773 }
774 else if (symb_table[2 * elem] != 0 && c1 == 1)
775 {
776 cend = str[0];
777 c = *p++;
778 }
779 else
780 return FNM_NOMATCH;
781 }
782# undef str
783 }
784 else
785 {
786 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
787 cend = *p++;
788 if (cend == L('\0'))
789 return FNM_NOMATCH;
790 cend = FOLD (cend);
791 }
acb5ee2e
UD
792
793 /* XXX It is not entirely clear to me how to handle
794 characters which are not mentioned in the
795 collation specification. */
796 if (
797# ifdef WIDE_CHAR_VERSION
f3e29a1a 798 lcollseq == 0xffffffff ||
acb5ee2e 799# endif
f3e29a1a 800 lcollseq <= fcollseq)
acb5ee2e
UD
801 {
802 /* We have to look at the upper bound. */
f3e29a1a 803 uint32_t hcollseq;
acb5ee2e 804
f3e29a1a
UD
805 if (is_seqval)
806 hcollseq = cend;
807 else
acb5ee2e 808 {
f3e29a1a 809# ifdef WIDE_CHAR_VERSION
4c7d276e 810 hcollseq =
25337753 811 __collseq_table_lookup (collseq, cend);
4c7d276e 812 if (hcollseq == ~((uint32_t) 0))
acb5ee2e 813 {
4c7d276e
UD
814 /* Hum, no information about the upper
815 bound. The matching succeeds if the
816 lower bound is matched exactly. */
817 if (lcollseq != fcollseq)
818 goto range_not_matched;
04ea3b0f 819
4c7d276e 820 goto matched;
acb5ee2e 821 }
acb5ee2e 822# else
f3e29a1a 823 hcollseq = collseq[cend];
acb5ee2e 824# endif
f3e29a1a 825 }
acb5ee2e 826
f3e29a1a 827 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
9de4e203
UD
828 goto matched;
829 }
acb5ee2e
UD
830# ifdef WIDE_CHAR_VERSION
831 range_not_matched:
832# endif
833#else
834 /* We use a boring value comparison of the character
835 values. This is better than comparing using
836 `strcoll' since the latter would have surprising
837 and sometimes fatal consequences. */
838 UCHAR cend = *p++;
839
840 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
841 cend = *p++;
842 if (cend == L('\0'))
843 return FNM_NOMATCH;
844
845 /* It is a range. */
bd8fbd57 846 if (cold <= fn && fn <= cend)
acb5ee2e
UD
847 goto matched;
848#endif
1fc82a56
UD
849
850 c = *p++;
851 }
852 }
853
854 if (c == L(']'))
855 break;
856 }
857
858 if (!not)
859 return FNM_NOMATCH;
860 break;
861
862 matched:
863 /* Skip the rest of the [...] that already matched. */
83b1b6d8 864 do
1fc82a56 865 {
f3e29a1a 866 ignore_next:
83b1b6d8
UD
867 c = *p++;
868
1fc82a56
UD
869 if (c == L('\0'))
870 /* [... (unterminated) loses. */
871 return FNM_NOMATCH;
872
1fc82a56
UD
873 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
874 {
875 if (*p == L('\0'))
876 return FNM_NOMATCH;
877 /* XXX 1003.2d11 is unclear if this is right. */
878 ++p;
879 }
880 else if (c == L('[') && *p == L(':'))
881 {
f3e29a1a
UD
882 int c1 = 0;
883 const CHAR *startp = p;
884
885 while (1)
886 {
887 c = *++p;
888 if (++c1 == CHAR_CLASS_MAX_LENGTH)
889 return FNM_NOMATCH;
890
891 if (*p == L(':') && p[1] == L(']'))
892 break;
893
894 if (c < L('a') || c >= L('z'))
895 {
896 p = startp;
897 goto ignore_next;
898 }
899 }
1fc82a56 900 p += 2;
f3e29a1a
UD
901 c = *p++;
902 }
903 else if (c == L('[') && *p == L('='))
904 {
905 c = *++p;
906 if (c == L('\0'))
907 return FNM_NOMATCH;
908 c = *++p;
909 if (c != L('=') || p[1] != L(']'))
910 return FNM_NOMATCH;
911 p += 2;
912 c = *p++;
913 }
914 else if (c == L('[') && *p == L('.'))
915 {
916 ++p;
917 while (1)
918 {
919 c = *++p;
920 if (c == '\0')
921 return FNM_NOMATCH;
922
923 if (*p == L('.') && p[1] == L(']'))
924 break;
925 }
926 p += 2;
927 c = *p++;
1fc82a56
UD
928 }
929 }
83b1b6d8 930 while (c != L(']'));
1fc82a56
UD
931 if (not)
932 return FNM_NOMATCH;
933 }
934 break;
935
955994e1
UD
936 case L('+'):
937 case L('@'):
938 case L('!'):
939 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
940 {
941 int res;
942
943 res = EXT (c, p, n, string_end, no_leading_period, flags);
944 if (res != -1)
945 return res;
946 }
947 goto normal_match;
948
949 case L('/'):
950 if (NO_LEADING_PERIOD (flags))
951 {
eb64f8cb 952 if (n == string_end || c != (UCHAR) *n)
955994e1
UD
953 return FNM_NOMATCH;
954
955 new_no_leading_period = 1;
956 break;
957 }
958 /* FALLTHROUGH */
1fc82a56 959 default:
955994e1
UD
960 normal_match:
961 if (n == string_end || c != FOLD ((UCHAR) *n))
1fc82a56
UD
962 return FNM_NOMATCH;
963 }
964
955994e1 965 no_leading_period = new_no_leading_period;
1fc82a56
UD
966 ++n;
967 }
968
955994e1 969 if (n == string_end)
1fc82a56
UD
970 return 0;
971
955994e1 972 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
1fc82a56
UD
973 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
974 return 0;
975
976 return FNM_NOMATCH;
ea6eb383 977}
1fc82a56 978
955994e1
UD
979
980static const CHAR *
981internal_function
982END (const CHAR *pattern)
983{
984 const CHAR *p = pattern;
985
986 while (1)
987 if (*++p == L('\0'))
988 /* This is an invalid pattern. */
989 return pattern;
990 else if (*p == L('['))
991 {
992 /* Handle brackets special. */
993 if (posixly_correct == 0)
994 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
995
996 /* Skip the not sign. We have to recognize it because of a possibly
997 following ']'. */
998 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
999 ++p;
1000 /* A leading ']' is recognized as such. */
1001 if (*p == L(']'))
1002 ++p;
1003 /* Skip over all characters of the list. */
1004 while (*p != L(']'))
1005 if (*p++ == L('\0'))
1006 /* This is no valid pattern. */
1007 return pattern;
1008 }
1009 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1010 || *p == L('!')) && p[1] == L('('))
1011 p = END (p + 1);
1012 else if (*p == L(')'))
1013 break;
1014
1015 return p + 1;
1016}
1017
1018
1019static int
1020internal_function
1021EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1022 int no_leading_period, int flags)
1023{
1024 const CHAR *startp;
1025 int level;
1026 struct patternlist
1027 {
1028 struct patternlist *next;
1029 CHAR str[0];
1030 } *list = NULL;
1031 struct patternlist **lastp = &list;
821a6bb4 1032 size_t pattern_len = STRLEN (pattern);
955994e1
UD
1033 const CHAR *p;
1034 const CHAR *rs;
1035
1036 /* Parse the pattern. Store the individual parts in the list. */
1037 level = 0;
1038 for (startp = p = pattern + 1; level >= 0; ++p)
1039 if (*p == L('\0'))
1040 /* This is an invalid pattern. */
1041 return -1;
1042 else if (*p == L('['))
1043 {
1044 /* Handle brackets special. */
1045 if (posixly_correct == 0)
1046 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1047
1048 /* Skip the not sign. We have to recognize it because of a possibly
1049 following ']'. */
1050 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1051 ++p;
1052 /* A leading ']' is recognized as such. */
1053 if (*p == L(']'))
1054 ++p;
1055 /* Skip over all characters of the list. */
1056 while (*p != L(']'))
1057 if (*p++ == L('\0'))
1058 /* This is no valid pattern. */
1059 return -1;
1060 }
1061 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1062 || *p == L('!')) && p[1] == L('('))
1063 /* Remember the nesting level. */
1064 ++level;
1065 else if (*p == L(')'))
1066 {
1067 if (level-- == 0)
1068 {
1069 /* This means we found the end of the pattern. */
1070#define NEW_PATTERN \
821a6bb4
UD
1071 struct patternlist *newp; \
1072 \
1073 if (opt == L('?') || opt == L('@')) \
1074 newp = alloca (sizeof (struct patternlist) \
1075 + (pattern_len * sizeof (CHAR))); \
1076 else \
1077 newp = alloca (sizeof (struct patternlist) \
1078 + ((p - startp + 1) * sizeof (CHAR))); \
955994e1
UD
1079 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \
1080 newp->next = NULL; \
1081 *lastp = newp; \
1082 lastp = &newp->next
1083 NEW_PATTERN;
1084 }
1085 }
1086 else if (*p == L('|'))
1087 {
1088 if (level == 0)
1089 {
1090 NEW_PATTERN;
1091 startp = p + 1;
1092 }
1093 }
1094 assert (list != NULL);
1095 assert (p[-1] == L(')'));
817a51e2 1096#undef NEW_PATTERN
955994e1
UD
1097
1098 switch (opt)
1099 {
1100 case L('*'):
1101 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1102 return 0;
1103 /* FALLTHROUGH */
1104
1105 case L('+'):
1106 do
1107 {
1108 for (rs = string; rs <= string_end; ++rs)
1109 /* First match the prefix with the current pattern with the
1110 current pattern. */
1111 if (FCT (list->str, string, rs, no_leading_period,
1112 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1113 /* This was successful. Now match the rest with the rest
1114 of the pattern. */
1115 && (FCT (p, rs, string_end,
1116 rs == string
1117 ? no_leading_period
1118 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1119 flags & FNM_FILE_NAME
1120 ? flags : flags & ~FNM_PERIOD) == 0
1121 /* This didn't work. Try the whole pattern. */
1122 || (rs != string
1123 && FCT (pattern - 1, rs, string_end,
1124 rs == string
1125 ? no_leading_period
1126 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1127 ? 1 : 0),
1128 flags & FNM_FILE_NAME
1129 ? flags : flags & ~FNM_PERIOD) == 0)))
1130 /* It worked. Signal success. */
1131 return 0;
1132 }
1133 while ((list = list->next) != NULL);
1134
1135 /* None of the patterns lead to a match. */
1136 return FNM_NOMATCH;
1137
1138 case L('?'):
1139 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1140 return 0;
1141 /* FALLTHROUGH */
1142
1143 case L('@'):
1144 do
821a6bb4
UD
1145 /* I cannot believe it but `strcat' is actually acceptable
1146 here. Match the entire string with the prefix from the
1147 pattern list and the rest of the pattern following the
1148 pattern list. */
1149 if (FCT (STRCAT (list->str, p), string, string_end,
1150 no_leading_period,
1151 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1152 /* It worked. Signal success. */
1153 return 0;
955994e1
UD
1154 while ((list = list->next) != NULL);
1155
1156 /* None of the patterns lead to a match. */
1157 return FNM_NOMATCH;
1158
1159 case L('!'):
1160 for (rs = string; rs <= string_end; ++rs)
1161 {
1162 struct patternlist *runp;
1163
1164 for (runp = list; runp != NULL; runp = runp->next)
1165 if (FCT (runp->str, string, rs, no_leading_period,
1166 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1167 break;
1168
1169 /* If none of the patterns matched see whether the rest does. */
1170 if (runp == NULL
1171 && (FCT (p, rs, string_end,
1172 rs == string
1173 ? no_leading_period
1174 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1175 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1176 == 0))
1177 /* This is successful. */
1178 return 0;
1179 }
1180
1181 /* None of the patterns together with the rest of the pattern
1182 lead to a match. */
1183 return FNM_NOMATCH;
1184
1185 default:
1186 assert (! "Invalid extended matching operator");
1187 break;
1188 }
1189
1190 return -1;
1191}
1192
1193
1fc82a56
UD
1194#undef FOLD
1195#undef CHAR
1196#undef UCHAR
955994e1 1197#undef INT
1fc82a56 1198#undef FCT
955994e1
UD
1199#undef EXT
1200#undef END
1201#undef MEMPCPY
1202#undef MEMCHR
1827fc4c 1203#undef STRCOLL
a986484f
UD
1204#undef STRLEN
1205#undef STRCAT
1fc82a56 1206#undef L
ea6eb383 1207#undef BTOWC