]> git.ipfire.org Git - thirdparty/glibc.git/blob - posix/fnmatch_loop.c
078b98242e800c1a8d2fbef88fc49b189dd4b093
[thirdparty/glibc.git] / posix / fnmatch_loop.c
1 /* Copyright (C) 1991-2013 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>. */
17
18 #include <stdint.h>
19
20 struct STRUCT
21 {
22 const CHAR *pattern;
23 const CHAR *string;
24 int no_leading_period;
25 };
26
27 /* Match STRING against the filename pattern PATTERN, returning zero if
28 it matches, nonzero if not. */
29 static int FCT (const CHAR *pattern, const CHAR *string,
30 const CHAR *string_end, int no_leading_period, int flags,
31 struct STRUCT *ends, size_t alloca_used)
32 internal_function;
33 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
34 const CHAR *string_end, int no_leading_period, int flags,
35 size_t alloca_used)
36 internal_function;
37 static const CHAR *END (const CHAR *patternp) internal_function;
38
39 static int
40 internal_function
41 FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
42 const CHAR *pattern;
43 const CHAR *string;
44 const CHAR *string_end;
45 int no_leading_period;
46 int flags;
47 struct STRUCT *ends;
48 size_t alloca_used;
49 {
50 const CHAR *p = pattern, *n = string;
51 UCHAR c;
52 #ifdef _LIBC
53 # if WIDE_CHAR_VERSION
54 const char *collseq = (const char *)
55 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
56 # else
57 const UCHAR *collseq = (const UCHAR *)
58 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
59 # endif
60 #endif
61
62 while ((c = *p++) != L('\0'))
63 {
64 int new_no_leading_period = 0;
65 c = FOLD (c);
66
67 switch (c)
68 {
69 case L('?'):
70 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
71 {
72 int res = EXT (c, p, n, string_end, no_leading_period,
73 flags, alloca_used);
74 if (res != -1)
75 return res;
76 }
77
78 if (n == string_end)
79 return FNM_NOMATCH;
80 else if (*n == L('/') && (flags & FNM_FILE_NAME))
81 return FNM_NOMATCH;
82 else if (*n == L('.') && no_leading_period)
83 return FNM_NOMATCH;
84 break;
85
86 case L('\\'):
87 if (!(flags & FNM_NOESCAPE))
88 {
89 c = *p++;
90 if (c == L('\0'))
91 /* Trailing \ loses. */
92 return FNM_NOMATCH;
93 c = FOLD (c);
94 }
95 if (n == string_end || FOLD ((UCHAR) *n) != c)
96 return FNM_NOMATCH;
97 break;
98
99 case L('*'):
100 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
101 {
102 int res = EXT (c, p, n, string_end, no_leading_period,
103 flags, alloca_used);
104 if (res != -1)
105 return res;
106 }
107 else if (ends != NULL)
108 {
109 ends->pattern = p - 1;
110 ends->string = n;
111 ends->no_leading_period = no_leading_period;
112 return 0;
113 }
114
115 if (n != string_end && *n == L('.') && no_leading_period)
116 return FNM_NOMATCH;
117
118 for (c = *p++; c == L('?') || c == L('*'); c = *p++)
119 {
120 if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
121 {
122 const CHAR *endp = END (p);
123 if (endp != p)
124 {
125 /* This is a pattern. Skip over it. */
126 p = endp;
127 continue;
128 }
129 }
130
131 if (c == L('?'))
132 {
133 /* A ? needs to match one character. */
134 if (n == string_end)
135 /* There isn't another character; no match. */
136 return FNM_NOMATCH;
137 else if (*n == L('/')
138 && __builtin_expect (flags & FNM_FILE_NAME, 0))
139 /* A slash does not match a wildcard under
140 FNM_FILE_NAME. */
141 return FNM_NOMATCH;
142 else
143 /* One character of the string is consumed in matching
144 this ? wildcard, so *??? won't match if there are
145 less than three characters. */
146 ++n;
147 }
148 }
149
150 if (c == L('\0'))
151 /* The wildcard(s) is/are the last element of the pattern.
152 If the name is a file name and contains another slash
153 this means it cannot match, unless the FNM_LEADING_DIR
154 flag is set. */
155 {
156 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
157
158 if (flags & FNM_FILE_NAME)
159 {
160 if (flags & FNM_LEADING_DIR)
161 result = 0;
162 else
163 {
164 if (MEMCHR (n, L('/'), string_end - n) == NULL)
165 result = 0;
166 }
167 }
168
169 return result;
170 }
171 else
172 {
173 const CHAR *endp;
174 struct STRUCT end;
175
176 end.pattern = NULL;
177 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
178 string_end - n);
179 if (endp == NULL)
180 endp = string_end;
181
182 if (c == L('[')
183 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
184 && (c == L('@') || c == L('+') || c == L('!'))
185 && *p == L('(')))
186 {
187 int flags2 = ((flags & FNM_FILE_NAME)
188 ? flags : (flags & ~FNM_PERIOD));
189
190 for (--p; n < endp; ++n, no_leading_period = 0)
191 if (FCT (p, n, string_end, no_leading_period, flags2,
192 &end, alloca_used) == 0)
193 goto found;
194 }
195 else if (c == L('/') && (flags & FNM_FILE_NAME))
196 {
197 while (n < string_end && *n != L('/'))
198 ++n;
199 if (n < string_end && *n == L('/')
200 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
201 NULL, alloca_used) == 0))
202 return 0;
203 }
204 else
205 {
206 int flags2 = ((flags & FNM_FILE_NAME)
207 ? flags : (flags & ~FNM_PERIOD));
208
209 if (c == L('\\') && !(flags & FNM_NOESCAPE))
210 c = *p;
211 c = FOLD (c);
212 for (--p; n < endp; ++n, no_leading_period = 0)
213 if (FOLD ((UCHAR) *n) == c
214 && (FCT (p, n, string_end, no_leading_period, flags2,
215 &end, alloca_used) == 0))
216 {
217 found:
218 if (end.pattern == NULL)
219 return 0;
220 break;
221 }
222 if (end.pattern != NULL)
223 {
224 p = end.pattern;
225 n = end.string;
226 no_leading_period = end.no_leading_period;
227 continue;
228 }
229 }
230 }
231
232 /* If we come here no match is possible with the wildcard. */
233 return FNM_NOMATCH;
234
235 case L('['):
236 {
237 /* Nonzero if the sense of the character class is inverted. */
238 const CHAR *p_init = p;
239 const CHAR *n_init = n;
240 int not;
241 CHAR cold;
242 UCHAR fn;
243
244 if (posixly_correct == 0)
245 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
246
247 if (n == string_end)
248 return FNM_NOMATCH;
249
250 if (*n == L('.') && no_leading_period)
251 return FNM_NOMATCH;
252
253 if (*n == L('/') && (flags & FNM_FILE_NAME))
254 /* `/' cannot be matched. */
255 return FNM_NOMATCH;
256
257 not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
258 if (not)
259 ++p;
260
261 fn = FOLD ((UCHAR) *n);
262
263 c = *p++;
264 for (;;)
265 {
266 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
267 {
268 if (*p == L('\0'))
269 return FNM_NOMATCH;
270 c = FOLD ((UCHAR) *p);
271 ++p;
272
273 goto normal_bracket;
274 }
275 else if (c == L('[') && *p == L(':'))
276 {
277 /* Leave room for the null. */
278 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
279 size_t c1 = 0;
280 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
281 wctype_t wt;
282 #endif
283 const CHAR *startp = p;
284
285 for (;;)
286 {
287 if (c1 == CHAR_CLASS_MAX_LENGTH)
288 /* The name is too long and therefore the pattern
289 is ill-formed. */
290 return FNM_NOMATCH;
291
292 c = *++p;
293 if (c == L(':') && p[1] == L(']'))
294 {
295 p += 2;
296 break;
297 }
298 if (c < L('a') || c >= L('z'))
299 {
300 /* This cannot possibly be a character class name.
301 Match it as a normal range. */
302 p = startp;
303 c = L('[');
304 goto normal_bracket;
305 }
306 str[c1++] = c;
307 }
308 str[c1] = L('\0');
309
310 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
311 wt = IS_CHAR_CLASS (str);
312 if (wt == 0)
313 /* Invalid character class name. */
314 return FNM_NOMATCH;
315
316 # if defined _LIBC && ! WIDE_CHAR_VERSION
317 /* The following code is glibc specific but does
318 there a good job in speeding up the code since
319 we can avoid the btowc() call. */
320 if (_ISCTYPE ((UCHAR) *n, wt))
321 goto matched;
322 # else
323 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
324 goto matched;
325 # endif
326 #else
327 if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
328 || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
329 || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
330 || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
331 || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
332 || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
333 || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
334 || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
335 || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
336 || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
337 || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
338 || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
339 goto matched;
340 #endif
341 c = *p++;
342 }
343 #ifdef _LIBC
344 else if (c == L('[') && *p == L('='))
345 {
346 UCHAR str[1];
347 uint32_t nrules =
348 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
349 const CHAR *startp = p;
350
351 c = *++p;
352 if (c == L('\0'))
353 {
354 p = startp;
355 c = L('[');
356 goto normal_bracket;
357 }
358 str[0] = c;
359
360 c = *++p;
361 if (c != L('=') || p[1] != L(']'))
362 {
363 p = startp;
364 c = L('[');
365 goto normal_bracket;
366 }
367 p += 2;
368
369 if (nrules == 0)
370 {
371 if ((UCHAR) *n == str[0])
372 goto matched;
373 }
374 else
375 {
376 const int32_t *table;
377 # if WIDE_CHAR_VERSION
378 const int32_t *weights;
379 const int32_t *extra;
380 # else
381 const unsigned char *weights;
382 const unsigned char *extra;
383 # endif
384 const int32_t *indirect;
385 int32_t idx;
386 const UCHAR *cp = (const UCHAR *) str;
387
388 /* This #include defines a local function! */
389 # if WIDE_CHAR_VERSION
390 # include <locale/weightwc.h>
391 # else
392 # include <locale/weight.h>
393 # endif
394
395 # if WIDE_CHAR_VERSION
396 table = (const int32_t *)
397 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
398 weights = (const int32_t *)
399 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
400 extra = (const int32_t *)
401 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
402 indirect = (const int32_t *)
403 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
404 # else
405 table = (const int32_t *)
406 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
407 weights = (const unsigned char *)
408 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
409 extra = (const unsigned char *)
410 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
411 indirect = (const int32_t *)
412 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
413 # endif
414
415 idx = findidx (&cp, 1);
416 if (idx != 0)
417 {
418 /* We found a table entry. Now see whether the
419 character we are currently at has the same
420 equivalance class value. */
421 int len = weights[idx & 0xffffff];
422 int32_t idx2;
423 const UCHAR *np = (const UCHAR *) n;
424
425 idx2 = findidx (&np, string_end - n);
426 if (idx2 != 0
427 && (idx >> 24) == (idx2 >> 24)
428 && len == weights[idx2 & 0xffffff])
429 {
430 int cnt = 0;
431
432 idx &= 0xffffff;
433 idx2 &= 0xffffff;
434
435 while (cnt < len
436 && (weights[idx + 1 + cnt]
437 == weights[idx2 + 1 + cnt]))
438 ++cnt;
439
440 if (cnt == len)
441 goto matched;
442 }
443 }
444 }
445
446 c = *p++;
447 }
448 #endif
449 else if (c == L('\0'))
450 {
451 /* [ unterminated, treat as normal character. */
452 p = p_init;
453 n = n_init;
454 c = L('[');
455 goto normal_match;
456 }
457 else
458 {
459 int is_range = 0;
460
461 #ifdef _LIBC
462 int is_seqval = 0;
463
464 if (c == L('[') && *p == L('.'))
465 {
466 uint32_t nrules =
467 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
468 const CHAR *startp = p;
469 size_t c1 = 0;
470
471 while (1)
472 {
473 c = *++p;
474 if (c == L('.') && p[1] == L(']'))
475 {
476 p += 2;
477 break;
478 }
479 if (c == '\0')
480 return FNM_NOMATCH;
481 ++c1;
482 }
483
484 /* We have to handling the symbols differently in
485 ranges since then the collation sequence is
486 important. */
487 is_range = *p == L('-') && p[1] != L('\0');
488
489 if (nrules == 0)
490 {
491 /* There are no names defined in the collation
492 data. Therefore we only accept the trivial
493 names consisting of the character itself. */
494 if (c1 != 1)
495 return FNM_NOMATCH;
496
497 if (!is_range && *n == startp[1])
498 goto matched;
499
500 cold = startp[1];
501 c = *p++;
502 }
503 else
504 {
505 int32_t table_size;
506 const int32_t *symb_table;
507 # ifdef WIDE_CHAR_VERSION
508 char str[c1];
509 unsigned int strcnt;
510 # else
511 # define str (startp + 1)
512 # endif
513 const unsigned char *extra;
514 int32_t idx;
515 int32_t elem;
516 int32_t second;
517 int32_t hash;
518
519 # ifdef WIDE_CHAR_VERSION
520 /* We have to convert the name to a single-byte
521 string. This is possible since the names
522 consist of ASCII characters and the internal
523 representation is UCS4. */
524 for (strcnt = 0; strcnt < c1; ++strcnt)
525 str[strcnt] = startp[1 + strcnt];
526 #endif
527
528 table_size =
529 _NL_CURRENT_WORD (LC_COLLATE,
530 _NL_COLLATE_SYMB_HASH_SIZEMB);
531 symb_table = (const int32_t *)
532 _NL_CURRENT (LC_COLLATE,
533 _NL_COLLATE_SYMB_TABLEMB);
534 extra = (const unsigned char *)
535 _NL_CURRENT (LC_COLLATE,
536 _NL_COLLATE_SYMB_EXTRAMB);
537
538 /* Locate the character in the hashing table. */
539 hash = elem_hash (str, c1);
540
541 idx = 0;
542 elem = hash % table_size;
543 if (symb_table[2 * elem] != 0)
544 {
545 second = hash % (table_size - 2) + 1;
546
547 do
548 {
549 /* First compare the hashing value. */
550 if (symb_table[2 * elem] == hash
551 && (c1
552 == extra[symb_table[2 * elem + 1]])
553 && memcmp (str,
554 &extra[symb_table[2 * elem
555 + 1]
556 + 1], c1) == 0)
557 {
558 /* Yep, this is the entry. */
559 idx = symb_table[2 * elem + 1];
560 idx += 1 + extra[idx];
561 break;
562 }
563
564 /* Next entry. */
565 elem += second;
566 }
567 while (symb_table[2 * elem] != 0);
568 }
569
570 if (symb_table[2 * elem] != 0)
571 {
572 /* Compare the byte sequence but only if
573 this is not part of a range. */
574 # ifdef WIDE_CHAR_VERSION
575 int32_t *wextra;
576
577 idx += 1 + extra[idx];
578 /* Adjust for the alignment. */
579 idx = (idx + 3) & ~3;
580
581 wextra = (int32_t *) &extra[idx + 4];
582 # endif
583
584 if (! is_range)
585 {
586 # ifdef WIDE_CHAR_VERSION
587 for (c1 = 0;
588 (int32_t) c1 < wextra[idx];
589 ++c1)
590 if (n[c1] != wextra[1 + c1])
591 break;
592
593 if ((int32_t) c1 == wextra[idx])
594 goto matched;
595 # else
596 for (c1 = 0; c1 < extra[idx]; ++c1)
597 if (n[c1] != extra[1 + c1])
598 break;
599
600 if (c1 == extra[idx])
601 goto matched;
602 # endif
603 }
604
605 /* Get the collation sequence value. */
606 is_seqval = 1;
607 # ifdef WIDE_CHAR_VERSION
608 cold = wextra[1 + wextra[idx]];
609 # else
610 /* Adjust for the alignment. */
611 idx += 1 + extra[idx];
612 idx = (idx + 3) & ~4;
613 cold = *((int32_t *) &extra[idx]);
614 # endif
615
616 c = *p++;
617 }
618 else if (c1 == 1)
619 {
620 /* No valid character. Match it as a
621 single byte. */
622 if (!is_range && *n == str[0])
623 goto matched;
624
625 cold = str[0];
626 c = *p++;
627 }
628 else
629 return FNM_NOMATCH;
630 }
631 }
632 else
633 # undef str
634 #endif
635 {
636 c = FOLD (c);
637 normal_bracket:
638
639 /* We have to handling the symbols differently in
640 ranges since then the collation sequence is
641 important. */
642 is_range = (*p == L('-') && p[1] != L('\0')
643 && p[1] != L(']'));
644
645 if (!is_range && c == fn)
646 goto matched;
647
648 /* This is needed if we goto normal_bracket; from
649 outside of is_seqval's scope. */
650 is_seqval = 0;
651 cold = c;
652 c = *p++;
653 }
654
655 if (c == L('-') && *p != L(']'))
656 {
657 #if _LIBC
658 /* We have to find the collation sequence
659 value for C. Collation sequence is nothing
660 we can regularly access. The sequence
661 value is defined by the order in which the
662 definitions of the collation values for the
663 various characters appear in the source
664 file. A strange concept, nowhere
665 documented. */
666 uint32_t fcollseq;
667 uint32_t lcollseq;
668 UCHAR cend = *p++;
669
670 # ifdef WIDE_CHAR_VERSION
671 /* Search in the `names' array for the characters. */
672 fcollseq = __collseq_table_lookup (collseq, fn);
673 if (fcollseq == ~((uint32_t) 0))
674 /* XXX We don't know anything about the character
675 we are supposed to match. This means we are
676 failing. */
677 goto range_not_matched;
678
679 if (is_seqval)
680 lcollseq = cold;
681 else
682 lcollseq = __collseq_table_lookup (collseq, cold);
683 # else
684 fcollseq = collseq[fn];
685 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
686 # endif
687
688 is_seqval = 0;
689 if (cend == L('[') && *p == L('.'))
690 {
691 uint32_t nrules =
692 _NL_CURRENT_WORD (LC_COLLATE,
693 _NL_COLLATE_NRULES);
694 const CHAR *startp = p;
695 size_t c1 = 0;
696
697 while (1)
698 {
699 c = *++p;
700 if (c == L('.') && p[1] == L(']'))
701 {
702 p += 2;
703 break;
704 }
705 if (c == '\0')
706 return FNM_NOMATCH;
707 ++c1;
708 }
709
710 if (nrules == 0)
711 {
712 /* There are no names defined in the
713 collation data. Therefore we only
714 accept the trivial names consisting
715 of the character itself. */
716 if (c1 != 1)
717 return FNM_NOMATCH;
718
719 cend = startp[1];
720 }
721 else
722 {
723 int32_t table_size;
724 const int32_t *symb_table;
725 # ifdef WIDE_CHAR_VERSION
726 char str[c1];
727 unsigned int strcnt;
728 # else
729 # define str (startp + 1)
730 # endif
731 const unsigned char *extra;
732 int32_t idx;
733 int32_t elem;
734 int32_t second;
735 int32_t hash;
736
737 # ifdef WIDE_CHAR_VERSION
738 /* We have to convert the name to a single-byte
739 string. This is possible since the names
740 consist of ASCII characters and the internal
741 representation is UCS4. */
742 for (strcnt = 0; strcnt < c1; ++strcnt)
743 str[strcnt] = startp[1 + strcnt];
744 # endif
745
746 table_size =
747 _NL_CURRENT_WORD (LC_COLLATE,
748 _NL_COLLATE_SYMB_HASH_SIZEMB);
749 symb_table = (const int32_t *)
750 _NL_CURRENT (LC_COLLATE,
751 _NL_COLLATE_SYMB_TABLEMB);
752 extra = (const unsigned char *)
753 _NL_CURRENT (LC_COLLATE,
754 _NL_COLLATE_SYMB_EXTRAMB);
755
756 /* Locate the character in the hashing
757 table. */
758 hash = elem_hash (str, c1);
759
760 idx = 0;
761 elem = hash % table_size;
762 if (symb_table[2 * elem] != 0)
763 {
764 second = hash % (table_size - 2) + 1;
765
766 do
767 {
768 /* First compare the hashing value. */
769 if (symb_table[2 * elem] == hash
770 && (c1
771 == extra[symb_table[2 * elem + 1]])
772 && memcmp (str,
773 &extra[symb_table[2 * elem + 1]
774 + 1], c1) == 0)
775 {
776 /* Yep, this is the entry. */
777 idx = symb_table[2 * elem + 1];
778 idx += 1 + extra[idx];
779 break;
780 }
781
782 /* Next entry. */
783 elem += second;
784 }
785 while (symb_table[2 * elem] != 0);
786 }
787
788 if (symb_table[2 * elem] != 0)
789 {
790 /* Compare the byte sequence but only if
791 this is not part of a range. */
792 # ifdef WIDE_CHAR_VERSION
793 int32_t *wextra;
794
795 idx += 1 + extra[idx];
796 /* Adjust for the alignment. */
797 idx = (idx + 3) & ~4;
798
799 wextra = (int32_t *) &extra[idx + 4];
800 # endif
801 /* Get the collation sequence value. */
802 is_seqval = 1;
803 # ifdef WIDE_CHAR_VERSION
804 cend = wextra[1 + wextra[idx]];
805 # else
806 /* Adjust for the alignment. */
807 idx += 1 + extra[idx];
808 idx = (idx + 3) & ~4;
809 cend = *((int32_t *) &extra[idx]);
810 # endif
811 }
812 else if (symb_table[2 * elem] != 0 && c1 == 1)
813 {
814 cend = str[0];
815 c = *p++;
816 }
817 else
818 return FNM_NOMATCH;
819 }
820 # undef str
821 }
822 else
823 {
824 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
825 cend = *p++;
826 if (cend == L('\0'))
827 return FNM_NOMATCH;
828 cend = FOLD (cend);
829 }
830
831 /* XXX It is not entirely clear to me how to handle
832 characters which are not mentioned in the
833 collation specification. */
834 if (
835 # ifdef WIDE_CHAR_VERSION
836 lcollseq == 0xffffffff ||
837 # endif
838 lcollseq <= fcollseq)
839 {
840 /* We have to look at the upper bound. */
841 uint32_t hcollseq;
842
843 if (is_seqval)
844 hcollseq = cend;
845 else
846 {
847 # ifdef WIDE_CHAR_VERSION
848 hcollseq =
849 __collseq_table_lookup (collseq, cend);
850 if (hcollseq == ~((uint32_t) 0))
851 {
852 /* Hum, no information about the upper
853 bound. The matching succeeds if the
854 lower bound is matched exactly. */
855 if (lcollseq != fcollseq)
856 goto range_not_matched;
857
858 goto matched;
859 }
860 # else
861 hcollseq = collseq[cend];
862 # endif
863 }
864
865 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
866 goto matched;
867 }
868 # ifdef WIDE_CHAR_VERSION
869 range_not_matched:
870 # endif
871 #else
872 /* We use a boring value comparison of the character
873 values. This is better than comparing using
874 `strcoll' since the latter would have surprising
875 and sometimes fatal consequences. */
876 UCHAR cend = *p++;
877
878 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
879 cend = *p++;
880 if (cend == L('\0'))
881 return FNM_NOMATCH;
882
883 /* It is a range. */
884 if (cold <= fn && fn <= cend)
885 goto matched;
886 #endif
887
888 c = *p++;
889 }
890 }
891
892 if (c == L(']'))
893 break;
894 }
895
896 if (!not)
897 return FNM_NOMATCH;
898 break;
899
900 matched:
901 /* Skip the rest of the [...] that already matched. */
902 do
903 {
904 ignore_next:
905 c = *p++;
906
907 if (c == L('\0'))
908 /* [... (unterminated) loses. */
909 return FNM_NOMATCH;
910
911 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
912 {
913 if (*p == L('\0'))
914 return FNM_NOMATCH;
915 /* XXX 1003.2d11 is unclear if this is right. */
916 ++p;
917 }
918 else if (c == L('[') && *p == L(':'))
919 {
920 int c1 = 0;
921 const CHAR *startp = p;
922
923 while (1)
924 {
925 c = *++p;
926 if (++c1 == CHAR_CLASS_MAX_LENGTH)
927 return FNM_NOMATCH;
928
929 if (*p == L(':') && p[1] == L(']'))
930 break;
931
932 if (c < L('a') || c >= L('z'))
933 {
934 p = startp;
935 goto ignore_next;
936 }
937 }
938 p += 2;
939 c = *p++;
940 }
941 else if (c == L('[') && *p == L('='))
942 {
943 c = *++p;
944 if (c == L('\0'))
945 return FNM_NOMATCH;
946 c = *++p;
947 if (c != L('=') || p[1] != L(']'))
948 return FNM_NOMATCH;
949 p += 2;
950 c = *p++;
951 }
952 else if (c == L('[') && *p == L('.'))
953 {
954 ++p;
955 while (1)
956 {
957 c = *++p;
958 if (c == '\0')
959 return FNM_NOMATCH;
960
961 if (*p == L('.') && p[1] == L(']'))
962 break;
963 }
964 p += 2;
965 c = *p++;
966 }
967 }
968 while (c != L(']'));
969 if (not)
970 return FNM_NOMATCH;
971 }
972 break;
973
974 case L('+'):
975 case L('@'):
976 case L('!'):
977 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
978 {
979 int res = EXT (c, p, n, string_end, no_leading_period, flags,
980 alloca_used);
981 if (res != -1)
982 return res;
983 }
984 goto normal_match;
985
986 case L('/'):
987 if (NO_LEADING_PERIOD (flags))
988 {
989 if (n == string_end || c != (UCHAR) *n)
990 return FNM_NOMATCH;
991
992 new_no_leading_period = 1;
993 break;
994 }
995 /* FALLTHROUGH */
996 default:
997 normal_match:
998 if (n == string_end || c != FOLD ((UCHAR) *n))
999 return FNM_NOMATCH;
1000 }
1001
1002 no_leading_period = new_no_leading_period;
1003 ++n;
1004 }
1005
1006 if (n == string_end)
1007 return 0;
1008
1009 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
1010 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
1011 return 0;
1012
1013 return FNM_NOMATCH;
1014 }
1015
1016
1017 static const CHAR *
1018 internal_function
1019 END (const CHAR *pattern)
1020 {
1021 const CHAR *p = pattern;
1022
1023 while (1)
1024 if (*++p == L('\0'))
1025 /* This is an invalid pattern. */
1026 return pattern;
1027 else if (*p == L('['))
1028 {
1029 /* Handle brackets special. */
1030 if (posixly_correct == 0)
1031 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1032
1033 /* Skip the not sign. We have to recognize it because of a possibly
1034 following ']'. */
1035 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1036 ++p;
1037 /* A leading ']' is recognized as such. */
1038 if (*p == L(']'))
1039 ++p;
1040 /* Skip over all characters of the list. */
1041 while (*p != L(']'))
1042 if (*p++ == L('\0'))
1043 /* This is no valid pattern. */
1044 return pattern;
1045 }
1046 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1047 || *p == L('!')) && p[1] == L('('))
1048 p = END (p + 1);
1049 else if (*p == L(')'))
1050 break;
1051
1052 return p + 1;
1053 }
1054
1055
1056 static int
1057 internal_function
1058 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1059 int no_leading_period, int flags, size_t alloca_used)
1060 {
1061 const CHAR *startp;
1062 int level;
1063 struct patternlist
1064 {
1065 struct patternlist *next;
1066 CHAR malloced;
1067 CHAR str[0];
1068 } *list = NULL;
1069 struct patternlist **lastp = &list;
1070 size_t pattern_len = STRLEN (pattern);
1071 int any_malloced = 0;
1072 const CHAR *p;
1073 const CHAR *rs;
1074 int retval = 0;
1075
1076 /* Parse the pattern. Store the individual parts in the list. */
1077 level = 0;
1078 for (startp = p = pattern + 1; level >= 0; ++p)
1079 if (*p == L('\0'))
1080 {
1081 /* This is an invalid pattern. */
1082 retval = -1;
1083 goto out;
1084 }
1085 else if (*p == L('['))
1086 {
1087 /* Handle brackets special. */
1088 if (posixly_correct == 0)
1089 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1090
1091 /* Skip the not sign. We have to recognize it because of a possibly
1092 following ']'. */
1093 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1094 ++p;
1095 /* A leading ']' is recognized as such. */
1096 if (*p == L(']'))
1097 ++p;
1098 /* Skip over all characters of the list. */
1099 while (*p != L(']'))
1100 if (*p++ == L('\0'))
1101 {
1102 /* This is no valid pattern. */
1103 retval = -1;
1104 goto out;
1105 }
1106 }
1107 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1108 || *p == L('!')) && p[1] == L('('))
1109 /* Remember the nesting level. */
1110 ++level;
1111 else if (*p == L(')'))
1112 {
1113 if (level-- == 0)
1114 {
1115 /* This means we found the end of the pattern. */
1116 #define NEW_PATTERN \
1117 struct patternlist *newp; \
1118 size_t slen = (opt == L('?') || opt == L('@') \
1119 ? pattern_len : (p - startp + 1)); \
1120 slen = sizeof (struct patternlist) + (slen * sizeof (CHAR)); \
1121 int malloced = ! __libc_use_alloca (alloca_used + slen); \
1122 if (__builtin_expect (malloced, 0)) \
1123 { \
1124 newp = malloc (slen); \
1125 if (newp == NULL) \
1126 { \
1127 retval = -2; \
1128 goto out; \
1129 } \
1130 any_malloced = 1; \
1131 } \
1132 else \
1133 newp = alloca_account (slen, alloca_used); \
1134 newp->next = NULL; \
1135 newp->malloced = malloced; \
1136 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \
1137 *lastp = newp; \
1138 lastp = &newp->next
1139 NEW_PATTERN;
1140 }
1141 }
1142 else if (*p == L('|'))
1143 {
1144 if (level == 0)
1145 {
1146 NEW_PATTERN;
1147 startp = p + 1;
1148 }
1149 }
1150 assert (list != NULL);
1151 assert (p[-1] == L(')'));
1152 #undef NEW_PATTERN
1153
1154 switch (opt)
1155 {
1156 case L('*'):
1157 if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1158 alloca_used) == 0)
1159 goto success;
1160 /* FALLTHROUGH */
1161
1162 case L('+'):
1163 do
1164 {
1165 for (rs = string; rs <= string_end; ++rs)
1166 /* First match the prefix with the current pattern with the
1167 current pattern. */
1168 if (FCT (list->str, string, rs, no_leading_period,
1169 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1170 NULL, alloca_used) == 0
1171 /* This was successful. Now match the rest with the rest
1172 of the pattern. */
1173 && (FCT (p, rs, string_end,
1174 rs == string
1175 ? no_leading_period
1176 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1177 flags & FNM_FILE_NAME
1178 ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0
1179 /* This didn't work. Try the whole pattern. */
1180 || (rs != string
1181 && FCT (pattern - 1, rs, string_end,
1182 rs == string
1183 ? no_leading_period
1184 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1185 ? 1 : 0),
1186 flags & FNM_FILE_NAME
1187 ? flags : flags & ~FNM_PERIOD, NULL,
1188 alloca_used) == 0)))
1189 /* It worked. Signal success. */
1190 goto success;
1191 }
1192 while ((list = list->next) != NULL);
1193
1194 /* None of the patterns lead to a match. */
1195 retval = FNM_NOMATCH;
1196 break;
1197
1198 case L('?'):
1199 if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1200 alloca_used) == 0)
1201 goto success;
1202 /* FALLTHROUGH */
1203
1204 case L('@'):
1205 do
1206 /* I cannot believe it but `strcat' is actually acceptable
1207 here. Match the entire string with the prefix from the
1208 pattern list and the rest of the pattern following the
1209 pattern list. */
1210 if (FCT (STRCAT (list->str, p), string, string_end,
1211 no_leading_period,
1212 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1213 NULL, alloca_used) == 0)
1214 /* It worked. Signal success. */
1215 goto success;
1216 while ((list = list->next) != NULL);
1217
1218 /* None of the patterns lead to a match. */
1219 retval = FNM_NOMATCH;
1220 break;
1221
1222 case L('!'):
1223 for (rs = string; rs <= string_end; ++rs)
1224 {
1225 struct patternlist *runp;
1226
1227 for (runp = list; runp != NULL; runp = runp->next)
1228 if (FCT (runp->str, string, rs, no_leading_period,
1229 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1230 NULL, alloca_used) == 0)
1231 break;
1232
1233 /* If none of the patterns matched see whether the rest does. */
1234 if (runp == NULL
1235 && (FCT (p, rs, string_end,
1236 rs == string
1237 ? no_leading_period
1238 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1239 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1240 NULL, alloca_used) == 0))
1241 /* This is successful. */
1242 goto success;
1243 }
1244
1245 /* None of the patterns together with the rest of the pattern
1246 lead to a match. */
1247 retval = FNM_NOMATCH;
1248 break;
1249
1250 default:
1251 assert (! "Invalid extended matching operator");
1252 retval = -1;
1253 break;
1254 }
1255
1256 success:
1257 out:
1258 if (any_malloced)
1259 while (list != NULL)
1260 {
1261 struct patternlist *old = list;
1262 list = list->next;
1263 if (old->malloced)
1264 free (old);
1265 }
1266
1267 return retval;
1268 }
1269
1270
1271 #undef FOLD
1272 #undef CHAR
1273 #undef UCHAR
1274 #undef INT
1275 #undef FCT
1276 #undef EXT
1277 #undef END
1278 #undef STRUCT
1279 #undef MEMPCPY
1280 #undef MEMCHR
1281 #undef STRCOLL
1282 #undef STRLEN
1283 #undef STRCAT
1284 #undef L
1285 #undef BTOWC